Load libraries

library("DESeq2")
library("RColorBrewer")
library("pheatmap")
library("tidyverse")
library("pheatmap")
library('corrr')
library(ggcorrplot)
library("FactoMineR")
library("factoextra")

All samples

File retrieval and preparation

Get the metadata, which includes sample name, rna extraction batch, clean up batch, behavioural test, viral load, colony location, and colony ID

metadata <- read.csv("./metadata.csv", row.names=1)

Retrieve first row of the counts file, which will be the sample ID

rawcounts <- read.table("./counts.tsv", nrows=1)
colnames <- as.character(rawcounts)

Get rawcounts

  • row.names: use the first column as the row names
  • col.names: use the column names we retrieved above as column names
  • check.names: in this instance prevents R from prepending an “X” to the column names
  • skip: skip the first row because it’s just the sample ID
rawcounts <- read.table("./counts.tsv", row.names=1, col.names=colnames,
                        check.names=FALSE,skip=1)

This has the effect of putting the rawcounts columns in the same order as the metadata rows. This is required by DESeq2

rawcounts <- select(rawcounts, rownames(metadata))

Check that they are now in the same order

all(rownames(metadata) == colnames(rawcounts))
## [1] TRUE

DESeq2 object

Create the DESeq2 object specifying viral load as the condition of interest

dds <- DESeqDataSetFromMatrix(countData = rawcounts, 
                              colData = metadata,
                              design = ~ viral_load)

Normalisation

dds <- estimateSizeFactors(dds)

Extract the normalised counts
If the normalized argument were specified as FALSE then we would extract the raw counts from the object

normalised_counts <- counts(dds, normalized=TRUE)

Variance stabilising transformation

This is a logarithmic transformation that moderates the variance across the mean
blind = TRUE ensures that the transformation should be blind to the sample information given in the design formula. This is important when performing quality assessment

vsd <- vst(dds, blind=TRUE)

Extract the vst matrix from the object

vsd_matrix <- assay(vsd)

Compute pairwise correlation values

vsd_correlation <- cor(vsd_matrix)

Heatmaps

Viral load

pheatmap(vsd_correlation, annotation = select(metadata, viral_load))

Behavioural test

pheatmap(vsd_correlation, annotation = select(metadata, behavioural_test))

Colony location

pheatmap(vsd_correlation, annotation = select(metadata, colony_location))

PCA

Viral Load

plotPCA(vsd, intgroup="viral_load")

Behavioural test

plotPCA(vsd, intgroup="behavioural_test")

Colony location

plotPCA(vsd, intgroup="colony_location")

Viral load and behavioural test

plotPCA(vsd, intgroup=c("viral_load", "behavioural_test"))

Viral load, behavioural test, and colony location

plotPCA(vsd, intgroup=c("viral_load", "behavioural_test", "colony_location"))

Create pca object

pca <- prcomp(vsd_correlation)

Scree plot

fviz_eig(pca, addlabels = TRUE)

PC1 PC2 viral load

fviz_pca_ind(pca,
             habillage=as.factor(vsd$viral_load),
             repel = TRUE,
             axes = c(1,2),
)

PC1 PC2 behavioural rank

fviz_pca_ind(pca,
             habillage=as.factor(vsd$behavioural_test),
             repel = TRUE,
             axes = c(1,2),
)

PC1 PC2 colony location

fviz_pca_ind(pca,
             habillage=as.factor(vsd$colony_location),
             repel = TRUE,
             axes = c(1,2),
)

PC1 PC3 viral load

fviz_pca_ind(pca,
             habillage=as.factor(vsd$viral_load),
             repel = TRUE,
             axes = c(1,3)
)

PC1 PC3 behavioural rank

fviz_pca_ind(pca,
             habillage=as.factor(vsd$behavioural_test),
             repel = TRUE,
             axes = c(1,3),
)

PC1 PC3 colony location

fviz_pca_ind(pca,
             habillage=as.factor(vsd$colony_location),
             repel = TRUE,
             axes = c(1,3),
)

#PC2 PC3 viral load

fviz_pca_ind(pca,
             habillage=as.factor(vsd$viral_load),
             repel = TRUE,
             axes = c(2,3),
)

PC2 PC3 behavioural rank

fviz_pca_ind(pca,
             habillage=as.factor(vsd$behavioural_test),
             repel = TRUE,
             axes = c(2,3),
)

PC2 PC3 colony location

fviz_pca_ind(pca,
             habillage=as.factor(vsd$colony_location),
             repel = TRUE,
             axes = c(2,3),
)

Cruickshank samples

Preparation

metadata_cruickshank <- subset(metadata, colony_location=="cruickshank")
rawcounts_cruickshank <- select(rawcounts, rownames(metadata_cruickshank))

DESeq2 object

dds_cruickshank <- DESeqDataSetFromMatrix(countData = rawcounts_cruickshank, 
                              colData = metadata_cruickshank,
                              design= ~ viral_load)
dds_cruickshank <- estimateSizeFactors(dds_cruickshank)
vsd_cruickshank <- vst(dds_cruickshank, blind=TRUE)
vsd_matrix_cruickshank <- assay(vsd_cruickshank)
vsd_correlation_cruickshank <- cor(vsd_matrix_cruickshank)

Heatmaps

Viral load

pheatmap(vsd_correlation_cruickshank, 
         annotation = select(metadata_cruickshank, viral_load))

Behavioural test

pheatmap(vsd_correlation_cruickshank, 
         annotation = select(metadata_cruickshank, behavioural_test))

PCA

Viral load

plotPCA(vsd_cruickshank, intgroup="viral_load")

Behavioural test

plotPCA(vsd_cruickshank, intgroup="behavioural_test")

Viral load and behavioural test

plotPCA(vsd_cruickshank, intgroup=c("viral_load", "behavioural_test"))

Create prcomp object

pca <- prcomp(vsd_correlation_cruickshank)

Scree plot

fviz_eig(pca, addlabels = TRUE)

PC1 PC2 viral load

fviz_pca_ind(pca,
             habillage=as.factor(vsd_cruickshank$viral_load),
             repel = TRUE,
             axes = c(1,2),
)

PC1 PC2 behavioural rank

fviz_pca_ind(pca,
             habillage=as.factor(vsd_cruickshank$behavioural_test),
             repel = TRUE,
             axes = c(1,2),
)

PC1 PC2 colony location

fviz_pca_ind(pca,
             habillage=as.factor(vsd_cruickshank$colony_location),
             repel = TRUE,
             axes = c(1,2),
)

PC1 PC3 viral load

fviz_pca_ind(pca,
             habillage=as.factor(vsd_cruickshank$viral_load),
             repel = TRUE,
             axes = c(1,3)
)

PC1 PC3 behavioural rank

fviz_pca_ind(pca,
             habillage=as.factor(vsd_cruickshank$behavioural_test),
             repel = TRUE,
             axes = c(1,3),
)

PC1 PC3 colony location

fviz_pca_ind(pca,
             habillage=as.factor(vsd_cruickshank$colony_location),
             repel = TRUE,
             axes = c(1,3),
)

#PC2 PC3 viral load

fviz_pca_ind(pca,
             habillage=as.factor(vsd_cruickshank$viral_load),
             repel = TRUE,
             axes = c(2,3),
)

PC2 PC3 behavioural rank

fviz_pca_ind(pca,
             habillage=as.factor(vsd_cruickshank$behavioural_test),
             repel = TRUE,
             axes = c(2,3),
)

PC2 PC3 colony location

fviz_pca_ind(pca,
             habillage=as.factor(vsd_cruickshank$colony_location),
             repel = TRUE,
             axes = c(2,3),
)

Newburgh samples

Preparation

metadata_newburgh <- subset(metadata, colony_location=="newburgh")
rawcounts_newburgh <- select(rawcounts, rownames(metadata_newburgh))

DESeq2 object

dds_newburgh <- DESeqDataSetFromMatrix(countData = rawcounts_newburgh, 
                              colData = metadata_newburgh,
                              design= ~ viral_load)
dds_newburgh <- estimateSizeFactors(dds_newburgh)
vsd_newburgh <- vst(dds_newburgh, blind=TRUE)
vsd_matrix_newburgh <- assay(vsd_newburgh)
vsd_correlation_newburgh <- cor(vsd_matrix_newburgh)

Heatmaps

Viral load

pheatmap(vsd_correlation_newburgh, 
         annotation = select(metadata_newburgh, viral_load))

Behavioural test

pheatmap(vsd_correlation_newburgh, 
         annotation = select(metadata_newburgh, behavioural_test))

PCA

Viral load

plotPCA(vsd_newburgh, intgroup="viral_load")

Behavioural test

plotPCA(vsd_newburgh, intgroup="behavioural_test")

Viral load and behavioural test

plotPCA(vsd_newburgh, intgroup=c("viral_load", "behavioural_test"))

Create prcomp object

pca <- prcomp(vsd_correlation_newburgh)

Scree plot

fviz_eig(pca, addlabels = TRUE)

PC1 PC2 viral load

fviz_pca_ind(pca,
             habillage=as.factor(vsd_newburgh$viral_load),
             repel = TRUE,
             axes = c(1,2),
)

PC1 PC2 behavioural rank

fviz_pca_ind(pca,
             habillage=as.factor(vsd_newburgh$behavioural_test),
             repel = TRUE,
             axes = c(1,2),
)

PC1 PC2 colony location

fviz_pca_ind(pca,
             habillage=as.factor(vsd_newburgh$colony_location),
             repel = TRUE,
             axes = c(1,2),
)

PC1 PC3 viral load

fviz_pca_ind(pca,
             habillage=as.factor(vsd_newburgh$viral_load),
             repel = TRUE,
             axes = c(1,3)
)

PC1 PC3 behavioural rank

fviz_pca_ind(pca,
             habillage=as.factor(vsd_newburgh$behavioural_test),
             repel = TRUE,
             axes = c(1,3),
)

PC1 PC3 colony location

fviz_pca_ind(pca,
             habillage=as.factor(vsd_newburgh$colony_location),
             repel = TRUE,
             axes = c(1,3),
)

#PC2 PC3 viral load

fviz_pca_ind(pca,
             habillage=as.factor(vsd_newburgh$viral_load),
             repel = TRUE,
             axes = c(2,3),
)

PC2 PC3 behavioural rank

fviz_pca_ind(pca,
             habillage=as.factor(vsd_newburgh$behavioural_test),
             repel = TRUE,
             axes = c(2,3),
)

PC2 PC3 colony location

fviz_pca_ind(pca,
             habillage=as.factor(vsd_newburgh$colony_location),
             repel = TRUE,
             axes = c(2,3),
)