library("DESeq2")
library("RColorBrewer")
library("pheatmap")
library("tidyverse")
library("pheatmap")
library('corrr')
library(ggcorrplot)
library("FactoMineR")
library("factoextra")
Get the metadata, which includes sample name, rna extraction batch, clean up batch, behavioural test, viral load, colony location, and colony ID
metadata <- read.csv("./metadata.csv", row.names=1)
Retrieve first row of the counts file, which will be the sample ID
rawcounts <- read.table("./counts.tsv", nrows=1)
colnames <- as.character(rawcounts)
Get rawcounts
rawcounts <- read.table("./counts.tsv", row.names=1, col.names=colnames,
check.names=FALSE,skip=1)
This has the effect of putting the rawcounts columns in the same order as the metadata rows. This is required by DESeq2
rawcounts <- select(rawcounts, rownames(metadata))
Check that they are now in the same order
all(rownames(metadata) == colnames(rawcounts))
## [1] TRUE
Create the DESeq2 object specifying viral load as the condition of interest
dds <- DESeqDataSetFromMatrix(countData = rawcounts,
colData = metadata,
design = ~ viral_load)
dds <- estimateSizeFactors(dds)
Extract the normalised counts
If the normalized argument were specified as FALSE then we
would extract the raw counts from the object
normalised_counts <- counts(dds, normalized=TRUE)
This is a logarithmic transformation that moderates the variance
across the mean
blind = TRUE ensures that the transformation should be blind to
the sample information given in the design formula. This is important
when performing quality assessment
vsd <- vst(dds, blind=TRUE)
Extract the vst matrix from the object
vsd_matrix <- assay(vsd)
Compute pairwise correlation values
vsd_correlation <- cor(vsd_matrix)
pheatmap(vsd_correlation, annotation = select(metadata, behavioural_test))
pheatmap(vsd_correlation, annotation = select(metadata, colony_location))
plotPCA(vsd, intgroup="behavioural_test")
plotPCA(vsd, intgroup="colony_location")
pca <- prcomp(vsd_correlation)
fviz_eig(pca, addlabels = TRUE)
fviz_pca_ind(pca,
habillage=as.factor(vsd$behavioural_test),
repel = TRUE,
axes = c(1,2),
)
fviz_pca_ind(pca,
habillage=as.factor(vsd$colony_location),
repel = TRUE,
axes = c(1,2),
)
fviz_pca_ind(pca,
habillage=as.factor(vsd$behavioural_test),
repel = TRUE,
axes = c(1,3),
)
fviz_pca_ind(pca,
habillage=as.factor(vsd$colony_location),
repel = TRUE,
axes = c(1,3),
)
fviz_pca_ind(pca,
habillage=as.factor(vsd$behavioural_test),
repel = TRUE,
axes = c(2,3),
)
fviz_pca_ind(pca,
habillage=as.factor(vsd$colony_location),
repel = TRUE,
axes = c(2,3),
)
metadata_cruickshank <- subset(metadata, colony_location=="cruickshank")
rawcounts_cruickshank <- select(rawcounts, rownames(metadata_cruickshank))
dds_cruickshank <- DESeqDataSetFromMatrix(countData = rawcounts_cruickshank,
colData = metadata_cruickshank,
design= ~ viral_load)
dds_cruickshank <- estimateSizeFactors(dds_cruickshank)
vsd_cruickshank <- vst(dds_cruickshank, blind=TRUE)
vsd_matrix_cruickshank <- assay(vsd_cruickshank)
vsd_correlation_cruickshank <- cor(vsd_matrix_cruickshank)
pheatmap(vsd_correlation_cruickshank,
annotation = select(metadata_cruickshank, behavioural_test))
plotPCA(vsd_cruickshank, intgroup="behavioural_test")
pca <- prcomp(vsd_correlation_cruickshank)
fviz_eig(pca, addlabels = TRUE)
fviz_pca_ind(pca,
habillage=as.factor(vsd_cruickshank$behavioural_test),
repel = TRUE,
axes = c(1,2),
)
fviz_pca_ind(pca,
habillage=as.factor(vsd_cruickshank$colony_location),
repel = TRUE,
axes = c(1,2),
)
fviz_pca_ind(pca,
habillage=as.factor(vsd_cruickshank$behavioural_test),
repel = TRUE,
axes = c(1,3),
)
fviz_pca_ind(pca,
habillage=as.factor(vsd_cruickshank$colony_location),
repel = TRUE,
axes = c(1,3),
)
fviz_pca_ind(pca,
habillage=as.factor(vsd_cruickshank$behavioural_test),
repel = TRUE,
axes = c(2,3),
)
fviz_pca_ind(pca,
habillage=as.factor(vsd_cruickshank$colony_location),
repel = TRUE,
axes = c(2,3),
)
metadata_newburgh <- subset(metadata, colony_location=="newburgh")
rawcounts_newburgh <- select(rawcounts, rownames(metadata_newburgh))
dds_newburgh <- DESeqDataSetFromMatrix(countData = rawcounts_newburgh,
colData = metadata_newburgh,
design= ~ viral_load)
dds_newburgh <- estimateSizeFactors(dds_newburgh)
vsd_newburgh <- vst(dds_newburgh, blind=TRUE)
vsd_matrix_newburgh <- assay(vsd_newburgh)
vsd_correlation_newburgh <- cor(vsd_matrix_newburgh)
pheatmap(vsd_correlation_newburgh,
annotation = select(metadata_newburgh, behavioural_test))
plotPCA(vsd_newburgh, intgroup="behavioural_test")
pca <- prcomp(vsd_correlation_newburgh)
fviz_eig(pca, addlabels = TRUE)
fviz_pca_ind(pca,
habillage=as.factor(vsd_newburgh$behavioural_test),
repel = TRUE,
axes = c(1,2),
)
fviz_pca_ind(pca,
habillage=as.factor(vsd_newburgh$colony_location),
repel = TRUE,
axes = c(1,2),
)
fviz_pca_ind(pca,
habillage=as.factor(vsd_newburgh$behavioural_test),
repel = TRUE,
axes = c(1,3),
)
fviz_pca_ind(pca,
habillage=as.factor(vsd_newburgh$colony_location),
repel = TRUE,
axes = c(1,3),
)
fviz_pca_ind(pca,
habillage=as.factor(vsd_newburgh$behavioural_test),
repel = TRUE,
axes = c(2,3),
)
fviz_pca_ind(pca,
habillage=as.factor(vsd_newburgh$colony_location),
repel = TRUE,
axes = c(2,3),
)