library("DESeq2")
library("tidyverse")
library("SummarizedExperiment")
Get the metadata, which includes sample name, rna extraction batch, clean up batch, behavioural test, viral load, colony location, and colony ID
metadata <- read.csv("./metadata.csv", row.names=1)
Retrieve first row of the counts file, which will be the sample ID
rawcounts <- read.table("./counts.tsv", nrows=1)
colnames <- as.character(rawcounts)
Get rawcounts
rawcounts <- read.table("./counts.tsv", row.names=1, col.names=colnames,
check.names=FALSE,skip=1)
This has the effect of putting the rawcounts columns in the same order as the metadata rows. This is required by DESeq2
rawcounts <- select(rawcounts, rownames(metadata))
Convert to matrix
rawcounts <- as.matrix(rawcounts)
Check that they are now in the same order
all(rownames(metadata) == colnames(rawcounts))
## [1] TRUE
Create SummarizedExperiment object
sumEx <- SummarizedExperiment(assay = list(count=rawcounts), colData = metadata)
Remove rows/genes with less than 10 reads mapped for any of the samples
sumEx <- subset(sumEx, rowSums(assay(sumEx)) >= 10)
Add library size by summing all columns
sumEx$library_size <- colSums(assay(sumEx))
Scale read counts according to library size
for(i in 1:33)
assay(sumEx)[,i] <- assay(sumEx)[,i]/sumEx$library_size[i]
Counts per million reads
assay(sumEx) <- assay(sumEx) * 10^6
Create logical vectors for viral load, behavioural test, and colony location
highLoad <- colData(sumEx)$viral_load == 'high'
behaviour0 <- colData(sumEx)$behavioural_test == 'rank 0'
colonyC <- colData(sumEx)$colony_location == 'cruickshank'
Create function to compute p-value for a single gene/row
getPValue <- function(row, con)
{
tt <- t.test(row[con], row[!con])
p <- tt$p.value
return(p)
}
Carry out t tests on low vs high, 0 vs 21 and colony locations
viralPs <- apply(assay(sumEx), 1, getPValue, highLoad)
behaviourPs <- apply(assay(sumEx), 1, getPValue, behaviour0)
colonyPs <- apply(assay(sumEx), 1, getPValue, colonyC)
Annotate p values
rowData(sumEx)$pViral <- viralPs
rowData(sumEx)$pBehaviour <- behaviourPs
rowData(sumEx)$pColony <- colonyPs
Annotate Bonferroni corrections
rowData(sumEx)$pViralAdj <- p.adjust(rowData(sumEx)$pViral, method="bonferroni")
rowData(sumEx)$pBehaviourAdj <- p.adjust(rowData(sumEx)$pBehaviour, method="bonferroni")
rowData(sumEx)$pColonyAdj <- p.adjust(rowData(sumEx)$pColony, method="bonferroni")
Explore adjusted p values
min(rowData(sumEx)$pViralAdj)
## [1] 0.3212292
min(rowData(sumEx)$pBehaviourAdj)
## [1] 1
min(rowData(sumEx)$pColonyAdj)
## [1] 7.757558e-06
nrow(subset(rowData(sumEx), rowData(sumEx)$pColonyAdj < 0.05))
## [1] 72
Subset CruickShank and Newburgh
sumExN <- subset(sumEx, , colony_location == "CruickShank")
sumExN <- subset(sumEx, , colony_location == "newburgh")
Carry out t tests on low vs high, 0 vs 21 and colony locations
viralPs <- apply(assay(sumExN), 1, getPValue, highLoad)
behaviourPs <- apply(assay(sumExN), 1, getPValue, behaviour0)
Annotate p values
rowData(sumExN)$pViral <- viralPs
rowData(sumExN)$pBehaviour <- behaviourPs
Annotate Bonferroni corrections
rowData(sumExN)$pViralAdj <- p.adjust(rowData(sumExN)$pViral, method="bonferroni")
rowData(sumExN)$pBehaviourAdj <- p.adjust(rowData(sumExN)$pBehaviour, method="bonferroni")
Explore adjusted p values
subset(rowData(sumExN), rowData(sumExN)$pViralAdj < 0.05)
## DataFrame with 1 row and 6 columns
## pViral pBehaviour pColony pViralAdj pBehaviourAdj pColonyAdj
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## LOC409055 2.18115e-06 0.371877 0.125524 0.0244921 1 1
subset(rowData(sumExN), rowData(sumExN)$pBehaviourAdj < 0.05)
## DataFrame with 1 row and 6 columns
## pViral pBehaviour pColony pViralAdj pBehaviourAdj pColonyAdj
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## LOC102655948 0.857197 2.64468e-06 0.250824 1 0.0296971 1
Here’s a paper that mentions how LOC102655948, the gene apparently DE according to behavioural rank, is upregulated by clothianidin, a neonicitinoid