Load libraries

library("DESeq2")
library("tidyverse")
library("SummarizedExperiment")

All samples

File retrieval and preparation

Get the metadata, which includes sample name, rna extraction batch, clean up batch, behavioural test, viral load, colony location, and colony ID

metadata <- read.csv("./metadata.csv", row.names=1)

Retrieve first row of the counts file, which will be the sample ID

rawcounts <- read.table("./counts.tsv", nrows=1)
colnames <- as.character(rawcounts)

Get rawcounts

  • row.names: use the first column as the row names
  • col.names: use the column names we retrieved above as column names
  • check.names: in this instance prevents R from prepending an “X” to the column names
  • skip: skip the first row because it’s just the sample ID
rawcounts <- read.table("./counts.tsv", row.names=1, col.names=colnames,
                        check.names=FALSE,skip=1)

This has the effect of putting the rawcounts columns in the same order as the metadata rows. This is required by DESeq2

rawcounts <- select(rawcounts, rownames(metadata))

Convert to matrix

rawcounts <- as.matrix(rawcounts)

Check that they are now in the same order

all(rownames(metadata) == colnames(rawcounts))
## [1] TRUE

SummarizedExperiment

Create SummarizedExperiment object

sumEx <- SummarizedExperiment(assay = list(count=rawcounts), colData = metadata)

Remove rows/genes with less than 10 reads mapped for any of the samples

sumEx <- subset(sumEx, rowSums(assay(sumEx)) >= 10)

Add library size by summing all columns

sumEx$library_size <- colSums(assay(sumEx))

Scale read counts according to library size

for(i in 1:33)
    assay(sumEx)[,i] <- assay(sumEx)[,i]/sumEx$library_size[i]

Counts per million reads

assay(sumEx) <- assay(sumEx) * 10^6

t-tests (crude exploration)

Create logical vectors for viral load, behavioural test, and colony location

highLoad <- colData(sumEx)$viral_load == 'high'
behaviour0 <- colData(sumEx)$behavioural_test == 'rank 0'
colonyC <- colData(sumEx)$colony_location == 'cruickshank'

Create function to compute p-value for a single gene/row

getPValue <- function(row, con)
{  
    tt <- t.test(row[con], row[!con])  
    p <- tt$p.value
    return(p)
}

Carry out t tests on low vs high, 0 vs 21 and colony locations

viralPs <- apply(assay(sumEx), 1, getPValue, highLoad)
behaviourPs <- apply(assay(sumEx), 1, getPValue, behaviour0)
colonyPs <- apply(assay(sumEx), 1, getPValue, colonyC)

Annotate p values

rowData(sumEx)$pViral <- viralPs
rowData(sumEx)$pBehaviour <- behaviourPs
rowData(sumEx)$pColony <- colonyPs

Annotate Bonferroni corrections

rowData(sumEx)$pViralAdj <- p.adjust(rowData(sumEx)$pViral, method="bonferroni")
rowData(sumEx)$pBehaviourAdj <- p.adjust(rowData(sumEx)$pBehaviour, method="bonferroni")
rowData(sumEx)$pColonyAdj <- p.adjust(rowData(sumEx)$pColony, method="bonferroni")

Explore adjusted p values

min(rowData(sumEx)$pViralAdj)
## [1] 0.3212292
min(rowData(sumEx)$pBehaviourAdj)
## [1] 1
min(rowData(sumEx)$pColonyAdj)
## [1] 7.757558e-06
nrow(subset(rowData(sumEx), rowData(sumEx)$pColonyAdj < 0.05))
## [1] 72

Cruickshank vs Newburgh

Subset CruickShank and Newburgh

sumExN <- subset(sumEx, , colony_location == "CruickShank")
sumExN <- subset(sumEx, , colony_location == "newburgh")

Carry out t tests on low vs high, 0 vs 21 and colony locations

viralPs <- apply(assay(sumExN), 1, getPValue, highLoad)
behaviourPs <- apply(assay(sumExN), 1, getPValue, behaviour0)

Annotate p values

rowData(sumExN)$pViral <- viralPs
rowData(sumExN)$pBehaviour <- behaviourPs

Annotate Bonferroni corrections

rowData(sumExN)$pViralAdj <- p.adjust(rowData(sumExN)$pViral, method="bonferroni")
rowData(sumExN)$pBehaviourAdj <- p.adjust(rowData(sumExN)$pBehaviour, method="bonferroni")

Explore adjusted p values

subset(rowData(sumExN), rowData(sumExN)$pViralAdj < 0.05)
## DataFrame with 1 row and 6 columns
##                pViral pBehaviour   pColony pViralAdj pBehaviourAdj pColonyAdj
##             <numeric>  <numeric> <numeric> <numeric>     <numeric>  <numeric>
## LOC409055 2.18115e-06   0.371877  0.125524 0.0244921             1          1
subset(rowData(sumExN), rowData(sumExN)$pBehaviourAdj < 0.05)
## DataFrame with 1 row and 6 columns
##                 pViral  pBehaviour   pColony pViralAdj pBehaviourAdj pColonyAdj
##              <numeric>   <numeric> <numeric> <numeric>     <numeric>  <numeric>
## LOC102655948  0.857197 2.64468e-06  0.250824         1     0.0296971          1

Here’s a paper that mentions how LOC102655948, the gene apparently DE according to behavioural rank, is upregulated by clothianidin, a neonicitinoid

First insights into the honey bee (Apis mellifera) brain lipidome and its neonicotinoid-induced alterations associated with reduced self-grooming behavior

LOC409055