library("DESeq2")
library("tidyverse")
library("SummarizedExperiment")
library("ggplot2")
library("ggrepel")
Get the metadata, which includes sample name, rna extraction batch, clean up batch, behavioural test, viral load, colony location, and colony ID
metadata <- read.csv("./metadata.csv", row.names=1)
Retrieve first row of the counts file, which will be the sample ID
rawcounts <- read.table("./counts.tsv", nrows=1)
colnames <- as.character(rawcounts)
Get rawcounts
rawcounts <- read.table("./counts.tsv", row.names=1, col.names=colnames,
check.names=FALSE,skip=1)
This has the effect of putting the rawcounts columns in the same order as the metadata rows. This is required by DESeq2
rawcounts <- select(rawcounts, rownames(metadata))
rawcounts <- as.matrix(rawcounts)
Check that they are now in the same order
all(rownames(metadata) == colnames(rawcounts))
## [1] TRUE
Create SummarizedExperiment object
sum_ex <- SummarizedExperiment(assay = list(count=rawcounts), colData = metadata)
Remove rows/genes with less than 4 reads mapped for any of the samples
sum_ex <- subset(sum_ex, rowSums(assay(sum_ex)) >= 4)
Two objects, one to examine viral load and the other to examine behavioural rank each with colony location as a covariate and
Run DESeq
deseq2_viral <- DESeq(deseq2_viral)
deseq2_behavioural <- DESeq(deseq2_behavioural)
Extract results
results_viral <- results(deseq2_viral, contrast=c("viral_load", "high", "low"))
results_behavioural <- results(deseq2_behavioural, contrast=c("behavioural_test", "rank 0", "rank 21"))
Lines of significance shown at alpha = 0.05 and log2 fold changes of
1 and -1
ID | Direction (high) | Biotype | Description | Gene length | Chromosome | L2FC |
---|---|---|---|---|---|---|
LOC113218947 | Up | lncRNA | uncharacterized LOC113218947 | 1255 | LG8 | 2.0 |
Subset colony location = Cruickshank
sum_ex_cruickshank <- subset(sum_ex, , colony_location == "cruickshank")
Two objects, one to examine viral load and the other to examine behavioural rank each with colony location as a covariate and
Run DESeq
deseq2_viral_cruickshank <- DESeq(deseq2_viral_cruickshank)
deseq2_behavioural_cruickshank <- DESeq(deseq2_behavioural_cruickshank)
Extract results
results_viral_cruickshank <- results(deseq2_viral_cruickshank, contrast=c("viral_load", "high", "low"))
results_behavioural_cruickshank <- results(deseq2_behavioural_cruickshank, contrast=c("behavioural_test", "rank 0", "rank 21"))
Lines of significance shown at alpha = 0.05 and log2 fold changes of
1 and -1
ID | Direction (high) | Biotype | Description | Gene length | Chromosome | L2FC |
---|---|---|---|---|---|---|
Apid1 | Down | protein_coding | apidaecin 1 | 1712 | LG16 | -4.6 |
LOC552073 | Down | protein_coding | arylsulfatase B | 7141 | LG2 | -1.1 |
Subset colony location = Newburgh
sum_ex_newburgh <- subset(sum_ex, , colony_location == "newburgh")
Two objects, one to examine viral load and the other to examine behavioural rank each with colony location as a covariate and
Run DESeq
deseq2_viral_newburgh <- DESeq(deseq2_viral_newburgh)
deseq2_behavioural_newburgh <- DESeq(deseq2_behavioural_newburgh)
Extract results
results_viral_newburgh <- results(deseq2_viral_newburgh, contrast=c("viral_load", "high", "low"))
results_behavioural_newburgh <- results(deseq2_behavioural_newburgh, contrast=c("behavioural_test", "rank 0", "rank 21"))
Lines of significance shown at alpha = 0.05 and log2 fold changes of
1 and -1
ID | Direction (high) | Biotype | Description | Gene length | Chromosome | L2FC |
---|---|---|---|---|---|---|
Apid1 | Up | protein_coding | apidaecin 1 | 1712 | LG16 | 5.8 |
Hbg2 | Up | protein_coding | alpha-glucosidase | 1743 | LG8 | 5.3 |
LOC100578156 | Up | protein_coding | uncharacterized LOC100578156, | 2289 | LG5 | 1.3 |
LOC102654076 | Up | lncRNA | uncharacterized LOC102654076 | 21364 | LG9 | 1.3 |
LOC113218549 | Up | lncRNA | uncharacterized LOC113218549 | 37214 | LG4 | 5.7 |
LOC113218947 | Up | lncRNA | uncharacterized LOC113218947 | 1255 | LG8 | 2.3 |
LOC406142 | Up | protein_coding | hymenoptaecin | 1640 | LG6 | 3.8 |
LOC406144 | Up | protein_coding | abaecin | 866 | LG10 | 3.0 |
LOC551263 | Up | protein_coding | monocarboxylate transporter 13, | 11240 | LG8 | 1.2 |
LOC724899 | Down | protein_coding | lysozyme | 1516 | LG13 | -1.8 |
PPO | Down | protein_coding | phenoloxidase subunit A3 | 3665 | LG14 | -1.8 |
LOC102655375 | Down | lncRNA | uncharacterized LOC102655375 | 3167 | LG8 | -1.3 |
LOC102655319 | Down | protein_coding | inner centromere protein A | 5515 | LG9 | -1.3 |
LOC408544 | Down | protein_coding | lysyl oxidase homolog 4, | 10351 | LG15 | -1.1 |