Bulk RNA-seq Analysis Report

Grieco Mariachiara mariachiara.grieco@studenti.unimi.it

30 September 2021

Introduction

In this report, a bulk-RNA seq analysis is performed comparing samples from colon, heart and liver Three samples from each tissue are retrieved from Recount2 and are analyzed for finding differentially expressed genes that characterize one tissue with respect to the other ones.

# Importing libraries --------------
library(limma)
library(edgeR)
library(DESeq2)
library(recount)
library(dplyr)
library(ggplot2)
library(reshape2)
library(RColorBrewer)
library(viridis)
library(gridExtra)
library(grid)
library(org.Hs.eg.db)
library(clusterProfiler)

Loading data

Data for each tissue are in the “Ranged Summarized Experiment” format of Recount2.

load(paste0(wd,"rse_gene_liver_9_scaled.Rdata"))
liver <- rse
liver_counts <- as.data.frame(assay(liver)[,2:4])
liver_sample_id <- colnames(liver_counts)
colnames(liver_counts) <- c('rep2','rep3','rep4')
liver_coldata <-as.data.frame(liver@colData)
liver_gtex_sampid <- liver_coldata$sampid[2:4]

load(paste0(wd,"rse_gene_heart_6_scaled.Rdata"))
heart <- rse
heart_counts <- as.data.frame(assay(heart)[,c(9,10,1)])
heart_sample_id <- colnames(heart_counts)
colnames(heart_counts) <- c('rep9','rep10','rep1')
heart_coldata <-as.data.frame(heart@colData)
heart_gtex_sampid <- heart_coldata$sampid[c(9,10,1)]

load(paste0(wd,"rse_gene_colon_7_scaled.Rdata"))
colon <- rse
colon_counts <- as.data.frame(assay(colon)[,2:4])
colon_sample_id <- colnames(colon_counts)
colnames(colon_counts) <- c('rep2','rep3','rep4')
colon_coldata <-as.data.frame(colon@colData)
colon_gtex_sampid <- colon_coldata$sampid[2:4]

Preprocessing

From the tables, we will:

select the columns we are interested in
remove all genes with length < 200
remove all mitochondrial genes
merge them into a single count table for subsequent analyses

Here we check that counts have already been normalized/scaled (to 40M reads per column) (e.g., check that the number of reads is less than or equal to 40 million)

colSums(liver_counts)/ 1e6

##     rep2     rep3     rep4 
## 36.16472 34.51705 39.72422

colSums(heart_counts) / 1e6

##     rep9    rep10     rep1 
## 39.51230 39.50502 38.78892

colSums(colon_counts)/ 1e6

##     rep2     rep3     rep4 
## 38.31695 36.18829 38.61814

Now we remove genes shorter than 200 bp. rowData is a dataframe object describing the rows. Row names, if present, become the row names of the SummarizedExperiment object. In the rowData are included: gene_id, bp_length and symbol (if present). Row names are the gene_id.

# The number of rows of the DataFrame must equal the number of rows of the matrices in assays.
# We can check this by doing:
dim(rowData(liver))

## [1] 58037     3

dim(assay(liver))

## [1] 58037    10

In this case, since all row data are identical – they represent the Gencode V25 comprehensive annotation - we can just use one of the tree to identify the short genes and the mt ones, to carry out then the filtering.

shortgenes <- as.data.frame(subset(rowData(liver), bp_length < 200))
dim(shortgenes)

## [1] 7341    3

Selecting mt genes: If a gene is both “short” and on the MT DNA, it has to be considered among the “short” genes and not a MT gene.

mt_genes <- as.data.frame(subset(rowRanges(liver), seqnames == 'chrM' & bp_length >= 200))
dim(mt_genes)

## [1] 15  8

Just for check, we can ensure that also in the heart and colon rowData the number of short genes is equal to the liver one.

dim(subset(rowData(heart), bp_length < 200))[1]

## [1] 7341

#colon
dim(subset(rowData(colon), bp_length < 200))[1]

## [1] 7341

We can create a function to find the short and the mitochondrial genes previosly selected, that can be applied on the tree datasets we have.

find_short <- function(count_table, short) {
  counts_short <- count_table %>% filter(row.names(count_table) %in% short$gene_id)
  return(counts_short)
}

find_mt <- function(count_table, mt) {
  counts_mt <- count_table %>% filter(row.names(count_table) %in% mt$gene_id)
  return(counts_mt)
}

  
liver_shortgenes <- find_short(liver_counts,shortgenes)
liver_mtgenes <- find_mt(liver_counts, mt_genes)

heart_shortgenes <- find_short(heart_counts,shortgenes)
heart_mtgenes <- find_mt(heart_counts, mt_genes)

colon_shortgenes <- find_short(colon_counts,shortgenes)
colon_mtgenes <- find_mt(colon_counts, mt_genes)


liver_counts_table <- data.frame('tissue' = 'liver',
                           'column' = colnames(liver_counts),
                           'tot_short_genes' = rep(dim(shortgenes)[1],3),
                           'tot_mt_genes' = rep(dim(mt_genes)[1],3),
                           'tot_reads' = colSums(liver_counts),
                           'tot_reads_short_genes' = colSums(liver_shortgenes),
                           'perc_reads_short_genes' = round((colSums(liver_shortgenes)/colSums(liver_counts))*100,2),
                           'tot_reads_mt_genes' = colSums(liver_mtgenes),
                           'perc_reads_mt_genes' = round((colSums(liver_mtgenes)/colSums(liver_counts))*100,2)
)

#heart 
heart_counts_table <- data.frame('tissue' = 'heart',
                           'column' = colnames(heart_counts),
                           'tot_short_genes' = rep(dim(shortgenes)[1],3),
                           'tot_mt_genes' = rep(dim(mt_genes)[1],3),
                           'tot_reads' = colSums(heart_counts),
                           'tot_reads_short_genes' = colSums(heart_shortgenes),
                           'perc_reads_short_genes' = round((colSums(heart_shortgenes)/colSums(heart_counts))*100,2),
                           'tot_reads_mt_genes' = colSums(heart_mtgenes),
                           'perc_reads_mt_genes' = round((colSums(heart_mtgenes)/colSums(heart_counts))*100,2)
                           
)

#colon
colon_counts_table <- data.frame('tissue' = 'colon',
                           'column' = colnames(colon_counts),
                           'tot_short_genes' = rep(dim(shortgenes)[1],3),
                           'tot_mt_genes' = rep(dim(mt_genes)[1],3),
                           'tot_reads' = colSums(colon_counts),
                           'tot_reads_short_genes' = colSums(colon_shortgenes),
                           'perc_reads_short_genes' = round((colSums(colon_shortgenes)/colSums(colon_counts))*100,2),
                           'tot_reads_mt_genes' = colSums(colon_mtgenes),
                           'perc_reads_mt_genes' = round((colSums(colon_mtgenes)/colSums(colon_counts))*100,2)
                           
)


counts_table <- rbind(liver_counts_table,heart_counts_table,colon_counts_table)

Now we create a new counts table containing the counts of NO short and NO mt genes for each rep in each tissue for subsequential analysis. The resulting total number of gene to keep should be equal to 58037 (tot genes) -7341 (short) -15 (mt) = 50681

liver_count_filtered <- liver_counts %>% filter(!row.names(liver_counts) %in% shortgenes$gene_id) 
liver_count_filtered <- liver_count_filtered %>% filter(!row.names(liver_count_filtered) %in% mt_genes$gene_id) 

heart_count_filtered <- heart_counts %>% filter(!row.names(heart_counts) %in% shortgenes$gene_id) 
heart_count_filtered <- heart_count_filtered %>% filter(!row.names(heart_count_filtered) %in% mt_genes$gene_id) 

colon_count_filtered <- colon_counts %>% filter(!row.names(colon_counts) %in% shortgenes$gene_id) 
colon_count_filtered <- colon_count_filtered %>% filter(!row.names(colon_count_filtered) %in% mt_genes$gene_id) 

counts <- cbind(liver_count_filtered, heart_count_filtered, colon_count_filtered)
colnames(counts) <- c('liver2','liver3','liver4',
                      'heart9','heart10','heart1',
                      'colon2','colon3','colon4')

DEG analysis using EdgeR

The DEG analysis will be performed using EdgeR. Firstly, the count table needs to be converted into a DGEList object, which contains all the data stored in a better way for usage with edgeR and allows to subsequently add further data derived from the following steps of the analysis itself. Then, we will set the design matrix.

de_object <- DGEList(counts = counts)

Now, we have to label the samples:

tissue <- factor(c(rep('liver',3),rep('heart',3),rep('colon',3)),levels = c('liver','heart','colon'))
de_object$samples$group <- tissue

table(rowSums(de_object$counts==0)==9)

## 
## FALSE  TRUE 
## 41248  9433

Remove altogether all genes with low or zero counts.

exprs2keep <- filterByExpr(de_object, group=tissue)
de_object_filtered <- de_object[exprs2keep,, keep.lib.sizes=FALSE]
dim(de_object)

## [1] 50681     9

dim(de_object_filtered)

## [1] 23454     9

Store in a vector the log of the counts per million before normalization with the “cpm” function:

logcpm_no_norm <- as.data.frame(cpm(de_object_filtered, log=TRUE))

melt_logcpm_no_norm <- melt(logcpm_no_norm)
colnames(melt_logcpm_no_norm) <- c('sample','log2_no_norm_count')

melt_logcpm_no_norm_liver <- melt(logcpm_no_norm[,1:3])
colnames(melt_logcpm_no_norm_liver) <- c('sample','log2_no_norm_count')
melt_logcpm_no_norm_liver$tissue <- 'liver'

melt_logcpm_no_norm_heart <- melt(logcpm_no_norm[,4:6])
colnames(melt_logcpm_no_norm_heart) <- c('sample','log2_no_norm_count')
melt_logcpm_no_norm_heart$tissue <- 'heart'

melt_logcpm_no_norm_colon <- melt(logcpm_no_norm[,7:9])
colnames(melt_logcpm_no_norm_colon) <- c('sample','log2_no_norm_count')
melt_logcpm_no_norm_colon$tissue <- 'colon'

melt_logcpm_no_norm_tissue <- rbind(melt_logcpm_no_norm_liver,
                                 melt_logcpm_no_norm_heart,
                                 melt_logcpm_no_norm_colon)
melt_logcpm_no_norm_tissue$tissue <- factor(melt_logcpm_no_norm_tissue$tissue,
                                         levels = c('liver','heart','colon'))

#This is the palette used to plot the three tissue in all downstream analysis
mypalette <- brewer.pal(8,'Set3')[c(3,4,7)]

ggplot(melt_logcpm_no_norm_tissue, aes(x=sample, y = log2_no_norm_count, fill = tissue)) + 
  geom_boxplot(outlier.shape = NA, notch = TRUE) +
  scale_fill_manual(values = mypalette,
                    name = 'Tissue',
                    labels = c('Liver','Heart','Colon')) +
  scale_x_discrete(labels = c(liver_sample_id,heart_sample_id,colon_sample_id)) +
  xlab('samples') + ylab('log2(counts)') +
  ggtitle('Not normalized counts') +
  theme_bw()

TMM normalization

TMM normalization is the default method used by edgeR to normalize data, based on a trimmed scaling factor for each column (sample).

de_object_normalized <- calcNormFactors(de_object_filtered, method = "TMM")

log2CPM

The read counts are again stored as the log(CPM).

logcpm_norm <- as.data.frame(cpm(de_object_normalized, log=TRUE))

melt_logcpm_norm <- melt(logcpm_norm)
colnames(melt_logcpm_norm) <- c('sample','log2_norm_count')

melt_logcpm_norm_liver <- melt(logcpm_norm[,1:3])
colnames(melt_logcpm_norm_liver) <- c('sample','log2_norm_count')
melt_logcpm_norm_liver$tissue <- 'liver'

melt_logcpm_norm_heart <- melt(logcpm_norm[,4:6])
colnames(melt_logcpm_norm_heart) <- c('sample','log2_norm_count')
melt_logcpm_norm_heart$tissue <- 'heart'

melt_logcpm_norm_colon <- melt(logcpm_norm[,7:9])
colnames(melt_logcpm_norm_colon) <- c('sample','log2_norm_count')
melt_logcpm_norm_colon$tissue <- 'colon'

melt_logcpm_norm_tissue <- rbind(melt_logcpm_norm_liver,
                                 melt_logcpm_norm_heart,
                                 melt_logcpm_norm_colon)
melt_logcpm_norm_tissue$tissue <- factor(melt_logcpm_norm_tissue$tissue,
                                         levels = c('liver','heart','colon'))

ggplot(melt_logcpm_norm_tissue, aes(x=sample, y = log2_norm_count, fill = tissue)) + 
  geom_boxplot(outlier.shape = NA, notch = TRUE) +
  scale_fill_manual(values = mypalette,
                    name = 'Tissue',
                    labels = c('Liver','Heart','Colon')) +
  scale_x_discrete(labels = c(liver_sample_id,heart_sample_id,colon_sample_id)) +
  xlab('samples') + ylab('log2(normalized_counts)') +
  theme_bw()

Experimental design

When we set the experimental design we can use the intercept in the linear model to specify a reference condition. In this case, the tissue compared are different and there is no a reference condition of one with respect to the others, so no intercept is used in the linear model. The features in the model are the tissue to which the replicates belong and are selected as retrieved in building the dataset for this analysis: liver, heart and colon.

design <- model.matrix(~0 + group, data=de_object_normalized$samples)
colnames(design) <- levels(de_object_normalized$samples$group)
design

##         liver heart colon
## liver2      1     0     0
## liver3      1     0     0
## liver4      1     0     0
## heart9      0     1     0
## heart10     0     1     0
## heart1      0     1     0
## colon2      0     0     1
## colon3      0     0     1
## colon4      0     0     1
## attr(,"assign")
## [1] 1 1 1
## attr(,"contrasts")
## attr(,"contrasts")$group
## [1] "contr.treatment"

Exploratory Analysis

edgeR contains a function plotMDS, which operates on a DGEList object and generates a two-dimensional MDS representation of the samples. The default distance between two samples can be interpreted as the “typical” log fold change between the two samples, for the genes that are most different between them (by default, the top 500 genes, but this can be modified as shown below).

MDS

plotMDS(logcpm_norm, 
        labels = c(liver_sample_id,heart_sample_id,colon_sample_id), 
        col = rep(mypalette, each = 3),
        cex = 0.85,
        xlim = c(-8,5),
        main = 'MDS of log2(CPM) using log fold change as distance')
legend(-8, 3.5, legend = c("Liver", "Heart", "Colon"), fill = mypalette)

PCA plot

plotMDS(logcpm_norm, 
        labels = c(liver_sample_id,heart_sample_id,colon_sample_id), 
        col = rep(mypalette, each = 3),
        cex = 0.85,
        xlim = c(-8,5),
        main = 'PCA of log2(CPM) using log fold change as distance',
        gene.selection = 'common')
legend(-8, 2.5, legend = c("Liver", "Heart", "Colon"), fill = mypalette)

We can also perform MDS on manually calculated distances, using the R function cmdscale and the Euclidean distance as distance.

sample_dist <- as.matrix(dist(t(logcpm_norm)))
mds <- data.frame(cmdscale(sample_dist))
mds$tissue <- tissue

p <- ggplot(mds, aes(X1,X2,color=tissue,shape=tissue)) + 
  geom_point(size=4) +
  scale_color_manual(values = mypalette,
                    name = 'Tissue',
                    labels = c('Liver','Heart','Colon'))+
  scale_shape_manual(values = c(17,18,19),
                     name = 'Tissue',
                    labels = c('Liver','Heart','Colon')) +
  theme_bw()

grid.arrange(textGrob('MDS of log2(CPM) using Euclidean distance as distance',
                      gp = gpar(fontsize = 1.5*10, fontface = "bold")),
             p, 
             heights = c(0.1, 1))

plotMDS(logcpm_norm, 
        labels = c(rep("male",3),c("male","female","female"),c("female","male","female")), 
        col = rep(mypalette, each = 3),
        cex = 0.85,
        xlim = c(-8,5),
        main = 'PCA of log2(CPM) using log fold change as distance',
        gene.selection = 'common')
legend(-8, 2.5, legend = c("Liver", "Heart", "Colon"), fill = mypalette)

BCV

In order to fit the model used by edgeR it’s necessary to evaluate if the normalized counts can be modeled with a Negative Binomial distribution, which is used by edgeR itself to model the variability of counts. In order to do that, in the plot below we can observe as estimate for the dispersion of the NB the Biological Coefficient of Variation (BCV):

de_object_normalized <- estimateDisp(de_object_normalized,design)
plotBCV(de_object_normalized)

The datasets have the following common dispersion estimate

de_object_normalized$common.dispersion

## [1] 0.2480655

Model fitting

Fit the data to the “generalized linear” model we designed

fit <- glmQLFit(de_object_normalized, design)

Finding DE

In this case, genes will be considered “DE” if:

their FDR < 0.01
their log-fold change is greater than 0 or lower than 0 (the lfc value is a threshold for the absolute value).

The comparisons between tissues are:

Liver versus heart
Liver versus colon
Heart versus colon

Liver versus heart

qlf.1vs2 <- glmQLFTest(fit, contrast=c(1,-1,0))

# select the significant ones with corrected pvalue < 0.01:
summary(decideTests(qlf.1vs2, p.value = 0.01, lfc = 0))

##        1*liver -1*heart
## Down               1779
## NotSig            19306
## Up                 2369

# Filtering and storing up and down regulated genes
deg.1vs2 <- topTags(qlf.1vs2, n=20000, adjust.method = "BH", sort.by = "PValue", p.value = 0.01)$table
up.genes.1vs2 <- row.names(deg.1vs2[deg.1vs2$logFC > 0,])
down.genes.1vs2 <- row.names(deg.1vs2[deg.1vs2$logFC < 0,])
deg.1vs2

Liver versus colon

qlf.1vs3 <- glmQLFTest(fit, contrast=c(1,0,-1))

# select the significant ones with corrected pvalue < 0.01:
summary(decideTests(qlf.1vs3, p.value = 0.01, lfc = 0))

##        1*liver -1*colon
## Down               1966
## NotSig            18950
## Up                 2538

deg.1vs3 <- topTags(qlf.1vs3, n=20000, adjust.method = "BH", sort.by = "PValue", p.value = 0.01)$table
up.genes.1vs3 <- row.names(deg.1vs3[deg.1vs3$logFC > 0,])
down.genes.1vs3 <- row.names(deg.1vs3[deg.1vs3$logFC < 0,])
deg.1vs3

Heart versus colon

qlf.2vs3 <- glmQLFTest(fit, contrast=c(0,1,-1))

# select the significant ones with corrected pvalue < 0.01:
summary(decideTests(qlf.2vs3, p.value = 0.01, lfc = 0))

##        1*heart -1*colon
## Down                439
## NotSig            22371
## Up                  644

deg.2vs3 <- topTags(qlf.2vs3, n=20000, adjust.method = "BH", sort.by = "PValue", p.value = 0.01)$table
up.genes.2vs3 <- row.names(deg.2vs3[deg.2vs3$logFC > 0,])
down.genes.2vs3 <- row.names(deg.2vs3[deg.2vs3$logFC < 0,])
deg.2vs3

Extracting list of genes up and down in each tissue:

# Liver
up_genes_liver <- intersect(up.genes.1vs2, up.genes.1vs3)
down_genes_liver <- intersect(down.genes.1vs2, down.genes.1vs3)

# Heart 
up_genes_heart <- intersect(down.genes.1vs2, up.genes.2vs3)
down_genes_heart <- intersect(up.genes.1vs2, down.genes.2vs3)

# Colon
up_genes_colon <- intersect(down.genes.1vs3, down.genes.2vs3)
down_genes_colon <- intersect(up.genes.1vs3, up.genes.2vs3)

Export results to .xlsx file

create_de_df <- function(v1, v2, cols_name){
  sq <- seq(max(length(v1), length(v2)))
  df <- data.frame(v1[sq],
                   v2[sq])
  colnames(df) <- cols_name
  return(df)
}


a_1vs2 <- create_de_df(up.genes.1vs2, down.genes.1vs2, c('up_liver_vs_heart','down_liver_vs_heart'))
b_1vs3 <- create_de_df(up.genes.1vs3, down.genes.1vs3, c('up_liver_vs_colon', 'down_liver_vs_colon'))
c_2vs3 <- create_de_df(up.genes.2vs3, down.genes.2vs3, c('up_heart_vs_colon', 'down_heart_vs_colon'))

d_de_liver <- create_de_df(up_genes_liver, down_genes_liver, c('up_liver','down_liver'))
e_de_heart <- create_de_df(up_genes_heart, down_genes_heart, c('up_heart','down_heart'))
f_de_colon <- create_de_df(up_genes_colon, down_genes_colon, c('up_colon','down_colon'))

xlsx::write.xlsx(a_1vs2, paste(wd,"966292_DE_genes.xlsx"), 
                 sheetName = "Liver vs heart",
                 col.names = TRUE, 
                 row.names = FALSE, 
                 append = FALSE, 
                 showNA=FALSE)

xlsx::write.xlsx(b_1vs3, paste(wd,"966292_DE_genes.xlsx"), 
                 sheetName = "Liver vs colon",
                 col.names = TRUE, 
                 row.names = FALSE, 
                 append = TRUE, 
                 showNA=FALSE)

xlsx::write.xlsx(c_2vs3, paste(wd,"966292_DE_genes.xlsx"), 
                 sheetName = "Heart vs colon",
                 col.names = TRUE, 
                 row.names = FALSE, 
                 append = TRUE, 
                 showNA=FALSE)

xlsx::write.xlsx(d_de_liver, paste(wd,"966292_DE_genes.xlsx"), 
                 sheetName = "DEG Liver",
                 col.names = TRUE, 
                 row.names = FALSE, 
                 append = TRUE, 
                 showNA=FALSE)

xlsx::write.xlsx(e_de_heart, paste(wd,"966292_DE_genes.xlsx"), 
                 sheetName = "DEG Heart",
                 col.names = TRUE, 
                 row.names = FALSE, 
                 append = TRUE, 
                 showNA=FALSE)

xlsx::write.xlsx(f_de_colon, paste(wd,"966292_DE_genes.xlsx"), 
                 sheetName = "DEG Colon",
                 col.names = TRUE, 
                 row.names = FALSE, 
                 append = TRUE, 
                 showNA=FALSE)

GO analysis on DE genes

In order to perform functional enrichment for this analysis, function enrichGO() contained in the package clusterProfiler is used.

Firstly, we have to remove the version from the ENSG ID.

up_genes_liver_noversion <- sapply(strsplit(up_genes_liver, "\\."), "[[", 1)
up_genes_heart_noversion <- sapply(strsplit(up_genes_heart, "\\."), "[[", 1)
up_genes_colon_noversion <- sapply(strsplit(up_genes_colon, "\\."), "[[", 1)

go_liver <- enrichGO(up_genes_liver_noversion, keyType = "ENSEMBL", 
                                      OrgDb = org.Hs.eg.db,
                                      ont = 'BP')
dotplot(go_liver)

go_heart <- enrichGO(up_genes_heart_noversion, keyType = "ENSEMBL", 
                                      OrgDb = org.Hs.eg.db,
                                      ont = 'BP')
dotplot(go_heart)

go_colon <- enrichGO(up_genes_colon_noversion, keyType = "ENSEMBL", 
                                      OrgDb = org.Hs.eg.db,
                                      ont = 'BP')
dotplot(go_colon)

Now we select the genes up-regulated in tissue 1 (liver) in both comparison and sort them by the FDR:

up_1vs2_FDR <- data.frame(deg.1vs2[deg.1vs2$logFC > 0,]$FDR)
up_1vs2_FDR$ENSG <- row.names(deg.1vs2[deg.1vs2$logFC > 0,])
row.names(up_1vs2_FDR) <- up_1vs2_FDR$ENSG

up_1vs3_FDR <- data.frame(deg.1vs3[deg.1vs3$logFC > 0,]$FDR)
up_1vs3_FDR$ENSG <- row.names(deg.1vs3[deg.1vs3$logFC > 0,])
row.names(up_1vs3_FDR) <- up_1vs3_FDR$ENSG

up_both <- merge(up_1vs2_FDR,up_1vs3_FDR)
sort_up_both <- up_both[order(up_both$deg.1vs2.deg.1vs2.logFC...0....FDR,up_both$deg.1vs3.deg.1vs3.logFC...0....FDR),]

Session info

sessionInfo()

## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8          LC_NUMERIC=C                 
##  [3] LC_TIME=en_GB.UTF-8           LC_COLLATE=en_US.UTF-8       
##  [5] LC_MONETARY=en_GB.UTF-8       LC_MESSAGES=en_US.UTF-8      
##  [7] LC_PAPER=en_GB.UTF-8          LC_NAME=en_GB.UTF-8          
##  [9] LC_ADDRESS=en_GB.UTF-8        LC_TELEPHONE=en_GB.UTF-8     
## [11] LC_MEASUREMENT=en_GB.UTF-8    LC_IDENTIFICATION=en_GB.UTF-8
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] clusterProfiler_4.2.2       org.Hs.eg.db_3.14.0        
##  [3] AnnotationDbi_1.56.2        gridExtra_2.3              
##  [5] viridis_0.6.2               viridisLite_0.4.0          
##  [7] RColorBrewer_1.1-2          reshape2_1.4.4             
##  [9] ggplot2_3.3.5               dplyr_1.0.7                
## [11] recount_1.20.0              DESeq2_1.34.0              
## [13] SummarizedExperiment_1.24.0 Biobase_2.54.0             
## [15] MatrixGenerics_1.6.0        matrixStats_0.61.0         
## [17] GenomicRanges_1.46.1        GenomeInfoDb_1.30.1        
## [19] IRanges_2.28.0              S4Vectors_0.32.3           
## [21] BiocGenerics_0.40.0         edgeR_3.36.0               
## [23] limma_3.50.1                rmdformats_1.0.3           
## [25] knitr_1.37                 
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.2               tidyselect_1.1.1         RSQLite_2.2.9           
##   [4] htmlwidgets_1.5.4        BiocParallel_1.28.3      scatterpie_0.1.7        
##   [7] munsell_0.5.0            codetools_0.2-18         rentrez_1.2.3           
##  [10] withr_2.4.3              colorspace_2.0-2         GOSemSim_2.20.0         
##  [13] filelock_1.0.2           highr_0.9                rstudioapi_0.13         
##  [16] rJava_1.0-6              DOSE_3.20.1              labeling_0.4.2          
##  [19] GenomeInfoDbData_1.2.7   polyclip_1.10-0          bit64_4.0.5             
##  [22] farver_2.1.0             downloader_0.4           treeio_1.18.1           
##  [25] vctrs_0.3.8              generics_0.1.2           xfun_0.30               
##  [28] BiocFileCache_2.2.1      R6_2.5.1                 graphlayouts_0.8.0      
##  [31] locfit_1.5-9.5           bitops_1.0-7             cachem_1.0.6            
##  [34] fgsea_1.20.0             gridGraphics_0.5-1       DelayedArray_0.20.0     
##  [37] assertthat_0.2.1         BiocIO_1.4.0             scales_1.1.1            
##  [40] ggraph_2.0.5             nnet_7.3-15              derfinder_1.28.0        
##  [43] enrichplot_1.14.2        gtable_0.3.0             xlsx_0.6.5              
##  [46] tidygraph_1.2.0          rlang_1.0.1              genefilter_1.76.0       
##  [49] splines_4.1.2            lazyeval_0.2.2           rtracklayer_1.54.0      
##  [52] GEOquery_2.62.2          checkmate_2.0.0          yaml_2.2.2              
##  [55] GenomicFeatures_1.46.4   backports_1.4.1          qvalue_2.26.0           
##  [58] Hmisc_4.6-0              tools_4.1.2              bookdown_0.25           
##  [61] ggplotify_0.1.0          ellipsis_0.3.2           jquerylib_0.1.4         
##  [64] Rcpp_1.0.8               plyr_1.8.6               base64enc_0.1-3         
##  [67] progress_1.2.2           zlibbioc_1.40.0          purrr_0.3.4             
##  [70] RCurl_1.98-1.5           prettyunits_1.1.1        rpart_4.1-15            
##  [73] bumphunter_1.36.0        GenomicFiles_1.30.0      ggrepel_0.9.1           
##  [76] cluster_2.1.0            magrittr_2.0.2           data.table_1.14.2       
##  [79] DO.db_2.9                xlsxjars_0.6.1           patchwork_1.1.1         
##  [82] hms_1.1.1                evaluate_0.14            xtable_1.8-4            
##  [85] XML_3.99-0.8             jpeg_0.1-9               compiler_4.1.2          
##  [88] biomaRt_2.50.3           tibble_3.1.6             shadowtext_0.1.1        
##  [91] crayon_1.4.2             htmltools_0.5.2          ggfun_0.0.6             
##  [94] tzdb_0.2.0               Formula_1.2-4            tidyr_1.2.0             
##  [97] geneplotter_1.72.0       aplot_0.1.3              DBI_1.1.2               
## [100] tweenr_1.0.2             dbplyr_2.1.1             MASS_7.3-53.1           
## [103] rappdirs_0.3.3           Matrix_1.4-0             readr_2.1.2             
## [106] cli_3.1.1                parallel_4.1.2           derfinderHelper_1.28.0  
## [109] igraph_1.2.11            pkgconfig_2.0.3          GenomicAlignments_1.30.0
## [112] foreign_0.8-80           xml2_1.3.3               foreach_1.5.2           
## [115] ggtree_3.2.1             annotate_1.72.0          bslib_0.3.1             
## [118] rngtools_1.5.2           XVector_0.34.0           yulab.utils_0.0.4       
## [121] doRNG_1.8.2              stringr_1.4.0            VariantAnnotation_1.40.0
## [124] digest_0.6.29            Biostrings_2.62.0        rmarkdown_2.13          
## [127] fastmatch_1.1-3          tidytree_0.3.9           htmlTable_2.4.0         
## [130] restfulr_0.0.13          curl_4.3.2               Rsamtools_2.10.0        
## [133] rjson_0.2.21             nlme_3.1-152             lifecycle_1.0.1         
## [136] jsonlite_1.7.3           BSgenome_1.62.0          fansi_1.0.2             
## [139] pillar_1.7.0             lattice_0.20-41          KEGGREST_1.34.0         
## [142] fastmap_1.1.0            httr_1.4.2               survival_3.2-7          
## [145] GO.db_3.14.0             glue_1.6.1               png_0.1-7               
## [148] iterators_1.0.14         bit_4.0.4                ggforce_0.3.3           
## [151] stringi_1.7.6            sass_0.4.0               blob_1.2.2              
## [154] latticeExtra_0.6-29      memoise_2.0.1            ape_5.6-2