Pathway enrichment

Here we explored whether different pathways were enriched in in vitro only and in vivo only hits.

load("/sibcb2/bioinformatics/KnowledgeBase/IDbase/GeneID2Symbol/geneInforTable.RData")
load("/sibcb2/bioinformatics/KnowledgeBase/IDbase/HomoloGene/homologeneTable.RData")
load("/sibcb2/bioinformatics/KnowledgeBase/MSigDB/MSigDB.RData")
load("DataSummary/Pooled_GeneLevel_log2FC.RData")

source("/sibcb2/bioinformatics/Script/PathwayEnrichment.R")
source("/sibcb2/bioinformatics/Script/IDconverter.R")

In vivo only hits

mPG = intersect(rownames(maM), rownames(hmM))
negGenes = grep("NonTarget", mPG, value = TRUE)
mirGenes = grep("mmu-mir",   mPG, value = TRUE)
mPG   = setdiff(mPG, c(negGenes, mirGenes))

dM  = data.frame(gene = mPG, 
                 ma = maM[mPG, "Vivo_vs_Plasmid"] - maM[mPG, "Vitro_vs_Plasmid"],
                 hm = hmM[mPG, "Vivo_vs_Plasmid"] - hmM[mPG, "Vitro_vs_Plasmid"])

difLogFC = setNames((dM$ma + dM$hm)/2, dM$gene)

mVivoGene = names(sort(difLogFC)[1:500])
hVivoGene = setdiff(HM_IDAnnotation(mVivoGene, geneInforTable, homologeneTable)[, "Homolog_Human"], "")
hPG = setdiff(HM_IDAnnotation(mPG, geneInforTable, homologeneTable)[, "Homolog_Human"], "")

pwListDB = MSigDB[c(6, 16)]
x = pHyperListDB(hVivoGene, pwListDB, hPG)
x = arrange(x, Pvalue) %>% filter(PathwaySize > 10) %>% filter(Qvalue < 0.05)
x[1:10, 1:7]
                                                                         PathwayName
1                                           KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION
2                              REACTOME_TCA_CYCLE_AND_RESPIRATORY_ELECTRON_TRANSPORT
3                                                 HALLMARK_OXIDATIVE_PHOSPHORYLATION
4  REACTOME_ANTIGEN_PRESENTATION_FOLDING_ASSEMBLY_AND_PEPTIDE_LOADING_OF_CLASS_I_MHC
5                                               REACTOME_CITRIC_ACID_CYCLE_TCA_CYCLE
6                                                       KEGG_CITRATE_CYCLE_TCA_CYCLE
7                                                               BIOCARTA_RHO_PATHWAY
8                                                          BIOCARTA_CHEMICAL_PATHWAY
9                                     REACTOME_ANTIGEN_PROCESSING_CROSS_PRESENTATION
10                                                     REACTOME_ER_PHAGOSOME_PATHWAY
          DataBase SignatureSize PathwaySize Overlap       Pvalue      Qvalue
1  MSigDB_c2_cp_v5           374          47       8 1.148259e-06 0.001584598
2  MSigDB_c2_cp_v5           374         107      11 7.291330e-06 0.005031018
3  MSigDB_h_all_v5           374         191      15 1.062673e-05 0.004888294
4  MSigDB_c2_cp_v5           374          17       4 3.288299e-05 0.011344630
5  MSigDB_c2_cp_v5           374          18       4 4.466297e-05 0.012326980
6  MSigDB_c2_cp_v5           374          30       5 5.692381e-05 0.013092476
7  MSigDB_c2_cp_v5           374          32       5 8.352298e-05 0.016465959
8  MSigDB_c2_cp_v5           374          21       4 1.001337e-04 0.017273055
9  MSigDB_c2_cp_v5           374          66       7 1.426027e-04 0.021865746
10 MSigDB_c2_cp_v5           374          53       6 2.162405e-04 0.029841193

In vitro only hits

Tx = read.table(file = "out/WG_Venn.txt", row.names = 1, header = TRUE, sep = "\t")
mShared = sapply(strsplit(rownames(Tx)[Tx$group == "vitro_only"], " "), function(z) z[2])
mPG = sapply(strsplit(rownames(Tx), " "), function(z) z[2])
Shared = setdiff(HM_IDAnnotation(mShared, geneInforTable, homologeneTable)[, "Homolog_Human"], "")
PG = setdiff(HM_IDAnnotation(mPG, geneInforTable, homologeneTable)[, "Homolog_Human"], "")

y = pHyperListDB(Shared, pwListDB, PG)
y = arrange(y, Pvalue) %>% filter(PathwaySize > 10) %>% filter(Qvalue < 0.05)
LongName = "REACTOME_ACTIVATION_OF_THE_MRNA_UPON_BINDING_OF_THE_CAP_BINDING_COMPLEX_AND_EIFS_AND_SUBSEQUENT_BINDING_TO_43S"
y[!(y$PathwayName %in% LongName), 1:7]
                                                                  PathwayName
1                                                 REACTOME_METABOLISM_OF_MRNA
2      REACTOME_NONSENSE_MEDIATED_DECAY_ENHANCED_BY_THE_EXON_JUNCTION_COMPLEX
3                  REACTOME_INFLUENZA_VIRAL_RNA_TRANSCRIPTION_AND_REPLICATION
4                                                               KEGG_RIBOSOME
5                                           REACTOME_PEPTIDE_CHAIN_ELONGATION
6                                                  REACTOME_METABOLISM_OF_RNA
7        REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE
8                            REACTOME_3_UTR_MEDIATED_TRANSLATIONAL_REGULATION
9                                                        REACTOME_TRANSLATION
10                                              REACTOME_INFLUENZA_LIFE_CYCLE
11                                                       HALLMARK_E2F_TARGETS
13 REACTOME_FORMATION_OF_THE_TERNARY_COMPLEX_AND_SUBSEQUENTLY_THE_43S_COMPLEX
14                                                        REACTOME_CELL_CYCLE
15                                                   REACTOME_DNA_REPLICATION
16                                                REACTOME_CELL_CYCLE_MITOTIC
17                                       REACTOME_MRNA_SPLICING_MINOR_PATHWAY
18      REACTOME_ABORTIVE_ELONGATION_OF_HIV1_TRANSCRIPT_IN_THE_ABSENCE_OF_TAT
19                                    REACTOME_ELONGATION_ARREST_AND_RECOVERY
20                       REACTOME_FORMATION_OF_RNA_POL_II_ELONGATION_COMPLEX_
21                                         HALLMARK_UNFOLDED_PROTEIN_RESPONSE
22                             REACTOME_METABOLISM_OF_LIPIDS_AND_LIPOPROTEINS
23                               REACTOME_GOLGI_ASSOCIATED_VESICLE_BIOGENESIS
24                                             REACTOME_MITOTIC_M_M_G1_PHASES
25                                              REACTOME_MEMBRANE_TRAFFICKING
26                                            REACTOME_METABOLISM_OF_PROTEINS
          DataBase SignatureSize PathwaySize Overlap       Pvalue       Qvalue
1  MSigDB_c2_cp_v5          1498         136     111 9.285565e-08 7.766109e-07
2  MSigDB_c2_cp_v5          1498          72      63 2.240920e-07 1.862933e-06
3  MSigDB_c2_cp_v5          1498          73      63 7.279819e-07 6.015659e-06
4  MSigDB_c2_cp_v5          1498          60      52 4.189618e-06 3.431260e-05
5  MSigDB_c2_cp_v5          1498          60      52 4.189618e-06 3.431260e-05
6  MSigDB_c2_cp_v5          1498         163     126 5.003623e-06 4.061765e-05
7  MSigDB_c2_cp_v5          1498          72      60 1.612820e-05 1.297779e-04
8  MSigDB_c2_cp_v5          1498          72      60 1.612820e-05 1.297779e-04
9  MSigDB_c2_cp_v5          1498          94      75 4.576286e-05 3.650448e-04
10 MSigDB_c2_cp_v5          1498          91      72 1.070983e-04 8.493999e-04
11 MSigDB_h_all_v5          1498         102      79 2.123148e-04 1.674254e-03
13 MSigDB_c2_cp_v5          1498          32      27 1.341371e-03 1.045815e-02
14 MSigDB_c2_cp_v5          1498         178     128 1.423581e-03 1.103675e-02
15 MSigDB_c2_cp_v5          1498         115      85 2.001602e-03 1.543135e-02
16 MSigDB_c2_cp_v5          1498         154     111 2.411496e-03 1.848814e-02
17 MSigDB_c2_cp_v5          1498          41      33 2.942138e-03 2.243177e-02
18 MSigDB_c2_cp_v5          1498          17      15 3.235390e-03 2.453208e-02
19 MSigDB_c2_cp_v5          1498          21      18 3.854617e-03 2.906760e-02
20 MSigDB_c2_cp_v5          1498          29      24 3.895851e-03 2.921888e-02
21 MSigDB_h_all_v5          1498          47      37 4.056055e-03 3.025598e-02
22 MSigDB_c2_cp_v5          1498          67      51 4.250320e-03 3.153463e-02
23 MSigDB_c2_cp_v5          1498          11      10 5.064556e-03 3.737480e-02
24 MSigDB_c2_cp_v5          1498         101      74 5.282358e-03 3.877476e-02
25 MSigDB_c2_cp_v5          1498          28      23 5.507916e-03 4.021653e-02
26 MSigDB_c2_cp_v5          1498         163     115 6.803383e-03 4.941405e-02

Concordance check

The in vovo-only genes were also defined using Z-score and permutation p-values. Here, we compared the concordance of two sets.

mVivo = sapply(strsplit(rownames(Tx)[Tx$group == "vivo_only"], " "), function(z) z[2])
mShared = intersect(mVivo, mVivoGene)
cat(length(mVivo), " genes were found using Z-score and p-value\n",
    length(mVivoGene), " genes werer found using Log2 fold-change\n",
    length(mShared), " genes shared.\n", sep = "")
528 genes were found using Z-score and p-value
500 genes werer found using Log2 fold-change
125 genes shared.

sessionInfo

sessionInfo()
R version 3.5.1 (2018-07-02)
Platform: x86_64-conda_cos6-linux-gnu (64-bit)
Running under: Red Hat Enterprise Linux Server 7.7 (Maipo)

Matrix products: default
BLAS: /sibcb2/bioinformatics/software/BcbioNG/anaconda/lib/R/lib/libRblas.so
LAPACK: /sibcb2/bioinformatics/software/BcbioNG/anaconda/lib/R/lib/libRlapack.so

locale:
[1] C

attached base packages:
[1] parallel  grid      stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] dplyr_1.0.5         survcomp_1.32.0     prodlim_2019.11.13 
 [4] RColorBrewer_1.1-2  gridExtra_2.3       survHD_0.99.1      
 [7] survC1_1.0-2        Hmisc_4.3-0         Formula_1.2-3      
[10] lattice_0.20-38     penalized_0.9-51    survival_3.1-8     
[13] Biobase_2.42.0      BiocGenerics_0.28.0 VennDiagram_1.6.20 
[16] futile.logger_1.4.3 ggrepel_0.8.1       ggplot2_3.2.1      
[19] rmarkdown_2.0      

loaded via a namespace (and not attached):
 [1] splines_3.5.1        rmeta_3.0            highr_0.8           
 [4] latticeExtra_0.6-28  bootstrap_2019.6     yaml_2.2.0          
 [7] survivalROC_1.0.3    pillar_1.5.1         backports_1.1.5     
[10] glue_1.4.2           digest_0.6.23        checkmate_1.9.4     
[13] colorspace_1.4-1     htmltools_0.5.1.1    Matrix_1.2-18       
[16] pkgconfig_2.0.3      purrr_0.3.3          scales_1.1.0        
[19] lava_1.6.7           htmlTable_1.13.3     tibble_3.1.0        
[22] generics_0.0.2       farver_2.0.1         ellipsis_0.3.0      
[25] withr_2.1.2          nnet_7.3-12          lazyeval_0.2.2      
[28] magrittr_1.5         crayon_1.3.4         evaluate_0.14       
[31] fansi_0.4.0          foreign_0.8-74       SuppDists_1.1-9.5   
[34] tools_3.5.1          data.table_1.12.8    formatR_1.7         
[37] lifecycle_1.0.0      stringr_1.4.0        munsell_0.5.0       
[40] cluster_2.1.0        lambda.r_1.2.4       compiler_3.5.1      
[43] rlang_0.4.10         rstudioapi_0.10      htmlwidgets_1.5.1   
[46] base64enc_0.1-3      labeling_0.3         gtable_0.3.0        
[49] DBI_1.1.0            R6_2.4.1             knitr_1.26          
[52] utf8_1.1.4           futile.options_1.0.1 KernSmooth_2.23-16  
[55] stringi_1.4.3        Rcpp_1.0.3           vctrs_0.3.7         
[58] rpart_4.1-15         acepack_1.4.1        tidyselect_1.1.0    
[61] xfun_0.18