Here we explored whether different pathways were enriched in in vitro only and in vivo only hits.
load("/sibcb2/bioinformatics/KnowledgeBase/IDbase/GeneID2Symbol/geneInforTable.RData")
load("/sibcb2/bioinformatics/KnowledgeBase/IDbase/HomoloGene/homologeneTable.RData")
load("/sibcb2/bioinformatics/KnowledgeBase/MSigDB/MSigDB.RData")
load("DataSummary/Pooled_GeneLevel_log2FC.RData")
source("/sibcb2/bioinformatics/Script/PathwayEnrichment.R")
source("/sibcb2/bioinformatics/Script/IDconverter.R")
mPG = intersect(rownames(maM), rownames(hmM))
negGenes = grep("NonTarget", mPG, value = TRUE)
mirGenes = grep("mmu-mir", mPG, value = TRUE)
mPG = setdiff(mPG, c(negGenes, mirGenes))
dM = data.frame(gene = mPG,
ma = maM[mPG, "Vivo_vs_Plasmid"] - maM[mPG, "Vitro_vs_Plasmid"],
hm = hmM[mPG, "Vivo_vs_Plasmid"] - hmM[mPG, "Vitro_vs_Plasmid"])
difLogFC = setNames((dM$ma + dM$hm)/2, dM$gene)
mVivoGene = names(sort(difLogFC)[1:500])
hVivoGene = setdiff(HM_IDAnnotation(mVivoGene, geneInforTable, homologeneTable)[, "Homolog_Human"], "")
hPG = setdiff(HM_IDAnnotation(mPG, geneInforTable, homologeneTable)[, "Homolog_Human"], "")
pwListDB = MSigDB[c(6, 16)]
x = pHyperListDB(hVivoGene, pwListDB, hPG)
x = arrange(x, Pvalue) %>% filter(PathwaySize > 10) %>% filter(Qvalue < 0.05)
x[1:10, 1:7]
PathwayName
1 KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION
2 REACTOME_TCA_CYCLE_AND_RESPIRATORY_ELECTRON_TRANSPORT
3 HALLMARK_OXIDATIVE_PHOSPHORYLATION
4 REACTOME_ANTIGEN_PRESENTATION_FOLDING_ASSEMBLY_AND_PEPTIDE_LOADING_OF_CLASS_I_MHC
5 REACTOME_CITRIC_ACID_CYCLE_TCA_CYCLE
6 KEGG_CITRATE_CYCLE_TCA_CYCLE
7 BIOCARTA_RHO_PATHWAY
8 BIOCARTA_CHEMICAL_PATHWAY
9 REACTOME_ANTIGEN_PROCESSING_CROSS_PRESENTATION
10 REACTOME_ER_PHAGOSOME_PATHWAY
DataBase SignatureSize PathwaySize Overlap Pvalue
1 MSigDB_c2_cp_v5 374 47 8 1.148259e-06
2 MSigDB_c2_cp_v5 374 107 11 7.291330e-06
3 MSigDB_h_all_v5 374 191 15 1.062673e-05
4 MSigDB_c2_cp_v5 374 17 4 3.288299e-05
5 MSigDB_c2_cp_v5 374 18 4 4.466297e-05
6 MSigDB_c2_cp_v5 374 30 5 5.692381e-05
7 MSigDB_c2_cp_v5 374 32 5 8.352298e-05
8 MSigDB_c2_cp_v5 374 21 4 1.001337e-04
9 MSigDB_c2_cp_v5 374 66 7 1.426027e-04
10 MSigDB_c2_cp_v5 374 53 6 2.162405e-04
Qvalue
1 0.001584598
2 0.005031018
3 0.004888294
4 0.011344630
5 0.012326980
6 0.013092476
7 0.016465959
8 0.017273055
9 0.021865746
10 0.029841193
Tx = read.table(file = "out/WG_Venn.txt", row.names = 1, header = TRUE, sep = "\t")
mShared = sapply(strsplit(rownames(Tx)[Tx$group == "vitro_only"], " "), function(z) z[2])
mPG = sapply(strsplit(rownames(Tx), " "), function(z) z[2])
Shared = setdiff(HM_IDAnnotation(mShared, geneInforTable, homologeneTable)[, "Homolog_Human"], "")
PG = setdiff(HM_IDAnnotation(mPG, geneInforTable, homologeneTable)[, "Homolog_Human"], "")
y = pHyperListDB(Shared, pwListDB, PG)
y = arrange(y, Pvalue) %>% filter(PathwaySize > 10) %>% filter(Qvalue < 0.05)
LongName = "REACTOME_ACTIVATION_OF_THE_MRNA_UPON_BINDING_OF_THE_CAP_BINDING_COMPLEX_AND_EIFS_AND_SUBSEQUENT_BINDING_TO_43S"
y[!(y$PathwayName %in% LongName), 1:7]
PathwayName
1 REACTOME_METABOLISM_OF_MRNA
2 REACTOME_NONSENSE_MEDIATED_DECAY_ENHANCED_BY_THE_EXON_JUNCTION_COMPLEX
3 REACTOME_INFLUENZA_VIRAL_RNA_TRANSCRIPTION_AND_REPLICATION
4 KEGG_RIBOSOME
5 REACTOME_PEPTIDE_CHAIN_ELONGATION
6 REACTOME_METABOLISM_OF_RNA
7 REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE
8 REACTOME_3_UTR_MEDIATED_TRANSLATIONAL_REGULATION
9 REACTOME_TRANSLATION
10 REACTOME_INFLUENZA_LIFE_CYCLE
11 HALLMARK_E2F_TARGETS
13 REACTOME_FORMATION_OF_THE_TERNARY_COMPLEX_AND_SUBSEQUENTLY_THE_43S_COMPLEX
14 REACTOME_CELL_CYCLE
15 REACTOME_DNA_REPLICATION
16 REACTOME_CELL_CYCLE_MITOTIC
17 REACTOME_MRNA_SPLICING_MINOR_PATHWAY
18 REACTOME_ABORTIVE_ELONGATION_OF_HIV1_TRANSCRIPT_IN_THE_ABSENCE_OF_TAT
19 REACTOME_ELONGATION_ARREST_AND_RECOVERY
20 REACTOME_FORMATION_OF_RNA_POL_II_ELONGATION_COMPLEX_
21 HALLMARK_UNFOLDED_PROTEIN_RESPONSE
22 REACTOME_METABOLISM_OF_LIPIDS_AND_LIPOPROTEINS
23 REACTOME_GOLGI_ASSOCIATED_VESICLE_BIOGENESIS
24 REACTOME_MITOTIC_M_M_G1_PHASES
25 REACTOME_MEMBRANE_TRAFFICKING
26 REACTOME_METABOLISM_OF_PROTEINS
DataBase SignatureSize PathwaySize Overlap Pvalue
1 MSigDB_c2_cp_v5 1498 136 111 9.285565e-08
2 MSigDB_c2_cp_v5 1498 72 63 2.240920e-07
3 MSigDB_c2_cp_v5 1498 73 63 7.279819e-07
4 MSigDB_c2_cp_v5 1498 60 52 4.189618e-06
5 MSigDB_c2_cp_v5 1498 60 52 4.189618e-06
6 MSigDB_c2_cp_v5 1498 163 126 5.003623e-06
7 MSigDB_c2_cp_v5 1498 72 60 1.612820e-05
8 MSigDB_c2_cp_v5 1498 72 60 1.612820e-05
9 MSigDB_c2_cp_v5 1498 94 75 4.576286e-05
10 MSigDB_c2_cp_v5 1498 91 72 1.070983e-04
11 MSigDB_h_all_v5 1498 102 79 2.123148e-04
13 MSigDB_c2_cp_v5 1498 32 27 1.341371e-03
14 MSigDB_c2_cp_v5 1498 178 128 1.423581e-03
15 MSigDB_c2_cp_v5 1498 115 85 2.001602e-03
16 MSigDB_c2_cp_v5 1498 154 111 2.411496e-03
17 MSigDB_c2_cp_v5 1498 41 33 2.942138e-03
18 MSigDB_c2_cp_v5 1498 17 15 3.235390e-03
19 MSigDB_c2_cp_v5 1498 21 18 3.854617e-03
20 MSigDB_c2_cp_v5 1498 29 24 3.895851e-03
21 MSigDB_h_all_v5 1498 47 37 4.056055e-03
22 MSigDB_c2_cp_v5 1498 67 51 4.250320e-03
23 MSigDB_c2_cp_v5 1498 11 10 5.064556e-03
24 MSigDB_c2_cp_v5 1498 101 74 5.282358e-03
25 MSigDB_c2_cp_v5 1498 28 23 5.507916e-03
26 MSigDB_c2_cp_v5 1498 163 115 6.803383e-03
Qvalue
1 7.766109e-07
2 1.862933e-06
3 6.015659e-06
4 3.431260e-05
5 3.431260e-05
6 4.061765e-05
7 1.297779e-04
8 1.297779e-04
9 3.650448e-04
10 8.493999e-04
11 1.674254e-03
13 1.045815e-02
14 1.103675e-02
15 1.543135e-02
16 1.848814e-02
17 2.243177e-02
18 2.453208e-02
19 2.906760e-02
20 2.921888e-02
21 3.025598e-02
22 3.153463e-02
23 3.737480e-02
24 3.877476e-02
25 4.021653e-02
26 4.941405e-02
The in vovo-only genes were also defined using Z-score and permutation p-values. Here, we compared the concordance of two sets.
mVivo = sapply(strsplit(rownames(Tx)[Tx$group == "vivo_only"], " "), function(z) z[2])
mShared = intersect(mVivo, mVivoGene)
cat(length(mVivo), " genes were found using Z-score and p-value\n",
length(mVivoGene), " genes werer found using Log2 fold-change\n",
length(mShared), " genes shared.\n", sep = "")
528 genes were found using Z-score and p-value
500 genes werer found using Log2 fold-change
125 genes shared.
R version 4.1.0 (2021-05-18)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Red Hat Enterprise Linux Server 7.6 (Maipo)
Matrix products: default
BLAS/LAPACK: /sibcb2/bioinformatics/software/Miniconda3/lib/libopenblasp-r0.3.15.so
locale:
[1] C
attached base packages:
[1] parallel grid stats graphics grDevices utils
[7] datasets methods base
other attached packages:
[1] survcomp_1.42.0 prodlim_2019.11.13 gridExtra_2.3
[4] survHD_0.99.1 survC1_1.0-3 Hmisc_4.5-0
[7] Formula_1.2-4 lattice_0.20-44 penalized_0.9-52
[10] survival_3.2-11 Biobase_2.52.0 BiocGenerics_0.38.0
[13] stringr_1.5.0 dplyr_1.1.2 scales_1.2.0
[16] RColorBrewer_1.1-3 pheatmap_1.0.12 VennDiagram_1.6.20
[19] futile.logger_1.4.3 ggrepel_0.9.1 ggplot2_3.4.2
[22] rmarkdown_2.20
loaded via a namespace (and not attached):
[1] survivalROC_1.0.3 nlme_3.1-152 tools_4.1.0
[4] backports_1.2.1 bslib_0.2.5.1 utf8_1.2.2
[7] R6_2.5.1 KernSmooth_2.23-20 rpart_4.1-15
[10] rmeta_3.0 DBI_1.1.3 mgcv_1.8-36
[13] colorspace_2.0-3 nnet_7.3-16 withr_2.5.0
[16] tidyselect_1.2.1 downlit_0.4.2 compiler_4.1.0
[19] cli_3.6.1 formatR_1.11 htmlTable_2.2.1
[22] labeling_0.4.2 bookdown_0.33 sass_0.4.0
[25] checkmate_2.0.0 digest_0.6.29 foreign_0.8-81
[28] base64enc_0.1-3 jpeg_0.1-9 pkgconfig_2.0.3
[31] htmltools_0.5.2 parallelly_1.32.0 fastmap_1.1.0
[34] highr_0.9 htmlwidgets_1.5.4 rlang_1.1.1
[37] rstudioapi_0.15.0 SuppDists_1.1-9.7 jquerylib_0.1.4
[40] farver_2.1.1 generics_0.1.3 jsonlite_1.8.7
[43] distill_1.5 magrittr_2.0.3 Matrix_1.3-4
[46] Rcpp_1.0.9 munsell_0.5.0 fansi_1.0.3
[49] lifecycle_1.0.3 stringi_1.7.8 yaml_2.3.5
[52] listenv_0.8.0 splines_4.1.0 knitr_1.42
[55] pillar_1.9.0 future.apply_1.9.0 codetools_0.2-18
[58] futile.options_1.0.1 glue_1.6.2 evaluate_0.20
[61] latticeExtra_0.6-29 lambda.r_1.2.4 data.table_1.14.2
[64] png_0.1-7 vctrs_0.6.3 bootstrap_2019.6
[67] gtable_0.3.0 future_1.26.1 cachem_1.0.6
[70] xfun_0.37 tibble_3.2.1 memoise_2.0.1
[73] cluster_2.1.2 lava_1.6.10 globals_0.15.1