Here we explored whether different pathways were enriched in in vitro only and in vivo only hits.
load("/sibcb2/bioinformatics/KnowledgeBase/IDbase/GeneID2Symbol/geneInforTable.RData")
load("/sibcb2/bioinformatics/KnowledgeBase/IDbase/HomoloGene/homologeneTable.RData")
load("/sibcb2/bioinformatics/KnowledgeBase/MSigDB/MSigDB.RData")
load("DataSummary/Pooled_GeneLevel_log2FC.RData")
source("/sibcb2/bioinformatics/Script/PathwayEnrichment.R")
source("/sibcb2/bioinformatics/Script/IDconverter.R")
mPG = intersect(rownames(maM), rownames(hmM))
negGenes = grep("NonTarget", mPG, value = TRUE)
mirGenes = grep("mmu-mir", mPG, value = TRUE)
mPG = setdiff(mPG, c(negGenes, mirGenes))
dM = data.frame(gene = mPG,
ma = maM[mPG, "Vivo_vs_Plasmid"] - maM[mPG, "Vitro_vs_Plasmid"],
hm = hmM[mPG, "Vivo_vs_Plasmid"] - hmM[mPG, "Vitro_vs_Plasmid"])
difLogFC = setNames((dM$ma + dM$hm)/2, dM$gene)
mVivoGene = names(sort(difLogFC)[1:500])
hVivoGene = setdiff(HM_IDAnnotation(mVivoGene, geneInforTable, homologeneTable)[, "Homolog_Human"], "")
hPG = setdiff(HM_IDAnnotation(mPG, geneInforTable, homologeneTable)[, "Homolog_Human"], "")
pwListDB = MSigDB[c(6, 16)]
x = pHyperListDB(hVivoGene, pwListDB, hPG)
x = arrange(x, Pvalue) %>% filter(PathwaySize > 10) %>% filter(Qvalue < 0.05)
x[1:10, 1:7]
PathwayName
1 KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION
2 REACTOME_TCA_CYCLE_AND_RESPIRATORY_ELECTRON_TRANSPORT
3 HALLMARK_OXIDATIVE_PHOSPHORYLATION
4 REACTOME_ANTIGEN_PRESENTATION_FOLDING_ASSEMBLY_AND_PEPTIDE_LOADING_OF_CLASS_I_MHC
5 REACTOME_CITRIC_ACID_CYCLE_TCA_CYCLE
6 KEGG_CITRATE_CYCLE_TCA_CYCLE
7 BIOCARTA_RHO_PATHWAY
8 BIOCARTA_CHEMICAL_PATHWAY
9 REACTOME_ANTIGEN_PROCESSING_CROSS_PRESENTATION
10 REACTOME_ER_PHAGOSOME_PATHWAY
DataBase SignatureSize PathwaySize Overlap Pvalue Qvalue
1 MSigDB_c2_cp_v5 374 47 8 1.148259e-06 0.001584598
2 MSigDB_c2_cp_v5 374 107 11 7.291330e-06 0.005031018
3 MSigDB_h_all_v5 374 191 15 1.062673e-05 0.004888294
4 MSigDB_c2_cp_v5 374 17 4 3.288299e-05 0.011344630
5 MSigDB_c2_cp_v5 374 18 4 4.466297e-05 0.012326980
6 MSigDB_c2_cp_v5 374 30 5 5.692381e-05 0.013092476
7 MSigDB_c2_cp_v5 374 32 5 8.352298e-05 0.016465959
8 MSigDB_c2_cp_v5 374 21 4 1.001337e-04 0.017273055
9 MSigDB_c2_cp_v5 374 66 7 1.426027e-04 0.021865746
10 MSigDB_c2_cp_v5 374 53 6 2.162405e-04 0.029841193
Tx = read.table(file = "out/WG_Venn.txt", row.names = 1, header = TRUE, sep = "\t")
mShared = sapply(strsplit(rownames(Tx)[Tx$group == "vitro_only"], " "), function(z) z[2])
mPG = sapply(strsplit(rownames(Tx), " "), function(z) z[2])
Shared = setdiff(HM_IDAnnotation(mShared, geneInforTable, homologeneTable)[, "Homolog_Human"], "")
PG = setdiff(HM_IDAnnotation(mPG, geneInforTable, homologeneTable)[, "Homolog_Human"], "")
y = pHyperListDB(Shared, pwListDB, PG)
y = arrange(y, Pvalue) %>% filter(PathwaySize > 10) %>% filter(Qvalue < 0.05)
LongName = "REACTOME_ACTIVATION_OF_THE_MRNA_UPON_BINDING_OF_THE_CAP_BINDING_COMPLEX_AND_EIFS_AND_SUBSEQUENT_BINDING_TO_43S"
y[!(y$PathwayName %in% LongName), 1:7]
PathwayName
1 REACTOME_METABOLISM_OF_MRNA
2 REACTOME_NONSENSE_MEDIATED_DECAY_ENHANCED_BY_THE_EXON_JUNCTION_COMPLEX
3 REACTOME_INFLUENZA_VIRAL_RNA_TRANSCRIPTION_AND_REPLICATION
4 KEGG_RIBOSOME
5 REACTOME_PEPTIDE_CHAIN_ELONGATION
6 REACTOME_METABOLISM_OF_RNA
7 REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE
8 REACTOME_3_UTR_MEDIATED_TRANSLATIONAL_REGULATION
9 REACTOME_TRANSLATION
10 REACTOME_INFLUENZA_LIFE_CYCLE
11 HALLMARK_E2F_TARGETS
13 REACTOME_FORMATION_OF_THE_TERNARY_COMPLEX_AND_SUBSEQUENTLY_THE_43S_COMPLEX
14 REACTOME_CELL_CYCLE
15 REACTOME_DNA_REPLICATION
16 REACTOME_CELL_CYCLE_MITOTIC
17 REACTOME_MRNA_SPLICING_MINOR_PATHWAY
18 REACTOME_ABORTIVE_ELONGATION_OF_HIV1_TRANSCRIPT_IN_THE_ABSENCE_OF_TAT
19 REACTOME_ELONGATION_ARREST_AND_RECOVERY
20 REACTOME_FORMATION_OF_RNA_POL_II_ELONGATION_COMPLEX_
21 HALLMARK_UNFOLDED_PROTEIN_RESPONSE
22 REACTOME_METABOLISM_OF_LIPIDS_AND_LIPOPROTEINS
23 REACTOME_GOLGI_ASSOCIATED_VESICLE_BIOGENESIS
24 REACTOME_MITOTIC_M_M_G1_PHASES
25 REACTOME_MEMBRANE_TRAFFICKING
26 REACTOME_METABOLISM_OF_PROTEINS
DataBase SignatureSize PathwaySize Overlap Pvalue Qvalue
1 MSigDB_c2_cp_v5 1498 136 111 9.285565e-08 7.766109e-07
2 MSigDB_c2_cp_v5 1498 72 63 2.240920e-07 1.862933e-06
3 MSigDB_c2_cp_v5 1498 73 63 7.279819e-07 6.015659e-06
4 MSigDB_c2_cp_v5 1498 60 52 4.189618e-06 3.431260e-05
5 MSigDB_c2_cp_v5 1498 60 52 4.189618e-06 3.431260e-05
6 MSigDB_c2_cp_v5 1498 163 126 5.003623e-06 4.061765e-05
7 MSigDB_c2_cp_v5 1498 72 60 1.612820e-05 1.297779e-04
8 MSigDB_c2_cp_v5 1498 72 60 1.612820e-05 1.297779e-04
9 MSigDB_c2_cp_v5 1498 94 75 4.576286e-05 3.650448e-04
10 MSigDB_c2_cp_v5 1498 91 72 1.070983e-04 8.493999e-04
11 MSigDB_h_all_v5 1498 102 79 2.123148e-04 1.674254e-03
13 MSigDB_c2_cp_v5 1498 32 27 1.341371e-03 1.045815e-02
14 MSigDB_c2_cp_v5 1498 178 128 1.423581e-03 1.103675e-02
15 MSigDB_c2_cp_v5 1498 115 85 2.001602e-03 1.543135e-02
16 MSigDB_c2_cp_v5 1498 154 111 2.411496e-03 1.848814e-02
17 MSigDB_c2_cp_v5 1498 41 33 2.942138e-03 2.243177e-02
18 MSigDB_c2_cp_v5 1498 17 15 3.235390e-03 2.453208e-02
19 MSigDB_c2_cp_v5 1498 21 18 3.854617e-03 2.906760e-02
20 MSigDB_c2_cp_v5 1498 29 24 3.895851e-03 2.921888e-02
21 MSigDB_h_all_v5 1498 47 37 4.056055e-03 3.025598e-02
22 MSigDB_c2_cp_v5 1498 67 51 4.250320e-03 3.153463e-02
23 MSigDB_c2_cp_v5 1498 11 10 5.064556e-03 3.737480e-02
24 MSigDB_c2_cp_v5 1498 101 74 5.282358e-03 3.877476e-02
25 MSigDB_c2_cp_v5 1498 28 23 5.507916e-03 4.021653e-02
26 MSigDB_c2_cp_v5 1498 163 115 6.803383e-03 4.941405e-02
The in vovo-only genes were also defined using Z-score and permutation p-values. Here, we compared the concordance of two sets.
mVivo = sapply(strsplit(rownames(Tx)[Tx$group == "vivo_only"], " "), function(z) z[2])
mShared = intersect(mVivo, mVivoGene)
cat(length(mVivo), " genes were found using Z-score and p-value\n",
length(mVivoGene), " genes werer found using Log2 fold-change\n",
length(mShared), " genes shared.\n", sep = "")
528 genes were found using Z-score and p-value
500 genes werer found using Log2 fold-change
125 genes shared.
sessionInfo()
R version 3.5.1 (2018-07-02)
Platform: x86_64-conda_cos6-linux-gnu (64-bit)
Running under: Red Hat Enterprise Linux Server 7.7 (Maipo)
Matrix products: default
BLAS: /sibcb2/bioinformatics/software/BcbioNG/anaconda/lib/R/lib/libRblas.so
LAPACK: /sibcb2/bioinformatics/software/BcbioNG/anaconda/lib/R/lib/libRlapack.so
locale:
[1] C
attached base packages:
[1] parallel grid stats graphics grDevices utils datasets
[8] methods base
other attached packages:
[1] dplyr_1.0.5 survcomp_1.32.0 prodlim_2019.11.13
[4] RColorBrewer_1.1-2 gridExtra_2.3 survHD_0.99.1
[7] survC1_1.0-2 Hmisc_4.3-0 Formula_1.2-3
[10] lattice_0.20-38 penalized_0.9-51 survival_3.1-8
[13] Biobase_2.42.0 BiocGenerics_0.28.0 VennDiagram_1.6.20
[16] futile.logger_1.4.3 ggrepel_0.8.1 ggplot2_3.2.1
[19] rmarkdown_2.0
loaded via a namespace (and not attached):
[1] splines_3.5.1 rmeta_3.0 highr_0.8
[4] latticeExtra_0.6-28 bootstrap_2019.6 yaml_2.2.0
[7] survivalROC_1.0.3 pillar_1.5.1 backports_1.1.5
[10] glue_1.4.2 digest_0.6.23 checkmate_1.9.4
[13] colorspace_1.4-1 htmltools_0.5.1.1 Matrix_1.2-18
[16] pkgconfig_2.0.3 purrr_0.3.3 scales_1.1.0
[19] lava_1.6.7 htmlTable_1.13.3 tibble_3.1.0
[22] generics_0.0.2 farver_2.0.1 ellipsis_0.3.0
[25] withr_2.1.2 nnet_7.3-12 lazyeval_0.2.2
[28] magrittr_1.5 crayon_1.3.4 evaluate_0.14
[31] fansi_0.4.0 foreign_0.8-74 SuppDists_1.1-9.5
[34] tools_3.5.1 data.table_1.12.8 formatR_1.7
[37] lifecycle_1.0.0 stringr_1.4.0 munsell_0.5.0
[40] cluster_2.1.0 lambda.r_1.2.4 compiler_3.5.1
[43] rlang_0.4.10 rstudioapi_0.10 htmlwidgets_1.5.1
[46] base64enc_0.1-3 labeling_0.3 gtable_0.3.0
[49] DBI_1.1.0 R6_2.4.1 knitr_1.26
[52] utf8_1.1.4 futile.options_1.0.1 KernSmooth_2.23-16
[55] stringi_1.4.3 Rcpp_1.0.3 vctrs_0.3.7
[58] rpart_4.1-15 acepack_1.4.1 tidyselect_1.1.0
[61] xfun_0.18