Pathway analysis of in genome-wdie CRISPR hits

Pathway enrichment

Here we explored whether different pathways were enriched in in vitro only and in vivo only hits.

load("/sibcb2/bioinformatics/KnowledgeBase/IDbase/GeneID2Symbol/geneInforTable.RData")
load("/sibcb2/bioinformatics/KnowledgeBase/IDbase/HomoloGene/homologeneTable.RData")
load("/sibcb2/bioinformatics/KnowledgeBase/MSigDB/MSigDB.RData")
load("DataSummary/Pooled_GeneLevel_log2FC.RData")

source("/sibcb2/bioinformatics/Script/PathwayEnrichment.R")
source("/sibcb2/bioinformatics/Script/IDconverter.R")

In vivo only hits

mPG = intersect(rownames(maM), rownames(hmM))
negGenes = grep("NonTarget", mPG, value = TRUE)
mirGenes = grep("mmu-mir",   mPG, value = TRUE)
mPG   = setdiff(mPG, c(negGenes, mirGenes))

dM  = data.frame(gene = mPG, 
         ma = maM[mPG, "Vivo_vs_Plasmid"] - maM[mPG, "Vitro_vs_Plasmid"],
         hm = hmM[mPG, "Vivo_vs_Plasmid"] - hmM[mPG, "Vitro_vs_Plasmid"])

difLogFC = setNames((dM$ma + dM$hm)/2, dM$gene)

mVivoGene = names(sort(difLogFC)[1:500])
hVivoGene = setdiff(HM_IDAnnotation(mVivoGene, geneInforTable, homologeneTable)[, "Homolog_Human"], "")
hPG = setdiff(HM_IDAnnotation(mPG, geneInforTable, homologeneTable)[, "Homolog_Human"], "")

pwListDB = MSigDB[c(6, 16)]
x = pHyperListDB(hVivoGene, pwListDB, hPG)
x = arrange(x, Pvalue) %>% filter(PathwaySize > 10) %>% filter(Qvalue < 0.05)
x[1:10, 1:7]
                                                                         PathwayName
1                                           KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION
2                              REACTOME_TCA_CYCLE_AND_RESPIRATORY_ELECTRON_TRANSPORT
3                                                 HALLMARK_OXIDATIVE_PHOSPHORYLATION
4  REACTOME_ANTIGEN_PRESENTATION_FOLDING_ASSEMBLY_AND_PEPTIDE_LOADING_OF_CLASS_I_MHC
5                                               REACTOME_CITRIC_ACID_CYCLE_TCA_CYCLE
6                                                       KEGG_CITRATE_CYCLE_TCA_CYCLE
7                                                               BIOCARTA_RHO_PATHWAY
8                                                          BIOCARTA_CHEMICAL_PATHWAY
9                                     REACTOME_ANTIGEN_PROCESSING_CROSS_PRESENTATION
10                                                     REACTOME_ER_PHAGOSOME_PATHWAY
          DataBase SignatureSize PathwaySize Overlap       Pvalue
1  MSigDB_c2_cp_v5           374          47       8 1.148259e-06
2  MSigDB_c2_cp_v5           374         107      11 7.291330e-06
3  MSigDB_h_all_v5           374         191      15 1.062673e-05
4  MSigDB_c2_cp_v5           374          17       4 3.288299e-05
5  MSigDB_c2_cp_v5           374          18       4 4.466297e-05
6  MSigDB_c2_cp_v5           374          30       5 5.692381e-05
7  MSigDB_c2_cp_v5           374          32       5 8.352298e-05
8  MSigDB_c2_cp_v5           374          21       4 1.001337e-04
9  MSigDB_c2_cp_v5           374          66       7 1.426027e-04
10 MSigDB_c2_cp_v5           374          53       6 2.162405e-04
        Qvalue
1  0.001584598
2  0.005031018
3  0.004888294
4  0.011344630
5  0.012326980
6  0.013092476
7  0.016465959
8  0.017273055
9  0.021865746
10 0.029841193

In vitro only hits

Tx = read.table(file = "out/WG_Venn.txt", row.names = 1, header = TRUE, sep = "\t")
mShared = sapply(strsplit(rownames(Tx)[Tx$group == "vitro_only"], " "), function(z) z[2])
mPG = sapply(strsplit(rownames(Tx), " "), function(z) z[2])
Shared = setdiff(HM_IDAnnotation(mShared, geneInforTable, homologeneTable)[, "Homolog_Human"], "")
PG = setdiff(HM_IDAnnotation(mPG, geneInforTable, homologeneTable)[, "Homolog_Human"], "")

y = pHyperListDB(Shared, pwListDB, PG)
y = arrange(y, Pvalue) %>% filter(PathwaySize > 10) %>% filter(Qvalue < 0.05)
LongName = "REACTOME_ACTIVATION_OF_THE_MRNA_UPON_BINDING_OF_THE_CAP_BINDING_COMPLEX_AND_EIFS_AND_SUBSEQUENT_BINDING_TO_43S"
y[!(y$PathwayName %in% LongName), 1:7]
                                                                  PathwayName
1                                                 REACTOME_METABOLISM_OF_MRNA
2      REACTOME_NONSENSE_MEDIATED_DECAY_ENHANCED_BY_THE_EXON_JUNCTION_COMPLEX
3                  REACTOME_INFLUENZA_VIRAL_RNA_TRANSCRIPTION_AND_REPLICATION
4                                                               KEGG_RIBOSOME
5                                           REACTOME_PEPTIDE_CHAIN_ELONGATION
6                                                  REACTOME_METABOLISM_OF_RNA
7        REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE
8                            REACTOME_3_UTR_MEDIATED_TRANSLATIONAL_REGULATION
9                                                        REACTOME_TRANSLATION
10                                              REACTOME_INFLUENZA_LIFE_CYCLE
11                                                       HALLMARK_E2F_TARGETS
13 REACTOME_FORMATION_OF_THE_TERNARY_COMPLEX_AND_SUBSEQUENTLY_THE_43S_COMPLEX
14                                                        REACTOME_CELL_CYCLE
15                                                   REACTOME_DNA_REPLICATION
16                                                REACTOME_CELL_CYCLE_MITOTIC
17                                       REACTOME_MRNA_SPLICING_MINOR_PATHWAY
18      REACTOME_ABORTIVE_ELONGATION_OF_HIV1_TRANSCRIPT_IN_THE_ABSENCE_OF_TAT
19                                    REACTOME_ELONGATION_ARREST_AND_RECOVERY
20                       REACTOME_FORMATION_OF_RNA_POL_II_ELONGATION_COMPLEX_
21                                         HALLMARK_UNFOLDED_PROTEIN_RESPONSE
22                             REACTOME_METABOLISM_OF_LIPIDS_AND_LIPOPROTEINS
23                               REACTOME_GOLGI_ASSOCIATED_VESICLE_BIOGENESIS
24                                             REACTOME_MITOTIC_M_M_G1_PHASES
25                                              REACTOME_MEMBRANE_TRAFFICKING
26                                            REACTOME_METABOLISM_OF_PROTEINS
          DataBase SignatureSize PathwaySize Overlap       Pvalue
1  MSigDB_c2_cp_v5          1498         136     111 9.285565e-08
2  MSigDB_c2_cp_v5          1498          72      63 2.240920e-07
3  MSigDB_c2_cp_v5          1498          73      63 7.279819e-07
4  MSigDB_c2_cp_v5          1498          60      52 4.189618e-06
5  MSigDB_c2_cp_v5          1498          60      52 4.189618e-06
6  MSigDB_c2_cp_v5          1498         163     126 5.003623e-06
7  MSigDB_c2_cp_v5          1498          72      60 1.612820e-05
8  MSigDB_c2_cp_v5          1498          72      60 1.612820e-05
9  MSigDB_c2_cp_v5          1498          94      75 4.576286e-05
10 MSigDB_c2_cp_v5          1498          91      72 1.070983e-04
11 MSigDB_h_all_v5          1498         102      79 2.123148e-04
13 MSigDB_c2_cp_v5          1498          32      27 1.341371e-03
14 MSigDB_c2_cp_v5          1498         178     128 1.423581e-03
15 MSigDB_c2_cp_v5          1498         115      85 2.001602e-03
16 MSigDB_c2_cp_v5          1498         154     111 2.411496e-03
17 MSigDB_c2_cp_v5          1498          41      33 2.942138e-03
18 MSigDB_c2_cp_v5          1498          17      15 3.235390e-03
19 MSigDB_c2_cp_v5          1498          21      18 3.854617e-03
20 MSigDB_c2_cp_v5          1498          29      24 3.895851e-03
21 MSigDB_h_all_v5          1498          47      37 4.056055e-03
22 MSigDB_c2_cp_v5          1498          67      51 4.250320e-03
23 MSigDB_c2_cp_v5          1498          11      10 5.064556e-03
24 MSigDB_c2_cp_v5          1498         101      74 5.282358e-03
25 MSigDB_c2_cp_v5          1498          28      23 5.507916e-03
26 MSigDB_c2_cp_v5          1498         163     115 6.803383e-03
         Qvalue
1  7.766109e-07
2  1.862933e-06
3  6.015659e-06
4  3.431260e-05
5  3.431260e-05
6  4.061765e-05
7  1.297779e-04
8  1.297779e-04
9  3.650448e-04
10 8.493999e-04
11 1.674254e-03
13 1.045815e-02
14 1.103675e-02
15 1.543135e-02
16 1.848814e-02
17 2.243177e-02
18 2.453208e-02
19 2.906760e-02
20 2.921888e-02
21 3.025598e-02
22 3.153463e-02
23 3.737480e-02
24 3.877476e-02
25 4.021653e-02
26 4.941405e-02

Concordance check

The in vovo-only genes were also defined using Z-score and permutation p-values. Here, we compared the concordance of two sets.

mVivo = sapply(strsplit(rownames(Tx)[Tx$group == "vivo_only"], " "), function(z) z[2])
mShared = intersect(mVivo, mVivoGene)
cat(length(mVivo), " genes were found using Z-score and p-value\n",
  length(mVivoGene), " genes werer found using Log2 fold-change\n",
  length(mShared), " genes shared.\n", sep = "")
528 genes were found using Z-score and p-value
500 genes werer found using Log2 fold-change
125 genes shared.

sessionInfo

R version 4.1.0 (2021-05-18)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Red Hat Enterprise Linux Server 7.6 (Maipo)

Matrix products: default
BLAS/LAPACK: /sibcb2/bioinformatics/software/Miniconda3/lib/libopenblasp-r0.3.15.so

locale:
[1] C

attached base packages:
[1] parallel  grid      stats     graphics  grDevices utils    
[7] datasets  methods   base     

other attached packages:
 [1] survcomp_1.42.0     prodlim_2019.11.13  gridExtra_2.3      
 [4] survHD_0.99.1       survC1_1.0-3        Hmisc_4.5-0        
 [7] Formula_1.2-4       lattice_0.20-44     penalized_0.9-52   
[10] survival_3.2-11     Biobase_2.52.0      BiocGenerics_0.38.0
[13] stringr_1.5.0       dplyr_1.1.2         scales_1.2.0       
[16] RColorBrewer_1.1-3  pheatmap_1.0.12     VennDiagram_1.6.20 
[19] futile.logger_1.4.3 ggrepel_0.9.1       ggplot2_3.4.2      
[22] rmarkdown_2.20     

loaded via a namespace (and not attached):
 [1] survivalROC_1.0.3    nlme_3.1-152         tools_4.1.0         
 [4] backports_1.2.1      bslib_0.2.5.1        utf8_1.2.2          
 [7] R6_2.5.1             KernSmooth_2.23-20   rpart_4.1-15        
[10] rmeta_3.0            DBI_1.1.3            mgcv_1.8-36         
[13] colorspace_2.0-3     nnet_7.3-16          withr_2.5.0         
[16] tidyselect_1.2.1     downlit_0.4.2        compiler_4.1.0      
[19] cli_3.6.1            formatR_1.11         htmlTable_2.2.1     
[22] labeling_0.4.2       bookdown_0.33        sass_0.4.0          
[25] checkmate_2.0.0      digest_0.6.29        foreign_0.8-81      
[28] base64enc_0.1-3      jpeg_0.1-9           pkgconfig_2.0.3     
[31] htmltools_0.5.2      parallelly_1.32.0    fastmap_1.1.0       
[34] highr_0.9            htmlwidgets_1.5.4    rlang_1.1.1         
[37] rstudioapi_0.15.0    SuppDists_1.1-9.7    jquerylib_0.1.4     
[40] farver_2.1.1         generics_0.1.3       jsonlite_1.8.7      
[43] distill_1.5          magrittr_2.0.3       Matrix_1.3-4        
[46] Rcpp_1.0.9           munsell_0.5.0        fansi_1.0.3         
[49] lifecycle_1.0.3      stringi_1.7.8        yaml_2.3.5          
[52] listenv_0.8.0        splines_4.1.0        knitr_1.42          
[55] pillar_1.9.0         future.apply_1.9.0   codetools_0.2-18    
[58] futile.options_1.0.1 glue_1.6.2           evaluate_0.20       
[61] latticeExtra_0.6-29  lambda.r_1.2.4       data.table_1.14.2   
[64] png_0.1-7            vctrs_0.6.3          bootstrap_2019.6    
[67] gtable_0.3.0         future_1.26.1        cachem_1.0.6        
[70] xfun_0.37            tibble_3.2.1         memoise_2.0.1       
[73] cluster_2.1.2        lava_1.6.10          globals_0.15.1