Permutation p-values

We use Myc and Pten as examples to show how permutation p-values are calculated.

library("ggplot2")
load("DataSummary/Pooled_MA_Zscore.RData")
attach(Pooled_MA_Zscore)
source("/sibcb2/bioinformatics/Script/PathwayEnrichment.R")
gene = c("Myc", "Pten")

control_sgRNA = rownames(annT)[annT$group == "control"]
control_list  = list()
for(i in 1:5000)
    control_list[[i]] = sample(control_sgRNA, 6)

Log2FC = sort(FcM[, "Vitro_vs_Plasmid"])
randomT = data.frame(controlZS = SimpleRankTest(Log2FC, control_list))
test_zscore = zM[gene, "Vitro_vs_Plasmid"]
test_pvalue = pM[gene, "Vitro_vs_Plasmid"]

  p <- ggplot(randomT, aes(controlZS) )
  p <- p + theme_bw() + labs(x = "Z-score for control sgRNAs", y = "Density", title = "") 
  p <- p + geom_line(stat="density", size = 0.6) + theme(legend.title=element_blank()) + xlim(-5, 5)
  p <- p + geom_vline(xintercept = test_zscore, size = 1, linetype = "longdash", color = c("blue", "red"))
  p <- p + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
  p <- p + geom_text(x = -3, y = 0.3, label = paste("Myc\np =", test_pvalue[1]), color = "blue")
  p <- p + geom_text(x =  4, y = 0.3, label = paste("Pten\np =", test_pvalue[2]), color = "red")
  print(p)

sortRank = 1:length(Log2FC)
test_zscore = signif(test_zscore, 4)
MycRank  = sortRank[names(Log2FC) %in% rownames(annT)[annT$Plate == "Myc"]]
PtenRank = sortRank[names(Log2FC) %in% rownames(annT)[annT$Plate == "Pten"]]

p <- ggplot(data = data.frame(sortRank), aes(x = sortRank))
p <- p + theme_bw() + labs(x = "", y = "", title = paste("Myc, Z-score =", test_zscore[1]))
p <- p + xlim(1, max(sortRank))
p <- p + geom_vline(data = data.frame(MycRank), aes(xintercept = MycRank), size = 1.5, color = "blue")
p <- p + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
p <- p + theme(axis.ticks = element_blank(), axis.text.x = element_blank())
print(p)

p <- ggplot(data = data.frame(sortRank), aes(x = sortRank))
p <- p + theme_bw() + labs(x = "", y = "", title = paste("Pten, Z-score =", test_zscore[2]))
p <- p + xlim(1, max(sortRank))
p <- p + geom_vline(data = data.frame(PtenRank), aes(xintercept = PtenRank), size = 1.5, color = "red")
p <- p + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
p <- p + theme(axis.ticks = element_blank(), axis.text.x = element_blank())
print(p)

sessionInfo

sessionInfo()
R version 4.1.0 (2021-05-18)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Red Hat Enterprise Linux Server 7.7 (Maipo)

Matrix products: default
BLAS/LAPACK: /sibcb2/bioinformatics/software/Miniconda3/lib/libopenblasp-r0.3.15.so

locale:
[1] C

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] ggplot2_3.3.5 rmarkdown_2.9

loaded via a namespace (and not attached):
 [1] highr_0.9         bslib_0.2.5.1     compiler_4.1.0    pillar_1.6.1     
 [5] jquerylib_0.1.4   tools_4.1.0       digest_0.6.27     jsonlite_1.7.2   
 [9] evaluate_0.14     lifecycle_1.0.0   tibble_3.1.2      gtable_0.3.0     
[13] pkgconfig_2.0.3   rlang_0.4.11      DBI_1.1.1         yaml_2.2.1       
[17] xfun_0.24         withr_2.4.2       stringr_1.4.0     dplyr_1.0.7      
[21] knitr_1.33        generics_0.1.0    sass_0.4.0        vctrs_0.3.8      
[25] grid_4.1.0        tidyselect_1.1.1  glue_1.4.2        R6_2.5.0         
[29] fansi_0.5.0       farver_2.1.0      purrr_0.3.4       magrittr_2.0.1   
[33] scales_1.1.1      ellipsis_0.3.2    htmltools_0.5.1.1 assertthat_0.2.1 
[37] colorspace_2.0-2  labeling_0.4.2    utf8_1.2.1        stringi_1.6.2    
[41] munsell_0.5.0     crayon_1.4.1