To validate genetic hits which were identified from the genome-wide screens, we selected 1034 in vivo and in vitro gene candidates, cloned a validation library containing 11,340 sgRNAs (10,340 targeting and 1,000 controls), and re-screened with greater coverage.
We firstly compared fold-change between genome-wide screen and focused screens.
library("ggplot2")
library("ggrepel")
load("DataSummary/Pooled_MA_Zscore.RData")
load("DataSummary/MA_RES.RData")
load("DataSummary/WG_Deplected.RData")
T1 = aggregate(Pooled_MA_Zscore$FcM, by = list(factor(Pooled_MA_Zscore$annT$Plate)), FUN="median")
M1 = as.matrix(T1[,c("Vitro_vs_Plasmid", "Vivo_vs_Pre")])
rownames(M1) = as.character(T1[,1])
M2 = maRES$LogFCM[, c("vitro_vs_plasmid", "Post_vs_Pre")]
sharedGenes = intersect(rownames(M1), rownames(M2))
M1 = M1[sharedGenes, ]
M2 = M2[sharedGenes, ]
colnames(M1) = colnames(M2) = c("vitro", "vivo")
vennTable = read.table(file = "out/WG_Venn.txt", row.names = 1, header = TRUE, sep = "\t")
group = vennTable[paste("Mm", sharedGenes), "group"]
for(i in 1:2){
for(j in 1:2){
WG = M1[,i]
FS = M2[,j]
res = data.frame(gene = sharedGenes, WG, FS, group)
xtext = c("vitro", "vivo")[i]
ytext = c("vitro", "vivo")[j]
xlab = paste("Log2 Fold-change (whole-genome)\n", xtext)
ylab = paste("Log2 Fold-change (Focused)\n", ytext)
p <- ggplot(res, aes(x = WG, y = FS, color = group))
p <- p + theme_bw() + labs(x = xlab, y = ylab, title = paste(xtext, "vs.", ytext, "in MA"))
p <- p + theme(plot.title = element_text(hjust = 0.5)) + geom_point(size = 0.5)
p <- p + geom_smooth(aes(group = 5), method='lm', formula= y~x, color = "black", linetype = "longdash")
p <- p + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
p <- p + scale_colour_manual(values = c("NS" = "grey", "vivo_only" = "red", "vitro_only" = "blue", "shared" = "green"))
print(p)
}
}
rm(list = ls())
load("DataSummary/Pooled_HM_Zscore.RData")
load("DataSummary/HM_RES.RData")
load("DataSummary/WG_Deplected.RData")
T1 = aggregate(Pooled_HM_Zscore$FcM, by = list(factor(Pooled_HM_Zscore$annT$Plate)), FUN="median")
M1 = as.matrix(T1[,c("Vitro_vs_Plasmid", "Vivo_vs_Pre")])
rownames(M1) = as.character(T1[,1])
M2 = hmRES$LogFCM[, c("vitro_vs_plasmid", "Post_vs_Pre")]
sharedGenes = intersect(rownames(M1), rownames(M2))
M1 = M1[sharedGenes, ]
M2 = M2[sharedGenes, ]
colnames(M1) = colnames(M2) = c("vitro", "vivo")
vennTable = read.table(file = "out/WG_Venn.txt", row.names = 1, header = TRUE, sep = "\t")
group = vennTable[paste("Mm", sharedGenes), "group"]
par(mfrow=c(2,2))
for(i in 1:2){
for(j in 1:2){
WG = M1[,i]
FS = M2[,j]
res = data.frame(gene = sharedGenes, WG, FS, group)
xtext = c("vitro", "vivo")[i]
ytext = c("vitro", "vivo")[j]
xlab = paste("Log2 Fold-change (whole-genome)\n", xtext)
ylab = paste("Log2 Fold-change (Focused)\n", ytext)
p <- ggplot(res, aes(x = WG, y = FS, color = group))
p <- p + theme_bw() + labs(x = xlab, y = ylab, title = paste(xtext, "vs.", ytext, "in HM"))
p <- p + theme(plot.title = element_text(hjust = 0.5)) + geom_point(size = 0.5)
p <- p + geom_smooth(aes(group = 5), method='lm', formula= y~x, color = "black", linetype = "longdash")
p <- p + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
p <- p + scale_colour_manual(values = c("NS" = "grey", "vivo_only" = "red", "vitro_only" = "blue", "shared" = "green"))
print(p)
}
}
rm(list = ls())
library("VennDiagram")
load("DataSummary/Focused_Depleted.RData")
hm_vitro <- hmList$vitro_vs_plasmid
hm_vivo <- hmList$Post_vs_Pre
ma_vitro <- maList$vitro_vs_plasmid
ma_vivo <- maList$Post_vs_Pre
xGene = PG
m1 = (xGene %in% ma_vitro)
m2 = (xGene %in% hm_vitro)
m3 = (xGene %in% ma_vivo)
m4 = (xGene %in% hm_vivo)
mm = cbind(m1, m2, m3, m4) + 0
colnames(mm) = c("ma_vitro", "hm_vitro", "ma_vivo", "hm_vivo")
vitro = rowSums(mm[, c(1,2)])
vivo = rowSums(mm[, c(3,4)])
group = rep("ND", length(xGene))
group[(vitro == 0) & (vivo > 0)] = "vivo_only"
group[(vitro > 0) & (vivo == 0)] = "vitro_only"
group[(vitro > 0) & (vivo > 0)] = "shared"
mergedT = data.frame(gene = paste(xGene, "Mm"), mm, group)
write.table(mergedT, file = "out/FS_Venn.txt", row.names = FALSE, col.names = TRUE, sep = "\t")
# Venn plot
sL = list(ma_vitro = ma_vitro, hm_vitro = hm_vitro, ma_vivo = ma_vivo, hm_vivo = hm_vivo)
groupColor = c("#08519c", "#ce1256", "#54278f", "#006d2c")
venn.plot <- draw.quad.venn(
area1 = sum(mm[,1]),
area2 = sum(mm[,2]),
area3 = sum(mm[,3]),
area4 = sum(mm[,4]),
n12 = sum(rowSums(mm[, c(1,2)]) == 2),
n13 = sum(rowSums(mm[, c(1,3)]) == 2),
n14 = sum(rowSums(mm[, c(1,4)]) == 2),
n23 = sum(rowSums(mm[, c(2,3)]) == 2),
n24 = sum(rowSums(mm[, c(2,4)]) == 2),
n34 = sum(rowSums(mm[, c(3,4)]) == 2),
n123 = sum(rowSums(mm[, c(1,2,3)]) == 3),
n124 = sum(rowSums(mm[, c(1,2,4)]) == 3),
n134 = sum(rowSums(mm[, c(1,3,4)]) == 3),
n234 = sum(rowSums(mm[, c(2,3,4)]) == 3),
n1234 = sum(rowSums(mm[, c(1,2,3,4)]) == 4),
category = colnames(mm),
fill = c("white", "white", "white", "white"),
lty = "solid",
lwd = 3,
cex = 2,
cat.cex = 1,
col = groupColor,
cat.col = groupColor,
euler = TRUE)
Tg = read.table(file = "out/WG_Venn.txt",row.names= 1, header = TRUE, sep = "\t")
rownames(Tg) = gsub("Mm ", "", rownames(Tg))
Tf = read.table(file = "out/FS_Venn.txt",row.names= 1, header = TRUE, sep = "\t")
rownames(Tf) = gsub(" Mm", "", rownames(Tf))
sharedGenes = intersect(rownames(Tg), rownames(Tf))
cat("Number of hits in genome-wide screen: ", nrow(Tg), "\n",
"Number of genes covered by focused screen: ", nrow(Tf), "\n",
"Number of genes shared:", length(sharedGenes), "\n", sep = "")
Number of hits in genome-wide screen: 2789
Number of genes covered by focused screen: 1035
Number of genes shared:828
Tg = Tg[sharedGenes, ]
Tf = Tf[sharedGenes, ]
sx = table(Tg$group, Tf$group)
sx
ND shared vitro_only vivo_only
shared 13 248 6 3
vitro_only 14 331 14 1
vivo_only 77 72 7 42
cat("Number of in vivo genes validated: ",
sum(rowSums(sx)[c("shared", "vivo_only")]), "\n",
"Number of genes validated: ",
sum(sx[c("shared", "vivo_only"),c("shared", "vivo_only")]), "\n", sep = "")
Number of in vivo genes validated: 468
Number of genes validated: 365
load("DataSummary/MA_RES.RData")
load("DataSummary/HM_RES.RData")
mergedT = read.table(file = "out/FS_Venn.txt", row.names = 1, header = TRUE, sep = "\t")
gene = rownames(mergedT) = gsub(" Mm", "", rownames(mergedT))
oTable = data.frame(gene, maDiff = maRES$LogFCM[gene, "Post_vs_vitro"], hmDiff = hmRES$LogFCM[gene, "Post_vs_vitro"], group = mergedT[gene, "group"])
p <- ggplot(oTable, aes(x = maDiff, y = hmDiff, color = group))
p <- p + theme_bw() + labs(x = "Log2 Fold change\n(vivo - vitro, MA)", y = "Log2 Fold change\n(vivo - vitro, HM)", title = "")
p <- p + geom_point(size = 1.5, shape = 1) + theme(plot.title = element_text(hjust = 0.5))
p <- p + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
p <- p + scale_colour_manual(values = c("vivo_only" = "#dd1c77", "vitro_only" = "#3182bd", "shared" = "black", "ND" = "grey"))
p <- p + theme(legend.title=element_blank())
print(p)
p <- ggplot(oTable[oTable$group == "vivo_only", ], aes(x = maDiff, y = hmDiff, color = group, label = gene))
p <- p + theme_bw() + labs(x = "Log2 Fold change\n(vivo - vitro, MA)", y = "Log2 Fold change\n(vivo - vitro, HM)", title = "In vivo only hits")
p <- p + xlim(-8, -2) + ylim(-8, -2) + theme(plot.title = element_text(hjust = 0.5))
p <- p + geom_text_repel(size = 4, box.padding = unit(0.35, "lines"), point.padding = unit(0.3, "lines"), segment.size = 0, color = "red")
print(p)
Many of the validated hits are associated with survival in human AML, as it is shown here Survival analysis. Below, in vivo only gene FERMT3
is shown as an example.
library("survHD")
source("/sibcb2/bioinformatics/Script/Survival_v2.R")
load("DataSummary/Survival_DataSet_New.RData")
ex <- ExS_List$GSE10358_ExS["FERMT3", ]
survTable <- Surv_List$GSE10358_survival
highGroup <- names(ex)[ex > quantile(ex, 3/4)]
lowGroup <- names(ex)[ex < quantile(ex, 1/4)]
hTable <- survTable[highGroup, ]
lTable <- survTable[lowGroup, ]
score <- c(ex[highGroup], ex[lowGroup])
dTable <- rbind(hTable, lTable)
plotKMStratifyBy("median", y=Surv(dTable$survivalTime, dTable$death), linearriskscore = score)
oTable = read.table(file = "out/FS_Venn.txt", row.names = 1, header = TRUE, sep = "\t")
knitr::kable(oTable[1:5, ])
ma_vitro | hm_vitro | ma_vivo | hm_vivo | group | |
---|---|---|---|---|---|
Rraga Mm | 1 | 1 | 1 | 1 | shared |
Pabpc1 Mm | 1 | 1 | 1 | 1 | shared |
Wdr43 Mm | 1 | 1 | 1 | 1 | shared |
Cdipt Mm | 1 | 1 | 1 | 1 | shared |
Vwa9 Mm | 1 | 1 | 1 | 1 | shared |
ND shared vitro_only vivo_only
233 693 37 72
You can download the results of focused CRISPR screening here Focused_screen_FC_pvalue.xlsx.
R version 4.1.0 (2021-05-18)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Red Hat Enterprise Linux Server 7.6 (Maipo)
Matrix products: default
BLAS/LAPACK: /sibcb2/bioinformatics/software/Miniconda3/lib/libopenblasp-r0.3.15.so
locale:
[1] C
attached base packages:
[1] parallel grid stats graphics grDevices utils
[7] datasets methods base
other attached packages:
[1] survcomp_1.42.0 prodlim_2019.11.13 gridExtra_2.3
[4] survHD_0.99.1 survC1_1.0-3 Hmisc_4.5-0
[7] Formula_1.2-4 lattice_0.20-44 penalized_0.9-52
[10] survival_3.2-11 Biobase_2.52.0 BiocGenerics_0.38.0
[13] stringr_1.5.0 dplyr_1.1.2 scales_1.2.0
[16] RColorBrewer_1.1-3 pheatmap_1.0.12 VennDiagram_1.6.20
[19] futile.logger_1.4.3 ggrepel_0.9.1 ggplot2_3.4.2
[22] rmarkdown_2.20
loaded via a namespace (and not attached):
[1] survivalROC_1.0.3 nlme_3.1-152 tools_4.1.0
[4] backports_1.2.1 bslib_0.2.5.1 utf8_1.2.2
[7] R6_2.5.1 KernSmooth_2.23-20 rpart_4.1-15
[10] rmeta_3.0 DBI_1.1.3 mgcv_1.8-36
[13] colorspace_2.0-3 nnet_7.3-16 withr_2.5.0
[16] tidyselect_1.2.1 downlit_0.4.2 compiler_4.1.0
[19] cli_3.6.1 formatR_1.11 htmlTable_2.2.1
[22] labeling_0.4.2 bookdown_0.33 sass_0.4.0
[25] checkmate_2.0.0 digest_0.6.29 foreign_0.8-81
[28] base64enc_0.1-3 jpeg_0.1-9 pkgconfig_2.0.3
[31] htmltools_0.5.2 parallelly_1.32.0 fastmap_1.1.0
[34] highr_0.9 htmlwidgets_1.5.4 rlang_1.1.1
[37] rstudioapi_0.15.0 SuppDists_1.1-9.7 jquerylib_0.1.4
[40] farver_2.1.1 generics_0.1.3 jsonlite_1.8.7
[43] distill_1.5 magrittr_2.0.3 Matrix_1.3-4
[46] Rcpp_1.0.9 munsell_0.5.0 fansi_1.0.3
[49] lifecycle_1.0.3 stringi_1.7.8 yaml_2.3.5
[52] listenv_0.8.0 splines_4.1.0 knitr_1.42
[55] pillar_1.9.0 future.apply_1.9.0 codetools_0.2-18
[58] futile.options_1.0.1 glue_1.6.2 evaluate_0.20
[61] latticeExtra_0.6-29 lambda.r_1.2.4 data.table_1.14.2
[64] png_0.1-7 vctrs_0.6.3 bootstrap_2019.6
[67] gtable_0.3.0 future_1.26.1 cachem_1.0.6
[70] xfun_0.37 tibble_3.2.1 memoise_2.0.1
[73] cluster_2.1.2 lava_1.6.10 globals_0.15.1