To validate genetic hits which were identified from the genome-wide screens, we selected 1034 in vivo and in vitro gene candidates, cloned a validation library containing 11,340 sgRNAs (10,340 targeting and 1,000 controls), and re-screened with greater coverage.

Quality control

We firstly compared fold-change between genome-wide screen and focused screens.

MA cell line

library("ggplot2")
library("ggrepel")

load("DataSummary/Pooled_MA_Zscore.RData")
load("DataSummary/MA_RES.RData")
load("DataSummary/WG_Deplected.RData")

T1 = aggregate(Pooled_MA_Zscore$FcM, by = list(factor(Pooled_MA_Zscore$annT$Plate)), FUN="median")
M1 = as.matrix(T1[,c("Vitro_vs_Plasmid", "Vivo_vs_Pre")])
rownames(M1) = as.character(T1[,1])

M2 = maRES$LogFCM[, c("vitro_vs_plasmid", "Post_vs_Pre")]
sharedGenes = intersect(rownames(M1), rownames(M2))
M1 = M1[sharedGenes, ]
M2 = M2[sharedGenes, ]
colnames(M1) = colnames(M2) = c("vitro", "vivo")

vennTable = read.table(file = "out/WG_Venn.txt", row.names = 1, header = TRUE, sep = "\t")
group = vennTable[paste("Mm", sharedGenes), "group"]
for(i in 1:2){
    for(j in 1:2){
        WG = M1[,i]
        FS = M2[,j]
        res = data.frame(gene = sharedGenes, WG, FS, group)
        xtext = c("vitro", "vivo")[i]
        ytext = c("vitro", "vivo")[j]
        xlab  = paste("Log2 Fold-change (whole-genome)\n", xtext)
        ylab  = paste("Log2 Fold-change (Focused)\n", ytext)
        p <- ggplot(res, aes(x = WG, y = FS, color = group))
        p <- p + theme_bw() + labs(x = xlab, y = ylab, title = paste(xtext, "vs.", ytext, "in MA"))
        p <- p + theme(plot.title = element_text(hjust = 0.5)) + geom_point(size = 0.5)
        p <- p + geom_smooth(aes(group = 5), method='lm', formula= y~x, color = "black", linetype = "longdash")
        p <- p + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
        p <- p + scale_colour_manual(values = c("NS" = "grey", "vivo_only" = "red", "vitro_only" = "blue", "shared" = "green"))
        print(p)
    }
}

HM cell line

rm(list = ls())
load("DataSummary/Pooled_HM_Zscore.RData")
load("DataSummary/HM_RES.RData")
load("DataSummary/WG_Deplected.RData")

T1 = aggregate(Pooled_HM_Zscore$FcM, by = list(factor(Pooled_HM_Zscore$annT$Plate)), FUN="median")
M1 = as.matrix(T1[,c("Vitro_vs_Plasmid", "Vivo_vs_Pre")])
rownames(M1) = as.character(T1[,1])

M2 = hmRES$LogFCM[, c("vitro_vs_plasmid", "Post_vs_Pre")]
sharedGenes = intersect(rownames(M1), rownames(M2))
M1 = M1[sharedGenes, ]
M2 = M2[sharedGenes, ]
colnames(M1) = colnames(M2) = c("vitro", "vivo")

vennTable = read.table(file = "out/WG_Venn.txt", row.names = 1, header = TRUE, sep = "\t")
group = vennTable[paste("Mm", sharedGenes), "group"]
par(mfrow=c(2,2))
for(i in 1:2){
    for(j in 1:2){
        WG = M1[,i]
        FS = M2[,j]
        res = data.frame(gene = sharedGenes, WG, FS, group)
        xtext = c("vitro", "vivo")[i]
        ytext = c("vitro", "vivo")[j]
        xlab  = paste("Log2 Fold-change (whole-genome)\n", xtext)
        ylab  = paste("Log2 Fold-change (Focused)\n", ytext)
        p <- ggplot(res, aes(x = WG, y = FS, color = group))
        p <- p + theme_bw() + labs(x = xlab, y = ylab, title = paste(xtext, "vs.", ytext, "in HM"))
        p <- p + theme(plot.title = element_text(hjust = 0.5)) + geom_point(size = 0.5)
        p <- p + geom_smooth(aes(group = 5), method='lm', formula= y~x, color = "black", linetype = "longdash")
        p <- p + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
        p <- p + scale_colour_manual(values = c("NS" = "grey", "vivo_only" = "red", "vitro_only" = "blue", "shared" = "green"))
        print(p)
    }
}

Assignment of CRISPR hits

Venn

rm(list = ls())
library("VennDiagram")
load("DataSummary/Focused_Depleted.RData")

hm_vitro <- hmList$vitro_vs_plasmid
hm_vivo  <- hmList$Post_vs_Pre
ma_vitro <- maList$vitro_vs_plasmid
ma_vivo  <- maList$Post_vs_Pre

xGene = PG
m1    = (xGene %in% ma_vitro)
m2    = (xGene %in% hm_vitro)
m3    = (xGene %in% ma_vivo)
m4    = (xGene %in% hm_vivo)
mm    = cbind(m1, m2, m3, m4) + 0
colnames(mm) = c("ma_vitro", "hm_vitro", "ma_vivo", "hm_vivo")

vitro = rowSums(mm[, c(1,2)])
vivo  = rowSums(mm[, c(3,4)])
group  = rep("ND", length(xGene))
group[(vitro == 0) & (vivo >  0)] = "vivo_only"
group[(vitro >  0) & (vivo == 0)] = "vitro_only"
group[(vitro >  0) & (vivo >  0)] = "shared"

mergedT = data.frame(gene = paste(xGene, "Mm"), mm, group)
write.table(mergedT, file = "out/FS_Venn.txt", row.names = FALSE, col.names = TRUE, sep = "\t")

# Venn plot
sL = list(ma_vitro = ma_vitro, hm_vitro = hm_vitro, ma_vivo = ma_vivo, hm_vivo = hm_vivo)
groupColor = c("#08519c", "#ce1256", "#54278f", "#006d2c")

venn.plot <- draw.quad.venn(
    area1 = sum(mm[,1]),
    area2 = sum(mm[,2]),
    area3 = sum(mm[,3]),
    area4 = sum(mm[,4]),
    n12   = sum(rowSums(mm[, c(1,2)]) == 2),
    n13   = sum(rowSums(mm[, c(1,3)]) == 2),
    n14   = sum(rowSums(mm[, c(1,4)]) == 2),
    n23   = sum(rowSums(mm[, c(2,3)]) == 2),
    n24   = sum(rowSums(mm[, c(2,4)]) == 2),
    n34   = sum(rowSums(mm[, c(3,4)]) == 2),
    n123  = sum(rowSums(mm[, c(1,2,3)]) == 3),
    n124  = sum(rowSums(mm[, c(1,2,4)]) == 3),
    n134  = sum(rowSums(mm[, c(1,3,4)]) == 3),
    n234  = sum(rowSums(mm[, c(2,3,4)]) == 3),
    n1234 = sum(rowSums(mm[, c(1,2,3,4)]) == 4),
    category = colnames(mm),
    fill = c("white", "white", "white", "white"),
    lty  = "solid",
    lwd  = 3,
    cex  = 2,
    cat.cex = 1,
    col     = groupColor,
    cat.col = groupColor,
    euler   = TRUE)

Genome-wide vs. Focused screening

Tg = read.table(file = "out/WG_Venn.txt",row.names= 1, header = TRUE, sep = "\t")
rownames(Tg) = gsub("Mm ", "", rownames(Tg))

Tf = read.table(file = "out/FS_Venn.txt",row.names= 1, header = TRUE, sep = "\t")
rownames(Tf) = gsub(" Mm", "", rownames(Tf))

sharedGenes = intersect(rownames(Tg), rownames(Tf))

cat("Number of hits in genome-wide screen: ", nrow(Tg), "\n", 
        "Number of genes covered by focused screen: ", nrow(Tf), "\n", 
        "Number of genes shared:", length(sharedGenes), "\n", sep = "")
Number of hits in genome-wide screen: 2789
Number of genes covered by focused screen: 1035
Number of genes shared:828
Tg = Tg[sharedGenes, ]
Tf = Tf[sharedGenes, ]

sx = table(Tg$group, Tf$group)
sx
            
              ND shared vitro_only vivo_only
  shared      13    248          6         3
  vitro_only  14    331         14         1
  vivo_only   77     72          7        42
cat("Number of in vivo genes validated: ", 
        sum(rowSums(sx)[c("shared", "vivo_only")]), "\n", 
      "Number of genes validated: ", 
      sum(sx[c("shared", "vivo_only"),c("shared", "vivo_only")]), "\n", sep = "")
Number of in vivo genes validated: 468
Number of genes validated: 365

Scatter plot

load("DataSummary/MA_RES.RData")
load("DataSummary/HM_RES.RData")
mergedT = read.table(file = "out/FS_Venn.txt", row.names = 1, header = TRUE, sep = "\t")
gene = rownames(mergedT) = gsub(" Mm", "", rownames(mergedT))
oTable = data.frame(gene, maDiff = maRES$LogFCM[gene, "Post_vs_vitro"], hmDiff = hmRES$LogFCM[gene, "Post_vs_vitro"], group = mergedT[gene, "group"])

    p <- ggplot(oTable, aes(x = maDiff, y = hmDiff, color = group))
    p <- p + theme_bw()  + labs(x = "Log2 Fold change\n(vivo - vitro, MA)", y = "Log2 Fold change\n(vivo - vitro, HM)", title = "")
    p <- p + geom_point(size = 1.5, shape = 1) + theme(plot.title = element_text(hjust = 0.5))
    p <- p + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
    p <- p + scale_colour_manual(values = c("vivo_only" = "#dd1c77", "vitro_only" = "#3182bd", "shared" = "black", "ND" = "grey"))
    p <- p + theme(legend.title=element_blank())
    print(p)

    p <- ggplot(oTable[oTable$group == "vivo_only", ], aes(x = maDiff, y = hmDiff, color = group, label = gene))
    p <- p + theme_bw() + labs(x = "Log2 Fold change\n(vivo - vitro, MA)", y = "Log2 Fold change\n(vivo - vitro, HM)", title = "In vivo only hits")
    p <- p + xlim(-8, -2) + ylim(-8, -2) + theme(plot.title = element_text(hjust = 0.5))
    p <- p + geom_text_repel(size = 4, box.padding = unit(0.35, "lines"), point.padding = unit(0.3, "lines"), segment.size = 0, color = "red")
    print(p)

Survival analysis

Many of the validated hits are associated with survival in human AML, as it is shown here Survival analysis. Below, in vivo only gene FERMT3 is shown as an example.

library("survHD")
source("/sibcb2/bioinformatics/Script/Survival_v2.R")
load("DataSummary/Survival_DataSet_New.RData")

ex <- ExS_List$GSE10358_ExS["FERMT3", ]
survTable <- Surv_List$GSE10358_survival

highGroup <- names(ex)[ex > quantile(ex, 3/4)]
lowGroup <- names(ex)[ex < quantile(ex, 1/4)]
hTable <- survTable[highGroup, ]
lTable <- survTable[lowGroup,  ]
score <- c(ex[highGroup], ex[lowGroup])
dTable <- rbind(hTable, lTable)

plotKMStratifyBy("median", y=Surv(dTable$survivalTime, dTable$death), linearriskscore = score)

Output

oTable = read.table(file = "out/FS_Venn.txt", row.names = 1, header = TRUE, sep = "\t")
knitr::kable(oTable[1:5, ])
ma_vitro hm_vitro ma_vivo hm_vivo group
Rraga Mm 1 1 1 1 shared
Pabpc1 Mm 1 1 1 1 shared
Wdr43 Mm 1 1 1 1 shared
Cdipt Mm 1 1 1 1 shared
Vwa9 Mm 1 1 1 1 shared
summary(factor(oTable$group))
        ND     shared vitro_only  vivo_only 
       233        693         37         72 

You can download the results of focused CRISPR screening here Focused_screen_FC_pvalue.xlsx.

sessionInfo

sessionInfo()
R version 3.5.1 (2018-07-02)
Platform: x86_64-conda_cos6-linux-gnu (64-bit)
Running under: Red Hat Enterprise Linux Server 7.7 (Maipo)

Matrix products: default
BLAS: /sibcb2/bioinformatics/software/BcbioNG/anaconda/lib/R/lib/libRblas.so
LAPACK: /sibcb2/bioinformatics/software/BcbioNG/anaconda/lib/R/lib/libRlapack.so

locale:
[1] C

attached base packages:
[1] parallel  grid      stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] stringr_1.4.0       viper_1.16.0        scales_1.1.0       
 [4] pheatmap_1.0.12     dplyr_1.0.5         survcomp_1.32.0    
 [7] prodlim_2019.11.13  RColorBrewer_1.1-2  gridExtra_2.3      
[10] survHD_0.99.1       survC1_1.0-2        Hmisc_4.3-0        
[13] Formula_1.2-3       lattice_0.20-38     penalized_0.9-51   
[16] survival_3.1-8      Biobase_2.42.0      BiocGenerics_0.28.0
[19] VennDiagram_1.6.20  futile.logger_1.4.3 ggrepel_0.8.1      
[22] ggplot2_3.2.1       rmarkdown_2.0      

loaded via a namespace (and not attached):
 [1] mixtools_1.1.0       splines_3.5.1        rmeta_3.0           
 [4] highr_0.8            latticeExtra_0.6-28  bootstrap_2019.6    
 [7] yaml_2.2.0           survivalROC_1.0.3    pillar_1.5.1        
[10] backports_1.1.5      glue_1.4.2           digest_0.6.23       
[13] checkmate_1.9.4      colorspace_1.4-1     htmltools_0.5.1.1   
[16] Matrix_1.2-18        pkgconfig_2.0.3      purrr_0.3.3         
[19] lava_1.6.7           htmlTable_1.13.3     tibble_3.1.0        
[22] generics_0.0.2       farver_2.0.1         ellipsis_0.3.0      
[25] withr_2.1.2          nnet_7.3-12          lazyeval_0.2.2      
[28] magrittr_1.5         crayon_1.3.4         evaluate_0.14       
[31] fansi_0.4.0          segmented_1.1-0      MASS_7.3-51.5       
[34] class_7.3-15         foreign_0.8-74       SuppDists_1.1-9.5   
[37] tools_3.5.1          data.table_1.12.8    formatR_1.7         
[40] lifecycle_1.0.0      munsell_0.5.0        cluster_2.1.0       
[43] lambda.r_1.2.4       e1071_1.7-3          compiler_3.5.1      
[46] rlang_0.4.10         rstudioapi_0.10      htmlwidgets_1.5.1   
[49] base64enc_0.1-3      labeling_0.3         gtable_0.3.0        
[52] DBI_1.1.0            R6_2.4.1             knitr_1.26          
[55] utf8_1.1.4           futile.options_1.0.1 KernSmooth_2.23-16  
[58] stringi_1.4.3        Rcpp_1.0.3           vctrs_0.3.7         
[61] rpart_4.1-15         acepack_1.4.1        tidyselect_1.1.0    
[64] xfun_0.18