loading data

For each gene, fold-changes were determined using pooled counts. To report the biological effect of targeting each gene, we summarized the collective changes of 6 sgRNA per gene as a Z-score using the Wilcoxon rank-sum test. To determine statistical significance, a permutation p-value was calculated from the background distribution of Z-scores generated by random picks of 6 control sgRNAs out of 1,000 internal to the library. An example can be found here P-value and Z-score calculation.

library("ggplot2")
library("ggrepel")
library("VennDiagram")
library("pheatmap")
library("RColorBrewer")
library("scales")

load("DataSummary/Pooled_MA_Zscore.RData")
attach(Pooled_MA_Zscore)
head(FcM) ## sgRNA-level fold-change
             Pre_vs_Plasmid Vitro_vs_Plasmid Vivo_vs_Plasmid Vitro_vs_Pre
MGLibA_00001      0.4368053       0.05901595        2.505846   -0.3777894
MGLibA_00002      2.6068334       4.60182632        2.305935    1.9949929
MGLibA_00003     -4.2028304      -8.34872815       -8.348728   -4.1458977
MGLibA_00004     -1.2025237      -0.32075123        1.952999    0.8817725
MGLibA_00005     -0.6230805       0.43797214       -2.463307    1.0610526
MGLibA_00006     -0.2139968       0.57372911        1.658612    0.7877259
             Vivo_vs_Pre
MGLibA_00001   2.0690406
MGLibA_00002  -0.3008987
MGLibA_00003  -4.1458977
MGLibA_00004   3.1555224
MGLibA_00005  -1.8402268
MGLibA_00006   1.8726090
head(zM)  ## gene-level Z-score
              Pre_vs_Plasmid Vitro_vs_Plasmid Vivo_vs_Plasmid Vitro_vs_Pre
0610007P14Rik      0.2109059       -0.3610930      -0.3542764   -0.9686669
0610009B22Rik     -0.8923795        0.7196534      -0.5768965    1.6677904
0610009D07Rik     -3.0760999       -3.7051443      -3.1719717   -1.3361115
0610009O20Rik      0.1873497        0.9600445       0.3275840    1.0606361
0610010F05Rik     -0.7796139       -0.7064171      -0.2322089    0.2572793
0610010K14Rik     -1.3236388        0.1289626      -1.3674298    0.8035464
              Vivo_vs_Pre
0610007P14Rik  -1.1504427
0610009B22Rik  -0.4077684
0610009D07Rik  -0.7648521
0610009O20Rik  -0.1928692
0610010F05Rik   0.8005765
0610010K14Rik  -0.4290117
head(pM)  ## gene-level permutation p-value
              Pre_vs_Plasmid Vitro_vs_Plasmid Vivo_vs_Plasmid Vitro_vs_Pre
0610007P14Rik         0.8388           0.7270          0.7246       0.3460
0610009B22Rik         0.3762           0.4864          0.5706       0.0892
0610009D07Rik         0.0020           0.0004          0.0012       0.1890
0610009O20Rik         0.8560           0.3498          0.7514       0.3034
0610010F05Rik         0.4390           0.4884          0.8100       0.8064
0610010K14Rik         0.1916           0.9120          0.1762       0.4416
              Vivo_vs_Pre
0610007P14Rik      0.2644
0610009B22Rik      0.6910
0610009D07Rik      0.4548
0610009O20Rik      0.8386
0610010F05Rik      0.4336
0610010K14Rik      0.6766

Quality control

As shown below, we could identify genes known to be essential for bone marrow homing (Cxcr4) or integrin signaling (Fermt3 and Tln1) as only depleted in the in vivo arm of the screen. In contrast, known mTORC1 regulators Pten and Tsc1 were significantly enriched and known essential genes such as Myc and Myb were significantly depleted (permutation p-value < 0.01), in both the in vitro and in vivo arms.

MA cell line

fc = FcM[, 2]
aT_MA_vitro = aggregate(fc, by = list(factor(annT$Plate)), FUN="median")
colnames(aT_MA_vitro) = c("ID", "log2FC")
rownames(aT_MA_vitro) = as.character(aT_MA_vitro[,1])

Kgenes = c("Myc","Myb","Hmgcs1","Fermt3","Cxcr4","Pten", "Tsc1", "Tsc2", "Tln1")
gene   = rownames(zM)
mT_MA_vitro     = data.frame(gene, log2FC = as.numeric(aT_MA_vitro[gene, 2]), score = zM[gene, 2], pvalue = -log10(pM[gene, 2]))

mT1    = filter(mT_MA_vitro, pvalue > -log10(0.05))
mT2    = filter(mT_MA_vitro, pvalue < -log10(0.05))
mT3    = filter(mT_MA_vitro, gene %in% Kgenes)

p <- ggplot(mT2, aes(x = log2FC, y = pvalue))
p <- p + theme_bw() + labs(x = "Log2 fold-change", y = "-log10 Permutation P-value", title = "In vitro hits (MA)") + xlim(-10,10)
p <- p + theme(plot.title = element_text(hjust = 0.5)) + geom_point(size = 0.1, color = "grey")
p <- p + geom_point(data = mT1, aes(x = log2FC, y = pvalue), color = "#99cfe0", size = 0.2)
p <- p + geom_point(data = mT3, aes(x = log2FC, y = pvalue), color = "red", size = 0.4)
p <- p + geom_text_repel(data = mT3, aes(x = log2FC, y = pvalue, label = gene), size = 4, 
    box.padding = unit(0.35, "lines"), point.padding = unit(0.3, "lines"), segment.size = 0.2, color = "black", nudge_y = -0.1)
print(p)

fc = FcM[, 3]
aT_MA_vivo = aggregate(fc, by = list(factor(annT$Plate)), FUN="median")
colnames(aT_MA_vivo) = c("ID", "log2FC")
rownames(aT_MA_vivo) = as.character(aT_MA_vivo[,1])

Kgenes = c("Myc","Myb","Hmgcs1","Fermt3","Cxcr4","Pten", "Tsc1", "Tsc2", "Tln1")
gene   = rownames(zM)
mT_MA_vivo     = data.frame(gene, log2FC = as.numeric(aT_MA_vivo[gene, 2]), score = zM[gene, 3], pvalue = -log10(pM[gene, 3]))

mT1    = filter(mT_MA_vivo, pvalue > -log10(0.05))
mT2    = filter(mT_MA_vivo, pvalue < -log10(0.05))
mT3    = filter(mT_MA_vivo, gene %in% Kgenes)

p <- ggplot(mT2, aes(x = log2FC, y = pvalue))
p <- p + theme_bw() + labs(x = "Log2FC", y = "-log10 Permutation P-value", title = "In vivo hits (MA)") + xlim(-10,10)
p <- p + theme(plot.title = element_text(hjust = 0.5)) + geom_point(size = 0.1, color = "grey")
p <- p + geom_point(data = mT1, aes(x = log2FC, y = pvalue), color = "#99cfe0", size = 0.2)
p <- p + geom_point(data = mT3, aes(x = log2FC, y = pvalue), color = "red", size = 0.4)
p <- p + geom_text_repel(data = mT3, aes(x = log2FC, y = pvalue, label = gene), size = 4, 
    box.padding = unit(0.35, "lines"), point.padding = unit(0.3, "lines"), 
    segment.size = 0.2, color = "black", nudge_y = -0.1)
print(p)

HM cell line

rm(list = ls())
load("DataSummary/Pooled_HM_Zscore.RData")
attach(Pooled_HM_Zscore)
fc = FcM[, 2]
aT_HM_vitro = aggregate(fc, by = list(factor(annT$Plate)), FUN="median")
colnames(aT_HM_vitro) = c("ID", "log2FC")
rownames(aT_HM_vitro) = as.character(aT_HM_vitro[,1])

Kgenes = c("Myc","Myb","Hmgcs1","Fermt3","Cxcr4","Pten", "Tsc1", "Tsc2", "Tln1")
gene   = rownames(zM)
mT_HM_vitro     = data.frame(gene, log2FC = as.numeric(aT_HM_vitro[gene, 2]), score = zM[gene, 2], pvalue = -log10(pM[gene, 2]))

mT1    = filter(mT_HM_vitro, pvalue > -log10(0.05))
mT2    = filter(mT_HM_vitro, pvalue < -log10(0.05))
mT3    = filter(mT_HM_vitro, gene %in% Kgenes)

p <- ggplot(mT2, aes(x = log2FC, y = pvalue))
p <- p + theme_bw() + labs(x = "Log2 fold-change", y = "-log10 Permutation P-value", title = "In vitro hits (HM)") + xlim(-10,10)
p <- p + theme(plot.title = element_text(hjust = 0.5)) + geom_point(size = 0.1, color = "grey")
p <- p + geom_point(data = mT1, aes(x = log2FC, y = pvalue), color = "#99cfe0", size = 0.2)
p <- p + geom_point(data = mT3, aes(x = log2FC, y = pvalue), color = "red", size = 0.4)
p <- p + geom_text_repel(data = mT3, aes(x = log2FC, y = pvalue, label = gene), size = 4, 
    box.padding = unit(0.35, "lines"), point.padding = unit(0.3, "lines"), segment.size = 0.2, color = "black", nudge_y = -0.1)
print(p)

fc = FcM[, 3]
aT_HM_vivo = aggregate(fc, by = list(factor(annT$Plate)), FUN="median")
colnames(aT_HM_vivo) = c("ID", "log2FC")
rownames(aT_HM_vivo) = as.character(aT_HM_vivo[,1])

Kgenes = c("Myc","Myb","Hmgcs1","Fermt3","Cxcr4","Pten", "Tsc1", "Tsc2", "Tln1")
gene   = rownames(zM)
mT_HM_vivo     = data.frame(gene, log2FC = as.numeric(aT_HM_vivo[gene, 2]), score = zM[gene, 3], pvalue = -log10(pM[gene, 3]))

mT1    = filter(mT_HM_vivo, pvalue > -log10(0.05))
mT2    = filter(mT_HM_vivo, pvalue < -log10(0.05))
mT3    = filter(mT_HM_vivo, gene %in% Kgenes)

p <- ggplot(mT2, aes(x = log2FC, y = pvalue))
p <- p + theme_bw() + labs(x = "Log2FC", y = "-log10 Permutation P-value", title = "In vivo hits (HM)") + xlim(-10,10)
p <- p + theme(plot.title = element_text(hjust = 0.5)) + geom_point(size = 0.1, color = "grey")
p <- p + geom_point(data = mT1, aes(x = log2FC, y = pvalue), color = "#99cfe0", size = 0.2)
p <- p + geom_point(data = mT3, aes(x = log2FC, y = pvalue), color = "red", size = 0.4)
p <- p + geom_text_repel(data = mT3, aes(x = log2FC, y = pvalue, label = gene), size = 4, 
    box.padding = unit(0.35, "lines"), point.padding = unit(0.3, "lines"), 
    segment.size = 0.2, color = "black", nudge_y = -0.1)
print(p)

Venn diagram

rm(list = ls())
load("DataSummary/CRISPR_Public.RData")
load("DataSummary/WG_Deplected.RData")

ma_vitro = maList[["Vitro_vs_Plasmid"]]
ma_vivo  = maList[["Vivo_vs_Pre"]]
hm_vitro = hmList[["Vitro_vs_Plasmid"]]
hm_vivo  = hmList[["Vivo_vs_Pre"]]
xGene = unique(c(ma_vitro, ma_vivo, hm_vitro, hm_vivo))
mm    = cbind(ma_vitro = xGene %in% ma_vitro, 
           hm_vitro = xGene %in% hm_vitro,
           ma_vivo = xGene %in% ma_vivo, 
           hm_vivo = xGene %in% hm_vivo) + 0
vitro = rowSums(mm[, c(1,2)])
vivo  = rowSums(mm[, c(3,4)])
group = rep("shared", length(xGene))
group[vitro == 0] = "vivo_only"
group[vivo  == 0] = "vitro_only"
oTable = data.frame(xGene = paste("Mm", xGene), mm, group)
write.table(oTable, file = "out/WG_Venn.txt", row.names = FALSE, col.names = TRUE, sep = "\t")

    groupColor = c("#08519c", "#ce1256", "#54278f", "#006d2c")
    venn.plot <- draw.quad.venn(
        area1 = sum(mm[,1]),
        area2 = sum(mm[,2]),
        area3 = sum(mm[,3]),
        area4 = sum(mm[,4]),
        n12   = sum(rowSums(mm[, c(1,2)]) == 2),
        n13   = sum(rowSums(mm[, c(1,3)]) == 2),
        n14   = sum(rowSums(mm[, c(1,4)]) == 2),
        n23   = sum(rowSums(mm[, c(2,3)]) == 2),
        n24   = sum(rowSums(mm[, c(2,4)]) == 2),
        n34   = sum(rowSums(mm[, c(3,4)]) == 2),
        n123  = sum(rowSums(mm[, c(1,2,3)]) == 3),
        n124  = sum(rowSums(mm[, c(1,2,4)]) == 3),
        n134  = sum(rowSums(mm[, c(1,3,4)]) == 3),
        n234  = sum(rowSums(mm[, c(2,3,4)]) == 3),
        n1234 = sum(rowSums(mm[, c(1,2,3,4)]) == 4),
        category = colnames(mm),
        fill = c("white", "white", "white", "white"),
        lty  = "solid",
        lwd  = 3,
        cex  = 2,
        cat.cex = 1,
        col     = groupColor,
        cat.col = groupColor,
        euler   = TRUE)

Comparison in vivo/in vitro

rm(list = ls())
load("DataSummary/Pooled_MA_Zscore.RData")
attach(Pooled_MA_Zscore)
fc = FcM[, "Vitro_vs_Plasmid"]
aT_MA_vitro = aggregate(fc, by = list(factor(annT$Plate)), FUN="median")
colnames(aT_MA_vitro) = c("ID", "log2FC")
rownames(aT_MA_vitro) = as.character(aT_MA_vitro[,1])

fc = FcM[, "Vivo_vs_Pre"]
aT_MA_vivo = aggregate(fc, by = list(factor(annT$Plate)), FUN="median")
colnames(aT_MA_vivo) = c("ID", "log2FC")
rownames(aT_MA_vivo) = as.character(aT_MA_vivo[,1])

load("DataSummary/Pooled_HM_Zscore.RData")
attach(Pooled_HM_Zscore)

fc = FcM[, "Vitro_vs_Plasmid"]
aT_HM_vitro = aggregate(fc, by = list(factor(annT$Plate)), FUN="median")
colnames(aT_HM_vitro) = c("ID", "log2FC")
rownames(aT_HM_vitro) = as.character(aT_HM_vitro[,1])

fc = FcM[, "Vivo_vs_Pre"]
aT_HM_vivo = aggregate(fc, by = list(factor(annT$Plate)), FUN="median")
colnames(aT_HM_vivo) = c("ID", "log2FC")
rownames(aT_HM_vivo) = as.character(aT_HM_vivo[,1])

gene   = rownames(zM)
Kgenes = c("Fermt3","Cxcr4","Pten", "Tsc1", "Tsc2", "Tln1")

load("DataSummary/WG_Deplected.RData")
ma_vitro = maList[["Vitro_vs_Plasmid"]]
ma_vivo  = maList[["Vivo_vs_Pre"]]
hm_vitro = hmList[["Vitro_vs_Plasmid"]]
hm_vivo  = hmList[["Vivo_vs_Pre"]]

ma_vivo_only = setdiff(ma_vivo, ma_vitro)
hm_vivo_only = setdiff(hm_vivo, hm_vitro)

wgTable = read.table(file = "out/WG_Venn.txt", row.names = NULL, header = TRUE)
vivo  = gsub("Mm ", "", as.character(filter(wgTable, group == "vivo_only")$xGene))
vitro = gsub("Mm ", "", as.character(filter(wgTable, group == "vitro_only")$xGene))

mT = data.frame(gene, 
                log2FC_HM_vivo = as.numeric(aT_HM_vivo[gene, 2]), 
                log2FC_HM_vitro = as.numeric(aT_HM_vitro[gene, 2]),
                log2FC_MA_vivo = as.numeric(aT_MA_vivo[gene, 2]),
                log2FC_MA_vitro = as.numeric(aT_MA_vitro[gene, 2])
                )
mT1    = filter(mT, gene %in% vivo)
mT2    = filter(mT, gene %in% vitro)
mT3    = filter(mT, gene %in% Kgenes)
mT4    = filter(mT, gene %in% ma_vivo_only)
mT5    = filter(mT, gene %in% hm_vivo_only)

##HM in vivo vs in vitro
p <- ggplot(mT, aes(x = log2FC_HM_vivo, y = log2FC_HM_vitro))
p <- p + theme_bw() + labs(x = "Log2FC in vivo", y = "Log2FC in vitro", title = "HM") + xlim(-10,10)
p <- p + theme(plot.title = element_text(hjust = 0.5)) + geom_point(size = 0.1, color = "grey")
p <- p + geom_point(data = mT5, aes(x = log2FC_HM_vivo, y = log2FC_HM_vitro), color = "#228B22", size = 0.2)
p <- p + geom_point(data = mT3, aes(x = log2FC_HM_vivo, y = log2FC_HM_vitro), color = "red", size = 0.4)
p <- p + geom_text_repel(data = mT3, aes(x = log2FC_HM_vivo, y = log2FC_HM_vitro, label = gene), size = 4, 
    box.padding = unit(0.35, "lines"), point.padding = unit(0.3, "lines"), 
    segment.size = 0.2, color = "black")
print(p)

##MA in vivo vs in vitro
p2 <- ggplot(mT, aes(x = log2FC_MA_vivo, y = log2FC_MA_vitro))
p2 <- p2 + theme_bw() + labs(x = "Log2FC in vivo", y = "Log2FC in vitro", title = "MA") + xlim(-10,10)
p2 <- p2 + theme(plot.title = element_text(hjust = 0.5)) + geom_point(size = 0.1, color = "grey")
p2 <- p2 + geom_point(data = mT4, aes(x = log2FC_MA_vivo, y = log2FC_MA_vitro), color = "#228B22", size = 0.2)
p2 <- p2 + geom_point(data = mT3, aes(x = log2FC_MA_vivo, y = log2FC_MA_vitro), color = "red", size = 0.4)
p2 <- p2 + geom_text_repel(data = mT3, aes(x = log2FC_MA_vivo, y = log2FC_MA_vitro, label = gene), size = 4, 
    box.padding = unit(0.35, "lines"), point.padding = unit(0.3, "lines"), 
    segment.size = 0.2, color = "black")
print(p2)

##In vivo
p3 <- ggplot(mT, aes(x = log2FC_HM_vivo, y = log2FC_MA_vivo))
p3 <- p3 + theme_bw() + labs(x = "Log2FC  (HM)", y = "Log2FC (MA)", title = "In vivo") + xlim(-10,10)
p3 <- p3 + theme(plot.title = element_text(hjust = 0.5)) + geom_point(size = 0.1, color = "grey")
p3 <- p3 + geom_point(data = mT1, aes(x = log2FC_HM_vivo, y = log2FC_MA_vivo), color = "#3182bd", size = 0.2)
p3 <- p3 + geom_point(data = mT3, aes(x = log2FC_HM_vivo, y = log2FC_MA_vivo), color = "red", size = 0.4)
p3 <- p3 + geom_text_repel(data = mT3, aes(x = log2FC_HM_vivo, y = log2FC_MA_vivo, label = gene), size = 4, 
    box.padding = unit(0.35, "lines"), point.padding = unit(0.3, "lines"), 
    segment.size = 0.2, color = "black")
print(p3)

##In vitro
p4 <- ggplot(mT, aes(x = log2FC_HM_vitro, y = log2FC_MA_vitro))
p4 <- p4 + theme_bw() + labs(x = "HM", y = "MA", title = "In vitro") + xlim(-10,10)
p4 <- p4 + theme(plot.title = element_text(hjust = 0.5)) + geom_point(size = 0.1, color = "grey")
p4 <- p4 + geom_point(data = mT2, aes(x = log2FC_HM_vitro, y = log2FC_MA_vitro), color = "#3182bd", size = 0.2)
p4 <- p4 + geom_point(data = mT3, aes(x = log2FC_HM_vitro, y = log2FC_MA_vitro), color = "red", size = 0.4)
p4 <- p4 + geom_text_repel(data = mT3, aes(x = log2FC_HM_vitro, y = log2FC_MA_vitro, label = gene), size = 4, 
    box.padding = unit(0.35, "lines"), point.padding = unit(0.3, "lines"), 
    segment.size = 0.2, color = "black")
print(p4)

Compared to public data

rm(list = ls())
load("DataSummary/CRISPR_Public.RData")
load("DataSummary/WG_Deplected.RData")
load("DataSummary/MSigDB.RData")

ma_vitro = maList_HS[["Vitro_vs_Plasmid"]]
ma_vivo  = maList_HS[["Vivo_vs_Pre"]]
hm_vitro = hmList_HS[["Vitro_vs_Plasmid"]]
hm_vivo  = hmList_HS[["Vivo_vs_Pre"]]
xGene = unique(c(ma_vitro, ma_vivo, hm_vitro, hm_vivo))
xGene = intersect(xGene, rownames(CRISPR_Public))

mm    = cbind(ma_vitro = xGene %in% ma_vitro, 
           hm_vitro = xGene %in% hm_vitro,
           ma_vivo = xGene %in% ma_vivo, 
           hm_vivo = xGene %in% hm_vivo) + 0
vitro = rowSums(mm[, c(1,2)])
vivo  = rowSums(mm[, c(3,4)])
group = rep("shared", length(xGene))
group[vitro == 0] = "vivo_only"
group[vivo  == 0] = "vitro_only"
humanTable = data.frame(gene = xGene, mm, group)

subM = CRISPR_Public[xGene, 1:20]
mergedT = data.frame(mm, subM, group)

mT1 = mergedT[mergedT$group == "vitro_only", ]
mT1 = mT1[order(rowSums(as.matrix(mT1[, -25]))), ]

mT2 = mergedT[mergedT$group == "shared", ]
mT2 = mT2[order(rowSums(as.matrix(mT2[, -25]))), ]

mT3 = mergedT[mergedT$group == "vivo_only", ]
mT3 = mT3[order(rowSums(as.matrix(mT3[, -25]))), ]

reT = rbind(mT1, mT2, mT3)

reGroup  = reT$group
RS_Index = rownames(reT) %in% MSigDB$MSigDB_c2_cp_kegg$KEGG_RIBOSOME
RIBOSOME = rep("NO", length(RS_Index))
RIBOSOME[RS_Index] = "YES"

annotation_row = data.frame(reGroup, RIBOSOME)
rownames(annotation_row) = rownames(reT)

reM = as.matrix(reT[, -25])
ann_colors = list(reGroup = c(vitro_only = "#deebf7", shared = "#9ecae1", vivo_only = "#3182bd"), 
    RIBOSOME = c(YES = "black", NO = "white"))  
pheatmap(reM,
         main = "",
         annotation_row = annotation_row,
         annotation_colors = ann_colors,
         border_color = "grey60", color = c("#f5f5f5","#f5f5f5","#dd1c77"),
         legend_breaks = c(-1,0,1),
         legend_labels = c("NC", "NC", "Depleted"),
         fontsize_row = 6,
         show_rownames = FALSE, show_colnames = TRUE,
         cluster_rows  = FALSE, cluster_cols  = FALSE)

Let’s check what’s the fraction of genes that have been previously identified in each of the three groups.

table(annotation_row[,1], rowSums(reM[,5:24]) > 0)
            
             FALSE TRUE
  shared        65  435
  vitro_only   341 1157
  vivo_only    266  155

We observed that in vitro-only and shared gene sets were largely represented in the previously published studies on AML (PMID:27760321, PMID:28162770 and PMID:29478914). In contrast, only a small fraction of in vivo-only genes has been previously identified.

Pathway enrichment

Details can be found here Pathway enrichment.

Essential genes

Different researchers may define essential genes as different gene sets. Here compared our CRISPR hits to three sets:

  1. Essential in culture: Genes deemed essential in multiple cultured cell lines based on shRNA screen data.
  2. Essential in culture: Genes deemed essential in multiple cultured cell lines based on CRISPR/Cas screen data.
  3. Essential in mice: Genes where homozygous knockout in mice results in pre-, peri- or post-natal lethality. The mouse phenotypes were reported by Jackson Labs.

These gene sets were curated by Mark Daly’s group at MHG.

core = as.character(read.table(file = "DataSummary/gene_list/core_essentials_hart.tsv", row.names = NULL, header = FALSE, sep = "\t")[,1])
culture = as.character(read.table(file = "DataSummary/gene_list/CEGv2_subset_universe.tsv", row.names = NULL, header = FALSE, sep = "\t")[,1])
mice = as.character(read.table(file = "DataSummary/gene_list/mgi_essential.tsv", row.names = NULL, header = FALSE, sep = "\t")[,1])

humanTable$core = humanTable$gene %in% core
humanTable$culture = humanTable$gene %in% culture
humanTable$mice = humanTable$gene %in% mice

table(humanTable$group, humanTable$core)
            
             FALSE TRUE
  shared       457   43
  vitro_only  1360  138
  vivo_only    417    4
table(humanTable$group, humanTable$culture)
            
             FALSE TRUE
  shared       381  119
  vitro_only  1096  402
  vivo_only    410   11
table(humanTable$group, humanTable$mice)
            
             FALSE TRUE
  shared       379  121
  vitro_only  1188  310
  vivo_only    332   89

So regardless of the definition of essential genes, the fraction of essential genes in shared/in vitro hits is almost 10 times higher than that in in vivo.

Output

oTable = read.table(file = "out/WG_Venn.txt", row.names = 1, header = TRUE, sep = "\t")
knitr::kable(oTable[1:5, ])
ma_vitro hm_vitro ma_vivo hm_vivo group
Mm 0610009D07Rik 1 0 0 0 vitro_only
Mm 1110004E09Rik 1 1 0 0 vitro_only
Mm 1110008L16Rik 1 0 1 0 shared
Mm 1110037F02Rik 1 0 0 0 vitro_only
Mm 1200014J11Rik 1 0 0 0 vitro_only
summary(factor(group))
    shared vitro_only  vivo_only 
       500       1498        421 

You can download the results of genome-wide CRISPR screening here Genome-wide_screen_Zscore_pvalue.xlsx.

sessionInfo

sessionInfo()
R version 3.5.1 (2018-07-02)
Platform: x86_64-conda_cos6-linux-gnu (64-bit)
Running under: Red Hat Enterprise Linux Server 7.7 (Maipo)

Matrix products: default
BLAS: /sibcb2/bioinformatics/software/BcbioNG/anaconda/lib/R/lib/libRblas.so
LAPACK: /sibcb2/bioinformatics/software/BcbioNG/anaconda/lib/R/lib/libRlapack.so

locale:
[1] C

attached base packages:
[1] parallel  grid      stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] stringr_1.4.0       viper_1.16.0        scales_1.1.0       
 [4] pheatmap_1.0.12     dplyr_1.0.5         survcomp_1.32.0    
 [7] prodlim_2019.11.13  RColorBrewer_1.1-2  gridExtra_2.3      
[10] survHD_0.99.1       survC1_1.0-2        Hmisc_4.3-0        
[13] Formula_1.2-3       lattice_0.20-38     penalized_0.9-51   
[16] survival_3.1-8      Biobase_2.42.0      BiocGenerics_0.28.0
[19] VennDiagram_1.6.20  futile.logger_1.4.3 ggrepel_0.8.1      
[22] ggplot2_3.2.1       rmarkdown_2.0      

loaded via a namespace (and not attached):
 [1] mixtools_1.1.0       splines_3.5.1        rmeta_3.0           
 [4] highr_0.8            latticeExtra_0.6-28  bootstrap_2019.6    
 [7] yaml_2.2.0           survivalROC_1.0.3    pillar_1.5.1        
[10] backports_1.1.5      glue_1.4.2           digest_0.6.23       
[13] checkmate_1.9.4      colorspace_1.4-1     htmltools_0.5.1.1   
[16] Matrix_1.2-18        pkgconfig_2.0.3      purrr_0.3.3         
[19] lava_1.6.7           htmlTable_1.13.3     tibble_3.1.0        
[22] generics_0.0.2       farver_2.0.1         ellipsis_0.3.0      
[25] withr_2.1.2          nnet_7.3-12          lazyeval_0.2.2      
[28] magrittr_1.5         crayon_1.3.4         evaluate_0.14       
[31] fansi_0.4.0          segmented_1.1-0      MASS_7.3-51.5       
[34] class_7.3-15         foreign_0.8-74       SuppDists_1.1-9.5   
[37] tools_3.5.1          data.table_1.12.8    formatR_1.7         
[40] lifecycle_1.0.0      munsell_0.5.0        cluster_2.1.0       
[43] lambda.r_1.2.4       e1071_1.7-3          compiler_3.5.1      
[46] rlang_0.4.10         rstudioapi_0.10      htmlwidgets_1.5.1   
[49] base64enc_0.1-3      labeling_0.3         gtable_0.3.0        
[52] DBI_1.1.0            R6_2.4.1             knitr_1.26          
[55] utf8_1.1.4           futile.options_1.0.1 KernSmooth_2.23-16  
[58] stringi_1.4.3        Rcpp_1.0.3           vctrs_0.3.7         
[61] rpart_4.1-15         acepack_1.4.1        tidyselect_1.1.0    
[64] xfun_0.18