The mHap map of common cancers: Cancer MHBs are associated with DEGs in Pan-cancer

Figure5. Cancer MHBs are associated with differentially expressed genes in pan-cancer. (A) Enrichment of cancer MHB-associated genes in differentially expressed genes. The forest plot shows the enrichment of cancer MHB-associated genes, excluding DMR-related genes, in DEGs. (B) Pathway enrichment of MHB-associated genes in pan-cancer. The upper panel displays the gene set enrichment of shared upregulated genes. The bottom panel illustrates the differences in methylation and expression profiles of selected genes, annotated in the G2/M checkpoint or MYC activity pathway across cancer types.(C) Pathway crosstalk analysis for pan-cancer. (D) The Kaplan-Meier survival curves compare overall survival between patient subgroups stratified by high/low expression (median cutoff) of RRM2 and SLC2A1 genes in CESC, LIHC, LUAD, and PAAD of TCGA dataset.

loading required packages:

library(tidyverse)
library(stringr)
library(data.table)
library(ggplot2)
library(rGREAT)
library(regioneR)
library(openxlsx)
library(patchwork)
library(clusterProfiler)
library(msigdbr)
library(igraph)

Figure 5A

# 1. Load data
# 1.1 load TCGA CGI-DMR: HM450K_CGI_DMR 
load("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig5/TCGA_DMR/HM450K_TCGA_CGI_DMR_GR.RData")
# 1.2 load TCGA DEGs: TCGA_DE_res 
load("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig5/TCGA_Exp/DEGs/TCGA_DE_res.RData")
# 1.3 Load MHB
mhb_path="/sibcb1/bioinformatics/dataupload/cancermhaps/Fig1/MHB/tumor/"
mhb <- list.files(mhb_path)
MHB.GR <- lapply(mhb,function(x){fread(paste0(mhb_path,x)) %>% toGRanges()}) 
names(MHB.GR) <- str_split_i(mhb,"\\.",i=1) %>% str_split_i("_",i=2)
# 1.4 rGREAT annotation
rGREAT_genes <- fread("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig3/ESCC/Methy/GREATv4.genes.promoter_UD1000.hg19.bed.gz")

# 2. DMR vs DEGs
id.methyl <- names(mcols(HM450K_CGI_DMR)) %>% setdiff("OV")
DMEG <- lapply(id.methyl,function(x){
        # 1. CGI-DMR
        test <- HM450K_CGI_DMR[,x]
        colnames(mcols(test))<-"Type"
        DMgene <- subset(test) %>%  great(cores=10,"msigdb:H", "GREAT:hg19") %>%  getRegionGeneAssociations() %>% 
                         as.data.frame() %>%  mutate(annotated_genes=lapply(annotated_genes,function(x){
                         x %>% paste(collapse=",")
                         }) %>% do.call(rbind,.) %>% as.character(),
                        dist_to_TSS = lapply(dist_to_TSS,function(x){
                                            x %>% paste(collapse=",")
                        }) %>% do.call(rbind,.) %>% as.character()) %>% 
                        separate_rows(annotated_genes,dist_to_TSS,sep=",",convert=TRUE) %>%
                        as.data.frame() %>% 
                       filter(abs(dist_to_TSS)<=1000) %>%          ### restrict to promoter regions TSS+/- 1K
                       group_by(annotated_genes) %>% 
                       summarise(annotated_genes=annotated_genes,Type=paste0(unique(Type),collapse=",")) %>% 
                       distinct() %>% filter(!str_detect(Type,","))
        # 2. DEGs
        if(x=="PDAC"){
            Eid = "PAAD"
        }else {
           Eid = x
        }
        DEG <- TCGA_DE_res %>% dplyr::select(c("gene",Eid)) %>% dplyr::rename(Type=2) 
        #######
        # 2 Fisher exact test

        ##2.1 Hypo vs. Up
        # Hypo + up
        aa1 <- DMgene %>% filter(Type=="Hypo") %>% pull(annotated_genes) %>% 
                intersect(DEG %>% filter(Type %in% "Up") %>% pull(gene))  %>% length()
        # Hypo + NC
        bb1 <- DMgene %>% filter(Type=="Hypo") %>% pull(annotated_genes) %>% 
                intersect(DEG %>% filter(Type %in% "NC") %>% pull(gene) )  %>% length()
        # NC + Up 
        cc1 <- DMgene %>% filter(Type=="NC") %>% pull(annotated_genes) %>% 
                intersect(DEG %>% filter(Type %in% "Up") %>% pull(gene) )  %>% length()
        # NC + NC
        dd1 <- DMgene %>% filter(Type=="NC") %>% pull(annotated_genes) %>% 
                intersect(DEG %>% filter(Type %in% "NC") %>% pull(gene) )  %>% length()
        
        ##2.2 Hyper vs DN
        # Hyper + DN
        aa2 <- DMgene %>% filter(Type=="Hyper") %>% pull(annotated_genes) %>% 
                intersect(DEG %>% filter(Type %in% "Down") %>% pull(gene))  %>% length()
        # Hyper + NC
        bb2 <- DMgene %>% filter(Type=="Hyper") %>% pull(annotated_genes) %>% 
                intersect(DEG %>% filter(Type %in% "NC") %>% pull(gene) )  %>% length()
        # NC + DN
        cc2 <- DMgene %>% filter(Type=="NC") %>% pull(annotated_genes) %>% 
                intersect(DEG %>% filter(Type %in% "Down") %>% pull(gene) )  %>% length()
        # NC + NC
        dd2 <- DMgene %>% filter(Type=="NC") %>% pull(annotated_genes) %>% 
                intersect( DEG %>% filter(Type %in% "NC") %>% pull(gene))  %>% length()
        
        ##2.3 Hyper vs UP
        # Hyper + UP
        aa3 <- DMgene %>% filter(Type=="Hyper") %>% pull(annotated_genes) %>% 
                intersect(DEG %>% filter(Type %in% "Up") %>% pull(gene))  %>% length()
        # Hyper + NC
        bb3 <- DMgene %>% filter(Type=="Hyper") %>% pull(annotated_genes) %>% 
                intersect(DEG %>% filter(Type %in% "NC") %>% pull(gene) )  %>% length()
        # NC + UP
        cc3 <- DMgene %>% filter(Type=="NC") %>% pull(annotated_genes) %>% 
                intersect(DEG %>% filter(Type %in% "Up") %>% pull(gene) )  %>% length()
        # NC + NC
        dd3 <- DMgene %>% filter(Type=="NC") %>% pull(annotated_genes) %>% 
                intersect( DEG %>% filter(Type %in% "NC") %>% pull(gene))  %>% length()
        
        ##2.4 Hypo vs DN
        # Hypo + DN
        aa4 <- DMgene %>% filter(Type=="Hypo") %>% pull(annotated_genes) %>% 
                intersect(DEG %>% filter(Type %in% "Down") %>% pull(gene))  %>% length()
        # Hyper + NC
        bb4 <- DMgene %>% filter(Type=="Hypo") %>% pull(annotated_genes) %>% 
                intersect(DEG %>% filter(Type %in% "NC") %>% pull(gene) )  %>% length()
        # NC + DN
        cc4 <- DMgene %>% filter(Type=="NC") %>% pull(annotated_genes) %>% 
                intersect(DEG %>% filter(Type %in% "Down") %>% pull(gene) )  %>% length()
        # NC + NC
        dd4 <- DMgene %>% filter(Type=="NC") %>% pull(annotated_genes) %>% 
                intersect( DEG %>% filter(Type %in% "NC") %>% pull(gene))  %>% length()
        
        # res
        Hypo_Up <- fisher.test(matrix(c(aa1,bb1,cc1,dd1),ncol=2,nrow=2)) %>% 
                broom::tidy() %>% mutate(A=aa1,B=bb1,C=cc1,D=dd1,Type="Hypo_Up")
        Hypo_DN<- fisher.test(matrix(c(aa4,bb4,cc4,dd4),ncol=2,nrow=2)) %>% 
                broom::tidy() %>% mutate(A=aa4,B=bb4,C=cc4,D=dd4,Type="Hypo_DN")
        Hyper_DN <- fisher.test(matrix(c(aa2,bb2,cc2,dd2),ncol=2,nrow=2)) %>% 
                broom::tidy() %>% mutate(A=aa2,B=bb2,C=cc2,D=dd2,Type="Hyper_DN")
        Hyper_UP <- fisher.test(matrix(c(aa3,bb3,cc3,dd3),ncol=2,nrow=2)) %>% 
                broom::tidy() %>% mutate(A=aa3,B=bb3,C=cc3,D=dd3,Type="Hyper_UP")

        res <- rbind(Hypo_Up,Hypo_DN,Hyper_DN,Hyper_UP)
})
names(DMEG) <- id.methyl
res_DMEG <- do.call(rbind,DMEG) %>% mutate(tissue=rep(id.methyl,each=4),
            logFDR=-log10(p.adjust(p.value,method="fdr",length(p.value))))

# 3. MHB (remove DMR) vs DEGs  
id.com <- c("BRCA","COAD","ESCA","COADREAD","HNSC","LIHC","LUAD","LUSC","STES","THCA","PDAC","CESC")
nonDM <- lapply(id.com,function(x){
        # 1.DMR
        test <- HM450K_CGI_DMR[,x]
        colnames(mcols(test))<-"Type"
        DMgene <- subset(test) %>%  great(cores=10,"msigdb:H", "GREAT:hg19") %>%  
                getRegionGeneAssociations() %>% as.data.frame() %>% 
                mutate(annotated_genes=lapply(annotated_genes,function(x){
                x %>% paste(collapse=",")
                }) %>% do.call(rbind,.) %>% as.character(),
                dist_to_TSS = lapply(dist_to_TSS,function(x){
                                            x %>% paste(collapse=",")
                }) %>% do.call(rbind,.) %>% as.character()) %>% 
                separate_rows(annotated_genes,dist_to_TSS,sep=",",convert=TRUE) %>%
                as.data.frame() %>% 
                filter(abs(dist_to_TSS)<=1000) %>%     ### restrict to promoter regions TSS+/- 1K
                group_by(annotated_genes) %>% 
                summarise(annotated_genes=annotated_genes,Type=paste0(unique(Type),collapse=",")) %>% 
                distinct() %>% filter(!str_detect(Type,","))     
        NoDMG <- DMgene %>% filter(Type == "NC") %>% pull(annotated_genes)
        # 2.DEG
        if(x=="PDAC"){
            Eid = "PAAD"
        }else {
           Eid = x
        }
        DEG <- TCGA_DE_res %>% dplyr::select(c("gene",Eid)) %>% dplyr::rename(Type=2) %>% 
                filter(Type %in% c("Up","Down"))
        Exp_NC <- TCGA_DE_res %>% dplyr::select(c("gene",Eid)) %>% dplyr::rename(Type=2) %>% 
                filter(Type %in% "NC")
        # Non DMR + DEG
        nonDM.DEG <- DEG %>% filter(gene %in% NoDMG)
        # 3. MHB
        if(x=="PDAC"){
            MHBid = "PACA"
        }else if(x=="COADREAD") {
            MHBid = "COAD"
        }else if(x=="LUSC" | x =="LUAD"){
            MHBid = "NSCLC"
        }else if (x=="STES") {
            MHBid = "STAD"
        }else{
            MHBid = x
        }
        data <- MHB.GR[[MHBid]]
        if (!is.null(data)){
            MHGenes <-  data %>% great(cores=10,"msigdb:H", "GREAT:hg19") %>%  getRegionGeneAssociations() %>% 
                    as.data.frame() %>%  mutate(annotated_genes=lapply(annotated_genes,function(x){
                    x %>% paste(collapse=",")
                    }) %>% do.call(rbind,.) %>% as.character(),
                    dist_to_TSS = lapply(dist_to_TSS,function(x){
                                            x %>% paste(collapse=",")
                    }) %>% do.call(rbind,.) %>% as.character()) %>% 
                    separate_rows(annotated_genes,dist_to_TSS,sep=",",convert=TRUE) %>%
                    as.data.frame() %>%
                    pull(annotated_genes) %>% unique()
        # fisher test  in NonDMR regions
        # MHB vs DEGs
        aa <-  intersect(MHGenes,nonDM.DEG %>% pull(gene)) %>% length()
        bb <-  intersect(MHGenes,NoDMG) %>% intersect(Exp_NC %>% pull(gene)) %>% length()
        cc <-  setdiff(NoDMG,MHGenes) %>% intersect(DEG %>% pull(gene)) %>% length()
        dd <-  setdiff(NoDMG,MHGenes) %>% intersect(Exp_NC %>% pull(gene)) %>% length()
        ##genes
        res <- intersect(MHGenes,nonDM.DEG %>% pull(gene))
        
        ##MHB vs Up
        aa1 <-  nonDM.DEG %>% filter(gene %in% MHGenes,Type=="Up")  %>% nrow()
        bb1 <- intersect(MHGenes,NoDMG) %>% intersect(Exp_NC %>% pull(gene)) %>% length()
        cc1<-  nonDM.DEG %>% filter(!(gene %in% MHGenes),Type=="Up")  %>% nrow()
        dd1 <-  setdiff(NoDMG,MHGenes) %>% intersect(Exp_NC %>% pull(gene)) %>% length()
        ##genes
        res1 <-  nonDM.DEG %>% filter(gene %in% MHGenes,Type=="Up") %>% pull(gene)

        # MHB vs DN
        aa2 <-  nonDM.DEG %>% filter(gene %in% MHGenes,Type=="Down")  %>% nrow()
        bb2 <-  intersect(MHGenes,NoDMG) %>% intersect(Exp_NC %>% pull(gene)) %>% length()
        cc2 <-  nonDM.DEG %>% filter(!(gene %in% MHGenes),Type=="Down")  %>% nrow()
        dd2 <- setdiff(NoDMG,MHGenes) %>% intersect(Exp_NC %>% pull(gene)) %>% length()
        ##genes
        res2 <- nonDM.DEG %>% filter(gene %in% MHGenes,Type=="Down") %>% pull(gene)

        # UP vs DN  (MHB genes)
        aa3 <-  nonDM.DEG %>% filter(gene %in% MHGenes,Type=="Up")  %>% nrow()
        bb3 <-  nonDM.DEG %>% filter(gene %in% MHGenes,Type=="Down")  %>% nrow()
        cc3 <-  nonDM.DEG %>% filter(!(gene %in% MHGenes),Type=="Up")  %>% nrow()
        dd3 <- nonDM.DEG %>% filter(!(gene %in% MHGenes),Type=="Down")  %>% nrow()
        # genes
        res3 <- nonDM.DEG %>% filter(gene %in% MHGenes,Type=="Down") %>% pull(gene)

        signif1 <- fisher.test(matrix(c(aa,bb,cc,dd),ncol=2,nrow=2)) %>% 
                    broom::tidy() %>% mutate(A=aa,B=bb,C=cc,D=dd,
                                    Type="NonDMR.MHB.DEG",NonDMR.MHB.DEG= paste0(res,collapse=","))
        signif2 <- fisher.test(matrix(c(aa1,bb1,cc1,dd1),ncol=2,nrow=2)) %>% 
                    broom::tidy() %>% mutate(A=aa1,B=bb1,C=cc1,D=dd1,
                                    Type="NonDMR.MHB.UP",NonDMR.MHB.DEG= paste0(res1,collapse=","))
        signif3 <- fisher.test(matrix(c(aa2,bb2,cc2,dd2),ncol=2,nrow=2)) %>% 
                    broom::tidy() %>% mutate(A=aa2,B=bb2,C=cc2,D=dd2,
                                    Type="NonDMR.MHB.DN",NonDMR.MHB.DEG= paste0(res2,collapse=","))
        signif4 <- fisher.test(matrix(c(bb3,aa3,dd3,cc3),ncol=2,nrow=2)) %>% 
                    broom::tidy() %>% mutate(A=aa3,B=bb3,C=cc3,D=dd3,
                                    Type="NonDMR.MHB.DN.UP",NonDMR.MHB.DEG= paste0(res3,collapse=","))
        
        rbind(signif1,signif2,signif3,signif4)
        }
    })
names(nonDM) <- id.com
res_nonDM <- do.call(rbind,nonDM) %>% mutate(tissue=rep(id.com,each=4))

# PLOT
# DMRs vs DEGs
res1 <- res_DMEG %>% filter(Type!="Total") %>% 
                    mutate(logFDR = ifelse(logFDR>20,20,logFDR),
                            DM = str_split_i(Type,"_",i=1),
                            DE = toupper(str_split_i(Type,"_",i=2))) %>% 
                    filter(tissue %in% id.com )
p1<- ggplot(res1,
        aes(x=logFDR,y=fct_rev(tissue),fill=DE)) + 
        geom_bar(stat="identity", position=position_dodge()) + 
        scale_fill_brewer(palette="Set1",direction=-1) +
        geom_vline(aes(xintercept = 1.3),color="gray",linetype="dashed", size = 0.5)+
        xlab('-log10(FDR)') + ylab(" ") + 
        theme_classic() +
        theme(legend.position = "top",
              axis.text = element_text(size=15,color="black"),
              axis.title.x = element_text(size=20,color="black"),
              axis.ticks.length=unit(0.2, "cm") ) +
        facet_wrap(~DM,scale="free") 
print(p1)

# MHBs(nonDMR) vs DEGs 
res3 <- res_nonDM %>% filter(Type=="NonDMR.MHB.UP" | Type=="NonDMR.MHB.DN") %>% 
                    mutate(logP=-log10(p.value),
                            logFDR=-log10(p.adjust(p.value,method="fdr",length(p.value))),
                            logFDR = ifelse(logFDR>8,8,logFDR),
                            DE = fct_inorder(str_split_i(Type,"\\.",i=3)),
                            signif = ifelse(logP>1.3,"1","0")) 
p3<- ggplot(res3,aes(x=estimate,y=fct_rev(tissue),col=signif)) + 
        geom_point(aes(size=4,col=signif),shape=18) +
        geom_errorbarh(aes(xmax = conf.high, xmin = conf.low), height = 0,size=1)+
        scale_color_brewer(palette="Set1",direction=-1) +
        geom_vline(aes(xintercept = 1),color="gray",linetype="dashed", size = 0.5)+
        scale_x_continuous(limits=c(0.1, 3.2), breaks = seq(0, 3.5, 0.5))+
        xlab('Odds Ratio') + ylab(" ") + 
        theme_classic() +
        theme(legend.position = "top",
              axis.text = element_text(size=15,color="black"),
              axis.title.x = element_text(size=20,color="black"),
              axis.ticks.length=unit(0.2, "cm") ) +
        facet_wrap(~DE,scale="free_y")
print(p3)

Figure 5B

Applying priority index (Pi) prioritization approach for identifying pan-cancer MHB targets at the gene and pathway crosstalk levels.

# Priority index (Pi) prioritization approach 
files= fread("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig5/Pi/source.txt",header=F)$V1
for (i in files){source(i)}

# Input: Log2FC & FDR of MHB-related genes in pan-cancer.
# specify built-in data placeholder
placeholder <- "http://www.comptransmed.pro/bigdata_ctm"
# load Preprocessed data and get summary data (2 column) 
## RNA-seq : Pan-cancer
path="/sibcb1/bioinformatics/dataupload/cancermhaps/Fig5/MHB_NonDMR.DEG/"
RNA.files <- list.files(path,pattern="_FC.txt.gz") %>% str_split_i("_",i=1)
ls_df_res  <-lapply(RNA.files,function(x){
              fread(paste0(path, x,"_MHB_NonDMR_DEGs_FC.txt.gz")) %>% 
                    mutate(wFC=-log10(FDR)*abs(logFC)) %>% 
                    dplyr::select(V1,wFC) %>% dplyr::rename(gene=V1) %>% 
                    arrange(-wFC) %>% as_tibble()
            })
names(ls_df_res) <- RNA.files
# define high-quality protein interaction network: KEGG or STRING
  if(1){
        network.customised <- oDefineNet(network="KEGG", 
                                         placeholder=placeholder) %>%
                              igraph::as.undirected()
  }else{
        network.customised <- oDefineNet(network="STRING_high", 
                                      STRING.only=c("experimental_score",
                                                    "database_score"),
                                      placeholder=placeholder)
  }

# All known genes (primarily sourced from UCSC)
guid <- NULL
  if(1){
        GR.Gene <- oRDS("UCSC_knownGene", placeholder=placeholder, guid=guid)
        if(1){
                gene_info <- oRDS("org.Hs.eg",placeholder=placeholder, guid=guid)$info
                ind <- match(names(GR.Gene), gene_info$Symbol)
                GR.Gene <- GR.Gene[!is.na(ind)]
            }
        ## remove gene at chrX and chrY
        if(0){
            df <- as.data.frame(GR.Gene) %>% as_tibble()
            ind <- which(!(df$seqnames %in% c('chrX','chrY')))
            GR.Gene <- GR.Gene[ind]
        }
        ## restricted to GR.Gene filter the node
        network.customised <- network.customised %>%
                              oNetInduce(nodes_query=names(GR.Gene),
                                         largest.comp=F)
  }

# ls_pNode: prepare predictors
ls_pNode <- pbapply::pblapply(ls_df_res, function(x){
                  pNode  <- x %>% as.data.frame() %>% 
                                  oPierAnno(list_pNode=NULL, 
                                  network.customised = network.customised,
                                  placeholder=placeholder, guid=guid)
})
# do prioritisation
dTarget <- oPierMatrix(ls_pNode, displayBy="pvalue", aggregateBy="fishers",
                       placeholder=placeholder, guid=guid)
# dT: restricted to GR.Gene and rating>0
df_GR_gene <- GR.Gene %>% as.data.frame() %>% as_tibble()
dT <- dTarget
dT$priority <- dTarget$priority %>% filter(rating>0) %>% 
                semi_join(df_GR_gene, by=c('name'='Symbol'))  %>% dplyr::mutate(rank=seq(n()))
dT$predictor <- dTarget$predictor[dT$priority %>% pull(name),]
# output
# dT$priority %>% openxlsx::write.xlsx("PI_Priority_mhb.xlsx")

# 1. load MHB.DEG.NonDMR genes 
rna_path <- "/sibcb1/bioinformatics/dataupload/cancermhaps/Fig5/TCGA_Exp/DEGs/"
rna.files <- list.files(rna_path,pattern="TCGA_DEG_")
rna.log2fc <- lapply(rna.files,function(x){fread(paste0(rna_path,x)) %>% 
                                            dplyr::select(1,3,4) %>% 
                                            mutate(DE=ifelse(FDR<=0.05 & logFC >=1,"up",
                                                    ifelse(FDR<=0.05 & logFC <= -1, "down","NC")))})
names(rna.log2fc) <- rna.files %>% str_split_i("\\.",i=1) %>% str_split_i("_",i=3)
# MHB_NonDMR.DEG
MHB_NonDMR.DEG <- read_gmt("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig5/DEG.MHB.NonDM.gmt.gz")
MHB_NonDMR.DEG.FC <- lapply(names(MHB_NonDMR.DEG),function(x){
            if(x=="PDAC"){
                y="PAAD"
            }else{
                y=x
            }           
      rna.log2fc[[y]] %>% filter(V1 %in% MHB_NonDMR.DEG[[x]]) %>% mutate(DE=ifelse(logFC>0,"up","down"),tissue=x)
    })
names(MHB_NonDMR.DEG.FC) <- names(MHB_NonDMR.DEG) %>% str_replace_all("PDAC","PAAD")

# 2. Load pi ranking results
seed.genes <- read.xlsx("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig5/PI_Priority_mhb.xlsx")
cancer_type <- names(seed.genes)[-c(1:5)]
network_genes <- lapply(cancer_type,function(x){
        seed.genes %>% dplyr::select(name,rank,x) %>% rename_with(~c("name","rank","seed")) %>% filter(seed==1)    
})
names(network_genes) <- cancer_type %>% str_replace_all("PDAC","PAAD")

# RNA + NoMHB
rna.log2fc.pi <- MHB_NonDMR.DEG.FC[names(network_genes)]
rna.log2fc.pi.rank <- lapply(names(network_genes),function(x){ 

                       rna.log2fc.pi[[x]] %>% dplyr::rename(name=V1) %>% 
                       inner_join(network_genes[[x]],by="name") %>% 
                       arrange(rank) 
                       })

names(rna.log2fc.pi.rank) <- names(network_genes)
res <- do.call(rbind,rna.log2fc.pi.rank) %>% as.data.frame() %>% group_by(name,rank) %>% 
                            summarise(tissue_count = n(),
                                      tissue = paste0(tissue,collapse=","),
                                      FC = paste0(DE,collapse=","),
                                      DE = paste0(unique(DE),collapse=",")) %>% 
                            arrange(rank,desc(tissue_count))
fwrite(res,file="DEG.MHB.NoDMR.Pi.ranking.txt",sep="\t",quote= F,row.names=F)

# showing top 500 
up_rank <- res %>% filter(DE=="up") %>% head(500)
down_rank <- res %>% filter(DE=="down") %>% head(500)

p1 <- ggplot(up_rank,aes(x=fct_inorder(name),y=tissue_count)) +
      geom_bar(stat="identity",fill="firebrick") +
      scale_y_continuous(expand=c(0,0),limits=c(0,12)) +
      labs(x="",y="Number of tissue") +
      theme_classic() +
      theme(axis.text.x = element_text(angle = 90)) 
p2 <- ggplot(down_rank,aes(x=fct_inorder(name),y=tissue_count)) +
      geom_bar(stat="identity",fill="steelblue") +
      scale_y_continuous(expand=c(0,0),limits=c(0,12)) +
      labs(x="",y="Number of tissue") +
      theme_classic() +
      theme(axis.text.x = element_text(angle = 90)) 
p<- wrap_plots(list(p1,p2),ncol=1,nrow=2) + plot_layout(guides = 'collect')
print(p)

# 3. Enrichment
## Here we use C2 & H gene sets as an example:
m_t2g <- msigdbr(species = "Homo sapiens", category = "C2") %>% 
            filter(!(gs_subcat %in% c("CGP","CP"))) %>% 
            dplyr::select(gs_name, entrez_gene) 
m_t2g1 <- msigdbr(species = "Homo sapiens", category = "H") %>% 
             dplyr::select(gs_name, entrez_gene)
# merging
m_t2g <- rbind(m_t2g1,m_t2g) 

# convert : the existence of genes more than 6 tissues. 
up_freq <- res %>% filter(DE=="up") %>% filter(tissue_count>=6) 
genes = bitr(up_freq$name,               
            fromType="SYMBOL",             
            toType="ENTREZID",              
            OrgDb="org.Hs.eg.db")               
# universe = read_gmt("KEGG_genes.gmt",
#                from="SYMBOL",
#               to = "ENTREZ",
#               orgdb="org.Hs.eg.db")

em <- enricher(genes$ENTREZID, TERM2GENE=m_t2g,pAdjustMethod= "BH") %>% as.data.frame() 

p <- em %>% filter(ID %in% c("HALLMARK_E2F_TARGETS","HALLMARK_G2M_CHECKPOINT","PID_MYC_ACTIV_PATHWAY")) %>% 
        ggplot(aes(x=-log10(p.adjust),y=fct_rev(ID))) +
            geom_bar(stat="identity",fill="orange") +
            scale_x_continuous(expand=c(0,0),position="top",
                        limits=c(0,25),
                        breaks=seq(0,25,5)) +
            scale_y_discrete(labels= function(x) str_wrap(x,width=45)) +
            labs(y="",x="-log10(FDR)") +
            geom_vline(aes(xintercept = 2),color="gray",linetype="dashed", size = 0.5)+
            theme_bw() +
            theme(panel.background=element_blank(),
                    panel.grid=element_blank())
print(p)

# 4. LogFC and delta beta value of target genes
# RNA MHB_NonDMR.DEG
## G2M 
Genes.G2M <-  em %>% filter(ID =="HALLMARK_G2M_CHECKPOINT" ) %>% pull(geneID) %>% 
                    str_split_1("/") %>% bitr(fromType="ENTREZID",
                    toType="SYMBOL",OrgDb="org.Hs.eg.db") %>% pull(SYMBOL) %>% c("MYC")
Genes.MYC <- em %>% filter(ID =="PID_MYC_ACTIV_PATHWAY" ) %>% pull(geneID) %>% 
                    str_split_1("/") %>% bitr(fromType="ENTREZID",
                    toType="SYMBOL",OrgDb="org.Hs.eg.db") %>% pull(SYMBOL)
rna.log2FC.G2M <- lapply(names(MHB_NonDMR.DEG.FC),function(x){ 
                        MHB_NonDMR.DEG.FC[[x]] %>% 
                        filter(V1 %in% Genes.G2M) %>%
                        mutate(tissue=x) })
names(rna.log2FC.G2M) <- names(MHB_NonDMR.DEG.FC)
## MYC
rna.log2FC.MYC <- lapply(names(MHB_NonDMR.DEG.FC),function(x){ 
                        MHB_NonDMR.DEG.FC[[x]] %>% 
                        filter(V1 %in% Genes.MYC) %>%
                        mutate(tissue=x) })
names(rna.log2FC.MYC) <- names(MHB_NonDMR.DEG.FC)
# DNA methylation MHB_NonDMR.DEG
load("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig5/TCGA_DMR/TCGA_TSS_UD1K_Methylation.RData")
Mregion <- fread("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig3/ESCC/Methy/GREATv4.genes.promoter_UD1000.hg19.bed.gz") %>% 
            dplyr::rename("name"="V5") %>% toGRanges()
MP.TvN <- pbapply::pblapply(names(rList),function(x){
        ##id 
        sample <- rList[[x]] %>% colnames() %>% str_split_i("-",i=4) 
        sample <- ifelse(str_detect(sample,"11"),"Normal","Tumor")
        ## data table
        data <- rList[[x]]  %>% as.data.frame() 
        names(data) <- paste0(names(data),"_",sample)
        data %>% summarise(MP.T = rowMeans(across(contains("Tumor")),na.rm=T),
                          MP.N = rowMeans(across(contains("Normal")),na.rm=T)) %>% 
                                dplyr::select(MP.T,MP.N) %>% 
                                mutate(SYMBOL = Mregion$name)
    })
names(MP.TvN) <- names(rList)
## G2M
DNAme.G2M <- lapply(names(MHB_NonDMR.DEG.FC),function(x){ 
                        MP.TvN[[x]] %>% 
                        filter(SYMBOL %in% rna.log2FC.G2M[[x]]$V1 ) %>%
                        mutate(tissue=x)  })
names(DNAme.G2M) <- names(MHB_NonDMR.DEG.FC)
## MYC
DNAme.MYC <- lapply(names(MHB_NonDMR.DEG.FC),function(x){ 
                        MP.TvN[[x]] %>% 
                        filter(SYMBOL %in% rna.log2FC.MYC[[x]]$V1 ) %>%
                        mutate(tissue=x)  })
names(DNAme.MYC) <- names(MHB_NonDMR.DEG.FC)

### RES
res_logfc.g2m <- do.call(rbind,rna.log2FC.G2M) %>% as.data.frame() %>% 
            mutate(V1=factor(V1,levels=seed.genes$name)) 
res_logfc.myc <- do.call(rbind,rna.log2FC.MYC) %>% as.data.frame() %>% 
            mutate(V1=factor(V1,levels=seed.genes$name)) 
##
res_DNAme.g2m <- do.call(rbind,DNAme.G2M) %>% as.data.frame() %>%
             mutate(SYMBOL=factor(SYMBOL,levels=seed.genes$name)) 
res_DNAme.myc <- do.call(rbind,DNAme.MYC) %>% as.data.frame() %>%
             mutate(SYMBOL=factor(SYMBOL,levels=seed.genes$name))
# PLOT
## RNA 
 p1 <-  res_logfc.g2m %>% ggplot(aes(x=V1,y=fct_rev(tissue))) +
        geom_tile(aes(fill=logFC)) +
        scale_fill_gradientn(colours = c("darkblue","white","darkred"),
                        limits=c(-8, 8), breaks = seq(-8, 8, 4)) + 
        theme_bw() +
        theme( axis.text.x = element_text(angle = 90,hjust=1,vjust=0.5),
        panel.background =element_blank())
 p12 <- res_logfc.myc %>% ggplot(aes(x=V1,y=fct_rev(tissue))) +
        geom_tile(aes(fill=logFC)) +
        scale_fill_gradientn(colours = c("darkblue","white","darkred"),
                        limits=c(-8, 8), breaks = seq(-8, 8, 4)) + 
        theme_bw() +
        theme(axis.text.x = element_text(angle = 90,hjust=1,vjust=0.5),
        panel.background =element_blank())
## DNAme
data1 <- res_DNAme.g2m %>% 
         mutate(delta = MP.T - MP.N,tissue=fct_rev(tissue)) 
 p21 <-  data1 %>% ggplot(aes(x=SYMBOL,y=tissue)) +
        geom_tile(aes(fill=delta)) +
        scale_fill_gradientn(colours = c('#7b3294','grey','#008837'),
                        limits=c(-0.2, 0.2), breaks = seq(-0.2, 0.2, 0.1)) + 
        theme_bw() +
        theme(axis.text.x = element_text(angle = 90,hjust=1,vjust=0.5),
        panel.background =element_blank()) 
data2 <- res_DNAme.myc %>% 
         mutate(delta = MP.T - MP.N,tissue=fct_rev(tissue)) 
 p22 <-  data2 %>% ggplot(aes(x=SYMBOL,y=tissue)) +
        geom_tile(aes(fill=delta)) +
        scale_fill_gradientn(colours = c('#7b3294','grey','#008837'),
                        limits=c(-0.2, 0.2), breaks = seq(-0.2, 0.2, 0.1)) + 
        theme_bw() +
        theme(axis.text.x = element_text(angle = 90,hjust=1,vjust=0.5),
        panel.background =element_blank())
p<- wrap_plots(list(p1,p12,p21,p22),ncol=2,nrow=2) + plot_layout(guides = 'collect')
print(p)

Figure 5C

# define KEGG-merged gene interaction network
ig.KEGG.category <- oRDS('ig.KEGG.category', placeholder=placeholder, guid=guid)
vec_categories <- ig.KEGG.category %>% pull(category) %>% str_replace(' ','') %>% str_c('KEGGc_',.)

ls_ig <- lapply(vec_categories, function(network){
    g <- oDefineNet(network=network, placeholder=placeholder, guid=guid)
})

ig  <- oCombineNet(ls_ig, combineBy='union', attrBy="intersect", verbose=TRUE)
ig2 <- oNetInduce(ig, nodes_query=V(ig)$name, largest.comp=T) %>% as.undirected()

# crosstalk identification
load("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig5/dTarget_PI_mhb.RData")
subg <- oPierSubnet(dT, network.customised=ig2, priority.quantile= 0.1,
                    subnet.size=50, placeholder=placeholder, guid=guid)

df_subg <- dT$priority %>% inner_join(oIG2TB(subg,'nodes') %>% dplyr::select(name), by='name')
# df_subg %>% openxlsx::write.xlsx('PI_subg_nodes_mhb.xlsx')
# output plot: oVisEvidenceAdv
subg <- subg %>% oLayout(c("layout_with_kk","graphlayouts.layout_with_stress")[2])
gp_rating_evidence <- oVisEvidenceAdv(dT, nodes=V(subg)$name, g=subg, 
                                      node.info="smart", colormap="spectral.top", 
                                      node.color.alpha=0.8, node.size.range=7, 
                                      edge.color='black', edge.color.alpha=0.2, edge.curve=0.01,
                                      node.label.size=2.5, node.label.color='black',
                                      node.label.alpha=0.8, node.label.padding=0.1, 
                                      node.label.arrow=0, node.label.force=1)
print(gp_rating_evidence)