The mHap map of common cancers: Cancer MHBs are associated with DEGs

Figure 3. Cancer MHBs are associated with dysregulation of gene expression. (A) Annotation of MHBs to DMRs. Hyper, hyper-DMR; Hypo, hypo-DMR; NC, no significant change; ND, not determined. (B-C) The enrichment of MHBs in hyper- and hypo-DMRs, respectively. (D) Association of MHBs and dysregulation of gene expression in ESCC. (E) A heatmap shows the mean methylation and expression of upregulated genes that also contains MHBs. (F) A heatmap shows the transcription factors (TF) activities between ESCC and normal esophageal tissues. (G). KLF5 binding profiles from ESCC TE5 cell line ChIP-seq data.

loading required packages:

library(tidyverse)
library(stringr)
library(data.table)
library(ggplot2)
library(pheatmap)
library("LOLA")
library(GenomicRanges)
library(reshape2)
library(rGREAT)
library(regioneR)
library(ComplexHeatmap)
library(circlize)
library(decoupleR)
library(limma)

Identification of DMRs

The Differentially methylated regions (DMRs) were identified by metilene.

# prepare input 
metilene_input.pl --in1  breast_T_Merged_CpG.bedGraph \
                  --in2  breast_N_Merged_CpG.bedGraph \
                  --out  metilene_input/breast_metilene_input.bedGraph \
                  --h1 g1_breast_T \
                  --h2 g2_breast_N
# call DMRs
metilene_linux64 -t 10 -c 2 -m 5 -a g1 -b g2 metilene_input/breast_metilene_input.bedGraph | \
        sort -V -k1,1 -k2,2n  | \
        awk '{OFS="\t"} {if ($4<=0.05 && $5<0.1) {print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,"Hypo"} 
                        else if($4<=0.05 && $5>0.1){print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,"Hyper"} 
                        else {print $0,"NC"}}' > metilene_res/breast_de_novo_DMR.bed

Figure 3A

# 2. tissue type MHB overlap with tissue type DMR 
DMR_path="/sibcb1/bioinformatics/dataupload/cancermhaps/Fig3/metilene/output"
tsMHB_path="/sibcb1/bioinformatics/dataupload/cancermhaps/Fig1/MHB/tumor/"
for i in `ls $DMR_path/*_DMR.bed` 
do
        a=$(echo ${i##*/} |sed 's/_de_novo_DMR.bed//g')
       if [[ "${a}" == *"breast"* ]];then
                tag_T="MHB_BRCA.bed"
        elif [[ "${a}" == *"colon"* ]];then
                tag_T="MHB_COAD.bed"
        elif [[ "${a}" == *"esophagus"* ]];then
                tag_T="MHB_ESCA.bed"
        elif [[ "${a}" == *"head_and_neck"* ]];then
                tag_T="MHB_HNSC.bed"
        elif [[ "${a}" == *"lung"* ]];then
                tag_T="MHB_NSCLC.bed"
        elif [[ "${a}" == *"liver"* ]];then
                  tag_T="MHB_LIHC.bed"
          elif [[ "${a}" == *"pancreas"* ]];then
                    tag_T="MHB_PACA.bed"
        elif [[ "${a}" == *"ovary"* ]];then
                    tag_T="MHB_OV.bed"
        elif [[ "${a}" == *"stomach"* ]];then
                tag_T="MHB_STAD.bed"
        elif [[ "${a}" == *"thyroid"* ]];then
                tag_T="MHB_THCA.bed"
        fi
        tsMHB=${tsMHB_path}/${tag_T}
        bedtools intersect -a ${tsMHB} -b  ${i} -wa -wb -loj | \
        awk '{OFS="\t"} {if ($14==".") {print $1,$2,$3,"'"${a}"'","NC"} else {print $1,$2,$3,"'"${a}"'",$14}}'|\
        bedtools merge -i - -c 4,5,5 -o distinct,distinct,count_distinct | \
        sort -k1,1 -k2,2n | \
        awk '{OFS="\t"} {if($6!=1) {print $1,$2,$3,$4,$5,$6,"ND"}else{print $0,$5}}' \
                    > /sibcb1/bioinformatics/dataupload/cancermhaps/Fig3/metilene/\
                    tsMHB_DMR_OverlaptsMHB_DMR_Overlap/${a}_MHB_DMR.txt
done

path="/sibcb1/bioinformatics/dataupload/cancermhaps/Fig3/metilene/tsMHB_DMR_Overlap/"
files=list.files(path)

MHB_DMR_CS <- lapply(files,function(x){
            mx <- read.table(paste0(path,x),sep="\t",header=F)
            mx %>% group_by(V4,V7) %>% summarize(DMR_count=n())
})
names(MHB_DMR_CS) = files %>% str_split_i(pattern="_",i=1)

MHB_DMR_frq = as.data.frame(do.call(rbind,MHB_DMR_CS)) %>% mutate(type=paste0(V4,"_cancer")) %>% 
            group_by(type) %>% mutate(Freq=100*DMR_count/sum(DMR_count))

p<-ggplot(MHB_DMR_frq,aes(x=type,y=DMR_count)) +
          geom_bar(aes(fill=V7),stat="identity",position = position_fill(reverse = TRUE))+
          labs(x="",y="Percent (%)") +
          scale_fill_brewer(palette = "Set1") +
          scale_y_continuous(breaks=c(0,0.25,0.5,0.75,1),labels=100*c(0,0.25,0.5,0.75,1))+
          theme_bw()+
          theme(panel.background = element_blank(),panel.grid =element_blank(),
                axis.ticks.length=unit(5,'pt'),axis.text.x =element_text(angle=90,vjust=0.5),
                plot.title=element_text(hjust=0.5),legend.position = "top") +
          guides(fill=guide_legend(title=""))
print(p)

Figure 3B

mhb_clusters="/sibcb1/bioinformatics/dataupload/cancermhaps/Fig1/MHB/mhb_clusters/"
background="/sibcb1/bioinformatics/dataupload/cancermhaps/Fig1/MHB/MHB_11_cancers_clusters.bed.gz"
## Hyper DMR
files = list.files(mhb_clusters)
locResults = lapply(files,function(i){
                  mhb=fread(paste0(mhb_clusters,i)) %>% toGRanges()
                  # Universe_Sets Background
                  universe_set=fread(background) %>% toGRanges()
                  # Load regionDB 
                  states=loadRegionDB("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig3/metilene/LOLA/LOLA_Hyper")
                  # Runing LOLA
                  runLOLA(mhb, universe_set, states, cores=10)
})
names(locResults) <- files
data <- lapply(files,function(i){
       mx = locResults[[i]][,c("pValueLog","filename")] %>% 
            mutate(Type=i,q=p.adjust(p=10^(-pValueLog),method="BH",n=10)) %>% 
            dplyr::select(-pValueLog)
})
data = as.data.frame(do.call(rbind,data))
dta = dcast(data,Type~filename) %>% column_to_rownames(var="Type")

dta = dta[c("BRCA_cluster.bed","CESC_cluster.bed","COAD_cluster.bed","ESCA_cluster.bed",
            "HNSC_cluster.bed","LIHC_cluster.bed","NSCLC_cluster.bed","OV_cluster.bed",
            "PACA_cluster.bed","STAD_cluster.bed", "THCA_cluster.bed","cluster12.bed",
            "cluster13.bed","cluster14.bed","cluster15.bed","cluster16.bed"),]
dta_t = -log10(dta)
# filter FDR < 0.01
dta_t[dta_t<2]<-NA
# ranking 
rank_t = function(x) {
    m = rank(array(-x),na.last="keep",ties.method = "min")
    return(m)
}
data_rank= as.data.frame(t(apply(dta_t,1,rank_t)))
names(data_rank) = gsub(".bed","",names(dta_t))
rownames(data_rank) = gsub(".bed.gz","",rownames(data_rank))

# PLOT
p<- pheatmap(data_rank, main="MHBs Overlap with Hyper-DMRs",
                show_colnames=T,
                show_rownames=T,
                cluster_rows=F,
                cluster_cols=F,
                scale="none",
                angle_col="90",
                color=colorRampPalette(colors = c("firebrick","white","darkblue"))(1000))
print(p)

Figure 3C

# Hypo DMR
locResults = lapply(files,function(i){
                 mhb=fread(paste0(mhb_clusters,i)) %>% toGRanges()
                  # Universe_Sets Background
                  universe_set=fread(background) %>% toGRanges()
                  # Load regionDB
                  states=loadRegionDB("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig3/metilene/LOLA/LOLA_Hypo")
                  # Runing LOLA
                  runLOLA(mhb, universe_set, states, cores=10)
})
names(locResults) <- files

data <- lapply(files,function(i){
  mx = locResults[[i]][,c("pValueLog","filename")] %>% 
        mutate(Type=i,q=p.adjust(p=10^(-pValueLog),method="BH",n=10)) %>% 
        dplyr::select(-pValueLog)
})
dta = as.data.frame(do.call(rbind,data)) %>% dcast(Type~filename) %>% column_to_rownames(var="Type")

dta = dta[c("BRCA_cluster.bed","CESC_cluster.bed","COAD_cluster.bed","ESCA_cluster.bed",
            "HNSC_cluster.bed","LIHC_cluster.bed","NSCLC_cluster.bed","OV_cluster.bed",
            "PACA_cluster.bed","STAD_cluster.bed", "THCA_cluster.bed","cluster12.bed",
            "cluster13.bed","cluster14.bed","cluster15.bed","cluster16.bed"),]
dta_t = -log10(dta)
# filter FDR < 0.01 and then ranking 
dta_t[dta_t<2]<-NA
rank_t = function(x) {
    m = rank(array(-x),na.last="keep",ties.method = "min")
    return(m)
}
data_rank= as.data.frame(t(apply(dta_t,1,rank_t)))
names(data_rank) = names(dta_t)
names(data_rank) = gsub(".bed","",names(dta_t))
rownames(data_rank) = gsub(".bed.gz","",rownames(data_rank))

# PLOT
p<- pheatmap(data_rank,main="MHBs Overlap with Hypo-DMRs",
             show_colnames=T,show_rownames=T,
             cluster_rows=F,cluster_cols=F,
             scale="none",angle_col="90",
             color=colorRampPalette(colors = c("firebrick","white","darkblue"))(1000))
print(p)

Figure 3D-E

Tss_data <- fread("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig3/ESCC/Methy/GREATv4.genes.promoter_UD1000.hg19.bed.gz") %>%
            rename_with(~c("chr","start","end","strand","genes"))

# Step1: MHB
ESCC_T_MHB <- "/sibcb1/bioinformatics/dataupload/cancermhaps/Fig1/MHB/public/MHB_ESCC.bed.gz"
ESCC_N_MHB <- "/sibcb1/bioinformatics/dataupload/cancermhaps/Fig1/MHB/public/MHB_Esophagus_N.bed.gz"
# MHB gene annotation
MHBGenes_TSS <- lapply(c(ESCC_T_MHB,ESCC_N_MHB),function(y){
                   fread(y) %>% toGRanges() %>% great("MSigDB:H","GREAT:hg19") %>% getRegionGeneAssociations() %>% 
                    as.data.frame() %>%  mutate(annotated_genes=lapply(annotated_genes,function(x){
                                                x %>% paste(collapse=",")
                                             }) %>% do.call(rbind,.) %>% as.character(),
                                             dist_to_TSS = lapply(dist_to_TSS,function(x){
                                                x %>% paste(collapse=",")
                                             }) %>% do.call(rbind,.) %>% as.character()) %>%
              separate_rows(annotated_genes,dist_to_TSS,sep=",",convert=TRUE) %>% 
              mutate(Type=ifelse(str_detect(y,"ESCC"),"esophagus_T","esophagus_N"))})
names(MHBGenes_TSS) =c("esophagus_T","esophagus_N")

# step2: Load DEGs
load("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig3/ESCC/RNA/ESCC_log2TPM_Wilcox.RData")
ESCC_DEGs <- ESCC_log2TPM_Wilcox 
DEGs_up <-  ESCC_DEGs %>% filter(P_DEG=="Up") %>% rownames_to_column(var="genes") %>% pull(genes) %>% unique()
DEGs_down <-  ESCC_DEGs %>% filter(P_DEG=="Down") %>% rownames_to_column(var="genes") %>% pull(genes) %>% unique()

# Step 3: Load DMR  (Wilcoxon or Metilene)
## 3.1 ESCC DNAme promoter +/-  TSS 1K (Differentially of Methylation of Promoters)
DNAme_path="/sibcb1/bioinformatics/dataupload/cancermhaps/Fig3/ESCC/Methy/TSS1K_DNAme"
files = list.files(DNAme_path,pattern="_TSS1k.txt.gz")
files = files[str_detect(files,"esophagus")]

ESCC_TSS_DNAme <- lapply(files,function(x) {
        fread(paste0(DNAme_path,"/",x))  %>% 
        transmute(chr=Chr,start=Start -1,
                  end=End,beta=ifelse(tBase>=10,round(mBase/tBase,3),NA),
                  Type=str_replace_all(x,"_TSS1k.txt","")) %>%
            cbind(Tss_data %>% dplyr::select("genes")) %>% dplyr::select(genes,beta,Type) 
    })
ESCC_DNAme <- do.call(rbind,ESCC_TSS_DNAme) %>% as.data.frame()  %>% 
            pivot_wider(names_from=Type,values_from=beta,values_fn=mean) %>% 
            column_to_rownames(var="genes")  %>% na.omit()

# Wilcoxon test between T vs N
Diff_DNAme <- function(x){
        pData = names(x)
        Normal = as.numeric(as.character(na.omit(x[grep("N",pData,value=T)])))
        Tumor = as.numeric(as.character(na.omit(x[grep("T",pData,value=T)])))
        delta = mean(Tumor) - mean(Normal)
       N <- mean(Normal) ; T <- mean(Tumor)
        p_value =wilcox.test(Normal,Tumor,paired=T)$p.value
        res = c(delta,N,T,p_value)
        return(res)
  }
res<- as.data.frame(t(apply(ESCC_DNAme,1,Diff_DNAme)))  
names(res) =c("delta","Mean_N","Mean_T","P_value") 
ESCC_DNAme_res = cbind(ESCC_DNAme,res) %>% mutate(FDR = p.adjust(P_value,method="BH",n=length(P_value)),
                    DM = ifelse(delta>0.1 & FDR <= 0.05,"Hyper",
                                ifelse(delta< -0.1 & FDR <= 0.05,"Hypo","NC")),
                    T_beta = ifelse(Mean_T>=0.8,"High",ifelse(Mean_T<=0.2,"Low","Intermediate"))) 

## 3.2 Call DMR metilene 
DMR_Metilene <- fread("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig3/ESCC/Methy/ESCC_T_N_de_novo_DMR.bed.gz") %>%
               toGRanges() %>% great("MSigDB:H","GREAT:hg19") %>% getRegionGeneAssociations() %>% 
               as.data.frame() %>%  mutate(annotated_genes=lapply(annotated_genes,function(x){
                                                x %>% paste(collapse=",")
                                             }) %>% do.call(rbind,.) %>% as.character(),
                                             dist_to_TSS = lapply(dist_to_TSS,function(x){
                                                x %>% paste(collapse=",")
                                             }) %>% do.call(rbind,.) %>% as.character()) %>% 
                                             separate_rows(annotated_genes,dist_to_TSS,sep=",",convert=TRUE)

## 3.3 Non-DMR  ## the genes are not regulated by DMR
common_genes <- intersect(rownames(ESCC_log2TPM),Tss_data %>% pull(genes))
# Non DMR genes
if(1){       ## from metilene DMR 
   DMR_genes <- DMR_Metilene %>% filter(V11 != "NC") %>%  pull(annotated_genes) %>% unique()  ##
   Non_DMR_genes <- setdiff(common_genes, DMR_genes) 
}else {      ## from Wilcoxon test
   Non_DMR_genes <- ESCC_DNAme_res %>%  filter(DM == "NC") %>% 
                    rownames_to_column(var="genes") %>% pull(genes) %>% unique()
   Non_DMR_genes <- intersect(Non_DMR_genes,common_genes)
}
# Revome the promoter delta Beta > 0.1
genes_N <- ESCC_DNAme_res %>% filter(abs(delta) < 0.1) %>% 
            rownames_to_column(var="genes") %>% pull(genes) %>% unique()
Non_DMR_genes <- intersect(Non_DMR_genes,genes_N)  
# save
Non_DMR_Genes_TSS <- Tss_data %>% filter(genes %in% Non_DMR_genes) %>% 
                    transmute(chr=chr,start=start+500,end=end-499,gene=genes)
write.table(Non_DMR_Genes_TSS,file="ESCC_Non_DMR_GENES_TSS_500.bed",
                            sep="\t",row.names=F,quote=F,col.names=F)

# 4.Integration analysis of MHB, DMRs, DEGs 
DNAme_MHB_Exp <- lapply(c("esophagus_T","esophagus_N"),function(x){
          
          mhbgenes <- MHBGenes_TSS[[x]] %>% pull(annotated_genes) %>%  unique()
          # 1932
          DEGs_up <- ESCC_DEGs %>% rownames_to_column(var="genes") %>% 
                    filter(genes %in% common_genes,P_DEG == "Up")  %>% pull(genes) %>% unique()
          # 837
          DEGs_down <- ESCC_DEGs %>% rownames_to_column(var="genes") %>% 
                    filter(genes %in% common_genes, P_DEG == "Down")  %>% pull(genes) %>% unique()
          NonDMR_DEG_up <- intersect(DEGs_up,Non_DMR_genes)
          NonDMR_DEG_DN <-  intersect(DEGs_down,Non_DMR_genes)

         # 2 x 2 table
         a <- intersect(mhbgenes,NonDMR_DEG_up) %>% length()  # 22
         b <- setdiff(NonDMR_DEG_up,mhbgenes) %>% length()    # 106

         d <- intersect(mhbgenes,NonDMR_DEG_DN) %>% length()  # 0
         e <- setdiff(NonDMR_DEG_DN,mhbgenes) %>% length()    # 25
         
         cat(a,b,d,e,"\n")
         fisher.test(matrix(c(d,e,a,b),ncol=2,nrow=2))%>% broom::tidy()
})

22 106 0 25 
33 95 0 25

# show case the cancer mhb associated genes with up-regulation and the methylation levels of promoter without any changes.
mhbgenes <-  mhbgenes <- MHBGenes_TSS[["esophagus_T"]] %>% pull(annotated_genes) %>%  unique()
NonDMR_DEG_up <- intersect(DEGs_up,Non_DMR_genes)
hint <- intersect(mhbgenes,NonDMR_DEG_up)
# sample heatmap preparation
data.DNAme <- ESCC_DNAme_res %>% dplyr::select(contains("esophagus")) %>% 
                                 rownames_to_column(var="genes") %>%
                                 filter(genes %in% hint)  %>% arrange(genes) %>%
                                  column_to_rownames(var="genes") %>% 
                                  rename_with(~gsub("esophagus_","",.x))
data.RNA <- ESCC_log2TPM_Wilcox %>% dplyr::select(contains("GSM")) %>% 
                                 rename_with(~str_split_i(.x,"_",i=2)) %>% 
                                 rownames_to_column(var="genes") %>% 
                                 filter(genes %in% hint)  %>% arrange(genes) %>% 
                                 column_to_rownames(var="genes") 

# 5. PLOT DNA methylation & Gene expression in MHB-associated up-regulated genes
## Annotation 
mhb_N <- MHBGenes_TSS[["esophagus_N"]] %>% pull(annotated_genes) %>% unique()   
mhb_Anno <- data.frame(gene=hint,Normal= (hint %in% mhb_N),Tumor="TRUE")
## DNAme
column_ha_1 = HeatmapAnnotation(Assay = rep("WGBS",20) ,
                                Type = ifelse(str_detect( names(data.DNAme),"N"),"Normal","Tumor") )
row_ha_1 = rowAnnotation(MHB_T = rep(1,22), MHB_N= ifelse(str_detect(mhb_Anno$Normal,"TRUE"),1,0))
colfun11 <- colorRamp2(breaks = c(0, 0.5, 1), colors = c('darkblue', 'white', 'firebrick'))
## RNA
column_ha_2 = HeatmapAnnotation(Assay = rep("RNA",20) , 
                                Type = ifelse(str_detect( names(data.DNAme),"N"),"Normal","Tumor") )
row_ha_2 = rowAnnotation(MHB_T = rep(1,22), MHB_N= ifelse(str_detect(mhb_Anno$Normal,"TRUE"),1,0))
colfun2 <- colorRamp2(breaks = c(-3, 0, 3), colors = c('darkblue', 'white', 'firebrick'))
data.RNA.scale = t(scale(t(as.matrix(data.RNA))))

p11 <- Heatmap(as.matrix(data.DNAme),name="Beta",
                col=colfun11,cluster_columns= F,
                top_annotation = column_ha_1, left_annotation = row_ha_1)
p21 <- Heatmap(data.RNA.scale,name="log2TPM(scaled)",
                col=colfun2,cluster_columns= F,
                top_annotation = column_ha_2)
p<- p11+p21
print(p)

Figure 3F

# Step 1: get all regulons from human
net <- decoupleR::get_collectri(organism='human', split_complexes=FALSE)
TFregul <- net %>% pull(source) %>% unique() 
regulon_gmt <- lapply(TFregul,function(x){net %>% filter(source==x) %>% pull(target)})
names(regulon_gmt)<-TFregul
# write gmt
write_gmt <- function(x,filename) {
    output<-file(filename,open = "wt")
    name <- names(x)
    lapply(name,function(y){
    writeLines(paste(c(y,"NA",x[[y]]),collapse = "\t"),con = output)
     })
    close(output)
}
write_gmt(regulon_gmt,file="TFregulon.gmt")

# Step 2: MHB
ESCC_T_MHB <- "/sibcb1/bioinformatics/dataupload/cancermhaps/Fig1/MHB/public/MHB_ESCC.bed.gz"
ESCC_N_MHB <- "/sibcb1/bioinformatics/dataupload/cancermhaps/Fig1/MHB/public/MHB_Esophagus_N.bed.gz"
# MHB gene annotation
MHBGenes_TSS <- lapply(c(ESCC_T_MHB,ESCC_N_MHB),function(y){
              fread(y) %>% toGRanges() %>% great("MSigDB:H","GREAT:hg19") %>% getRegionGeneAssociations() %>% 
              as.data.frame() %>%  
              mutate(annotated_genes=lapply(annotated_genes,function(x){
                                                x %>% paste(collapse=",")
                                             }) %>% do.call(rbind,.) %>% as.character(),
                                             dist_to_TSS = lapply(dist_to_TSS,function(x){
                                                x %>% paste(collapse=",")
                                             }) %>% do.call(rbind,.) %>% as.character()) %>% 
                                             separate_rows(annotated_genes,dist_to_TSS,sep=",",convert=TRUE) %>%
                                           mutate(Type=ifelse(str_detect(y,"ESCC"),"esophagus_T","esophagus_N"))})
names(MHBGenes_TSS) =c("esophagus_T","esophagus_N")
MHBGenes_T <- MHBGenes_TSS[["esophagus_T"]]  %>% pull(annotated_genes) %>% unique()
MHBGenes_N <- MHBGenes_TSS[["esophagus_N"]]  %>% pull(annotated_genes) %>% unique()

# step 2.1 Select regulons, regulated by MHB
if(!dir.exists("regulon_MHB"))
  dir.create("regulon_MHB")
# Tumor
for ( i in names(regulon_gmt)){
    gene <- regulon_gmt[[i]] 
    regulon_MHB<- MHBGenes_TSS[["esophagus_T"]] %>% filter(annotated_genes %in% gene)
    write.table(regulon_MHB, file=paste0("regulon_MHB/",i,"_regulon_targets_MHB_T.bed"),quote=F,
                                        row.names=F,col.names=F,sep="\t")
}
# step 2.2 do enrichment of regulons in MHBGene_T
regulons <- read_gmt("TFregulon.gmt", 
    from = "SYMBOL", to = "ENTREZ", orgdb = "org.Hs.eg.db") 
regulons_MHB_enrichment <-fread(ESCC_T_MHB) %>%  toGRanges() %>% great(regulons, "hg19") %>%
                            getEnrichmentTable() %>% filter(p_value_hyper<=0.05)
# saving 
write.table(regulons_MHB_enrichment,file="regulons_MHB_enrichment_significant.txt",sep="\t",quote=F,row.names=F)

# Step 3: TF Activity inference with Univariate Linear Model (ULM)
load("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig3/ESCC/RNA/ESCC_log2TPM_Wilcox.RData")
sample_acts <- run_ulm(mat=ESCC_log2TPM, net=net, .source='source', .target='target',
                  .mor='mor', minsize = 5)  %>% filter(p_value<0.05)
# Visualization
sample_acts_mat <- sample_acts %>%
  pivot_wider(id_cols = 'condition', names_from = 'source',
              values_from = 'score') %>%
  column_to_rownames('condition') %>%
  as.matrix() %>% t() %>% na.omit() %>% t()

# Get TF activity 
regulon_mhb_tfs <- regulons_MHB_enrichment  %>% pull(id) %>% intersect(colnames(sample_acts_mat))
tfs <- sample_acts %>%
          filter(source %in% regulon_mhb_tfs ) %>%     #### only contains the MHB enriched regulons
          group_by(source) %>%
          summarise(std = sd(score)) %>%
          arrange(-abs(std)) %>%          
          pull(source)
sample_acts_mat <- sample_acts_mat[,tfs]
# Scale per sample
sample_acts_mat_scale <- scale(sample_acts_mat)
# Ranking
Tx <- sample_acts_mat %>% t() %>% as.data.frame() %>% dplyr::select(!contains("T7")) %>% 
                           mutate(T=rowMeans(across(contains("_T")),na.rm=T),
                                  N=rowMeans(across(contains("_N")),na.rm=T),
                                  cluster=ifelse(T>N,"1","2")) %>%
                          rownames_to_column(var="name") %>% arrange(cluster,name)
gene_order <- Tx %>% pull(name)
sample_acts_mat_scale <- sample_acts_mat_scale[,gene_order]
# Choose color palette
palette_length = 100
my_color = colorRampPalette(c("darkblue", "white","firebrick"))(palette_length)

my_breaks <- c(seq(-3, 0, length.out=ceiling(palette_length/2) + 1),
               seq(0.05, 3, length.out=floor(palette_length/2)))

# Plot
p<- pheatmap(t(sample_acts_mat_scale), 
              cluster_col=F, 
              cluster_row=F,
              border_color = NA, 
              color=my_color,
              breaks = my_breaks)
print(p)