Figure 2. Cancer MHBs are enriched in accessible chromatin regions. (A) Overlapping of cancer MHBs with regions of open chromatin in matched cancer types. ATAC-seq peaks were defined previously and downloaded from NCI’s Genomic Data Commons40. (B) Annotation of MHBs to regions of DNA methylation states, including UMR, LMR, PDR, and HMR. (C-D) The enrichment of MHBs in LMR and PMD, respectively. Enrichment test was performed by R package LOLA, with the union of MHBs serving as the background set. The resulting adjusted P-values for significant enrichment were ranked across all cancer types (FDR < 0.01). Non-significant results are shown in grey. (E) Genomic regions with MHBs are more enriched in regions of open chromatin when controlling for mean methylation levels. The enrichment score was calculated as the percentage of CpGs covered by open chromatin regions. (F) Enrichment of MHBs in ChIA-PET in a disease status-specific manner. The enrichment score was calculated as the ratio of observed to expected overlaps between MHBs and ChIA-PET regions with permutation test.
loading required packages:
# Input CpGs
data_path="/sibcb1/bioinformatics/dataupload/cancermhaps/Fig2/Methyl_bdg/"
samples=list.files(data_path,pattern="Count_CpG.bedGraph.gz")
# hg19 CGI
session <- browserSession()
genome(session) <- "hg19"
query <- ucscTableQuery(session, table = "cpgIslandExt")
CpGislands.gr <- track(query)
genome(CpGislands.gr) <- NA
# Remove CGI +/-5K CpGs
CpGislands.gr <-suppressWarnings(resize(CpGislands.gr, 5000, fix="center"))
for ( i in samples){
# Load Methylation
x <- fread(paste0(data_path,i)) %>% toGRanges()
names(mcols(x)) = c("M","Um")
mcols(x)[,"T"] = mcols(x)[,1] + mcols(x)[,2]
ranges(x) <- end(x)
mcols(x) <- mcols(x)[,c("T","M")]
tag <- gsub("_Merged_Count_CpG.bedGraph","",i)
# PMD
PMDsegments<-segmentPMDs(m=x, chr.sel="chr22",seqLengths=sLengths,num.cores=10)
# FDR cut-off
stats <- suppressWarnings(calculateFDRs(m=x, CGIs=CpGislands.gr,
PMDs=PMDsegments, num.cores=10))
FDR.cutoff <- 5
m.sel <- 0.5
n.sel=as.integer(names(stats$FDRs[as.character(m.sel), ]
[stats$FDRs[as.character(m.sel), ]<FDR.cutoff])[1])
# UMR LMR
UMRLMRsegments <- segmentUMRsLMRs(m=x, meth.cutoff=m.sel,
nCpG.cutoff=n.sel, PMDs=PMDsegments,
num.cores=10, myGenomeSeq=Hsapiens,minCover=5,
seqLengths=sLengths)
# Saving
# PMD
write.table(as.data.frame(PMDsegments[PMDsegments$type=="PMD"])[c(1:3)],file=paste0("res/",tag,"_PMDs.txt"),
sep="\t",quote=F,col.names=F,row.names=F)
# LMR & UMR
write.table(granges(UMRLMRsegments[UMRLMRsegments$type=="LMR",]),file=paste0("res/",tag,"_LMRs.txt"),
sep="\t",quote=F,col.names=F,row.names=F)
write.table(granges(UMRLMRsegments[UMRLMRsegments$type=="UMR",]),file=paste0("res/",tag,"_UMRs.txt"),
sep="\t",quote=F,col.names=F,row.names=F)
}
# The Frequency of MHB in ATAC-peaks
cancer_types <- list.files("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig1/MHB/tumor/") %>%
str_split_i(pattern="[_.]",i=2) %>% setdiff(c("OV","PACA"))
mn = list()
for ( sample in cancer_types ){
mhb_peak = fread(paste0("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig1/MHB/tumor/MHB_",sample,".bed.gz")) %>%
toGRanges()
mhb_tag = sample
# Read the TCGA ATAC-seq Peaks
ATAC_path="/sibcb1/bioinformatics/dataupload/cancermhaps/Fig2/ATAC/"
if (mhb_tag == "BRCA"){
i = "BRCA_hg19_peak.bed.gz"
}else if(mhb_tag == "CESC"){
i = "CESC_hg19_peak.bed.gz"
}else if (mhb_tag == "COAD"){
i = "COAD_hg19_peak.bed.gz"
}else if (mhb_tag == "ESCA"){
i = "ESCA_hg19_peak.bed.gz"
}else if (mhb_tag =="HNSC"){
i = "HNSC_hg19_peak.bed.gz"
}else if (mhb_tag == "LIHC"){
i = "LIHC_hg19_peak.bed.gz"
}else if(mhb_tag == "NSCLC"){
i = "LUAD_hg19_peak.bed.gz"
}else if (mhb_tag == "STAD"){
i = "STAD_hg19_peak.bed.gz"
}else if(mhb_tag == "THCA"){
i = "THCA_hg19_peak.bed.gz"
}
tag = strsplit(i,"_")[[1]][1]
# ATAC-seq peaks
assign(tag,fread(paste0(ATAC_path,i)) %>% toGRanges())
# Cancer specific MHB
x=findOverlaps(get(tag),mhb_peak,type = "any", ignore.strand = TRUE)
# Overlapping
cM=length(unique(subjectHits(x)))
# total MHB
cL=length(mhb_peak)
m = round(100*cM/cL,2)
mn[[tag]] = m
}
# add LUSC
i="LUSC_hg19_peak.bed.gz"
tag = strsplit(i,"_")[[1]][1]
assign(tag,fread(paste0(ATAC_path,i)) %>% toGRanges())
x=findOverlaps(get(tag),mhb_peak,type = "any", ignore.strand = TRUE)
cM=length(unique(subjectHits(x)))
cL=length(mhb_peak)
m = round(100*cM/cL,0)
mn[[tag]] = m
MHB_Ov_ATAC=as.data.frame(do.call(rbind,mn))
MHB_Ov_ATAC$type = rownames(MHB_Ov_ATAC)
MHB_Ov_ATAC = MHB_Ov_ATAC[order(rownames(MHB_Ov_ATAC)),]
# PLOT
p<-ggplot(MHB_Ov_ATAC,aes(type,V1)) +
geom_bar(stat="identity",fill="steelblue",width=0.8)+
scale_y_continuous(expand=c(0,0),limits=c(0,80))+
labs(y="The Frequency of Cancer-type MHBs \n Overlap with TCGA ATAC peaks (%)")+
theme_bw() +
theme(panel.grid= element_blank(),panel.background=element_blank(),
panel.border=element_blank(),axis.line=element_line(color="black"),
axis.title.x=element_blank(),
axis.text.x = element_text(color="black",size=10,angle=45,vjust=1,hjust=1),
axis.text.y = element_text(color="black",size=10)) +
geom_text(aes(label=paste0(V1,"%")), vjust=-0.3, size=3.5,color="black")
print(p)
# The Frequency of MHB in genomic features.(PMD, LMR, UMR, HMR)
cancer_type = list.files("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig1/MHB/tumor/")
mn =list()
for (i in cancer_type ){
mhb = fread(paste0("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig1/MHB/tumor/",i)) %>% toGRanges()
mhb_tag = i %>% str_split_i(pattern="[_.]",i=2)
# Read the cancer type PMD HMR LMR UMR regions
MR_path="/sibcb1/bioinformatics/dataupload/cancermhaps/Fig2/genomic_segments/"
if (mhb_tag == "BRCA"){
tag = "breast_T"
}else if(mhb_tag == "CESC"){
tag = "cervix_T"
}else if (mhb_tag == "COAD"){
tag = "colon_T"
}else if (mhb_tag == "ESCA"){
tag = "esophagus_T"
}else if (mhb_tag =="HNSC"){
tag = "head_and_neck_T"
}else if (mhb_tag == "LIHC"){
tag = "liver_T"
}else if(mhb_tag == "NSCLC"){
tag = "lung_T"
}else if (mhb_tag == "STAD"){
tag = "stomach_T"
}else if(mhb_tag == "THCA"){
tag = "thyroid_T"
}else if(mhb_tag == "OV"){
tag = "ovary_T"
}else if(mhb_tag == "PACA"){
tag = "pancreas_T"
}
# Genomic feature regions
GFR = fread(paste0(MR_path,tag,"_genomic_segments.bed.gz")) %>% toGRanges()
# Cancer-type MHB overlaps
# PMD
x1=findOverlaps(subset(GFR,V4=="PMD"),mhb,type = "any", ignore.strand = TRUE)
m1=length(unique(subjectHits(x1)))
# HMR
x2=findOverlaps(subset(GFR,V4=="HMR"),mhb,type = "any", ignore.strand = TRUE)
m2=length(unique(subjectHits(x2)))
# LMR
x3=findOverlaps(subset(GFR,V4=="LMR"),mhb,type = "any", ignore.strand = TRUE)
m3=length(unique(subjectHits(x3)))
# UMR
x4=findOverlaps(subset(GFR,V4=="UMR"),mhb,type = "any", ignore.strand = TRUE)
m4=length(unique(subjectHits(x4)))
m = data.frame(freq = c(m1,m2,m3,m4)/sum(m1+m2+m3+m4))
rownames(m) = c("PMD","HMR","LMR","UMR")
names(m) = tag
mn[[tag]] = m
}
res = do.call(cbind,mn) %>% as.data.frame() %>% rownames_to_column(var="segments") %>%
pivot_longer(!segments,names_to="variable",values_to="value")
# PLOT
p<-ggplot(res,aes(x=variable,y=value*100,fill=factor(segments,level=c("HMR","PMD","LMR","UMR"))))+
geom_bar(stat="identity",position="stack")+
scale_fill_brewer(palette = "Set1") +
ggtitle("The Genomic distribution of Cancer-type MHBs")+
labs(x="",y="Percent (%)") +
theme_bw()+
theme(panel.grid = element_blank(),panel.background=element_blank(),
axis.ticks.length=unit(5,'pt'),axis.line=element_line(color="black"),
axis.title.x=element_blank(),
axis.text.x = element_text(color="black",size=10,angle=45,vjust=1,hjust=1),
axis.text.y = element_text(color="black",size=10),
axis.ticks = element_line(color="black")) +
guides(fill=guide_legend(title=NULL))
print(p)
mhb = "/sibcb1/bioinformatics/dataupload/cancermhaps/Fig1/MHB/tumor/"
background="/sibcb1/bioinformatics/dataupload/cancermhaps/Fig1/MHB/MHB_11_cancers_clusters.bed.gz"
files = list.files(mhb)
# LOLA enrichment
locResults = lapply(files,function(i){
# Input
mhb=fread(paste0(mhb,i)) %>% toGRanges()
# Universe_Sets Background
universe_set=fread(background) %>% toGRanges()
# Genomic features
states=loadRegionDB("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig2/genomic_segments/LOLA")
# Runing LOLA
runLOLA(mhb, universe_set, states, cores=10)
})
names(locResults) <-files
# enrichment output
res = lapply(files,function(i){
mx = locResults[[i]][,c("qValue","filename")]
mx$Type = i
mx
})
data <- as.data.frame(do.call(rbind,res))
# filter FDR < 0.01
data <- data %>% mutate(qValue=ifelse(qValue<=0.01,qValue,NA),qValue=-log10(qValue)) %>%
pivot_wider(names_from=filename,values_from=qValue)
# significance ranking
rank_t = function(x) {
m = rank(array(-x),na.last="keep",ties.method = "min")
return(m)
}
# LMR
lmr <- as.data.frame(t(apply(data %>% dplyr::select(contains("LMR")),1,rank_t)))
rownames(lmr) <- files
names(lmr) <- data %>% dplyr::select(contains("LMR")) %>% names()
lmr <- lmr %>% dplyr::select(sort(names(.)))
names(lmr) <- str_split_i(names(lmr),"\\.",i=1)
rownames(lmr) <- str_split_i(rownames(lmr),"\\.",i=1)
# PMD
pmd <- as.data.frame(t(apply(data %>% dplyr::select(contains("PMD")),1,rank_t)))
rownames(pmd) <- files
names(pmd) <- data %>% dplyr::select(contains("PMD")) %>% names()
pmd <- pmd %>% dplyr::select(sort(names(.)))
names(pmd) <- str_split_i(names(pmd),"\\.",i=1)
rownames(pmd) <- str_split_i(rownames(pmd),"\\.",i=1)
# PLOT LMR, PMD
p1<- pheatmap(lmr,
main="Cancer MHBs vs. LMRs",
show_colnames=T,show_rownames=T,
cluster_rows=F,cluster_cols=F,
scale="none",angle_col="90",
color=colorRampPalette(colors = c("firebrick","white","darkblue"))(1000))
print(p1)
p2<- pheatmap(pmd,
main="Cancer MHBs vs. PMDs",
show_colnames=T,show_rownames=T,
cluster_rows=F,cluster_cols=F,
scale="none",angle_col="90",
color=colorRampPalette(colors = c("firebrick","white","darkblue"))(1000))
print(p2)
# Cancer Type ATAC-seq Peak
ATAC_path="/sibcb1/bioinformatics/dataupload/cancermhaps/Fig2/ATAC/"
# Genomic segments
segments_path="/sibcb1/bioinformatics/dataupload/cancermhaps/Fig2/genomic_segments"
# CpG sites
hg19_CpG="/sibcb1/bioinformatics/dataupload/cancermhaps/Fig2/genomic_segments/hg19_CpG.bed"
for i in `ls ${segments_path}/*_T_MHB_genomic_segments.bed`
do
a=${i##*/}
if [[ "${a}" == *"breast"* ]];then
tag="BRCA_hg19_peak.bed"
elif [[ "${a}" == *"cervix"* ]];then
tag="CESC_hg19_peak.bed"
elif [[ "${a}" == *"colon"* ]];then
tag="COAD_hg19_peak.bed"
elif [[ "${a}" == *"esophagus"* ]];then
tag="ESCA_hg19_peak.bed"
elif [[ "${a}" == *"head_and_neck"* ]];then
tag="HNSC_hg19_peak.bed"
elif [[ "${a}" == *"lung"* ]];then
tag="LUAD_hg19_peak.bed"
elif [[ "${a}" == *"liver"* ]];then
tag="LIHC_hg19_peak.bed"
elif [[ "${a}" == *"stomach"* ]];then
tag="STAD_hg19_peak.bed"
elif [[ "${a}" == *"thyroid"* ]];then
tag="THCA_hg19_peak.bed"
fi
# MHB LMR UMR PMD HMR
less ${i} |grep MHB | bedtools intersect -a ${hg19_CpG} -b - | bedtools coverage -a - -b \
${ATAC_path}/${tag} > ${tag%%_*}_segments_ATAC_MHB_coverage.bed
less ${i} |grep LMR | bedtools intersect -a ${hg19_CpG} -b - | bedtools coverage -a - -b \
${ATAC_path}/${tag} > ${tag%%_*}_segments_ATAC_LMR_coverage.bed
less ${i} |grep UMR | bedtools intersect -a ${hg19_CpG} -b - | bedtools coverage -a - -b \
${ATAC_path}/${tag} > ${tag%%_*}_segments_ATAC_UMR_coverage.bed
less ${i} |grep PMD | bedtools intersect -a ${hg19_CpG} -b - | bedtools coverage -a - -b \
${ATAC_path}/${tag} > ${tag%%_*}_segments_ATAC_PMD_coverage.bed
less ${i} |grep HMR | bedtools intersect -a ${hg19_CpG} -b - | bedtools coverage -a - -b \
${ATAC_path}/${tag} > ${tag%%_*}_segments_ATAC_HMR_coverage.bed
done
# add LUSC
less ${segments_path}/lung_T_MHB_genomic_segments.bed |grep MHB | bedtools intersect -a ${hg19_CpG} -b - | \
bedtools coverage -a - -b ${ATAC_path}/LUSC_hg19_peak.bed >LUSC_segments_ATAC_MHB_coverage.bed
less ${segments_path}/lung_T_MHB_genomic_segments.bed |grep LMR | bedtools intersect -a ${hg19_CpG} -b - | \
bedtools coverage -a - -b ${ATAC_path}/LUSC_hg19_peak.bed > LUSC_segments_ATAC_LMR_coverage.bed
less ${segments_path}/lung_T_MHB_genomic_segments.bed |grep UMR | bedtools intersect -a ${hg19_CpG} -b - | \
bedtools coverage -a - -b ${ATAC_path}/LUSC_hg19_peak.bed > LUSC_segments_ATAC_UMR_coverage.bed
less ${segments_path}/lung_T_MHB_genomic_segments.bed |grep PMD | bedtools intersect -a ${hg19_CpG} -b - | \
bedtools coverage -a - -b ${ATAC_path}/LUSC_hg19_peak.bed > LUSC_segments_ATAC_PMD_coverage.bed
less ${segments_path}/lung_T_MHB_genomic_segments.bed |grep HMR | bedtools intersect -a ${hg19_CpG} -b - | \
bedtools coverage -a - -b ${ATAC_path}/LUSC_hg19_peak.bed > LUSC_segments_ATAC_HMR_coverage.bed
The Genomic segments overlap with TCGA ATAC-seq peaks in CpG base pairs level
path="/sibcb1/bioinformatics/dataupload/cancermhaps/Fig2/ATAC_segment_coverage/"
samples=list.files(path,pattern="_MHB_coverage.bed")
# Methylation
beta_path="/sibcb1/bioinformatics/dataupload/cancermhaps/Fig2/Methyl_bdg/"
CpG_segment_ATAC_rate<-lapply(samples,function(x){
if(str_detect(x,"BRCA")){
beta<-paste0(beta_path,"breast_T_Merged_Count_CpG.bedGraph.gz")
}else if(str_detect(x,"CESC")){
beta<-paste0(beta_path,"cervix_T_Merged_Count_CpG.bedGraph.gz")
} else if(str_detect(x,"COAD")){
beta<-paste0(beta_path,"colon_T_Merged_Count_CpG.bedGraph.gz")
} else if(str_detect(x,"ESCA")){
beta<-paste0(beta_path,"esophagus_T_Merged_Count_CpG.bedGraph.gz")
} else if(str_detect(x,"HNSC")){
beta<-paste0(beta_path,"head_and_neck_T_Merged_Count_CpG.bedGraph.gz")
} else if(str_detect(x,"LIHC")){
beta<-paste0(beta_path,"liver_T_Merged_Count_CpG.bedGraph.gz")
} else if(str_detect(x,"LUAD")){
beta<-paste0(beta_path,"lung_T_Merged_Count_CpG.bedGraph.gz")
} else if(str_detect(x,"LUSC")){
beta<-paste0(beta_path,"lung_T_Merged_Count_CpG.bedGraph.gz")
} else if(str_detect(x,"STAD")){
beta<-paste0(beta_path,"stomach_T_Merged_Count_CpG.bedGraph.gz")
} else if(str_detect(x,"THCA")){
beta<-paste0(beta_path,"thyroid_T_Merged_Count_CpG.bedGraph.gz")
}
# read mhb data
mhb_coverage<- fread(paste0(path,x)) %>%
dplyr::select(c(1:3)) %>% mutate(MHB="MHB")
# Genomic features were divided into MHB overlapping or not
gf <- c("LMR","UMR","PMD","HMR")
Genomic_coverage <- lapply(gf, function(m) {
tag<-str_replace_all(x,"MHB",m)
fread(paste0(path,tag)) %>% dplyr::select(c(1:3,6,7)) %>% mutate(Type=m) %>%
left_join(mhb_coverage,by=c("V1","V2","V3")) %>%
mutate(MHB = ifelse(is.na(MHB),paste0(m,"_NonMHB"),paste0(m,"_MHB")))
})
coverage <- do.call(rbind,Genomic_coverage) %>% as.data.frame()
MM <- fread(beta) %>% transmute(V1=V1,V2=V2,V3=V3,V4=ifelse(V4+V5>=10,round(V4/(V4+V5),2),NA))
# merge
left_join(coverage,MM,by=intersect(names(coverage),names(MM))) %>% drop_na()
})
names(CpG_segment_ATAC_rate) <- samples %>% str_split_i(pattern="_",i=1)
# In cancer type
Cancer_segment_ATAC<- lapply(names(CpG_segment_ATAC_rate),function(i){
CpG_segment_ATAC_rate[[i]] %>% dplyr::rename("olen"="V6","tlen"="V7","Beta"="V4") %>% arrange(Beta)
} )
names(Cancer_segment_ATAC) <- names(CpG_segment_ATAC_rate)
# All the Cancer merged
Tx=do.call(rbind,Cancer_segment_ATAC)
Tx$Tag = sapply(strsplit(rownames(Tx),"\\."),function(x) x[1])
### HMR
tHMR = Tx[Tx$Type == "HMR",] %>% arrange(Beta)
tHMR$Cluster = paste0("cluster_",rep(1:10,each=trunc(nrow(tHMR)/10)+1))[1:nrow(tHMR)]
trHMR = as.data.frame(tHMR[c("olen","tlen","Cluster","MHB")] %>% group_by(Cluster,MHB) %>% summarise_each(funs=sum))
cluster_tHMR = as.data.frame(tHMR[c("Beta","Cluster","MHB")] %>% group_by(Cluster,MHB) %>% summarise_each(funs=median))
trHMR = inner_join(trHMR,cluster_tHMR,by=c("Cluster","MHB")) %>% mutate(Freq=100*round(olen/tlen,4))
### PMD
tPMD = Tx[Tx$Type == "PMD",] %>% arrange(Beta)
tPMD$Cluster = paste0("cluster_",rep(1:10,each=trunc(nrow(tPMD)/10)+1))[1:nrow(tPMD)]
trPMD = as.data.frame(tPMD[c("olen","tlen","Cluster","MHB")] %>% group_by(Cluster,MHB) %>% summarise_each(funs=sum))
cluster_tPMD = as.data.frame(tPMD[c("Beta","Cluster","MHB")] %>% group_by(Cluster,MHB) %>% summarise_each(funs=median))
trPMD = inner_join(trPMD,cluster_tPMD,by=c("Cluster","MHB")) %>% mutate(Freq=100*round(olen/tlen,4))
### LMR
tLMR = Tx[Tx$Type == "LMR",] %>% arrange(Beta)
tLMR$Cluster = paste0("cluster_",rep(1:10,each=trunc(nrow(tLMR)/10)+1))[1:nrow(tLMR)]
trLMR = as.data.frame(tLMR[c("olen","tlen","Cluster","MHB")] %>% group_by(Cluster,MHB) %>% summarise_each(funs=sum))
cluster_tLMR = as.data.frame(tLMR[c("Beta","Cluster","MHB")] %>% group_by(Cluster,MHB) %>% summarise_each(funs=median))
trLMR = inner_join(trLMR,cluster_tLMR,by=c("Cluster","MHB")) %>% mutate(Freq=100*round(olen/tlen,4))
###UMR
tUMR = Tx[Tx$Type == "UMR",] %>% arrange(Beta)
tUMR$Cluster = paste0("cluster_",rep(1:10,each=trunc(nrow(tUMR)/10)+1))[1:nrow(tUMR)]
trUMR = as.data.frame(tUMR[c("olen","tlen","Cluster","MHB")] %>% group_by(Cluster,MHB) %>% summarise_each(funs=sum))
cluster_tUMR = as.data.frame(tUMR[c("Beta","Cluster","MHB")] %>% group_by(Cluster,MHB) %>% summarise_each(funs=median))
trUMR = inner_join(trUMR,cluster_tUMR,by=c("Cluster","MHB")) %>% mutate(Freq=100*round(olen/tlen,4))
TMR= do.call(rbind,list("UMR"=trUMR,"LMR"=trLMR,"PMD"=trPMD,"HMR"=trHMR))
TMR$Segment = factor(sapply(strsplit(rownames(TMR),"\\."),function(x) x[1]),levels=c("UMR","LMR","PMD","HMR"))
save(TMR,file="/sibcb1/bioinformatics/dataupload/cancermhaps/Fig2/Genomic_segments_ATAC_MHB.RData")
# load pre-processed data
load("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig2/Genomic_segments_ATAC_MHB.RData")
p<- TMR %>% mutate(Type=str_split_i(MHB,pattern="_",i=2),Segment=fct_inorder(Segment)) %>%
ggplot(aes(x=Beta,y=Freq,color=Type)) +
geom_smooth(aes(color=Type),se=F,linewidth=0.8) +
scale_y_continuous(expand=c(0,0),limits=c(0,80))+
scale_x_continuous(limits=c(0,1))+
scale_color_manual(values=c("#df8f44","#00a1d5")) +
labs(x="Mean Methylation",y="The Percentage of CpGs covered by TCGA ATAC-seq Peaks(%)",
title="Pan-Cancer") +
theme_bw()+
theme(panel.grid = element_blank(),panel.background = element_blank(),
axis.ticks.length=unit(5,'pt'), axis.line=element_line(color="black"),
plot.title = element_text(hjust=0.5,vjust=0.5),
strip.background.x = element_rect(color="black",fill="gray", size=0.8, linetype="solid")) +
geom_vline(aes(xintercept = 0.05),colour="black",linetype="dashed") +
facet_wrap(~Segment,scales="fixed")
print(p)
Enrichment of MHBs in ChIA-PET by permutation test, the ChIApet_enrichment.R script can be downloaded from here.
Rscript ChIApet_enrichment.R \
--forward MCF-7_BRCA_ChIA_PET_Forward.bed \
--reverse MCF-7_BRCA_ChIA_PET_Reverse.bed \
--genome hg19.chrom.sizes \
--testBED BRCA_MHB \
--nTimes 1000 \
--tmp ./ \
--outFile BRCA_MHB_ChIA_enrichment.txt
done
# Enrichment score ChIA-PET MCF7/MCF-10A
# The scores are estimated by the fold enrichment in Pol II ChIA-PET of between MHB and random regions.
MCF7_N=28.17;MCF7_T=40.06
MCF10A_N=60;MCF10A_T=53.62
data = data.frame(MCF7=c(MCF7_N,MCF7_T),MCF10A=c(MCF10A_N,MCF10A_T)) %>%
mutate(Type=factor(c("Normal_MHB","Tumor_MHB"),levels=c("Tumor_MHB","Normal_MHB"))) %>%
pivot_longer(!Type,names_to="variable",values_to="value")
p<-ggplot(data,aes(x=variable,y=value,fill=Type))+
geom_bar(stat="identity", position=position_dodge()) +
scale_fill_brewer(palette="Set1") +
labs(y="Enrichment Score")+
theme_bw()+
theme(panel.grid= element_blank(),panel.background=element_blank(),
panel.border=element_blank(),axis.line=element_line(color="black"),
axis.title.x=element_blank(),axis.text.x = element_text(color="black",size=10,
angle=45,vjust=1,hjust=1),
axis.text.y = element_text(color="black",size=10),
axis.ticks = element_line(color="black"),axis.ticks.length = unit(5, "pt"))+
geom_text(aes(label=value), vjust=-0.3, size=3.5,color="black")
print(p)