Pseudotime analysis using Monocle2

loading packages¶

library(Seurat)
library(monocle)
library(data.table)

loading data¶

load("data/MacrophageRDS/macrophage.rds")
set.seed(123)
project = project_monocle

project

An object of class Seurat 
26082 features across 381 samples within 1 assay 
Active assay: RNA (26082 features, 2000 variable features)
 3 layers present: counts, data, scale.data
 3 dimensional reductions calculated: pca, tsne, umap

DimPlot(project)

FeaturePlot(object = project, features = 'CD34', label = TRUE, cols=c('grey95', 'red3'), pt.size=1)

features <- c("CD34","MYB","KLF1","GATA1","PF4","GP9","MPO","LYZ","HLA-DRA","CCR2","CD14","MRC1")
DotPlot(project, features = features) + RotatedAxis()

Warning message:
“Scaling data with a low number of groups may produce misleading results”

building new CDS object¶

data <- as(as.matrix(project@assays$RNA@counts),'sparseMatrix')
pd <- project@meta.data
fd <- data.frame(gene_short_name = row.names(project),row.names=row.names(project))

head(pd)

head(fd)

pd <- new("AnnotatedDataFrame",data=pd)
fd <- new("AnnotatedDataFrame",data=fd)
cds <- newCellDataSet(data, #expression data matrix for an experiment
                      phenoData = pd, #data frame containing attributes of individual cells
                      featureData = fd, #data frame containing attributes of features (e.g. genes)
                      lowerDetectionLimit = 0.5, #the minimum expression level that consistitutes true expression
                      expressionFamily = negbinomial.size()) #the VGAM family function to be used for expression

cds

CellDataSet (storageMode: environment)
assayData: 26082 features, 381 samples 
  element names: exprs 
protocolData: none
phenoData
  sampleNames: YS5_TKR180900693_HT3YJCCXY_L4_sc82
    YS5_TKR180900693_HT3YJCCXY_L4_sc88 ... L3_FKDL190665156.1a_sc48
    (381 total)
  varLabels: orig.ident nCount_RNA ... Size_Factor (11 total)
  varMetadata: labelDescription
featureData
  featureNames: A1BG A1BG-AS1 ... ZZZ3 (26082 total)
  fvarLabels: gene_short_name
  fvarMetadata: labelDescription
experimentData: use 'experimentData(object)'
Annotation:

Estimating size factors and dispersions¶

cds <- estimateSizeFactors(cds)
cds <- estimateDispersions(cds)

Warning message:
“`group_by_()` was deprecated in dplyr 0.7.0.
ℹ Please use `group_by()` instead.
ℹ See vignette('programming') for more help
ℹ The deprecated feature was likely used in the monocle package.
  Please report the issue to the authors.”
Warning message:
“`select_()` was deprecated in dplyr 0.7.0.
ℹ Please use `select()` instead.
ℹ The deprecated feature was likely used in the monocle package.
  Please report the issue to the authors.”
Removing 282 outliers

cds <- detectGenes(cds,min_expr = 0.1)

head(fData(cds))

head(pData(cds))

Constructing Single Cell Trajectories¶

Step 1: choosing genes that define progress¶

project <- FindVariableFeatures(project,selection.method = "vst",nfeatures = 2000)

Warning message:
“The following arguments are not used: nselect”

gene_var <- VariableFeatures(project)

head(gene_var)

write.table(gene_var,file="data/MacrophageRDS/monocle_DEG.xls",col.names = T,row.names = F,sep="\t",quote = F)

ordergene <- as.vector(unique(gene_var))
cds <- setOrderingFilter(cds,ordergene)

plot_ordering_genes(cds)

Warning message:
“Transformation introduced infinite values in continuous y-axis”

Step 2: reducing the dimensionality of the data¶

cds <- reduceDimension(cds, max_components = 2, method = 'DDRTree')

Found more than one class "dist" in cache; using the first, from namespace 'spam'

Also defined by ‘BiocGenerics’

Found more than one class "dist" in cache; using the first, from namespace 'spam'

Also defined by ‘BiocGenerics’

step 3: order cells along the trajectory¶

cds = orderCells(cds)

Warning message in graph.dfs(dp_mst, root = root_cell, neimode = "all", unreachable = FALSE, :
“Argument `neimode' is deprecated; use `mode' instead”
Warning message in graph.dfs(dp_mst, root = root_cell, neimode = "all", unreachable = FALSE, :
“Argument `neimode' is deprecated; use `mode' instead”

#choose the root
#cds <- orderCells(cds,root_state=?)

Visualization¶

By Pseudotime¶

plot_cell_trajectory(cds,color_by="Pseudotime", size=1,show_backbone=TRUE)

By cell type¶

plot_cell_trajectory(cds,color_by="CellType", size=1,show_backbone=TRUE)

By cell State¶

plot_cell_trajectory(cds, color_by = "State",size=1,show_backbone=TRUE)

plot_cell_trajectory(cds, color_by = "CellType") + facet_wrap("~State", nrow = 1)

Displaying specific genes¶

keygenes <- head(ordergene,5)
plot_genes_in_pseudotime(cds[keygenes],color_by = "State")

plot_genes_in_pseudotime(cds[keygenes],color_by = "CellType")

plot_genes_in_pseudotime(cds[keygenes],color_by = "Pseudotime")

plot_genes_jitter(cds[keygenes,],color_by = "State",grouping = "State")

plot_genes_jitter(cds[keygenes,],color_by = "CellType",grouping = "CellType")

plot_genes_jitter(cds[keygenes,],color_by = "Pseudotime",grouping = "Pseudotime")

Finding trajectory-associated genes¶

#fullModelFormulaStr = "~sm.ns(Pseudotime)"  "Pseudotime","CellType","Cluster"

Deg_Pseudotime <- differentialGeneTest(cds[ordergene,],cores = 1,fullModelFormulaStr = "~sm.ns(Pseudotime)")
Deg_Pseudotime <- Deg_Pseudotime[order(Deg_Pseudotime$qval),]

head(Deg_Pseudotime)

write.table(Deg_Pseudotime,file="data/MacrophageRDS/Diff_pseudotime_heatmap.xls",sep="\t",row.names = F)

# show single gene
pData(cds)$S100A8 =log2(exprs(cds)['S100A8',]+1)
plot_cell_trajectory(cds,color_by = "S100A8")

# heatmap
top50 = (Deg_Pseudotime$gene_short_name)[1:50]
plot_pseudotime_heatmap(cds[top50,],num_clusters = 4,cores = 1,show_rownames = T)

plot_genes_in_pseudotime(cds[top50[1:4],],nrow= 2,ncol = 2)

Branched expression analysis modeling (BEAM)¶

BEAM_res <- BEAM(cds, branch_point = 1, cores = 8)

BEAM_res <- BEAM_res[order(BEAM_res$qval),]

head(BEAM_res)

plot_genes_branched_heatmap(cds[row.names(BEAM_res[1:50,]),],branch_point = 1,
                            num_clusters =4,cores = 1,use_gene_short_name = T,
                            show_rownames = T, return_heatmap = F)

# jupyter nbconvert --to html Monocle2-Macrophage.ipynb --template classic

	orig.ident	nCount_RNA	nFeature_RNA	RNA_snn_res.0.5	seurat_clusters	Site	Stage.x	CellType	UMAP1.x	UMAP2.x
	<fct>	<dbl>	<int>	<fct>	<fct>	<chr>	<chr>	<chr>	<dbl>	<dbl>
YS5_TKR180900693_HT3YJCCXY_L4_sc82	YS5	449236	6552	2	2	YS	CS17	GMP	-2.625685	-1.1097157
YS5_TKR180900693_HT3YJCCXY_L4_sc88	YS5	41766	2543	0	0	YS	CS17	Monocyte	5.644469	-2.6879447
YS5_TKR180900693_HT3YJCCXY_L4_sc89	YS5	192476	5350	4	4	YS	CS17	Myeloblast	-1.116260	0.3711908
B2_TKR180900710_HT3YJCCXY_L6_sc3	B2	201498	6002	2	2	Blood	CS15	YSMP	-4.758543	-1.2244828
B2_TKR180900710_HT3YJCCXY_L6_sc5	B2	16128	3480	4	4	Blood	CS15	GMP	-2.054222	0.0809534
B2_TKR180900710_HT3YJCCXY_L6_sc6	B2	84374	4627	2	2	Blood	CS15	YSMP	-3.903346	-1.8806273

	gene_short_name
	<chr>
A1BG	A1BG
A1BG-AS1	A1BG-AS1
A1CF	A1CF
A2M	A2M
A2M-AS1	A2M-AS1
A2ML1	A2ML1

	gene_short_name	num_cells_expressed
	<chr>	<int>
A1BG	A1BG	33
A1BG-AS1	A1BG-AS1	0
A1CF	A1CF	7
A2M	A2M	174
A2M-AS1	A2M-AS1	9
A2ML1	A2ML1	2

	orig.ident	nCount_RNA	nFeature_RNA	RNA_snn_res.0.5	seurat_clusters	Site	Stage.x	CellType	UMAP1.x	UMAP2.x	Size_Factor	num_genes_expressed
	<fct>	<dbl>	<int>	<fct>	<fct>	<chr>	<chr>	<chr>	<dbl>	<dbl>	<dbl>	<int>
YS5_TKR180900693_HT3YJCCXY_L4_sc82	YS5	449236	6552	2	2	YS	CS17	GMP	-2.625685	-1.1097157	4.1286239	6552
YS5_TKR180900693_HT3YJCCXY_L4_sc88	YS5	41766	2543	0	0	YS	CS17	Monocyte	5.644469	-2.6879447	0.3838430	2543
YS5_TKR180900693_HT3YJCCXY_L4_sc89	YS5	192476	5350	4	4	YS	CS17	Myeloblast	-1.116260	0.3711908	1.7689166	5350
B2_TKR180900710_HT3YJCCXY_L6_sc3	B2	201498	6002	2	2	Blood	CS15	YSMP	-4.758543	-1.2244828	1.8518317	6002
B2_TKR180900710_HT3YJCCXY_L6_sc5	B2	16128	3480	4	4	Blood	CS15	GMP	-2.054222	0.0809534	0.1482215	3480
B2_TKR180900710_HT3YJCCXY_L6_sc6	B2	84374	4627	2	2	Blood	CS15	YSMP	-3.903346	-1.8806273	0.7754243	4627

	status	family	pval	qval	gene_short_name	num_cells_expressed	use_for_ordering
	<chr>	<chr>	<dbl>	<dbl>	<chr>	<int>	<lgl>
S100A8	OK	negbinomial.size	2.136287e-192	4.272574e-189	S100A8	269	TRUE
S100A12	OK	negbinomial.size	2.553230e-154	2.553230e-151	S100A12	111	TRUE
S100A9	OK	negbinomial.size	8.338810e-153	5.559207e-150	S100A9	333	TRUE
MNDA	OK	negbinomial.size	1.198348e-139	5.991740e-137	MNDA	264	TRUE
FCN1	OK	negbinomial.size	4.838441e-131	1.935377e-128	FCN1	206	TRUE
ANGPT1	OK	negbinomial.size	1.016048e-125	3.386827e-123	ANGPT1	197	TRUE