Pseudotime analysis using Monocle2

loading packages

In [2]:
library(Seurat)
library(monocle)
library(data.table)

loading data

In [10]:
load("data/MacrophageRDS/macrophage.rds")
set.seed(123)
project = project_monocle
In [11]:
project
An object of class Seurat 
26082 features across 381 samples within 1 assay 
Active assay: RNA (26082 features, 2000 variable features)
 3 layers present: counts, data, scale.data
 3 dimensional reductions calculated: pca, tsne, umap
In [12]:
DimPlot(project)
In [16]:
FeaturePlot(object = project, features = 'CD34', label = TRUE, cols=c('grey95', 'red3'), pt.size=1)
In [17]:
features <- c("CD34","MYB","KLF1","GATA1","PF4","GP9","MPO","LYZ","HLA-DRA","CCR2","CD14","MRC1")
DotPlot(project, features = features) + RotatedAxis()
Warning message:
“Scaling data with a low number of groups may produce misleading results”

building new CDS object

In [19]:
data <- as(as.matrix(project@assays$RNA@counts),'sparseMatrix')
pd <- project@meta.data
fd <- data.frame(gene_short_name = row.names(project),row.names=row.names(project))
In [20]:
head(pd)
A data.frame: 6 × 10
orig.identnCount_RNAnFeature_RNARNA_snn_res.0.5seurat_clustersSiteStage.xCellTypeUMAP1.xUMAP2.x
<fct><dbl><int><fct><fct><chr><chr><chr><dbl><dbl>
YS5_TKR180900693_HT3YJCCXY_L4_sc82YS5449236655222YS CS17GMP -2.625685-1.1097157
YS5_TKR180900693_HT3YJCCXY_L4_sc88YS5 41766254300YS CS17Monocyte 5.644469-2.6879447
YS5_TKR180900693_HT3YJCCXY_L4_sc89YS5192476535044YS CS17Myeloblast-1.116260 0.3711908
B2_TKR180900710_HT3YJCCXY_L6_sc3B2 201498600222BloodCS15YSMP -4.758543-1.2244828
B2_TKR180900710_HT3YJCCXY_L6_sc5B2 16128348044BloodCS15GMP -2.054222 0.0809534
B2_TKR180900710_HT3YJCCXY_L6_sc6B2 84374462722BloodCS15YSMP -3.903346-1.8806273
In [21]:
head(fd)
A data.frame: 6 × 1
gene_short_name
<chr>
A1BGA1BG
A1BG-AS1A1BG-AS1
A1CFA1CF
A2MA2M
A2M-AS1A2M-AS1
A2ML1A2ML1
In [22]:
pd <- new("AnnotatedDataFrame",data=pd)
fd <- new("AnnotatedDataFrame",data=fd)
cds <- newCellDataSet(data, #expression data matrix for an experiment
                      phenoData = pd, #data frame containing attributes of individual cells
                      featureData = fd, #data frame containing attributes of features (e.g. genes)
                      lowerDetectionLimit = 0.5, #the minimum expression level that consistitutes true expression
                      expressionFamily = negbinomial.size()) #the VGAM family function to be used for expression
In [23]:
cds
CellDataSet (storageMode: environment)
assayData: 26082 features, 381 samples 
  element names: exprs 
protocolData: none
phenoData
  sampleNames: YS5_TKR180900693_HT3YJCCXY_L4_sc82
    YS5_TKR180900693_HT3YJCCXY_L4_sc88 ... L3_FKDL190665156.1a_sc48
    (381 total)
  varLabels: orig.ident nCount_RNA ... Size_Factor (11 total)
  varMetadata: labelDescription
featureData
  featureNames: A1BG A1BG-AS1 ... ZZZ3 (26082 total)
  fvarLabels: gene_short_name
  fvarMetadata: labelDescription
experimentData: use 'experimentData(object)'
Annotation:  

Estimating size factors and dispersions

In [24]:
cds <- estimateSizeFactors(cds)
cds <- estimateDispersions(cds)
Warning message:
“`group_by_()` was deprecated in dplyr 0.7.0.
 Please use `group_by()` instead.
 See vignette('programming') for more help
 The deprecated feature was likely used in the monocle package.
  Please report the issue to the authors.”
Warning message:
“`select_()` was deprecated in dplyr 0.7.0.
 Please use `select()` instead.
 The deprecated feature was likely used in the monocle package.
  Please report the issue to the authors.”
Removing 282 outliers

In [25]:
cds <- detectGenes(cds,min_expr = 0.1)
In [26]:
head(fData(cds))
A data.frame: 6 × 2
gene_short_namenum_cells_expressed
<chr><int>
A1BGA1BG 33
A1BG-AS1A1BG-AS1 0
A1CFA1CF 7
A2MA2M 174
A2M-AS1A2M-AS1 9
A2ML1A2ML1 2
In [27]:
head(pData(cds))
A data.frame: 6 × 12
orig.identnCount_RNAnFeature_RNARNA_snn_res.0.5seurat_clustersSiteStage.xCellTypeUMAP1.xUMAP2.xSize_Factornum_genes_expressed
<fct><dbl><int><fct><fct><chr><chr><chr><dbl><dbl><dbl><int>
YS5_TKR180900693_HT3YJCCXY_L4_sc82YS5449236655222YS CS17GMP -2.625685-1.10971574.12862396552
YS5_TKR180900693_HT3YJCCXY_L4_sc88YS5 41766254300YS CS17Monocyte 5.644469-2.68794470.38384302543
YS5_TKR180900693_HT3YJCCXY_L4_sc89YS5192476535044YS CS17Myeloblast-1.116260 0.37119081.76891665350
B2_TKR180900710_HT3YJCCXY_L6_sc3B2 201498600222BloodCS15YSMP -4.758543-1.22448281.85183176002
B2_TKR180900710_HT3YJCCXY_L6_sc5B2 16128348044BloodCS15GMP -2.054222 0.08095340.14822153480
B2_TKR180900710_HT3YJCCXY_L6_sc6B2 84374462722BloodCS15YSMP -3.903346-1.88062730.77542434627

Constructing Single Cell Trajectories

Step 1: choosing genes that define progress

In [28]:
project <- FindVariableFeatures(project,selection.method = "vst",nfeatures = 2000)
Warning message:
“The following arguments are not used: nselect”
In [29]:
gene_var <- VariableFeatures(project)
In [30]:
head(gene_var)
  1. 'PRG2'
  2. 'PRTN3'
  3. 'AZU1'
  4. 'S100A8'
  5. 'ELANE'
  6. 'DEFA4'
In [31]:
write.table(gene_var,file="data/MacrophageRDS/monocle_DEG.xls",col.names = T,row.names = F,sep="\t",quote = F)
In [32]:
ordergene <- as.vector(unique(gene_var))
cds <- setOrderingFilter(cds,ordergene)
In [33]:
plot_ordering_genes(cds)
Warning message:
“Transformation introduced infinite values in continuous y-axis”

Step 2: reducing the dimensionality of the data

In [35]:
cds <- reduceDimension(cds, max_components = 2, method = 'DDRTree')
Found more than one class "dist" in cache; using the first, from namespace 'spam'

Also defined by ‘BiocGenerics’

Found more than one class "dist" in cache; using the first, from namespace 'spam'

Also defined by ‘BiocGenerics’

step 3: order cells along the trajectory

In [36]:
cds = orderCells(cds)
Warning message in graph.dfs(dp_mst, root = root_cell, neimode = "all", unreachable = FALSE, :
“Argument `neimode' is deprecated; use `mode' instead”
Warning message in graph.dfs(dp_mst, root = root_cell, neimode = "all", unreachable = FALSE, :
“Argument `neimode' is deprecated; use `mode' instead”
In [37]:
#choose the root
#cds <- orderCells(cds,root_state=?)

Visualization

By Pseudotime

In [38]:
plot_cell_trajectory(cds,color_by="Pseudotime", size=1,show_backbone=TRUE)

By cell type

In [39]:
plot_cell_trajectory(cds,color_by="CellType", size=1,show_backbone=TRUE)

By cell State

In [40]:
plot_cell_trajectory(cds, color_by = "State",size=1,show_backbone=TRUE)
In [41]:
plot_cell_trajectory(cds, color_by = "CellType") + facet_wrap("~State", nrow = 1)

Displaying specific genes

In [42]:
keygenes <- head(ordergene,5)
plot_genes_in_pseudotime(cds[keygenes],color_by = "State")
In [43]:
plot_genes_in_pseudotime(cds[keygenes],color_by = "CellType")
In [44]:
plot_genes_in_pseudotime(cds[keygenes],color_by = "Pseudotime")
In [45]:
plot_genes_jitter(cds[keygenes,],color_by = "State",grouping = "State")
In [46]:
plot_genes_jitter(cds[keygenes,],color_by = "CellType",grouping = "CellType")
In [47]:
plot_genes_jitter(cds[keygenes,],color_by = "Pseudotime",grouping = "Pseudotime")

Finding trajectory-associated genes

In [48]:
#fullModelFormulaStr = "~sm.ns(Pseudotime)"  "Pseudotime","CellType","Cluster"
In [49]:
Deg_Pseudotime <- differentialGeneTest(cds[ordergene,],cores = 1,fullModelFormulaStr = "~sm.ns(Pseudotime)")
Deg_Pseudotime <- Deg_Pseudotime[order(Deg_Pseudotime$qval),]
In [50]:
head(Deg_Pseudotime)
A data.frame: 6 × 7
statusfamilypvalqvalgene_short_namenum_cells_expresseduse_for_ordering
<chr><chr><dbl><dbl><chr><int><lgl>
S100A8OKnegbinomial.size2.136287e-1924.272574e-189S100A8 269TRUE
S100A12OKnegbinomial.size2.553230e-1542.553230e-151S100A12111TRUE
S100A9OKnegbinomial.size8.338810e-1535.559207e-150S100A9 333TRUE
MNDAOKnegbinomial.size1.198348e-1395.991740e-137MNDA 264TRUE
FCN1OKnegbinomial.size4.838441e-1311.935377e-128FCN1 206TRUE
ANGPT1OKnegbinomial.size1.016048e-1253.386827e-123ANGPT1 197TRUE
In [51]:
write.table(Deg_Pseudotime,file="data/MacrophageRDS/Diff_pseudotime_heatmap.xls",sep="\t",row.names = F)
In [54]:
# show single gene
pData(cds)$S100A8 =log2(exprs(cds)['S100A8',]+1)
plot_cell_trajectory(cds,color_by = "S100A8")
In [55]:
# heatmap
top50 = (Deg_Pseudotime$gene_short_name)[1:50]
plot_pseudotime_heatmap(cds[top50,],num_clusters = 4,cores = 1,show_rownames = T)
In [56]:
plot_genes_in_pseudotime(cds[top50[1:4],],nrow= 2,ncol = 2)

Branched expression analysis modeling (BEAM)

In [57]:
BEAM_res <- BEAM(cds, branch_point = 1, cores = 8)
In [58]:
BEAM_res <- BEAM_res[order(BEAM_res$qval),]
In [59]:
head(BEAM_res)
A data.frame: 6 × 7
statusfamilypvalqvalgene_short_namenum_cells_expresseduse_for_ordering
<chr><chr><dbl><dbl><chr><int><lgl>
PRTN3OKnegbinomial.size4.818417e-1041.255005e-99PRTN3 187TRUE
AZU1OKnegbinomial.size4.626611e-1016.025236e-97AZU1 191TRUE
S100A8OKnegbinomial.size 2.332694e-922.025245e-88S100A8269TRUE
MPOOKnegbinomial.size 4.910403e-853.197409e-81MPO 305TRUE
MS4A3OKnegbinomial.size 2.015427e-751.049876e-71MS4A3 160TRUE
LYZOKnegbinomial.size 8.302534e-743.604130e-70LYZ 361TRUE
In [60]:
plot_genes_branched_heatmap(cds[row.names(BEAM_res[1:50,]),],branch_point = 1,
                            num_clusters =4,cores = 1,use_gene_short_name = T,
                            show_rownames = T, return_heatmap = F)
In [ ]:
# jupyter nbconvert --to html Monocle2-Macrophage.ipynb --template classic