These are the widgets shown in my ABACBS 2022 poster demonstrating
the langevitour
package.
→ langevitour package home page
Points are cells. Axes are varimax rotated principal components.
library(langevitour)
library(tidyverse)
library(Seurat)
library(scDblFinder)
set.seed(1)
# Download data from Gene Expression Omnibus (GEO) ====
dir.create("data", showWarnings=FALSE)
# Download from GEO can be slow.
options(timeout=3600)
if (!file.exists("data/GSE96583_RAW.tar") ||
file.info("data/GSE96583_RAW.tar")$size != 76195840) {
download.file(
"https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE96583&format=file",
"data/GSE96583_RAW.tar")
}
if (!file.exists("data/GSE96583_batch2.total.tsne.df.tsv.gz") ||
file.info("data/GSE96583_batch2.total.tsne.df.tsv.gz")$size != 756342) {
download.file(
"https://ftp.ncbi.nlm.nih.gov/geo/series/GSE96nnn/GSE96583/suppl/GSE96583_batch2.total.tsne.df.tsv.gz",
"data/GSE96583_batch2.total.tsne.df.tsv.gz")
}
if (!file.exists("data/GSE96583_batch2.genes.tsv.gz") ||
file.info("data/GSE96583_batch2.genes.tsv.gz")$size != 277054) {
download.file(
"https://ftp.ncbi.nlm.nih.gov/geo/series/GSE96nnn/GSE96583/suppl/GSE96583_batch2.genes.tsv.gz",
"data/GSE96583_batch2.genes.tsv.gz")
}
untar("data/GSE96583_RAW.tar", exdir="data")
# Load the data ====
cells <- read.table("data/GSE96583_batch2.total.tsne.df.tsv.gz", sep="\t")
counts <- cbind(
ReadMtx("data/GSM2560248_2.1.mtx.gz", "data/GSM2560248_barcodes.tsv.gz", "data/GSE96583_batch2.genes.tsv.gz"),
ReadMtx("data/GSM2560249_2.2.mtx.gz", "data/GSM2560249_barcodes.tsv.gz", "data/GSE96583_batch2.genes.tsv.gz"))
colnames(counts) <- rownames(cells)
# Seurat processing ====
obj <- CreateSeuratObject(counts, meta.data=cells)
obj$percentRibo <- PercentageFeatureSet(obj, pattern = "^(RPL|RPS)")
obj <- NormalizeData(obj)
obj <- FindVariableFeatures(obj, selection.method = "vst", nfeatures = 2000)
obj <- ScaleData(obj)
obj <- RunPCA(obj, "RNA")
obj <- RunUMAP(obj, dims=1:10)
## Varimax rotation ====
nDim <- 10
loadings <- obj$pca@feature.loadings[,1:nDim]
scores <- obj$pca@cell.embeddings[,1:nDim]
# Find varimax rotation (without Kaiser normalization)
rotation <- varimax(loadings, normalize=FALSE)$rotmat
# Tidy the rotation: flip components to have positive skew
flips <- ifelse(colSums((loadings %*% rotation) ** 3) < 0, -1, 1)
rotation <- sweep(rotation, 2, flips, '*')
# Tidy the rotation: order by variance of scores
rotation <- rotation[, rev(order(colSums((scores %*% rotation)**2)))]
colnames(rotation) <- paste0("C",1:nDim)
vmLoadings <- loadings %*% rotation
vmScores <- scores %*% rotation
## Denoising ====
denoisedScores <- knnDenoise(vmScores, k=30, steps=2)
## Doublet detection ====
#
# Some doublets have already been identified based on genetic differences between individuals.
#
# Attempt to identify further doublets where the two cells came from the same indiviudal.
#
# Detection is done on the unstimulated and stimulated cells independently since there will be no cells that are a mixture of unstimulated and stimulated.
#
labelDoublets <- function(obj) {
reco <- recoverDoublets(
obj$pca@cell.embeddings,
doublets=obj$multiplets == "doublet",
samples=table(obj$ind), # Relative proportions of cells from each sample, here individual donors
transposed=TRUE)
case_when(
reco$known ~ "known",
reco$predicted ~ "predicted",
TRUE ~ "singlet")
}
doubletLabel <-
split(seq_len(ncol(obj)), obj$stim) |>
map(~labelDoublets(obj[,.])) |>
unsplit(obj$stim)
## Simplified labels ====
typeLabel <- case_when(
doubletLabel != "singlet" ~ "Doublet",
obj$cell == "B cells" ~ "B cell",
obj$cell == "CD4 T cells" ~ "CD4 T cell",
obj$cell == "CD8 T cells" ~ "CD8 T cell",
obj$cell == "NK cells" ~ "NK cell",
obj$cell == "CD14+ Monocytes" ~ "Monocyte",
obj$cell == "FCGR3A+ Monocytes" ~ "Monocyte",
TRUE ~ "Other"
) |> factor(c(
"Doublet",
"CD4 T cell",
"CD8 T cell",
"NK cell",
"B cell",
"Monocyte",
"Other"
))
## Write out results ====
#
# Only 5,000 cells are kept out of 29,065, so langevitour can run at a very smooth framerate.
#
keep <- sample.int(ncol(obj), 5000)
cellsOut <- tibble(
barcode = gsub("-1","",colnames(obj)[keep]),
nCount = obj$nCount_RNA[keep],
nFeature = obj$nFeature_RNA[keep],
percentRibo = obj$percentRibo[keep],
individual = as.integer(factor(obj$ind))[keep],
stim = obj$stim[keep],
type = typeLabel[keep],
doublet = doubletLabel[keep],
umapX = as.vector(obj$umap@cell.embeddings[keep,1]),
umapY = as.vector(obj$umap@cell.embeddings[keep,2]))
saveRDS(file="out.rds", object=list(
cells = cellsOut,
vmLoadings = vmLoadings,
vmScores = vmScores[keep,],
denoisedScores = denoisedScores[keep,],
stdev = Stdev(obj)))
## 2022-11-19 This code requires the development version of langevitour
# remotes::install_github("pfh/langevitour")
library(tidyverse)
library(crosstalk)
library(DT)
library(langevitour)
out <- readRDS("out.rds")
cells <- out$cells
shared <- SharedData$new(cells)
stimLabel <- factor(ifelse(cells$stim=="stim","S","U"), c("U","S"))
crossLabel <- fct_cross(stimLabel, cells$type, sep=" ")
h <- c(0,2,1,3,4,5,0)/6*360
c <- c(1,1,1,1,1,1,0)*100
colsCrossLabel <- hcl(h=rep(h,each=2), c=rep(c,each=2), l=rep(c(40,70),7), fixup=TRUE)
colsTypeLabel <- hcl(h=h, c=c, l=50, fixup=TRUE)
langevitour(
out$vmScores,
crossLabel,
levelColors=colsCrossLabel,
state=list(labelInactive=list("U Doublet","S Doublet")),
link=shared,
width=900, height=700)
# A table widget, using the SharedData object
datatable(
shared,
rownames=FALSE, width="100%",
class='compact cell-border hover', extensions='Buttons',
options=list(dom='Bfrtip',buttons=c('copy','csv','excel'))) |>
formatRound("percentRibo", 0) |>
formatRound(c("umapX","umapY"), 2)
If you select some cells in the langevitour
widget, only
selected cells will be shown in the table. You can also use the table to
select cells, which will then be highlighted in the
langevitour
widget. It is also possible to have linked
brushing with plotly widgets, but there is currently a performance
problem, so I have made a separate demonstration of this.
Points are genes. Axes are samples. Extra axes have been defined for average gene expression, the difference between upper and lower molars, and time.
samples <- read_csv("teeth.csv")
counts <- read_csv("teeth-read-counts.csv") |>
column_to_rownames("gene") |>
as.matrix()
colnames(counts) <- paste0(
rep(c("L","U"),8),
rep(1:8,each=2))
library(edgeR)
keep <- rowMeans(counts) >= 50
dge <- DGEList(counts[keep,]) |> calcNormFactors()
lcpm <- cpm(dge, log=TRUE)
rnaseqShared = SharedData$new(tibble(
Gene=rownames(lcpm)))
extraAxes <- cbind(
Average = rep(1/16,16),
"Upper vs Lower" = rep(c(-1,1)/8,8),
Time = samples$day-mean(samples$day))
axisColors <- hcl(l=rep(c(40,60),8), c=100, h=rep(seq(0,180,length.out=8),each=2), fixup=TRUE)
langevitour(
lcpm,
name=rownames(lcpm),
extraAxes=extraAxes,
axisColors=axisColors,
scale=15, pointSize=2,
link=rnaseqShared,
state=list(heat=-2,guide=2,guideType="pca",labelInactive=list("Average")),
width=900, height=700)
datatable(
rnaseqShared,
rownames=FALSE, width="50%",
class='compact cell-border hover', extensions='Buttons',
options=list(dom='Bfrtip',buttons=c('copy','csv','excel')))