Developer guide: Extending a data wrapper class
Source:vignettes/dev_wrapper_subclass.Rmd
dev_wrapper_subclass.Rmd
The existing data object wrapper classes are not yet very flexible, but they can be extended to support custom use cases.
Here, we extend the SeuratWrapper
class so that it supports multiple cell sets, to deal with the fact that the Seurat FindClusters
overwrites its results each time.
library(vitessceR)
#' Subclass of SeuratWrapper to deal with cell sets.
#' @title MyCustomSeuratWrapper Class
#' @docType class
#' @description
#' Subclass of SeuratWrapper.
MyCustomSeuratWrapper <- R6::R6Class("MyCustomSeuratWrapper",
inherit = SeuratWrapper,
public = list(
#' @field cell_sets The cell sets.
#' @keywords internal
cell_sets = NULL,
#' @description
#' Create a wrapper around a Seurat object.
#' @param obj The object to wrap.
#' @param cell_sets A list of cell sets.
#' @param ... Parameters inherited from `SeuratWrapper`.
#' @return A new `SeuratWrapper` object.
initialize = function(obj, cell_sets, ...) {
super$initialize(obj, ...)
self$cell_sets <- cell_sets
},
#' @description
#' Create a list representing the cluster assignments in the cell set list.
#' @return A list that can be converted to JSON.
#' @keywords internal
create_cell_sets_list = function() {
obj <- self$obj
cells <- Seurat::Idents(obj)
# https://s3.amazonaws.com/vitessce-data/0.0.31/master_release/linnarsson/linnarsson.cell-sets.json
cell_sets_list <- list(
datatype = jsonlite::unbox("cell"),
version = jsonlite::unbox("0.1.3"),
tree = list()
)
if(!is.na(self$cell_sets)) {
for(cell_set_name in names(self$cell_sets)) {
cell_set <- self$cell_sets[[cell_set_name]]
cell_set_meta_node <- list(
name = jsonlite::unbox(cell_set_name),
children = list()
)
cell_set_annotations <- cell_set
cell_set_annotation_scores <- NA
cluster_names <- sort(unique(cell_set_annotations))
for(cluster_name in cluster_names) {
cells_in_cluster <- names(cells[cell_set_annotations == cluster_name])
# TODO: find out if there is a way to return NULL
make_null_tuples <- function(x) { list(jsonlite::unbox(x), jsonlite::unbox(NA)) }
cells_in_cluster_with_score <- purrr::map(cells_in_cluster, make_null_tuples)
cluster_node <- list(
name = jsonlite::unbox(cluster_name),
set = cells_in_cluster_with_score
)
cell_set_meta_node$children <- append(cell_set_meta_node$children, list(cluster_node))
}
cell_sets_list$tree <- append(cell_sets_list$tree, list(cell_set_meta_node))
}
}
cell_sets_list
}
)
)
Next, we can preprocess the dataset.
library(Seurat)
# Download example dataset
url <- "https://cf.10xgenomics.com/samples/cell/pbmc3k/pbmc3k_filtered_gene_bc_matrices.tar.gz"
dir.create("seurat")
download.file(url, destfile = "seurat/filtered_gene_bc_matrices.tar.gz")
untar("seurat/filtered_gene_bc_matrices.tar.gz", exdir = "seurat")
# Load example dataset
pbmc.data <- Read10X(data.dir = "seurat/filtered_gene_bc_matrices/hg19")
# Process example dataset (run PCA and cluster)
pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3, min.features = 200)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
pbmc <- FindNeighbors(pbmc, dims = 1:10)
We can run FindClusters
with different algorithms and save the clustering results to new variables between each algorithm, since the results in the Seurat object will be overwritten each time.
louvain_clusters <- slot(FindClusters(pbmc, algorithm = 1, resolution = 0.3), "meta.data")$seurat_clusters
slm_clusters <- slot(FindClusters(pbmc, algorithm = 3), "meta.data")$seurat_clusters
cell_sets_list <- list(louvain = louvain_clusters, SLM = slm_clusters)
With this list of cell sets, we can now create an instance of our custom SeuratWrapper subclass:
my_wrapped_object <- MyCustomSeuratWrapper$new(pbmc, cell_sets = cell_sets_list, out_dir = "out")
Now, we create the Vitessce config as usual:
# Create Vitessce view config
vc <- VitessceConfig$new(schema_version = "1.0.16", name = "My config")
dataset <- vc$add_dataset("My dataset")$add_object(my_wrapped_object)
scatterplot <- vc$add_view(dataset, Component$SCATTERPLOT, mapping = "pca")
status <- vc$add_view(dataset, Component$STATUS)
desc <- vc$add_view(dataset, Component$DESCRIPTION)
desc <- desc$set_props(description = "Visualization of a Seurat object containing the PBMC 3K dataset.")
cell_sets <- vc$add_view(dataset, Component$OBS_SETS)
vc$layout(hconcat(
vconcat(scatterplot),
vconcat(cell_sets, vconcat(desc, status))
))
# Render the Vitessce widget
vc$widget(theme = "light")