@@ -16,7 +16,7 @@ knitr::opts_chunk$set(
1616)
1717```
1818
19- ``` {r setup , echo=FALSE, message=FALSE}
19+ ``` {r load_packages , echo=FALSE, message=FALSE}
2020library(SuperCellCyto)
2121library(parallel)
2222library(BiocParallel)
@@ -97,7 +97,7 @@ with each sample containing 10,000 cells.
9797Hence in total, we will have a toy dataset containing 15 markers and
989830,000 cells.
9999
100- ``` {r}
100+ ``` {r simulate_data }
101101n_markers <- 15
102102n_samples <- 3
103103dat <- simCytoData(nmarkers = n_markers, ncells = rep(10000, n_samples))
@@ -107,7 +107,7 @@ head(dat)
107107For our toy dataset, we will transform our data using arcsinh transformation.
108108We will use the base R ` asinh ` function to do this:
109109
110- ``` {r}
110+ ``` {r arcsinh_transformation }
111111# Specify which columns are the markers to transform
112112marker_cols <- paste0("Marker_", seq_len(n_markers))
113113# The co-factor for arc-sinh
@@ -130,7 +130,7 @@ We will also create a column *Cell_id_dummy* which uniquely identify each cell.
130130It will have values such as ` Cell_1, Cell_2, ` all the way until ` Cell_x `
131131where x is the number of cells in the dataset.
132132
133- ``` {r}
133+ ``` {r create_cell_id }
134134dat$Cell_id_dummy <- paste0("Cell_", seq_len(nrow(dat)))
135135head(dat$Cell_id_dummy, n = 10)
136136```
@@ -139,13 +139,13 @@ By default, the `simCytoData` function will generate cells for multiple samples,
139139and that the resulting ` data.table ` object will already have a column
140140called * Sample* that denotes the sample the cells come from.
141141
142- ``` {r}
142+ ``` {r check_sample_col }
143143unique(dat$Sample)
144144```
145145
146146Let's take note of the sample and cell id column for later.
147147
148- ``` {r}
148+ ``` {r set_colnames }
149149sample_col <- "Sample"
150150cell_id_col <- "Cell_id_dummy"
151151```
@@ -167,7 +167,7 @@ your data, then make sure you specify them in a vector that you later pass to
167167For this tutorial, we will use all the arcsinh transformed markers in the
168168toy data.
169169
170- ``` {r}
170+ ``` {r run_supercellcyto }
171171supercells <- runSuperCellCyto(
172172 dt = dat,
173173 markers = marker_cols_asinh,
@@ -178,13 +178,13 @@ supercells <- runSuperCellCyto(
178178
179179Let's dig deeper into the object it created:
180180
181- ``` {r}
181+ ``` {r check_supercells_class }
182182class(supercells)
183183```
184184
185185It is a list containing 3 elements:
186186
187- ``` {r}
187+ ``` {r check_supercells_names }
188188names(supercells)
189189```
190190
@@ -207,7 +207,7 @@ supercell.
207207These are calculated by taking the average of the marker expression of
208208all the cells contained within a supercell.
209209
210- ``` {r}
210+ ``` {r show_supercell_expr_matrix }
211211head(supercells$supercell_expression_matrix)
212212```
213213
@@ -224,7 +224,7 @@ variable).
224224
225225Let's have a look at ` SuperCellId ` :
226226
227- ``` {r}
227+ ``` {r show_supercell_ids }
228228head(unique(supercells$supercell_expression_matrix$SuperCellId))
229229```
230230
@@ -234,7 +234,7 @@ a sample) used to uniquely identify each supercell in a sample.
234234Notably, you may encounter this (` SuperCell_1 ` , ` SuperCell_2 ` ) being repeated
235235across different samples, e.g.,
236236
237- ``` {r}
237+ ``` {r show_supercell_1_ids }
238238supercell_ids <- unique(supercells$supercell_expression_matrix$SuperCellId)
239239supercell_ids[grep("SuperCell_1_", supercell_ids)]
240240```
@@ -253,7 +253,7 @@ This aids in differentiating the supercells in different samples.
253253` supercell_cell_map ` maps each cell in our dataset to the supercell it
254254belongs to.
255255
256- ``` {r}
256+ ``` {r show_supercell_cell_map }
257257head(supercells$supercell_cell_map)
258258```
259259
@@ -280,7 +280,7 @@ As each sample will be processed by a parallel job, we don't want a job that
280280processs large sample to also be assigned other smaller samples if possible.
281281If you want to know more how this feature works, please refer to our manuscript.
282282
283- ``` {r}
283+ ``` {r run_supercellcyto_parallel }
284284supercell_par <- runSuperCellCyto(
285285 dt = dat,
286286 markers = marker_cols_asinh,
@@ -343,7 +343,7 @@ toy dataset, we will regenerate the supercells using gamma of 10 and 50.
343343The function to do this is ` recomputeSupercells ` .
344344We will store the output in a list, one element per gamma value.
345345
346- ``` {r}
346+ ``` {r recompute_supercells }
347347addt_gamma_vals <- c(10, 50)
348348supercells_addt_gamma <- lapply(addt_gamma_vals, function(gam) {
349349 recomputeSupercells(
@@ -361,7 +361,7 @@ We should end up with a list containing 2 elements.
361361The 1st element contains supercells generated using gamma = 10,
362362and the 2nd contains supercells generated using gamma = 50.
363363
364- ``` {r}
364+ ``` {r show_supercells_gamma10 }
365365supercells_addt_gamma[[1]]
366366```
367367
@@ -377,7 +377,7 @@ Compared to the previous run where gamma was set to 20, we should get more
377377supercells for gamma = 10, and less for gamma = 50.
378378Let's see if that's the case.
379379
380- ``` {r}
380+ ``` {r count_supercells }
381381n_supercells_gamma20 <- nrow(supercells$supercell_expression_matrix)
382382n_supercells_gamma10 <- nrow(
383383 supercells_addt_gamma[[1]]$supercell_expression_matrix
@@ -387,11 +387,11 @@ n_supercells_gamma50 <- nrow(
387387)
388388```
389389
390- ``` {r}
390+ ``` {r gamma10_gt_gamma20 }
391391n_supercells_gamma10 > n_supercells_gamma20
392392```
393393
394- ``` {r}
394+ ``` {r gamma50_lt_gamma20 }
395395n_supercells_gamma50 < n_supercells_gamma20
396396```
397397
@@ -405,7 +405,7 @@ and run `runSuperCellCyto`
405405function on each of them with different ` gam ` parameter value.
406406Something like the following:
407407
408- ``` {r}
408+ ``` {r diff_gamma_per_sample }
409409n_markers <- 10
410410dat <- simCytoData(nmarkers = n_markers)
411411markers_col <- paste0("Marker_", seq_len(n_markers))
@@ -433,7 +433,7 @@ supercells_diff_gam <- lapply(seq_len(length(samples)), function(i) {
433433Subsequently, to extract and combine the ` supercell_expression_matrix ` and
434434` supercell_cell_map ` , we will need to use ` rbind ` :
435435
436- ``` {r}
436+ ``` {r combine_supercell_results }
437437supercell_expression_matrix <- do.call(
438438 "rbind", lapply(
439439 supercells_diff_gam, function(x) x[["supercell_expression_matrix"]]
@@ -447,14 +447,14 @@ supercell_cell_map <- do.call(
447447)
448448```
449449
450- ``` {r}
450+ ``` {r show_combined_expr_matrix }
451451rbind(
452452 head(supercell_expression_matrix, n = 3),
453453 tail(supercell_expression_matrix, n = 3)
454454)
455455```
456456
457- ``` {r}
457+ ``` {r show_combined_cell_map }
458458rbind(head(supercell_cell_map, n = 3), tail(supercell_cell_map, n = 3))
459459```
460460
@@ -499,6 +499,6 @@ load the relevant output saved using the qs package and the relevant data
499499` recomputeSupercells ` function.
500500
501501## Session information
502- ``` {r}
502+ ``` {r session_info }
503503sessionInfo()
504504```
0 commit comments