Skip to content

Commit 224ff2a

Browse files
authored
Merge pull request #108 from LieberInstitute/pseudobulkDGE_compatable
Make registration_pseudobulk compatible with pseudobulkDGE workflow
2 parents 1de5729 + 36d54ec commit 224ff2a

File tree

3 files changed

+99
-25
lines changed

3 files changed

+99
-25
lines changed

R/registration_pseudobulk.R

Lines changed: 51 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@
2121
#' number of cells (for scRNA-seq) or spots (for spatial) that are combined
2222
#' when pseudo-bulking. Pseudo-bulked samples with less than `min_ncells` on
2323
#' `sce_pseudo$ncells` will be dropped.
24+
#' @param filter_expr A `logical(1)` specifying whether to filter pseudobulked
25+
#' counts with `edgeR::filterByExpr`. Defaults to `TRUE`, filtering is recommended for
26+
#' spatail registratrion workflow.
27+
#' @param mito_gene An optional `logical()` vector indicating which genes are
28+
#' mitochondrial, used to calculate pseudo bulked mitochondrial expression rate
29+
#' `expr_chrM` and `pseudo_expr_chrM` .
2430
#'
2531
#' @return A pseudo-bulked [SingleCellExperiment-class][SingleCellExperiment::SingleCellExperiment-class] object. The `logcounts()` assay are `log2-CPM`
2632
#' values calculated with `edgeR::cpm(log = TRUE)`. See
@@ -54,18 +60,22 @@
5460
#' rowData(sce)$ensembl <- paste0("ENSG", seq_len(nrow(sce)))
5561
#' rowData(sce)$gene_name <- paste0("gene", seq_len(nrow(sce)))
5662
#'
57-
#' ## Pseudo-bulk
58-
#' sce_pseudo <- registration_pseudobulk(sce, "Cell_Cycle", "sample_id", c("age"), min_ncells = NULL)
63+
#' ## Pseudo-bulk by Cell Cycle
64+
#' sce_pseudo <- registration_pseudobulk(sce,
65+
#' var_registration = "Cell_Cycle",
66+
#' var_sample_id = "sample_id",
67+
#' covars = c("age"),
68+
#' min_ncells = NULL)
5969
#' colData(sce_pseudo)
6070
registration_pseudobulk <-
61-
function(
62-
sce,
63-
var_registration,
64-
var_sample_id,
65-
covars = NULL,
66-
min_ncells = 10,
67-
pseudobulk_rds_file = NULL
68-
) {
71+
function(sce,
72+
var_registration,
73+
var_sample_id,
74+
covars = NULL,
75+
min_ncells = 10,
76+
pseudobulk_rds_file = NULL,
77+
filter_expr = TRUE,
78+
mito_gene = NULL) {
6979
## Check that inputs are correct
7080
stopifnot(is(sce, "SingleCellExperiment"))
7181
stopifnot(var_registration %in% colnames(colData(sce)))
@@ -79,9 +89,12 @@ registration_pseudobulk <-
7989
stopifnot(!var_registration %in% covars)
8090
stopifnot(!var_sample_id %in% covars)
8191
stopifnot(var_registration != var_sample_id)
92+
93+
## create var_registration col
94+
sce$var_registration <- sce[[var_registration]]
8295

8396
## Check that the values in the registration variable are numeric
84-
if (is.numeric(sce[[var_registration]])) {
97+
if (is.numeric(sce[["var_registration"]])) {
8598
warning(
8699
sprintf(
87100
"var_registration \"%s\" is numeric, convering to categorical vector...",
@@ -92,7 +105,7 @@ registration_pseudobulk <-
92105
}
93106

94107
## check for Non-Syntactic variables - convert with make.names & warn
95-
uniq_var_regis <- unique(sce[[var_registration]])
108+
uniq_var_regis <- unique(sce[["var_registration"]])
96109
syntatic <- grepl(
97110
"^((([[:alpha:]]|[.][._[:alpha:]])[._[:alnum:]]*)|[.])$",
98111
uniq_var_regis
@@ -110,7 +123,7 @@ registration_pseudobulk <-
110123
),
111124
call. = FALSE
112125
)
113-
sce[[var_registration]] <- make.names(sce[[var_registration]])
126+
sce[["var_registration"]] <- make.names(sce[["var_registration"]])
114127
}
115128

116129
## Pseudo-bulk for our current BayesSpace cluster results
@@ -119,7 +132,7 @@ registration_pseudobulk <-
119132
sce_pseudo <- scuttle::aggregateAcrossCells(
120133
sce,
121134
DataFrame(
122-
registration_variable = sce[[var_registration]],
135+
registration_variable = sce[["var_registration"]],
123136
registration_sample_id = sce[[var_sample_id]]
124137
)
125138
)
@@ -129,6 +142,9 @@ registration_pseudobulk <-
129142
"_",
130143
sce_pseudo$registration_variable
131144
)
145+
146+
## rm sce_pseudo$var_registration - redundant with registration variable
147+
sce_pseudo$var_registration <- NULL
132148

133149
## Check that the covariates are present
134150
if (!is.null(covars)) {
@@ -164,16 +180,29 @@ registration_pseudobulk <-
164180
sce_pseudo$registration_variable
165181
)
166182
}
183+
184+
## compute pseudo QC metrics
185+
sce_pseudo$pseudo_sum_umi <- colSums(counts(sce_pseudo))
186+
187+
## if mitochondrial genes are indicated, calculate pseudo mito rate
188+
if(!is.null(mito_gene)){
189+
if(length(mito_gene) == nrow(sce_pseudo)){
190+
sce_pseudo$pseudo_expr_chrM <- colSums(counts(sce_pseudo)[mito_gene, , drop = FALSE])
191+
sce_pseudo$pseudo_expr_chrM_ratio <- sce_pseudo$pseudo_expr_chrM / sce_pseudo$pseudo_sum_umi
192+
} else {
193+
warning("length(mito_gene) != nrow(sce_pseudo) : unable to calc 'pseudo_expr_chrM' metrics")
194+
}
167195

196+
}
197+
168198
## Drop lowly-expressed genes
169-
message(Sys.time(), " drop lowly expressed genes")
170-
keep_expr <-
171-
edgeR::filterByExpr(
172-
sce_pseudo,
173-
group = sce_pseudo$registration_variable
174-
)
175-
sce_pseudo <- sce_pseudo[which(keep_expr), ]
176-
199+
if(filter_expr){
200+
message(Sys.time(), " drop lowly expressed genes")
201+
keep_expr <-
202+
edgeR::filterByExpr(sce_pseudo, group = sce_pseudo$registration_variable)
203+
sce_pseudo <- sce_pseudo[which(keep_expr), ]
204+
}
205+
177206
## Compute the logcounts
178207
message(Sys.time(), " normalize expression")
179208
logcounts(sce_pseudo) <-

man/registration_pseudobulk.Rd

Lines changed: 17 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-registration_pseudobulk.R

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,34 @@ test_that("NA check works", {
1212
)
1313
})
1414

15+
## pseudo qc vars
16+
mito_gene_test <- runif(n = nrow(sce)) < 0.03 ## pick random mito genes
17+
18+
pseudo_test <- registration_pseudobulk(sce,
19+
var_registration = "Cell_Cycle",
20+
var_sample_id = "sample_id",
21+
covars = c("age"),
22+
min_ncells = NULL,
23+
mito_gene = mito_gene_test
24+
)
25+
26+
colData(pseudo_test)
27+
28+
test_that("pseudo_sum_umi in output",{
29+
expect_true(all(c("pseudo_sum_umi", "pseudo_expr_chrM", "pseudo_expr_chrM_ratio") %in% colnames(colData(pseudo_test))))
30+
})
31+
32+
33+
test_that("mito_gene catches work", {
34+
expect_warning(registration_pseudobulk(sce,
35+
var_registration = "Cell_Cycle",
36+
var_sample_id = "sample_id",
37+
covars = c("age"),
38+
min_ncells = NULL,
39+
mito_gene = c(TRUE, FALSE, FALSE)
40+
))
41+
})
42+
1543

1644
#### Syntactic Variable Test ####
1745
set.seed(20220907) ## Ensure reproducibility of example data
@@ -53,3 +81,6 @@ test_that(
5381
min_ncells = NULL
5482
))
5583
)
84+
85+
86+

0 commit comments

Comments
 (0)