From 3a621d6b5cd8387e729b5361de95a545425c904b Mon Sep 17 00:00:00 2001 From: drbh Date: Mon, 18 Aug 2025 19:26:29 -0400 Subject: [PATCH 1/3] feat: explore pairwise fdr diffs --- src/pdex/_single_cell.py | 2 ++ test-single.py | 60 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 test-single.py diff --git a/src/pdex/_single_cell.py b/src/pdex/_single_cell.py index dba2aa1..5ce7a7f 100644 --- a/src/pdex/_single_cell.py +++ b/src/pdex/_single_cell.py @@ -574,6 +574,7 @@ def _process_single_target_vectorized( # Statistical tests across all genes simultaneously p_values, statistics = _vectorized_ranksum_test_numba(X_target, X_ref) + pairwise_fdr = false_discovery_control(p_values, method="bh") # Build results for all genes at once using vectorized operations target_results = [ @@ -587,6 +588,7 @@ def _process_single_target_vectorized( "fold_change": fc[i], "p_value": p_values[i], "statistic": statistics[i], + "pairwise_fdr": pairwise_fdr[i] } for i in range(len(gene_names)) ] diff --git a/test-single.py b/test-single.py new file mode 100644 index 0000000..e34784e --- /dev/null +++ b/test-single.py @@ -0,0 +1,60 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "anndata", +# "pdex", +# ] +# +# [tool.uv.sources] +# pdex = { git = "https://github.com/drbh/pdex.git", branch = "pairwise_fdr" } +# /// +import os +import anndata as ad +from pdex._single_cell import ( + parallel_differential_expression_vec_wrapper as parallel_differential_expression, +) + +adata = ad.read_h5ad("../bspc/convert/vcc_data/adata_Training.h5ad") +os.makedirs("de_results", exist_ok=True) + +ctrl_mask = adata.obs["target_gene"] == "non-targeting" +control_cells = adata[ctrl_mask] + +targets = ["LAD1", "TWF2"] + +cells = [control_cells] +for target in targets: + print(f"Selected target: {target}") + target_mask = adata.obs["target_gene"] == target + target_cells = adata[target_mask] + cells.append(target_cells) + + +de_adata = ad.concat(cells) + + +results = parallel_differential_expression( + de_adata, + reference="non-targeting", + groupby_key="target_gene", +) + + +results["diff"] = results["pairwise_fdr"] - results["fdr"] +results = results.sort_values("diff") + +filtered = results[results["pairwise_fdr"] < 0.05] + +print(filtered) +# print the number of each target +for target in targets: + count = filtered[filtered["target"] == target].shape[0] + print(f"Target: {target}, DE genes: {count}") + +print("=" * 30) + +filtered2 = results[results["fdr"] < 0.05] +print(filtered2) +for target in targets: + count = filtered2[filtered2["target"] == target].shape[0] + print(f"Target: {target}, DE genes: {count}") From 22b580edbbebb23aa5601ce4f6e7f9415c2f9209 Mon Sep 17 00:00:00 2001 From: drbh Date: Mon, 18 Aug 2025 19:32:03 -0400 Subject: [PATCH 2/3] fix: remove example --- test-single.py | 60 -------------------------------------------------- 1 file changed, 60 deletions(-) delete mode 100644 test-single.py diff --git a/test-single.py b/test-single.py deleted file mode 100644 index e34784e..0000000 --- a/test-single.py +++ /dev/null @@ -1,60 +0,0 @@ -# /// script -# requires-python = ">=3.10" -# dependencies = [ -# "anndata", -# "pdex", -# ] -# -# [tool.uv.sources] -# pdex = { git = "https://github.com/drbh/pdex.git", branch = "pairwise_fdr" } -# /// -import os -import anndata as ad -from pdex._single_cell import ( - parallel_differential_expression_vec_wrapper as parallel_differential_expression, -) - -adata = ad.read_h5ad("../bspc/convert/vcc_data/adata_Training.h5ad") -os.makedirs("de_results", exist_ok=True) - -ctrl_mask = adata.obs["target_gene"] == "non-targeting" -control_cells = adata[ctrl_mask] - -targets = ["LAD1", "TWF2"] - -cells = [control_cells] -for target in targets: - print(f"Selected target: {target}") - target_mask = adata.obs["target_gene"] == target - target_cells = adata[target_mask] - cells.append(target_cells) - - -de_adata = ad.concat(cells) - - -results = parallel_differential_expression( - de_adata, - reference="non-targeting", - groupby_key="target_gene", -) - - -results["diff"] = results["pairwise_fdr"] - results["fdr"] -results = results.sort_values("diff") - -filtered = results[results["pairwise_fdr"] < 0.05] - -print(filtered) -# print the number of each target -for target in targets: - count = filtered[filtered["target"] == target].shape[0] - print(f"Target: {target}, DE genes: {count}") - -print("=" * 30) - -filtered2 = results[results["fdr"] < 0.05] -print(filtered2) -for target in targets: - count = filtered2[filtered2["target"] == target].shape[0] - print(f"Target: {target}, DE genes: {count}") From 5670427984b6d6b2cade9418a312c988a41a3b55 Mon Sep 17 00:00:00 2001 From: drbh Date: Mon, 18 Aug 2025 20:27:15 -0400 Subject: [PATCH 3/3] fix: run formatter --- src/pdex/_single_cell.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pdex/_single_cell.py b/src/pdex/_single_cell.py index 5ce7a7f..55f3881 100644 --- a/src/pdex/_single_cell.py +++ b/src/pdex/_single_cell.py @@ -588,7 +588,7 @@ def _process_single_target_vectorized( "fold_change": fc[i], "p_value": p_values[i], "statistic": statistics[i], - "pairwise_fdr": pairwise_fdr[i] + "pairwise_fdr": pairwise_fdr[i], } for i in range(len(gene_names)) ]