Skip to main content

cyto_cli/workflow/
mod.rs

1use std::{fmt::Display, path::PathBuf, process::Command};
2
3use anyhow::{bail, Result};
4use clap::{Parser, Subcommand};
5use log::{debug, error};
6
7use super::{ArgsCrispr, ArgsGex};
8
9pub const VERSION_GEOMUX: &str = "0.5.5";
10pub const VERSION_CELL_FILTER: &str = "0.1.2";
11pub const VERSION_PYCYTO: &str = "0.1.13";
12
13#[derive(Subcommand, Debug)]
14pub enum WorkflowCommand {
15    /// Executes a gex mapping workflow (map => sort => barcode => sort => umi => sort => count)
16    #[clap(name = "gex")]
17    GexMapping(GexMappingCommand),
18
19    /// Executes a crispr mapping workflow (map => sort => barcode => sort => umi => sort => count)
20    #[clap(name = "crispr")]
21    CrisprMapping(CrisprMappingCommand),
22}
23impl WorkflowCommand {
24    pub fn validate_outdir(&self) -> Result<()> {
25        match self {
26            WorkflowCommand::GexMapping(cmd) => cmd.gex_args.validate_outdir(),
27            WorkflowCommand::CrisprMapping(cmd) => cmd.crispr_args.validate_outdir(),
28        }
29    }
30
31    pub fn log_path(&self) -> PathBuf {
32        match self {
33            WorkflowCommand::GexMapping(cmd) => cmd.gex_args.log_path(),
34            WorkflowCommand::CrisprMapping(cmd) => cmd.crispr_args.log_path(),
35        }
36    }
37}
38
39#[derive(Parser, Debug)]
40pub struct GexMappingCommand {
41    #[clap(flatten)]
42    pub gex_args: ArgsGex,
43
44    #[clap(flatten)]
45    pub wf_args: ArgsWorkflow,
46}
47impl GexMappingCommand {
48    pub fn mode(&self) -> WorkflowMode {
49        WorkflowMode::Gex
50    }
51}
52
53#[derive(Parser, Debug)]
54pub struct CrisprMappingCommand {
55    #[clap(flatten)]
56    pub crispr_args: ArgsCrispr,
57
58    #[clap(flatten)]
59    pub geomux_args: ArgsGeomux,
60
61    #[clap(flatten)]
62    pub wf_args: ArgsWorkflow,
63}
64impl CrisprMappingCommand {
65    pub fn mode(&self) -> WorkflowMode {
66        WorkflowMode::Crispr
67    }
68}
69
70#[derive(Clone, Copy, Debug, PartialEq, Eq)]
71pub enum WorkflowMode {
72    Gex,
73    Crispr,
74}
75impl WorkflowMode {
76    pub fn should_filter(&self) -> bool {
77        match self {
78            WorkflowMode::Gex => true,
79            WorkflowMode::Crispr => false,
80        }
81    }
82}
83
84#[derive(Parser, Debug)]
85#[clap(next_help_heading = "Workflow Options")]
86pub struct ArgsWorkflow {
87    /// Skip barcode correction step
88    #[clap(long)]
89    pub skip_barcode: bool,
90
91    /// Skip UMI correction step
92    #[clap(long)]
93    pub skip_umi: bool,
94
95    /// Skip reads/umi saturation step
96    #[clap(long)]
97    pub skip_reads: bool,
98
99    /// Skip `EmptyDrops` filtering step (GEX)
100    ///
101    /// Only used when format is h5ad
102    #[clap(long)]
103    pub no_filter: bool,
104
105    /// Keep the unfiltered h5ad file (GEX)
106    ///
107    /// Only used when format is h5ad
108    #[clap(long)]
109    pub keep_unfiltered: bool,
110
111    /// Keep the IBU file(s) after counting
112    #[clap(long)]
113    pub keep_ibu: bool,
114
115    /// Skip CRISPR-barcode assignment step (CRISPR)
116    ///
117    /// Only used when format is h5ad
118    #[clap(long)]
119    pub skip_assignment: bool,
120
121    /// Sort in memory instead of using disk
122    #[clap(long)]
123    pub sort_in_memory: bool,
124
125    /// Memory limit for sorting (ignored if `sort_in_memory` is true)
126    #[clap(long, default_value = "5GiB")]
127    pub memory_limit: String,
128
129    /// Exact barcode matching only
130    ///
131    /// Default allows barcode correction of 1 unambiguous mismatch from whitelist
132    #[clap(long)]
133    pub bc_exact: bool,
134
135    /// Skip barcode correction second pass step.
136    ///
137    /// This skips recovery of ambiguous one-offs barcodes by parent abundance.
138    #[clap(long, conflicts_with = "skip_barcode")]
139    pub skip_bc_second_pass: bool,
140
141    /// Cell Barcode Whitelist
142    #[clap(short, long, required_unless_present = "skip_barcode")]
143    pub whitelist: String,
144
145    #[clap(short = 'F', long, default_value = "h5ad")]
146    pub format: CountFormat,
147}
148impl ArgsWorkflow {
149    pub fn validate_requirements(&self, mode: WorkflowMode) -> Result<()> {
150        if self.format == CountFormat::H5ad || !self.no_filter {
151            debug!("Checking if `uv` exists in $PATH");
152            match Command::new("uv").args(["--version"]).output() {
153                Ok(_) => debug!("Found `uv` in $PATH"),
154                Err(e) => {
155                    error!("Encountered an unexpected error checking for `uv`: {e}");
156                    bail!("Encountered an unexpected error checking for `uv`: {e}");
157                }
158            }
159            transparent_uv_install("pycyto", VERSION_PYCYTO)?;
160        }
161        if mode == WorkflowMode::Gex && !self.no_filter {
162            transparent_uv_install("cell-filter", VERSION_CELL_FILTER)?;
163        }
164        if mode == WorkflowMode::Crispr {
165            transparent_uv_install("geomux", VERSION_GEOMUX)?;
166        }
167        Ok(())
168    }
169
170    /// Check whether the workflow should output mtx files
171    ///
172    /// This is true if the format is mtx or h5ad but mtx is consumed by h5ad
173    pub fn mtx(&self) -> bool {
174        match self.format {
175            CountFormat::H5ad | CountFormat::Mtx => true,
176            CountFormat::Tsv => false,
177        }
178    }
179
180    /// Check whether the workflow should output h5ad files
181    pub fn to_h5ad(&self) -> bool {
182        match self.format {
183            CountFormat::H5ad => true,
184            CountFormat::Mtx | CountFormat::Tsv => false,
185        }
186    }
187}
188
189#[derive(Clone, Copy, Default, Debug, clap::ValueEnum, PartialEq, Eq)]
190pub enum CountFormat {
191    #[default]
192    H5ad,
193    Mtx,
194    Tsv,
195}
196
197fn transparent_uv_install(name: &str, version: &str) -> Result<()> {
198    debug!("Installing `{name}@{version}` if necessary...");
199    // if name == "geomux" || name == "pycyto" {
200    //     warn!("Not installing {name}- using PATH. Remove me before release!");
201    //     // skip for now in testing
202    //     return Ok(());
203    // }
204    match Command::new("uv")
205        .arg("tool")
206        .arg("install")
207        .arg(format!("{name}@{version}"))
208        .output()
209    {
210        Ok(_) => {
211            debug!("Precompiling `{name}`...");
212            match Command::new(name).arg("--help").output() {
213                Ok(_) => {
214                    debug!("Precompiled `{name}`");
215                    Ok(())
216                }
217                Err(e) => {
218                    error!("Encountered an unexpected error precompiling `{name}`: {e}");
219                    bail!("Encountered an unexpected error precompiling `{name}`: {e}");
220                }
221            }
222        }
223        Err(e) => {
224            error!("Encountered an unexpected error installing `{name}`: {e}");
225            bail!("Encountered an unexpected error installing `{name}`: {e}");
226        }
227    }
228}
229
230#[derive(Parser, Debug, Clone, Copy)]
231#[clap(next_help_heading = "Geomux Options")]
232pub struct ArgsGeomux {
233    /// Minimum number of UMIs required for a cell to be included in geomux testing.
234    ///
235    /// 5 for geomux
236    /// 3 for mixture
237    #[clap(long)]
238    geomux_min_umi_cells: Option<usize>,
239    /// Minimum number of UMIs required for a guide to be included in geomux testing.
240    #[clap(long, default_value_t = 5)]
241    pub geomux_min_umi_guides: usize,
242    /// Log odds ratio minimum threshold to use for geomux assignments.
243    #[clap(long)]
244    pub geomux_log_odds_ratio: Option<f64>,
245    /// fdr threshold to use for geomux assignments.
246    #[clap(long, default_value_t = 0.05)]
247    pub geomux_fdr_threshold: f64,
248    /// Mode to use for geomux testing.
249    #[clap(long, default_value = "geomux")]
250    pub geomux_mode: GeomuxMode,
251}
252impl ArgsGeomux {
253    pub fn min_umi_cells(&self) -> usize {
254        self.geomux_min_umi_cells.unwrap_or(match self.geomux_mode {
255            GeomuxMode::Geomux => 5,
256            GeomuxMode::Mixture => 3,
257        })
258    }
259}
260
261#[derive(Debug, Clone, Copy, clap::ValueEnum)]
262pub enum GeomuxMode {
263    /// Use the hypergeometric test.
264    Geomux,
265    /// Use the gaussian mixture model
266    Mixture,
267}
268impl Display for GeomuxMode {
269    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
270        match self {
271            GeomuxMode::Geomux => write!(f, "geomux"),
272            GeomuxMode::Mixture => write!(f, "mixture"),
273        }
274    }
275}