Skip to main content

cyto_cli/workflow/
mod.rs

1use std::{fmt::Display, path::PathBuf, process::Command};
2
3use anyhow::{bail, Result};
4use clap::{Parser, Subcommand};
5use log::{debug, error};
6
7use crate::{ArgsCrispr, ArgsGex};
8
9pub const VERSION_GEOMUX: &str = "0.5.5";
10pub const VERSION_CELL_FILTER: &str = "0.1.2";
11pub const VERSION_PYCYTO: &str = "0.1.14";
12
13#[derive(Subcommand, Debug)]
14pub enum WorkflowCommand {
15    /// Executes a gex mapping workflow (map => sort => umi-correct => count => filter)
16    #[clap(name = "gex")]
17    GexMapping(GexMappingCommand),
18
19    /// Executes a crispr mapping workflow (map => sort => umi-correct => count => assign)
20    #[clap(name = "crispr")]
21    CrisprMapping(CrisprMappingCommand),
22}
23impl WorkflowCommand {
24    pub fn validate_outdir(&self) -> Result<()> {
25        match self {
26            WorkflowCommand::GexMapping(cmd) => cmd.gex_args.validate_outdir(),
27            WorkflowCommand::CrisprMapping(cmd) => cmd.crispr_args.validate_outdir(),
28        }
29    }
30
31    pub fn log_path(&self) -> PathBuf {
32        match self {
33            WorkflowCommand::GexMapping(cmd) => cmd.gex_args.log_path(),
34            WorkflowCommand::CrisprMapping(cmd) => cmd.crispr_args.log_path(),
35        }
36    }
37}
38
39#[derive(Parser, Debug)]
40pub struct GexMappingCommand {
41    #[clap(flatten)]
42    pub gex_args: ArgsGex,
43
44    #[clap(flatten)]
45    pub wf_args: ArgsWorkflow,
46}
47impl GexMappingCommand {
48    pub fn mode(&self) -> WorkflowMode {
49        WorkflowMode::Gex
50    }
51}
52
53#[derive(Parser, Debug)]
54pub struct CrisprMappingCommand {
55    #[clap(flatten)]
56    pub crispr_args: ArgsCrispr,
57
58    #[clap(flatten)]
59    pub geomux_args: ArgsGeomux,
60
61    #[clap(flatten)]
62    pub wf_args: ArgsWorkflow,
63}
64impl CrisprMappingCommand {
65    pub fn mode(&self) -> WorkflowMode {
66        WorkflowMode::Crispr
67    }
68}
69
70#[derive(Clone, Copy, Debug, PartialEq, Eq)]
71pub enum WorkflowMode {
72    Gex,
73    Crispr,
74}
75impl WorkflowMode {
76    pub fn should_filter(&self) -> bool {
77        match self {
78            WorkflowMode::Gex => true,
79            WorkflowMode::Crispr => false,
80        }
81    }
82}
83
84#[derive(Parser, Debug)]
85#[clap(next_help_heading = "Workflow Options")]
86pub struct ArgsWorkflow {
87    /// Skip UMI correction step
88    #[clap(long)]
89    pub skip_umi: bool,
90
91    /// Skip reads/umi saturation step
92    #[clap(long)]
93    pub skip_reads: bool,
94
95    /// Skip `EmptyDrops` filtering step (GEX)
96    ///
97    /// Only used when format is h5ad
98    #[clap(long)]
99    pub no_filter: bool,
100
101    /// Keep the unfiltered h5ad file (GEX)
102    ///
103    /// Only used when format is h5ad
104    #[clap(long)]
105    pub keep_unfiltered: bool,
106
107    /// Keep the IBU file(s) after counting
108    #[clap(long)]
109    pub keep_ibu: bool,
110
111    /// Skip CRISPR-barcode assignment step (CRISPR)
112    ///
113    /// Only used when format is h5ad
114    #[clap(long)]
115    pub skip_assignment: bool,
116
117    /// Sort in memory instead of using disk
118    #[clap(long)]
119    pub sort_in_memory: bool,
120
121    /// Memory limit for sorting (ignored if `sort_in_memory` is true)
122    #[clap(long, default_value = "5GiB")]
123    pub memory_limit: String,
124
125    #[clap(short = 'F', long, default_value = "h5ad")]
126    pub format: CountFormat,
127}
128impl ArgsWorkflow {
129    pub fn validate_requirements(&self, mode: WorkflowMode) -> Result<()> {
130        if self.format == CountFormat::H5ad || !self.no_filter {
131            debug!("Checking if `uv` exists in $PATH");
132            match Command::new("uv").args(["--version"]).output() {
133                Ok(_) => debug!("Found `uv` in $PATH"),
134                Err(e) => {
135                    error!("Encountered an unexpected error checking for `uv`: {e}");
136                    bail!("Encountered an unexpected error checking for `uv`: {e}");
137                }
138            }
139            transparent_uv_install("pycyto", VERSION_PYCYTO)?;
140        }
141        if mode == WorkflowMode::Gex && !self.no_filter {
142            transparent_uv_install("cell-filter", VERSION_CELL_FILTER)?;
143        }
144        if mode == WorkflowMode::Crispr {
145            transparent_uv_install("geomux", VERSION_GEOMUX)?;
146        }
147        Ok(())
148    }
149
150    /// Check whether the workflow should output mtx files
151    ///
152    /// This is true if the format is mtx or h5ad but mtx is consumed by h5ad
153    pub fn mtx(&self) -> bool {
154        match self.format {
155            CountFormat::H5ad | CountFormat::Mtx => true,
156            CountFormat::Tsv => false,
157        }
158    }
159
160    /// Check whether the workflow should output h5ad files
161    pub fn to_h5ad(&self) -> bool {
162        match self.format {
163            CountFormat::H5ad => true,
164            CountFormat::Mtx | CountFormat::Tsv => false,
165        }
166    }
167}
168
169#[derive(Clone, Copy, Default, Debug, clap::ValueEnum, PartialEq, Eq)]
170pub enum CountFormat {
171    #[default]
172    H5ad,
173    Mtx,
174    Tsv,
175}
176
177fn transparent_uv_install(name: &str, version: &str) -> Result<()> {
178    debug!("Installing `{name}@{version}` if necessary...");
179    // if name == "geomux" || name == "pycyto" {
180    //     warn!("Not installing {name}- using PATH. Remove me before release!");
181    //     // skip for now in testing
182    //     return Ok(());
183    // }
184    match Command::new("uv")
185        .arg("tool")
186        .arg("install")
187        .arg(format!("{name}@{version}"))
188        .output()
189    {
190        Ok(_) => {
191            debug!("Precompiling `{name}`...");
192            match Command::new(name).arg("--help").output() {
193                Ok(_) => {
194                    debug!("Precompiled `{name}`");
195                    Ok(())
196                }
197                Err(e) => {
198                    error!("Encountered an unexpected error precompiling `{name}`: {e}");
199                    bail!("Encountered an unexpected error precompiling `{name}`: {e}");
200                }
201            }
202        }
203        Err(e) => {
204            error!("Encountered an unexpected error installing `{name}`: {e}");
205            bail!("Encountered an unexpected error installing `{name}`: {e}");
206        }
207    }
208}
209
210#[derive(Parser, Debug, Clone, Copy)]
211#[clap(next_help_heading = "Geomux Options")]
212pub struct ArgsGeomux {
213    /// Minimum number of UMIs required for a cell to be included in geomux testing.
214    ///
215    /// 5 for geomux
216    /// 3 for mixture
217    #[clap(long)]
218    geomux_min_umi_cells: Option<usize>,
219    /// Minimum number of UMIs required for a guide to be included in geomux testing.
220    #[clap(long, default_value_t = 5)]
221    pub geomux_min_umi_guides: usize,
222    /// Log odds ratio minimum threshold to use for geomux assignments.
223    #[clap(long)]
224    pub geomux_log_odds_ratio: Option<f64>,
225    /// fdr threshold to use for geomux assignments.
226    #[clap(long, default_value_t = 0.05)]
227    pub geomux_fdr_threshold: f64,
228    /// Mode to use for geomux testing.
229    #[clap(long, default_value = "geomux")]
230    pub geomux_mode: GeomuxMode,
231}
232impl ArgsGeomux {
233    pub fn min_umi_cells(&self) -> usize {
234        self.geomux_min_umi_cells.unwrap_or(match self.geomux_mode {
235            GeomuxMode::Geomux => 5,
236            GeomuxMode::Mixture => 3,
237        })
238    }
239}
240
241#[derive(Debug, Clone, Copy, clap::ValueEnum)]
242pub enum GeomuxMode {
243    /// Use the hypergeometric test.
244    Geomux,
245    /// Use the gaussian mixture model
246    Mixture,
247}
248impl Display for GeomuxMode {
249    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
250        match self {
251            GeomuxMode::Geomux => write!(f, "geomux"),
252            GeomuxMode::Mixture => write!(f, "mixture"),
253        }
254    }
255}