Skip to main content

rsomics_cell_filter/
lib.rs

1use std::fs::File;
2use std::io::{BufRead, BufReader, BufWriter, Write};
3use std::path::Path;
4
5use rsomics_common::{Result, RsomicsError};
6
7pub struct FilterCriteria {
8    pub min_genes: u64,
9    pub min_umis: u64,
10    pub max_mito_frac: f64,
11}
12
13pub struct FilterResult {
14    pub total: u64,
15    pub passed: u64,
16    pub failed: u64,
17}
18
19pub fn filter_cells(
20    input: &Path,
21    criteria: &FilterCriteria,
22    output: &mut dyn Write,
23) -> Result<FilterResult> {
24    let file = File::open(input)
25        .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", input.display())))?;
26    let reader = BufReader::new(file);
27    let mut out = BufWriter::new(output);
28    let mut lines = reader.lines();
29
30    if let Some(header) = lines.next() {
31        let header = header.map_err(RsomicsError::Io)?;
32        writeln!(out, "{header}").map_err(RsomicsError::Io)?;
33    }
34
35    let mut total = 0u64;
36    let mut passed = 0u64;
37
38    for line in lines {
39        let line = line.map_err(RsomicsError::Io)?;
40        let parts: Vec<&str> = line.split('\t').collect();
41        if parts.len() < 4 {
42            continue;
43        }
44        total += 1;
45
46        let genes: u64 = parts.get(1).and_then(|s| s.parse().ok()).unwrap_or(0);
47        let umis: u64 = parts.get(2).and_then(|s| s.parse().ok()).unwrap_or(0);
48        let mito: f64 = parts.get(3).and_then(|s| s.parse().ok()).unwrap_or(1.0);
49
50        if genes >= criteria.min_genes
51            && umis >= criteria.min_umis
52            && mito <= criteria.max_mito_frac
53        {
54            writeln!(out, "{line}").map_err(RsomicsError::Io)?;
55            passed += 1;
56        }
57    }
58
59    out.flush().map_err(RsomicsError::Io)?;
60    Ok(FilterResult {
61        total,
62        passed,
63        failed: total - passed,
64    })
65}