rsomics-cell-filter 0.1.0

Filter cells by QC metrics — min genes, min UMIs, max mito fraction from a barcode stats TSV
Documentation
use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::path::Path;

use rsomics_common::{Result, RsomicsError};

pub struct FilterCriteria {
    pub min_genes: u64,
    pub min_umis: u64,
    pub max_mito_frac: f64,
}

pub struct FilterResult {
    pub total: u64,
    pub passed: u64,
    pub failed: u64,
}

pub fn filter_cells(
    input: &Path,
    criteria: &FilterCriteria,
    output: &mut dyn Write,
) -> Result<FilterResult> {
    let file = File::open(input)
        .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", input.display())))?;
    let reader = BufReader::new(file);
    let mut out = BufWriter::new(output);
    let mut lines = reader.lines();

    if let Some(header) = lines.next() {
        let header = header.map_err(RsomicsError::Io)?;
        writeln!(out, "{header}").map_err(RsomicsError::Io)?;
    }

    let mut total = 0u64;
    let mut passed = 0u64;

    for line in lines {
        let line = line.map_err(RsomicsError::Io)?;
        let parts: Vec<&str> = line.split('\t').collect();
        if parts.len() < 4 {
            continue;
        }
        total += 1;

        let genes: u64 = parts.get(1).and_then(|s| s.parse().ok()).unwrap_or(0);
        let umis: u64 = parts.get(2).and_then(|s| s.parse().ok()).unwrap_or(0);
        let mito: f64 = parts.get(3).and_then(|s| s.parse().ok()).unwrap_or(1.0);

        if genes >= criteria.min_genes
            && umis >= criteria.min_umis
            && mito <= criteria.max_mito_frac
        {
            writeln!(out, "{line}").map_err(RsomicsError::Io)?;
            passed += 1;
        }
    }

    out.flush().map_err(RsomicsError::Io)?;
    Ok(FilterResult {
        total,
        passed,
        failed: total - passed,
    })
}