Skip to main content

rsomics_seq_stats/
lib.rs

1use rsomics_common::{Result, RsomicsError};
2use std::io::Write;
3use std::path::Path;
4
5#[allow(clippy::cast_precision_loss)]
6pub fn seq_stats(input: &Path, output: &mut dyn Write) -> Result<()> {
7    let mut reader = needletail::parse_fastx_file(input)
8        .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", input.display())))?;
9
10    let mut lengths: Vec<u64> = Vec::new();
11    let mut gc = 0u64;
12    let mut total_bp = 0u64;
13
14    while let Some(result) = reader.next() {
15        let record = result.map_err(|e| RsomicsError::InvalidInput(format!("read: {e}")))?;
16        let seq = record.seq();
17        let len = seq.len() as u64;
18        lengths.push(len);
19        total_bp += len;
20        gc += seq
21            .iter()
22            .filter(|b| matches!(b, b'G' | b'g' | b'C' | b'c'))
23            .count() as u64;
24    }
25
26    if lengths.is_empty() {
27        return Err(RsomicsError::InvalidInput("no sequences".into()));
28    }
29
30    lengths.sort_unstable();
31    let n = lengths.len();
32    let min = lengths[0];
33    let max = lengths[n - 1];
34    let mean = total_bp as f64 / n as f64;
35
36    let mut cumsum = 0u64;
37    let half = total_bp / 2;
38    let mut n50 = 0u64;
39    for &l in lengths.iter().rev() {
40        cumsum += l;
41        if cumsum >= half {
42            n50 = l;
43            break;
44        }
45    }
46
47    let gc_pct = if total_bp > 0 {
48        gc as f64 / total_bp as f64 * 100.0
49    } else {
50        0.0
51    };
52
53    writeln!(output, "sequences\t{n}").map_err(RsomicsError::Io)?;
54    writeln!(output, "total_bp\t{total_bp}").map_err(RsomicsError::Io)?;
55    writeln!(output, "min_len\t{min}").map_err(RsomicsError::Io)?;
56    writeln!(output, "max_len\t{max}").map_err(RsomicsError::Io)?;
57    writeln!(output, "mean_len\t{mean:.1}").map_err(RsomicsError::Io)?;
58    writeln!(output, "N50\t{n50}").map_err(RsomicsError::Io)?;
59    writeln!(output, "GC%\t{gc_pct:.2}").map_err(RsomicsError::Io)?;
60    Ok(())
61}