Skip to main content

rsomics_fasta_sample/
lib.rs

1use std::io::{BufWriter, Write};
2use std::path::Path;
3
4use needletail::parse_fastx_file;
5use rand::rngs::StdRng;
6use rand::{Rng, SeedableRng};
7use rsomics_common::{Result, RsomicsError};
8
9pub fn sample(input: &Path, fraction: f64, seed: u64, output: &mut dyn Write) -> Result<u64> {
10    if std::fs::metadata(input).is_ok_and(|m| m.len() == 0) {
11        return Ok(0);
12    }
13    let mut reader = parse_fastx_file(input)
14        .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", input.display())))?;
15    let mut out = BufWriter::with_capacity(256 * 1024, output);
16    let mut rng = StdRng::seed_from_u64(seed);
17    let mut count: u64 = 0;
18
19    while let Some(record) = reader.next() {
20        let rec = record.map_err(|e| RsomicsError::InvalidInput(format!("parsing: {e}")))?;
21        if rng.r#gen::<f64>() < fraction {
22            out.write_all(b">").map_err(RsomicsError::Io)?;
23            out.write_all(rec.id()).map_err(RsomicsError::Io)?;
24            out.write_all(b"\n").map_err(RsomicsError::Io)?;
25            out.write_all(&rec.seq()).map_err(RsomicsError::Io)?;
26            out.write_all(b"\n").map_err(RsomicsError::Io)?;
27            count += 1;
28        }
29    }
30
31    out.flush().map_err(RsomicsError::Io)?;
32    Ok(count)
33}