rsomics_fasta_sample/
lib.rs1use std::io::{BufWriter, Write};
2use std::path::Path;
3
4use needletail::parse_fastx_file;
5use rand::rngs::StdRng;
6use rand::{Rng, SeedableRng};
7use rsomics_common::{Result, RsomicsError};
8
9pub fn sample(input: &Path, fraction: f64, seed: u64, output: &mut dyn Write) -> Result<u64> {
10 if std::fs::metadata(input).is_ok_and(|m| m.len() == 0) {
11 return Ok(0);
12 }
13 let mut reader = parse_fastx_file(input)
14 .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", input.display())))?;
15 let mut out = BufWriter::with_capacity(256 * 1024, output);
16 let mut rng = StdRng::seed_from_u64(seed);
17 let mut count: u64 = 0;
18
19 while let Some(record) = reader.next() {
20 let rec = record.map_err(|e| RsomicsError::InvalidInput(format!("parsing: {e}")))?;
21 if rng.r#gen::<f64>() < fraction {
22 out.write_all(b">").map_err(RsomicsError::Io)?;
23 out.write_all(rec.id()).map_err(RsomicsError::Io)?;
24 out.write_all(b"\n").map_err(RsomicsError::Io)?;
25 out.write_all(&rec.seq()).map_err(RsomicsError::Io)?;
26 out.write_all(b"\n").map_err(RsomicsError::Io)?;
27 count += 1;
28 }
29 }
30
31 out.flush().map_err(RsomicsError::Io)?;
32 Ok(count)
33}