rsomics-fastq-utils 0.1.0

FASTQ utility toolkit — lightweight subcommands for counting, filtering, converting, and inspecting FASTQ files
Documentation
use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::path::Path;

use rsomics_common::{Result, RsomicsError};

pub fn shuffle_fastq(input: &Path, output: &mut dyn Write, seed: u64) -> Result<u64> {
    let file = File::open(input)
        .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", input.display())))?;
    let reader = BufReader::new(file);
    let mut lines = reader.lines();

    let mut records: Vec<[String; 4]> = Vec::new();
    while let Some(h) = lines.next() {
        let h = h.map_err(RsomicsError::Io)?;
        if h.is_empty() {
            continue;
        }
        let s = next_line(&mut lines)?;
        let p = next_line(&mut lines)?;
        let q = next_line(&mut lines)?;
        records.push([h, s, p, q]);
    }

    let mut rng = SimpleRng(seed);
    for i in (1..records.len()).rev() {
        let j = rng.next_usize(i + 1);
        records.swap(i, j);
    }

    let mut out = BufWriter::with_capacity(256 * 1024, output);
    for rec in &records {
        for line in rec {
            writeln!(out, "{line}").map_err(RsomicsError::Io)?;
        }
    }
    out.flush().map_err(RsomicsError::Io)?;

    Ok(records.len() as u64)
}

fn next_line<B: BufRead>(lines: &mut std::io::Lines<B>) -> Result<String> {
    lines
        .next()
        .ok_or_else(|| RsomicsError::InvalidInput("truncated FASTQ".into()))?
        .map_err(RsomicsError::Io)
}

struct SimpleRng(u64);

impl SimpleRng {
    fn next_u64(&mut self) -> u64 {
        self.0 ^= self.0 << 13;
        self.0 ^= self.0 >> 7;
        self.0 ^= self.0 << 17;
        self.0
    }

    fn next_usize(&mut self, bound: usize) -> usize {
        #[allow(clippy::cast_possible_truncation)]
        let idx = (self.next_u64() % (bound as u64)) as usize;
        idx
    }
}