fastars 0.1.0

Ultra-fast QC and trimming for short and long reads
Documentation
//! Automatic read mode detection based on read length sampling.

use anyhow::Result;

use crate::cli::ReadMode;
use crate::io::{DirectFastqReader, OwnedRecord};
use std::path::Path;

/// Configuration for mode detection
pub struct ModeDetectionConfig {
    pub sample_size: usize,
    pub threshold_bp: usize,
}

impl Default for ModeDetectionConfig {
    fn default() -> Self {
        Self {
            sample_size: 100,
            threshold_bp: 500,
        }
    }
}

/// Detect read mode by sampling the first N reads
pub fn detect_mode<P: AsRef<Path>>(
    input_path: P,
    config: &ModeDetectionConfig,
) -> Result<(ReadMode, usize)> {
    let mut reader = DirectFastqReader::new(input_path.as_ref())?;
    let mut lengths: Vec<usize> = Vec::with_capacity(config.sample_size);
    let mut record = OwnedRecord::with_capacity(256);

    while lengths.len() < config.sample_size {
        if !reader.read_into(&mut record)? {
            break; // EOF
        }
        lengths.push(record.seq.len());
    }

    if lengths.is_empty() {
        return Ok((ReadMode::Short, 0)); // Default to short if no reads
    }

    lengths.sort_unstable();
    let median = lengths[lengths.len() / 2];

    let mode = if median > config.threshold_bp {
        ReadMode::Long
    } else {
        ReadMode::Short
    };

    Ok((mode, median))
}