fastars 0.1.0

Ultra-fast QC and trimming for short and long reads
Documentation
//! Length-based filtering.
//!
//! This module provides length-based read filtering,
//! useful for removing reads that are too short or too long.

/// Configuration for length filtering.
#[derive(Debug, Clone)]
pub struct LengthConfig {
    /// Minimum read length to keep.
    pub min_length: usize,
    /// Maximum read length to keep (None = no limit).
    pub max_length: Option<usize>,
    /// Maximum length for R1 (truncation, not filtering).
    pub max_len_r1: Option<usize>,
    /// Maximum length for R2 (truncation, not filtering).
    pub max_len_r2: Option<usize>,
}

impl Default for LengthConfig {
    fn default() -> Self {
        Self {
            min_length: 15,
            max_length: None,
            max_len_r1: None,
            max_len_r2: None,
        }
    }
}

impl LengthConfig {
    /// Create a new length config with default settings.
    pub fn new() -> Self {
        Self::default()
    }

    /// Create config for short read mode.
    pub fn short_read() -> Self {
        Self {
            min_length: 15,
            max_length: None,
            max_len_r1: None,
            max_len_r2: None,
        }
    }

    /// Create config for long read mode.
    pub fn long_read() -> Self {
        Self {
            min_length: 200,
            max_length: None,
            max_len_r1: None,
            max_len_r2: None,
        }
    }

    /// Set minimum length.
    pub fn with_min_length(mut self, length: usize) -> Self {
        self.min_length = length;
        self
    }

    /// Set maximum length.
    pub fn with_max_length(mut self, length: usize) -> Self {
        self.max_length = Some(length);
        self
    }

    /// Remove maximum length limit.
    pub fn with_no_max_length(mut self) -> Self {
        self.max_length = None;
        self
    }

    /// Set maximum length for R1 (truncation).
    pub fn with_max_len_r1(mut self, length: usize) -> Self {
        self.max_len_r1 = Some(length);
        self
    }

    /// Set maximum length for R2 (truncation).
    pub fn with_max_len_r2(mut self, length: usize) -> Self {
        self.max_len_r2 = Some(length);
        self
    }
}

/// Check if a read length passes the filter.
///
/// # Arguments
/// * `len` - The read length to check
/// * `config` - Length configuration
///
/// # Returns
/// `true` if the length passes the filter, `false` otherwise.
///
/// # Example
/// ```
/// use fastars::trim::length::{check_length, LengthConfig};
///
/// let config = LengthConfig::new().with_min_length(50);
/// assert!(!check_length(30, &config)); // Too short
/// assert!(check_length(100, &config)); // OK
/// ```
#[inline]
pub fn check_length(len: usize, config: &LengthConfig) -> bool {
    if len < config.min_length {
        return false;
    }
    if let Some(max) = config.max_length {
        if len > max {
            return false;
        }
    }
    true
}

/// Check if a sequence passes length filtering.
#[inline]
pub fn check_sequence_length(seq: &[u8], config: &LengthConfig) -> bool {
    check_length(seq.len(), config)
}

/// Truncate a read to the maximum allowed length.
///
/// # Arguments
/// * `seq` - The sequence to truncate (modified in place)
/// * `qual` - The quality scores to truncate (modified in place)
/// * `max_len` - The maximum length
///
/// # Example
/// ```
/// use fastars::trim::length::truncate_to_max_len;
///
/// let mut seq = b"ACGTACGTACGT".to_vec();
/// let mut qual = b"IIIIIIIIIIII".to_vec();
/// truncate_to_max_len(&mut seq, &mut qual, 8);
/// assert_eq!(seq, b"ACGTACGT");
/// assert_eq!(qual, b"IIIIIIII");
/// ```
#[inline]
pub fn truncate_to_max_len(seq: &mut Vec<u8>, qual: &mut Vec<u8>, max_len: usize) {
    if seq.len() > max_len {
        seq.truncate(max_len);
        qual.truncate(max_len);
    }
}

// Legacy type alias for compatibility
pub type LengthTrimmer = LengthConfig;

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_length_config_default() {
        let config = LengthConfig::default();
        assert_eq!(config.min_length, 15);
        assert!(config.max_length.is_none());
    }

    #[test]
    fn test_length_config_short_read() {
        let config = LengthConfig::short_read();
        assert_eq!(config.min_length, 15);
    }

    #[test]
    fn test_length_config_long_read() {
        let config = LengthConfig::long_read();
        assert_eq!(config.min_length, 200);
    }

    #[test]
    fn test_check_length_min_only() {
        let config = LengthConfig::new().with_min_length(50);
        assert!(!check_length(0, &config));
        assert!(!check_length(49, &config));
        assert!(check_length(50, &config));
        assert!(check_length(100, &config));
        assert!(check_length(1000, &config));
    }

    #[test]
    fn test_check_length_max_only() {
        let config = LengthConfig::new()
            .with_min_length(0)
            .with_max_length(100);
        assert!(check_length(0, &config));
        assert!(check_length(50, &config));
        assert!(check_length(100, &config));
        assert!(!check_length(101, &config));
        assert!(!check_length(1000, &config));
    }

    #[test]
    fn test_check_length_both() {
        let config = LengthConfig::new()
            .with_min_length(50)
            .with_max_length(100);
        assert!(!check_length(0, &config));
        assert!(!check_length(49, &config));
        assert!(check_length(50, &config));
        assert!(check_length(75, &config));
        assert!(check_length(100, &config));
        assert!(!check_length(101, &config));
    }

    #[test]
    fn test_check_length_edge_cases() {
        let config = LengthConfig::new()
            .with_min_length(50)
            .with_max_length(50);
        assert!(!check_length(49, &config));
        assert!(check_length(50, &config));
        assert!(!check_length(51, &config));
    }

    #[test]
    fn test_check_sequence_length() {
        let config = LengthConfig::new().with_min_length(10);
        assert!(!check_sequence_length(b"ACGT", &config));
        assert!(check_sequence_length(b"ACGTACGTACGT", &config));
    }

    #[test]
    fn test_config_builder() {
        let config = LengthConfig::new()
            .with_min_length(100)
            .with_max_length(500);
        assert_eq!(config.min_length, 100);
        assert_eq!(config.max_length, Some(500));

        let config2 = config.with_no_max_length();
        assert!(config2.max_length.is_none());
    }

    #[test]
    fn test_zero_min_length() {
        let config = LengthConfig::new().with_min_length(0);
        assert!(check_length(0, &config));
        assert!(check_length(1, &config));
    }

    #[test]
    fn test_truncate_to_max_len() {
        let mut seq = b"ACGTACGTACGT".to_vec();
        let mut qual = b"IIIIIIIIIIII".to_vec();

        // Test truncation
        truncate_to_max_len(&mut seq, &mut qual, 8);
        assert_eq!(seq, b"ACGTACGT");
        assert_eq!(qual, b"IIIIIIII");

        // Test no truncation when already shorter
        truncate_to_max_len(&mut seq, &mut qual, 20);
        assert_eq!(seq, b"ACGTACGT");
        assert_eq!(qual, b"IIIIIIII");

        // Test exact length
        truncate_to_max_len(&mut seq, &mut qual, 8);
        assert_eq!(seq, b"ACGTACGT");
        assert_eq!(qual, b"IIIIIIII");
    }

    #[test]
    fn test_max_len_config() {
        let config = LengthConfig::new()
            .with_max_len_r1(100)
            .with_max_len_r2(150);
        assert_eq!(config.max_len_r1, Some(100));
        assert_eq!(config.max_len_r2, Some(150));
    }
}