fastars 0.1.0

Ultra-fast QC and trimming for short and long reads
Documentation
//! Fixed-position global trimming.
//!
//! This module provides trimming of fixed numbers of bases from the front
//! and tail of reads, regardless of quality scores. This is useful for:
//!
//! - Removing known low-quality positions (e.g., first few bases in some protocols)
//! - Trimming adapter contamination at known positions
//! - Creating uniform read lengths for downstream analysis

use super::TrimResult;

/// Configuration for global (fixed-position) trimming.
///
/// Specifies the number of bases to trim from the front and tail
/// of reads, with separate settings for R1 and R2 in paired-end data.
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct GlobalTrimConfig {
    /// Number of bases to trim from the front of R1.
    pub trim_front1: usize,
    /// Number of bases to trim from the tail of R1.
    pub trim_tail1: usize,
    /// Number of bases to trim from the front of R2.
    pub trim_front2: usize,
    /// Number of bases to trim from the tail of R2.
    pub trim_tail2: usize,
}

impl GlobalTrimConfig {
    /// Create a new global trim config with no trimming.
    pub fn new() -> Self {
        Self::default()
    }

    /// Set trim_front1 (R1 front trimming).
    pub fn with_trim_front1(mut self, bases: usize) -> Self {
        self.trim_front1 = bases;
        self
    }

    /// Set trim_tail1 (R1 tail trimming).
    pub fn with_trim_tail1(mut self, bases: usize) -> Self {
        self.trim_tail1 = bases;
        self
    }

    /// Set trim_front2 (R2 front trimming).
    pub fn with_trim_front2(mut self, bases: usize) -> Self {
        self.trim_front2 = bases;
        self
    }

    /// Set trim_tail2 (R2 tail trimming).
    pub fn with_trim_tail2(mut self, bases: usize) -> Self {
        self.trim_tail2 = bases;
        self
    }

    /// Get the configuration for read 1.
    pub fn for_read1(&self) -> (usize, usize) {
        (self.trim_front1, self.trim_tail1)
    }

    /// Get the configuration for read 2.
    pub fn for_read2(&self) -> (usize, usize) {
        (self.trim_front2, self.trim_tail2)
    }

    /// Check if any trimming is enabled.
    pub fn is_enabled(&self) -> bool {
        self.trim_front1 > 0
            || self.trim_tail1 > 0
            || self.trim_front2 > 0
            || self.trim_tail2 > 0
    }
}

/// Perform fixed-position global trimming.
///
/// Trims a fixed number of bases from the front and/or tail of a read,
/// regardless of quality scores. The trim amounts depend on whether
/// this is read 1 or read 2.
///
/// # Arguments
/// * `seq` - The read sequence
/// * `config` - Global trimming configuration
/// * `is_read2` - Whether this is read 2 (for paired-end data)
///
/// # Returns
/// A `TrimResult` containing the start and end indices after trimming.
///
/// # Example
/// ```
/// use fastars::trim::global::{trim_global, GlobalTrimConfig};
///
/// let seq = b"ACGTACGTACGTACGT";
/// let config = GlobalTrimConfig::new()
///     .with_trim_front1(3)
///     .with_trim_tail1(2);
/// let result = trim_global(seq, &config, false);
/// // Trimmed: "TACGTACGTACG" (removed 3 from front, 2 from tail)
/// assert_eq!(result.start, 3);
/// assert_eq!(result.end, 14);
/// ```
pub fn trim_global(seq: &[u8], config: &GlobalTrimConfig, is_read2: bool) -> TrimResult {
    let len = seq.len();

    // If no trimming is configured, return full range
    if !config.is_enabled() {
        return TrimResult::full(len);
    }

    // Get trim amounts for the appropriate read
    let (trim_front, trim_tail) = if is_read2 {
        config.for_read2()
    } else {
        config.for_read1()
    };

    // Handle edge case: empty sequence
    if len == 0 {
        return TrimResult::empty();
    }

    // Calculate new start and end positions
    let start = trim_front.min(len);
    let end = len.saturating_sub(trim_tail);

    // If trimming would remove everything or result in invalid range
    if start >= end {
        return TrimResult::empty();
    }

    TrimResult::new(start, end)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_global_trim_config_default() {
        let config = GlobalTrimConfig::default();
        assert_eq!(config.trim_front1, 0);
        assert_eq!(config.trim_tail1, 0);
        assert_eq!(config.trim_front2, 0);
        assert_eq!(config.trim_tail2, 0);
        assert!(!config.is_enabled());
    }

    #[test]
    fn test_global_trim_config_builder() {
        let config = GlobalTrimConfig::new()
            .with_trim_front1(5)
            .with_trim_tail1(3)
            .with_trim_front2(7)
            .with_trim_tail2(4);

        assert_eq!(config.trim_front1, 5);
        assert_eq!(config.trim_tail1, 3);
        assert_eq!(config.trim_front2, 7);
        assert_eq!(config.trim_tail2, 4);
        assert!(config.is_enabled());
    }

    #[test]
    fn test_global_trim_config_for_read1() {
        let config = GlobalTrimConfig::new()
            .with_trim_front1(5)
            .with_trim_tail1(3);
        let (front, tail) = config.for_read1();
        assert_eq!(front, 5);
        assert_eq!(tail, 3);
    }

    #[test]
    fn test_global_trim_config_for_read2() {
        let config = GlobalTrimConfig::new()
            .with_trim_front2(7)
            .with_trim_tail2(4);
        let (front, tail) = config.for_read2();
        assert_eq!(front, 7);
        assert_eq!(tail, 4);
    }

    #[test]
    fn test_trim_global_no_trimming() {
        let seq = b"ACGTACGTACGTACGT";
        let config = GlobalTrimConfig::new();
        let result = trim_global(seq, &config, false);
        assert_eq!(result.start, 0);
        assert_eq!(result.end, 16);
        assert_eq!(result.len(), 16);
    }

    #[test]
    fn test_trim_global_front_only() {
        let seq = b"ACGTACGTACGTACGT";
        let config = GlobalTrimConfig::new().with_trim_front1(5);
        let result = trim_global(seq, &config, false);
        assert_eq!(result.start, 5);
        assert_eq!(result.end, 16);
        assert_eq!(result.len(), 11);
        assert_eq!(result.apply(seq), b"CGTACGTACGT");
    }

    #[test]
    fn test_trim_global_tail_only() {
        let seq = b"ACGTACGTACGTACGT";
        let config = GlobalTrimConfig::new().with_trim_tail1(4);
        let result = trim_global(seq, &config, false);
        assert_eq!(result.start, 0);
        assert_eq!(result.end, 12);
        assert_eq!(result.len(), 12);
        assert_eq!(result.apply(seq), b"ACGTACGTACGT");
    }

    #[test]
    fn test_trim_global_both_ends() {
        let seq = b"ACGTACGTACGTACGT";
        let config = GlobalTrimConfig::new()
            .with_trim_front1(3)
            .with_trim_tail1(2);
        let result = trim_global(seq, &config, false);
        assert_eq!(result.start, 3);
        assert_eq!(result.end, 14);
        assert_eq!(result.len(), 11);
        assert_eq!(result.apply(seq), b"TACGTACGTAC");
    }

    #[test]
    fn test_trim_global_read1_vs_read2() {
        let seq = b"ACGTACGTACGTACGT";
        let config = GlobalTrimConfig::new()
            .with_trim_front1(2)
            .with_trim_tail1(1)
            .with_trim_front2(4)
            .with_trim_tail2(3);

        // R1 trimming
        let result_r1 = trim_global(seq, &config, false);
        assert_eq!(result_r1.start, 2);
        assert_eq!(result_r1.end, 15);
        assert_eq!(result_r1.len(), 13);

        // R2 trimming
        let result_r2 = trim_global(seq, &config, true);
        assert_eq!(result_r2.start, 4);
        assert_eq!(result_r2.end, 13);
        assert_eq!(result_r2.len(), 9);
    }

    #[test]
    fn test_trim_global_empty_sequence() {
        let seq = b"";
        let config = GlobalTrimConfig::new()
            .with_trim_front1(5)
            .with_trim_tail1(3);
        let result = trim_global(seq, &config, false);
        assert!(result.is_empty());
    }

    #[test]
    fn test_trim_global_trim_more_than_length() {
        let seq = b"ACGTACGT"; // Length 8
        let config = GlobalTrimConfig::new()
            .with_trim_front1(5)
            .with_trim_tail1(5);
        let result = trim_global(seq, &config, false);
        assert!(result.is_empty());
    }

    #[test]
    fn test_trim_global_trim_exact_length() {
        let seq = b"ACGTACGT"; // Length 8
        let config = GlobalTrimConfig::new()
            .with_trim_front1(4)
            .with_trim_tail1(4);
        let result = trim_global(seq, &config, false);
        assert!(result.is_empty());
    }

    #[test]
    fn test_trim_global_front_exceeds_length() {
        let seq = b"ACGT";
        let config = GlobalTrimConfig::new().with_trim_front1(10);
        let result = trim_global(seq, &config, false);
        assert!(result.is_empty());
    }

    #[test]
    fn test_trim_global_tail_exceeds_length() {
        let seq = b"ACGT";
        let config = GlobalTrimConfig::new().with_trim_tail1(10);
        let result = trim_global(seq, &config, false);
        // tail trimming saturates to 0, leaving front only
        assert!(result.is_empty());
    }

    #[test]
    fn test_trim_global_one_base_remaining() {
        let seq = b"ACGTACGT";
        let config = GlobalTrimConfig::new()
            .with_trim_front1(3)
            .with_trim_tail1(4);
        let result = trim_global(seq, &config, false);
        assert_eq!(result.len(), 1);
        assert_eq!(result.apply(seq), b"T");
    }

    #[test]
    fn test_is_enabled() {
        let config1 = GlobalTrimConfig::new();
        assert!(!config1.is_enabled());

        let config2 = GlobalTrimConfig::new().with_trim_front1(1);
        assert!(config2.is_enabled());

        let config3 = GlobalTrimConfig::new().with_trim_tail2(1);
        assert!(config3.is_enabled());
    }
}