fastars 0.1.0

Ultra-fast QC and trimming for short and long reads
Documentation
//! Paired-end read merging module.
//!
//! This module provides functionality to merge overlapping paired-end reads
//! into a single longer read. When R1 and reverse-complement of R2 overlap,
//! they can be merged to produce a higher-quality consensus sequence.
//!
//! ## Algorithm Overview
//!
//! 1. Reverse complement R2
//! 2. Find best overlap between R1 and RC(R2)
//! 3. Score overlap considering quality scores
//! 4. If overlap meets criteria, merge sequences
//! 5. For mismatches, choose base with higher quality
//!
//! ## Example
//!
//! ```ignore
//! use fastars::merge::{ReadMerger, MergeConfig};
//!
//! let config = MergeConfig::default();
//! let merger = ReadMerger::new(config);
//!
//! // r1 and r2 are OwnedRecord
//! match merger.merge(&r1, &r2) {
//!     MergeResult::Merged(merged) => { /* single merged read */ }
//!     MergeResult::Unmerged(r1, r2) => { /* keep as pair */ }
//! }
//! ```

pub mod merger;

pub use merger::{MergeResult, MergeStats, OverlapInfo, ReadMerger};

use serde::{Deserialize, Serialize};

/// Configuration for paired-end read merging.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MergeConfig {
    /// Enable or disable merging.
    pub enabled: bool,
    /// Minimum overlap length required for merging (default: 30).
    pub min_overlap: usize,
    /// Maximum mismatch ratio allowed in overlap region (default: 0.1).
    /// e.g., 0.1 means at most 10% mismatches.
    pub max_mismatch_ratio: f64,
    /// Whether to correct mismatches using quality scores (default: true).
    /// When true, for positions with mismatches, the base with
    /// higher quality score is chosen.
    pub correct_mismatches: bool,
    /// Minimum quality difference to confidently choose one base over another
    /// in mismatch correction (default: 3).
    pub quality_diff_threshold: u8,
}

impl Default for MergeConfig {
    fn default() -> Self {
        Self {
            enabled: false,
            min_overlap: 30,
            max_mismatch_ratio: 0.1,
            correct_mismatches: true,
            quality_diff_threshold: 3,
        }
    }
}

impl MergeConfig {
    /// Create a new MergeConfig with default settings.
    pub fn new() -> Self {
        Self::default()
    }

    /// Create a config with merging enabled.
    pub fn enabled() -> Self {
        Self {
            enabled: true,
            ..Self::default()
        }
    }

    /// Create a config with merging disabled.
    pub fn disabled() -> Self {
        Self {
            enabled: false,
            ..Self::default()
        }
    }

    /// Enable merging.
    pub fn with_enabled(mut self, enabled: bool) -> Self {
        self.enabled = enabled;
        self
    }

    /// Set minimum overlap length.
    pub fn with_min_overlap(mut self, min_overlap: usize) -> Self {
        self.min_overlap = min_overlap;
        self
    }

    /// Set maximum mismatch ratio.
    pub fn with_max_mismatch_ratio(mut self, ratio: f64) -> Self {
        self.max_mismatch_ratio = ratio.clamp(0.0, 1.0);
        self
    }

    /// Enable or disable mismatch correction.
    pub fn with_correct_mismatches(mut self, correct: bool) -> Self {
        self.correct_mismatches = correct;
        self
    }

    /// Set quality difference threshold for mismatch correction.
    pub fn with_quality_diff_threshold(mut self, threshold: u8) -> Self {
        self.quality_diff_threshold = threshold;
        self
    }

    /// Validate the configuration.
    pub fn validate(&self) -> Result<(), &'static str> {
        if self.min_overlap == 0 {
            return Err("min_overlap must be greater than 0");
        }
        if self.max_mismatch_ratio < 0.0 || self.max_mismatch_ratio > 1.0 {
            return Err("max_mismatch_ratio must be between 0.0 and 1.0");
        }
        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_merge_config_default() {
        let config = MergeConfig::default();
        assert!(!config.enabled);
        assert_eq!(config.min_overlap, 30);
        assert!((config.max_mismatch_ratio - 0.1).abs() < f64::EPSILON);
        assert!(config.correct_mismatches);
    }

    #[test]
    fn test_merge_config_enabled() {
        let config = MergeConfig::enabled();
        assert!(config.enabled);
    }

    #[test]
    fn test_merge_config_disabled() {
        let config = MergeConfig::disabled();
        assert!(!config.enabled);
    }

    #[test]
    fn test_merge_config_builder() {
        let config = MergeConfig::new()
            .with_enabled(true)
            .with_min_overlap(20)
            .with_max_mismatch_ratio(0.15)
            .with_correct_mismatches(false)
            .with_quality_diff_threshold(5);

        assert!(config.enabled);
        assert_eq!(config.min_overlap, 20);
        assert!((config.max_mismatch_ratio - 0.15).abs() < f64::EPSILON);
        assert!(!config.correct_mismatches);
        assert_eq!(config.quality_diff_threshold, 5);
    }

    #[test]
    fn test_merge_config_clamp_mismatch_ratio() {
        let config = MergeConfig::new().with_max_mismatch_ratio(1.5);
        assert!((config.max_mismatch_ratio - 1.0).abs() < f64::EPSILON);

        let config = MergeConfig::new().with_max_mismatch_ratio(-0.5);
        assert!((config.max_mismatch_ratio - 0.0).abs() < f64::EPSILON);
    }

    #[test]
    fn test_merge_config_validate() {
        let config = MergeConfig::default();
        assert!(config.validate().is_ok());

        let bad_config = MergeConfig::new().with_min_overlap(0);
        assert!(bad_config.validate().is_err());
    }

    #[test]
    fn test_merge_config_serialize() {
        let config = MergeConfig::enabled();
        let json = serde_json::to_string(&config).unwrap();
        let deserialized: MergeConfig = serde_json::from_str(&json).unwrap();
        assert_eq!(config.enabled, deserialized.enabled);
        assert_eq!(config.min_overlap, deserialized.min_overlap);
    }
}