pub mod corrector;
pub use corrector::{CorrectionBuffers, OverlapCorrector, OverlapRegion};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CorrectionConfig {
pub enabled: bool,
pub min_overlap: usize,
pub diff_limit: usize,
pub diff_percent_limit: f64,
pub allow_gap_overlap: bool,
pub overlapped_out: Option<std::path::PathBuf>,
}
impl Default for CorrectionConfig {
fn default() -> Self {
Self {
enabled: false,
min_overlap: 30,
diff_limit: 5,
diff_percent_limit: 5.0,
allow_gap_overlap: false,
overlapped_out: None,
}
}
}
impl CorrectionConfig {
pub fn new() -> Self {
Self::default()
}
pub fn enabled(mut self) -> Self {
self.enabled = true;
self
}
pub fn disabled(mut self) -> Self {
self.enabled = false;
self
}
pub fn with_min_overlap(mut self, min_overlap: usize) -> Self {
self.min_overlap = min_overlap;
self
}
pub fn with_diff_limit(mut self, diff_limit: usize) -> Self {
self.diff_limit = diff_limit;
self
}
pub fn with_diff_percent_limit(mut self, diff_percent_limit: f64) -> Self {
self.diff_percent_limit = diff_percent_limit.clamp(0.0, 100.0);
self
}
pub fn with_allow_gap_overlap(mut self, allow: bool) -> Self {
self.allow_gap_overlap = allow;
self
}
pub fn with_overlapped_out(mut self, path: Option<std::path::PathBuf>) -> Self {
self.overlapped_out = path;
self
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct CorrectionStats {
pub pairs_processed: u64,
pub pairs_with_overlap: u64,
pub pairs_corrected: u64,
pub bases_corrected: u64,
pub bases_corrected_r1: u64,
pub bases_corrected_r2: u64,
}
impl CorrectionStats {
pub fn new() -> Self {
Self::default()
}
pub fn merge(&mut self, other: &CorrectionStats) {
self.pairs_processed += other.pairs_processed;
self.pairs_with_overlap += other.pairs_with_overlap;
self.pairs_corrected += other.pairs_corrected;
self.bases_corrected += other.bases_corrected;
self.bases_corrected_r1 += other.bases_corrected_r1;
self.bases_corrected_r2 += other.bases_corrected_r2;
}
pub fn overlap_rate(&self) -> f64 {
if self.pairs_processed == 0 {
0.0
} else {
(self.pairs_with_overlap as f64 / self.pairs_processed as f64) * 100.0
}
}
pub fn correction_rate(&self) -> f64 {
if self.pairs_processed == 0 {
0.0
} else {
(self.pairs_corrected as f64 / self.pairs_processed as f64) * 100.0
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_correction_config_default() {
let config = CorrectionConfig::default();
assert!(!config.enabled);
assert_eq!(config.min_overlap, 30);
assert_eq!(config.diff_limit, 5);
assert!((config.diff_percent_limit - 5.0).abs() < f64::EPSILON);
}
#[test]
fn test_correction_config_builder() {
let config = CorrectionConfig::new()
.enabled()
.with_min_overlap(20)
.with_diff_limit(3)
.with_diff_percent_limit(10.0)
.with_allow_gap_overlap(true);
assert!(config.enabled);
assert_eq!(config.min_overlap, 20);
assert_eq!(config.diff_limit, 3);
assert!((config.diff_percent_limit - 10.0).abs() < f64::EPSILON);
assert!(config.allow_gap_overlap);
}
#[test]
fn test_correction_config_diff_percent_clamp() {
let config = CorrectionConfig::new().with_diff_percent_limit(150.0);
assert!((config.diff_percent_limit - 100.0).abs() < f64::EPSILON);
let config = CorrectionConfig::new().with_diff_percent_limit(-10.0);
assert!((config.diff_percent_limit - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_correction_stats_default() {
let stats = CorrectionStats::default();
assert_eq!(stats.pairs_processed, 0);
assert_eq!(stats.pairs_corrected, 0);
assert_eq!(stats.bases_corrected, 0);
}
#[test]
fn test_correction_stats_merge() {
let mut stats1 = CorrectionStats {
pairs_processed: 100,
pairs_with_overlap: 80,
pairs_corrected: 10,
bases_corrected: 50,
bases_corrected_r1: 25,
bases_corrected_r2: 25,
};
let stats2 = CorrectionStats {
pairs_processed: 50,
pairs_with_overlap: 40,
pairs_corrected: 5,
bases_corrected: 25,
bases_corrected_r1: 12,
bases_corrected_r2: 13,
};
stats1.merge(&stats2);
assert_eq!(stats1.pairs_processed, 150);
assert_eq!(stats1.pairs_with_overlap, 120);
assert_eq!(stats1.pairs_corrected, 15);
assert_eq!(stats1.bases_corrected, 75);
assert_eq!(stats1.bases_corrected_r1, 37);
assert_eq!(stats1.bases_corrected_r2, 38);
}
#[test]
fn test_correction_stats_rates() {
let stats = CorrectionStats {
pairs_processed: 100,
pairs_with_overlap: 80,
pairs_corrected: 10,
bases_corrected: 50,
bases_corrected_r1: 25,
bases_corrected_r2: 25,
};
assert!((stats.overlap_rate() - 80.0).abs() < 0.001);
assert!((stats.correction_rate() - 10.0).abs() < 0.001);
}
#[test]
fn test_correction_stats_rates_empty() {
let stats = CorrectionStats::default();
assert_eq!(stats.overlap_rate(), 0.0);
assert_eq!(stats.correction_rate(), 0.0);
}
}