use crate::utils::quality::build_qual_table;
#[derive(Debug, Clone)]
pub struct TrimmerConfig {
pub error_threshold: f64,
pub min_length: usize,
pub qual_offset: u8,
pub trim_left: usize,
pub trim_right: usize,
}
impl Default for TrimmerConfig {
fn default() -> Self {
Self {
error_threshold: 0.05,
min_length: 30,
qual_offset: 33,
trim_left: 0,
trim_right: 0,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TrimResult {
pub start: usize,
pub end: usize,
}
pub fn trim_mott(qual: &[u8], config: &TrimmerConfig) -> TrimResult {
if qual.is_empty() {
return TrimResult { start: 0, end: 0 };
}
let error_table = build_qual_table(config.qual_offset);
let mut start;
let mut end;
if qual.len() > config.min_length {
let mut score = 0.0;
let mut max_score = 0.0;
let mut max_end = qual.len();
let mut temp_start = 0;
let mut best_start = 0;
for (i, &q) in qual.iter().enumerate() {
let q_clamped = q.clamp(36, 127);
let error_prob = error_table[q_clamped as usize];
score += config.error_threshold - error_prob;
if score > max_score {
max_score = score;
max_end = i + 1;
best_start = temp_start;
}
if score < 0.0 {
score = 0.0;
temp_start = i + 1;
}
}
start = best_start;
end = max_end;
if max_score == 0.0 {
start = 0;
end = config.min_length;
}
if end - start < config.min_length && qual.len() >= config.min_length {
let mut window_sum: i32 = qual[0..config.min_length]
.iter()
.map(|&q| (q as i32) - (config.qual_offset as i32))
.sum();
let mut best_sum = window_sum;
start = 0;
for i in config.min_length..qual.len() {
window_sum += (qual[i] as i32) - (config.qual_offset as i32);
window_sum -= (qual[i - config.min_length] as i32) - (config.qual_offset as i32);
if window_sum > best_sum {
best_sum = window_sum;
start = i - config.min_length + 1;
}
}
end = start + config.min_length;
}
} else {
start = 0;
end = qual.len();
}
start = start.saturating_add(config.trim_left);
end = end.saturating_sub(config.trim_right);
if start >= end {
return TrimResult { start: 0, end: 0 };
}
TrimResult { start, end }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_trim_mott_basic() {
let qual = b"IIIIIIIIII!!!!!IIIIIIIIII";
let config = TrimmerConfig {
error_threshold: 0.05,
min_length: 5,
qual_offset: 33,
trim_left: 0,
trim_right: 0,
};
let result = trim_mott(qual, &config);
assert!(result.start < result.end);
assert!(result.end - result.start >= config.min_length);
}
#[test]
fn test_trim_mott_empty() {
let qual = b"";
let config = TrimmerConfig::default();
let result = trim_mott(qual, &config);
assert_eq!(result.start, 0);
assert_eq!(result.end, 0);
}
#[test]
fn test_trim_mott_with_fixed_trim() {
let qual = b"IIIIIIIIIIIIIIIIIIII"; let config = TrimmerConfig {
error_threshold: 0.05,
min_length: 5,
qual_offset: 33,
trim_left: 2,
trim_right: 3,
};
let result = trim_mott(qual, &config);
assert!(result.start >= 2);
assert!(result.end <= qual.len() - 3);
}
#[test]
fn test_trim_mott_all_low_quality() {
let qual = b"!!!!!!!!!!";
let config = TrimmerConfig {
error_threshold: 0.05,
min_length: 20, qual_offset: 33,
trim_left: 0,
trim_right: 0,
};
let result = trim_mott(qual, &config);
assert_eq!(result.start, 0);
assert_eq!(result.end, 10);
}
}