use super::TrimResult;
#[derive(Debug, Clone)]
pub struct QualityTrimConfig {
pub window_size: usize,
pub threshold: u8,
pub cut_front: bool,
pub cut_tail: bool,
pub cut_right: bool,
pub right_window_size: Option<usize>,
pub right_threshold: Option<u8>,
pub tail_window_size: Option<usize>,
pub tail_threshold: Option<u8>,
pub front_window_size: Option<usize>,
pub front_threshold: Option<u8>,
}
impl Default for QualityTrimConfig {
fn default() -> Self {
Self {
window_size: 4,
threshold: 15,
cut_front: false,
cut_tail: true,
cut_right: false,
right_window_size: None,
right_threshold: None,
tail_window_size: None,
tail_threshold: None,
front_window_size: None,
front_threshold: None,
}
}
}
impl QualityTrimConfig {
pub fn new() -> Self {
Self::default()
}
pub fn short_read() -> Self {
Self {
window_size: 4,
threshold: 15,
cut_front: false,
cut_tail: true,
cut_right: false,
right_window_size: None,
right_threshold: None,
tail_window_size: None,
tail_threshold: None,
front_window_size: None,
front_threshold: None,
}
}
pub fn long_read() -> Self {
Self {
window_size: 20,
threshold: 7,
cut_front: false,
cut_tail: true,
cut_right: false,
right_window_size: None,
right_threshold: None,
tail_window_size: None,
tail_threshold: None,
front_window_size: None,
front_threshold: None,
}
}
pub fn with_window_size(mut self, size: usize) -> Self {
self.window_size = size;
self
}
pub fn with_threshold(mut self, threshold: u8) -> Self {
self.threshold = threshold;
self
}
pub fn with_cut_front(mut self, enabled: bool) -> Self {
self.cut_front = enabled;
self
}
pub fn with_cut_tail(mut self, enabled: bool) -> Self {
self.cut_tail = enabled;
self
}
pub fn with_cut_right(mut self, enabled: bool) -> Self {
self.cut_right = enabled;
self
}
pub fn with_right_window_size(mut self, size: usize) -> Self {
self.right_window_size = Some(size);
self
}
pub fn with_right_threshold(mut self, threshold: u8) -> Self {
self.right_threshold = Some(threshold);
self
}
pub fn with_tail_window_size(mut self, size: usize) -> Self {
self.tail_window_size = Some(size);
self
}
pub fn with_tail_threshold(mut self, threshold: u8) -> Self {
self.tail_threshold = Some(threshold);
self
}
pub fn with_front_window_size(mut self, size: usize) -> Self {
self.front_window_size = Some(size);
self
}
pub fn with_front_threshold(mut self, threshold: u8) -> Self {
self.front_threshold = Some(threshold);
self
}
}
pub fn sliding_window_trim(qual: &[u8], config: &QualityTrimConfig) -> TrimResult {
let len = qual.len();
if len == 0 {
return TrimResult::empty();
}
if config.window_size == 0 || config.window_size > len {
return TrimResult::full(len);
}
let mut start = 0;
let mut end = len;
if config.cut_tail {
let window_size = config.tail_window_size.unwrap_or(config.window_size);
let threshold = config.tail_threshold.unwrap_or(config.threshold);
end = trim_tail(qual, window_size, threshold);
}
if config.cut_front && end > 0 {
let window_size = config.front_window_size.unwrap_or(config.window_size);
let threshold = config.front_threshold.unwrap_or(config.threshold);
start = trim_front(qual, end, window_size, threshold);
}
if config.cut_right && end > 0 {
let window_size = config.right_window_size.unwrap_or(config.window_size);
let threshold = config.right_threshold.unwrap_or(config.threshold);
let right_end = trim_right(qual, window_size, threshold);
end = end.min(right_end);
}
if start >= end {
return TrimResult::empty();
}
TrimResult::new(start, end)
}
fn trim_tail(qual: &[u8], window_size: usize, threshold: u8) -> usize {
let len = qual.len();
if len < window_size {
return len;
}
let mut window_sum: u32 = qual[len - window_size..]
.iter()
.map(|&q| phred_score(q) as u32)
.sum();
let threshold_sum = (threshold as u32) * (window_size as u32);
if window_sum >= threshold_sum {
return len;
}
let mut end = len - window_size;
for i in (0..len - window_size).rev() {
window_sum = window_sum + phred_score(qual[i]) as u32
- phred_score(qual[i + window_size]) as u32;
if window_sum >= threshold_sum {
return i + 1;
}
end = i;
}
end
}
fn trim_front(qual: &[u8], end: usize, window_size: usize, threshold: u8) -> usize {
if end < window_size {
return 0;
}
let mut window_sum: u32 = qual[..window_size]
.iter()
.map(|&q| phred_score(q) as u32)
.sum();
let threshold_sum = (threshold as u32) * (window_size as u32);
if window_sum >= threshold_sum {
return 0;
}
for i in 1..=end - window_size {
window_sum = window_sum + phred_score(qual[i + window_size - 1]) as u32
- phred_score(qual[i - 1]) as u32;
if window_sum >= threshold_sum {
return i;
}
}
end
}
fn trim_right(qual: &[u8], window_size: usize, threshold: u8) -> usize {
let len = qual.len();
if len < window_size {
return len;
}
let mut window_sum: u32 = qual[..window_size]
.iter()
.map(|&q| phred_score(q) as u32)
.sum();
let threshold_sum = (threshold as u32) * (window_size as u32);
if window_sum < threshold_sum {
return 0;
}
for i in 1..=len - window_size {
window_sum = window_sum + phred_score(qual[i + window_size - 1]) as u32
- phred_score(qual[i - 1]) as u32;
if window_sum < threshold_sum {
return i;
}
}
len
}
#[inline]
fn phred_score(ascii: u8) -> u8 {
ascii.saturating_sub(33)
}
#[inline]
pub fn mean_quality(qual: &[u8]) -> f64 {
if qual.is_empty() {
return 0.0;
}
let sum: u32 = qual.iter().map(|&q| phred_score(q) as u32).sum();
sum as f64 / qual.len() as f64
}
pub type QualityTrimmer = QualityTrimConfig;
#[derive(Debug, Clone)]
pub struct MaskConfig {
pub window_size: usize,
pub threshold: u8,
}
impl MaskConfig {
pub fn new(window_size: usize, threshold: u8) -> Self {
Self {
window_size,
threshold,
}
}
}
#[derive(Debug, Clone)]
pub struct BreakConfig {
pub window_size: usize,
pub threshold: u8,
}
impl BreakConfig {
pub fn new(window_size: usize, threshold: u8) -> Self {
Self {
window_size,
threshold,
}
}
}
pub fn mask_low_quality(seq: &mut [u8], qual: &[u8], config: &MaskConfig) -> usize {
let len = seq.len();
if len == 0 || len != qual.len() {
return 0;
}
if config.window_size == 0 || config.window_size > len {
return 0;
}
let threshold_sum = (config.threshold as u32) * (config.window_size as u32);
let mut masked_count = 0;
let mut window_sum: u32 = qual[..config.window_size]
.iter()
.map(|&q| phred_score(q) as u32)
.sum();
if window_sum < threshold_sum {
for i in 0..config.window_size {
seq[i] = b'N';
masked_count += 1;
}
}
for i in 1..=len - config.window_size {
window_sum = window_sum + phred_score(qual[i + config.window_size - 1]) as u32
- phred_score(qual[i - 1]) as u32;
if window_sum < threshold_sum {
for j in i..i + config.window_size {
if seq[j] != b'N' {
seq[j] = b'N';
masked_count += 1;
}
}
}
}
masked_count
}
pub fn break_at_low_quality(qual: &[u8], config: &BreakConfig) -> Vec<(usize, usize)> {
let len = qual.len();
if len == 0 {
return Vec::new();
}
if config.window_size == 0 || config.window_size > len {
return vec![(0, len)];
}
let threshold_sum = (config.threshold as u32) * (config.window_size as u32);
let mut low_quality_regions = Vec::new();
let mut window_sum: u32 = qual[..config.window_size]
.iter()
.map(|&q| phred_score(q) as u32)
.sum();
let mut in_low_quality = window_sum < threshold_sum;
let mut low_quality_start = if in_low_quality { 0 } else { usize::MAX };
for i in 1..=len - config.window_size {
window_sum = window_sum + phred_score(qual[i + config.window_size - 1]) as u32
- phred_score(qual[i - 1]) as u32;
let is_low = window_sum < threshold_sum;
if is_low && !in_low_quality {
low_quality_start = i;
in_low_quality = true;
} else if !is_low && in_low_quality {
low_quality_regions.push((low_quality_start, i + config.window_size - 1));
in_low_quality = false;
}
}
if in_low_quality {
low_quality_regions.push((low_quality_start, len));
}
if low_quality_regions.is_empty() {
return vec![(0, len)];
}
let mut segments = Vec::new();
let mut current_pos = 0;
for (lq_start, lq_end) in low_quality_regions {
if current_pos < lq_start {
segments.push((current_pos, lq_start));
}
current_pos = lq_end;
}
if current_pos < len {
segments.push((current_pos, len));
}
segments
}
#[cfg(test)]
mod tests {
use super::*;
fn make_qual(scores: &[u8]) -> Vec<u8> {
scores.iter().map(|&s| s + 33).collect()
}
#[test]
fn test_quality_trim_config_default() {
let config = QualityTrimConfig::default();
assert_eq!(config.window_size, 4);
assert_eq!(config.threshold, 15);
assert!(!config.cut_front);
assert!(config.cut_tail);
}
#[test]
fn test_quality_trim_config_short_read() {
let config = QualityTrimConfig::short_read();
assert_eq!(config.window_size, 4);
assert_eq!(config.threshold, 15);
}
#[test]
fn test_quality_trim_config_long_read() {
let config = QualityTrimConfig::long_read();
assert_eq!(config.window_size, 20);
assert_eq!(config.threshold, 7);
}
#[test]
fn test_sliding_window_empty() {
let config = QualityTrimConfig::default();
let result = sliding_window_trim(&[], &config);
assert!(result.is_empty());
}
#[test]
fn test_sliding_window_all_high_quality() {
let qual = make_qual(&[30, 30, 30, 30, 30, 30, 30, 30]);
let config = QualityTrimConfig::default();
let result = sliding_window_trim(&qual, &config);
assert_eq!(result.start, 0);
assert_eq!(result.end, 8);
}
#[test]
fn test_sliding_window_all_low_quality() {
let qual = make_qual(&[2, 2, 2, 2, 2, 2, 2, 2]);
let config = QualityTrimConfig::default();
let result = sliding_window_trim(&qual, &config);
assert!(result.is_empty() || result.len() < qual.len());
}
#[test]
fn test_sliding_window_tail_trim() {
let qual = make_qual(&[30, 30, 30, 30, 30, 30, 5, 5, 5, 5]);
let config = QualityTrimConfig::default().with_cut_tail(true).with_cut_front(false);
let result = sliding_window_trim(&qual, &config);
assert_eq!(result.start, 0);
assert!(result.end < qual.len());
}
#[test]
fn test_sliding_window_front_trim() {
let qual = make_qual(&[5, 5, 5, 5, 30, 30, 30, 30, 30, 30]);
let config = QualityTrimConfig::default().with_cut_front(true).with_cut_tail(false);
let result = sliding_window_trim(&qual, &config);
assert!(result.start > 0);
assert_eq!(result.end, 10);
}
#[test]
fn test_sliding_window_both_ends() {
let qual = make_qual(&[5, 5, 5, 30, 30, 30, 30, 5, 5, 5]);
let config = QualityTrimConfig::default().with_cut_front(true).with_cut_tail(true);
let result = sliding_window_trim(&qual, &config);
assert!(result.len() < qual.len());
}
#[test]
fn test_sliding_window_shorter_than_window() {
let qual = make_qual(&[30, 30]);
let config = QualityTrimConfig::default().with_window_size(4);
let result = sliding_window_trim(&qual, &config);
assert_eq!(result.start, 0);
assert_eq!(result.end, 2);
}
#[test]
fn test_mean_quality() {
let qual = make_qual(&[30, 30, 30, 30]);
let mean = mean_quality(&qual);
assert!((mean - 30.0).abs() < 0.01);
}
#[test]
fn test_mean_quality_empty() {
let mean = mean_quality(&[]);
assert_eq!(mean, 0.0);
}
#[test]
fn test_phred_score() {
assert_eq!(phred_score(b'!'), 0); assert_eq!(phred_score(b'I'), 40); assert_eq!(phred_score(b'~'), 93); }
#[test]
fn test_config_builder() {
let config = QualityTrimConfig::new()
.with_window_size(10)
.with_threshold(20)
.with_cut_front(true)
.with_cut_tail(false);
assert_eq!(config.window_size, 10);
assert_eq!(config.threshold, 20);
assert!(config.cut_front);
assert!(!config.cut_tail);
}
}