pub mod adapter;
pub mod global;
pub mod length;
pub mod long_read;
pub mod overlap;
pub mod quality;
pub mod tail;
pub use adapter::{detect_adapter, trim_adapter, trim_adapter_indexed, trim_adapter_targeted, AdapterConfig, AdapterIndices, AdapterKmerIndex, ILLUMINA_TRUSEQ_R1, ILLUMINA_TRUSEQ_R2, NEXTERA_R1, NEXTERA_R2};
pub use overlap::{analyze_overlap, trim_by_overlap, OverlapConfig, OverlapResult};
pub use global::{trim_global, GlobalTrimConfig};
pub use length::{check_length, LengthConfig};
pub use long_read::{split_on_adapter, split_on_low_quality, LongReadConfig};
pub use quality::{sliding_window_trim, QualityTrimConfig};
pub use tail::{trim_poly_tail, TailConfig};
#[inline]
fn trim_trailing_n(seq: &[u8], start: usize, end: usize) -> usize {
let mut new_end = end;
while new_end > start && seq[new_end - 1] == b'N' {
new_end -= 1;
}
new_end
}
#[inline]
fn trim_leading_n(seq: &[u8], start: usize, end: usize) -> usize {
let mut new_start = start;
while new_start < end && seq[new_start] == b'N' {
new_start += 1;
}
new_start
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct TrimResult {
pub start: usize,
pub end: usize,
}
impl TrimResult {
#[inline]
pub fn new(start: usize, end: usize) -> Self {
Self { start, end }
}
#[inline]
pub fn full(len: usize) -> Self {
Self { start: 0, end: len }
}
#[inline]
pub fn empty() -> Self {
Self { start: 0, end: 0 }
}
#[inline]
pub fn len(&self) -> usize {
self.end.saturating_sub(self.start)
}
#[inline]
pub fn is_empty(&self) -> bool {
self.end <= self.start
}
#[inline]
pub fn apply<'a, T>(&self, slice: &'a [T]) -> &'a [T] {
if self.start >= slice.len() {
return &slice[0..0];
}
let end = self.end.min(slice.len());
&slice[self.start..end]
}
#[inline]
pub fn combine(&self, other: &TrimResult) -> TrimResult {
let new_start = self.start + other.start;
let new_end = self.start + other.end.min(self.len());
TrimResult::new(new_start, new_end)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum Mode {
#[default]
Short,
Long,
}
impl Mode {
#[inline]
pub fn default_window_size(&self) -> usize {
match self {
Mode::Short => 4,
Mode::Long => 20,
}
}
#[inline]
pub fn default_quality_threshold(&self) -> u8 {
match self {
Mode::Short => 15,
Mode::Long => 7,
}
}
#[inline]
pub fn default_min_length(&self) -> usize {
match self {
Mode::Short => 15,
Mode::Long => 200,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_trim_result_new() {
let tr = TrimResult::new(5, 10);
assert_eq!(tr.start, 5);
assert_eq!(tr.end, 10);
assert_eq!(tr.len(), 5);
}
#[test]
fn test_trim_result_full() {
let tr = TrimResult::full(100);
assert_eq!(tr.start, 0);
assert_eq!(tr.end, 100);
assert_eq!(tr.len(), 100);
}
#[test]
fn test_trim_result_empty() {
let tr = TrimResult::empty();
assert!(tr.is_empty());
assert_eq!(tr.len(), 0);
}
#[test]
fn test_trim_result_apply() {
let data = b"ACGTACGTACGT";
let tr = TrimResult::new(2, 8);
let result = tr.apply(data);
assert_eq!(result, b"GTACGT");
}
#[test]
fn test_trim_result_apply_bounds() {
let data = b"ACGT";
let tr = TrimResult::new(10, 20);
let result = tr.apply(data);
assert!(result.is_empty());
}
#[test]
fn test_trim_result_combine() {
let first = TrimResult::new(5, 15);
let second = TrimResult::new(2, 8);
let combined = first.combine(&second);
assert_eq!(combined.start, 7);
assert_eq!(combined.end, 13);
}
#[test]
fn test_mode_defaults() {
assert_eq!(Mode::Short.default_window_size(), 4);
assert_eq!(Mode::Long.default_window_size(), 20);
assert_eq!(Mode::Short.default_quality_threshold(), 15);
assert_eq!(Mode::Long.default_quality_threshold(), 7);
assert_eq!(Mode::Short.default_min_length(), 15);
assert_eq!(Mode::Long.default_min_length(), 200);
}
#[test]
fn test_mode_default_is_short() {
assert_eq!(Mode::default(), Mode::Short);
}
}
#[derive(Debug, Clone, Default)]
pub struct TrimConfig {
pub global: GlobalTrimConfig,
pub quality: QualityTrimConfig,
pub adapter: AdapterConfig,
pub tail: TailConfig,
pub length: LengthConfig,
}
impl TrimConfig {
pub fn new() -> Self {
Self::default()
}
pub fn short_read() -> Self {
Self {
global: GlobalTrimConfig::default(),
quality: QualityTrimConfig::short_read(),
adapter: AdapterConfig::truseq(),
tail: TailConfig::poly_g(), length: LengthConfig::short_read(),
}
}
pub fn long_read() -> Self {
Self {
global: GlobalTrimConfig::default(),
quality: QualityTrimConfig::long_read(),
adapter: AdapterConfig::disabled(),
tail: TailConfig::default(),
length: LengthConfig::long_read(),
}
}
pub fn disabled() -> Self {
Self {
global: GlobalTrimConfig::default(),
quality: QualityTrimConfig::default()
.with_cut_front(false)
.with_cut_tail(false),
adapter: AdapterConfig::disabled(),
tail: TailConfig::new().with_min_length(usize::MAX),
length: LengthConfig::new().with_min_length(0),
}
}
pub fn with_global(mut self, config: GlobalTrimConfig) -> Self {
self.global = config;
self
}
pub fn with_quality(mut self, config: QualityTrimConfig) -> Self {
self.quality = config;
self
}
pub fn with_adapter(mut self, config: AdapterConfig) -> Self {
self.adapter = config;
self
}
pub fn with_tail(mut self, config: TailConfig) -> Self {
self.tail = config;
self
}
pub fn with_length(mut self, config: LengthConfig) -> Self {
self.length = config;
self
}
pub fn apply(&self, seq: &[u8], qual: &[u8]) -> TrimResult {
self.apply_with_read_type(seq, qual, false)
}
pub fn apply_with_read_type(&self, seq: &[u8], qual: &[u8], is_read2: bool) -> TrimResult {
if seq.is_empty() {
return TrimResult::empty();
}
let mut result = TrimResult::full(seq.len());
let global_result = trim_global(seq, &self.global, is_read2);
result = result.combine(&global_result);
if result.is_empty() {
return result;
}
let trimmed_qual = result.apply(qual);
let qual_result = sliding_window_trim(trimmed_qual, &self.quality);
result = result.combine(&qual_result);
if result.is_empty() {
return result;
}
if self.quality.cut_tail {
result.end = trim_trailing_n(seq, result.start, result.end);
if result.is_empty() {
return result;
}
}
if self.quality.cut_front {
result.start = trim_leading_n(seq, result.start, result.end);
if result.is_empty() {
return result;
}
}
let trimmed_seq = result.apply(seq);
let adapter_result = trim_adapter(trimmed_seq, &self.adapter);
result = result.combine(&adapter_result);
if result.is_empty() {
return result;
}
let trimmed_seq = result.apply(seq);
let tail_result = trim_poly_tail(trimmed_seq, &self.tail);
result = result.combine(&tail_result);
result
}
#[inline]
pub fn check_length(&self, len: usize) -> bool {
check_length(len, &self.length)
}
}
#[derive(Debug, Clone)]
pub struct OptimizedTrimmer {
pub config: TrimConfig,
pub overlap_config: OverlapConfig,
pub adapter_indices: AdapterIndices,
}
impl OptimizedTrimmer {
pub fn new(config: TrimConfig) -> Self {
let adapter_indices = AdapterIndices::from_config(&config.adapter);
Self {
config,
overlap_config: OverlapConfig::default(),
adapter_indices,
}
}
pub fn with_overlap_config(mut self, overlap_config: OverlapConfig) -> Self {
self.overlap_config = overlap_config;
self
}
#[inline]
pub fn apply(&self, seq: &[u8], qual: &[u8]) -> TrimResult {
self.apply_single_optimized(seq, qual, false)
}
#[inline]
pub fn apply_with_read_type(&self, seq: &[u8], qual: &[u8], is_read2: bool) -> TrimResult {
self.apply_single_optimized(seq, qual, is_read2)
}
fn apply_single_optimized(&self, seq: &[u8], qual: &[u8], is_read2: bool) -> TrimResult {
if seq.is_empty() {
return TrimResult::empty();
}
let mut result = TrimResult::full(seq.len());
let global_result = trim_global(seq, &self.config.global, is_read2);
result = result.combine(&global_result);
if result.is_empty() {
return result;
}
let trimmed_qual = result.apply(qual);
let qual_result = sliding_window_trim(trimmed_qual, &self.config.quality);
result = result.combine(&qual_result);
if result.is_empty() {
return result;
}
if self.config.quality.cut_tail {
result.end = trim_trailing_n(seq, result.start, result.end);
if result.is_empty() {
return result;
}
}
if self.config.quality.cut_front {
result.start = trim_leading_n(seq, result.start, result.end);
if result.is_empty() {
return result;
}
}
let trimmed_seq = result.apply(seq);
let adapter_result = trim_adapter_indexed(trimmed_seq, &self.adapter_indices, &self.config.adapter);
result = result.combine(&adapter_result);
if result.is_empty() {
return result;
}
let trimmed_seq = result.apply(seq);
let tail_result = trim_poly_tail(trimmed_seq, &self.config.tail);
result = result.combine(&tail_result);
result
}
pub fn apply_paired(
&self,
r1_seq: &[u8],
r1_qual: &[u8],
r2_seq: &[u8],
r2_qual: &[u8],
) -> (TrimResult, TrimResult) {
if r1_seq.is_empty() || r2_seq.is_empty() {
return (TrimResult::empty(), TrimResult::empty());
}
let mut r1_result = TrimResult::full(r1_seq.len());
let mut r2_result = TrimResult::full(r2_seq.len());
let global_r1 = trim_global(r1_seq, &self.config.global, false);
let global_r2 = trim_global(r2_seq, &self.config.global, true);
r1_result = r1_result.combine(&global_r1);
r2_result = r2_result.combine(&global_r2);
if r1_result.is_empty() || r2_result.is_empty() {
return (r1_result, r2_result);
}
let r1_qual_trimmed = r1_result.apply(r1_qual);
let r2_qual_trimmed = r2_result.apply(r2_qual);
let qual_r1 = sliding_window_trim(r1_qual_trimmed, &self.config.quality);
let qual_r2 = sliding_window_trim(r2_qual_trimmed, &self.config.quality);
r1_result = r1_result.combine(&qual_r1);
r2_result = r2_result.combine(&qual_r2);
if r1_result.is_empty() || r2_result.is_empty() {
return (r1_result, r2_result);
}
if self.config.quality.cut_tail {
r1_result.end = trim_trailing_n(r1_seq, r1_result.start, r1_result.end);
r2_result.end = trim_trailing_n(r2_seq, r2_result.start, r2_result.end);
if r1_result.is_empty() || r2_result.is_empty() {
return (r1_result, r2_result);
}
}
if self.config.quality.cut_front {
r1_result.start = trim_leading_n(r1_seq, r1_result.start, r1_result.end);
r2_result.start = trim_leading_n(r2_seq, r2_result.start, r2_result.end);
if r1_result.is_empty() || r2_result.is_empty() {
return (r1_result, r2_result);
}
}
let r1_seq_trimmed = r1_result.apply(r1_seq);
let r2_seq_trimmed = r2_result.apply(r2_seq);
let adapter_r1 = trim_adapter(r1_seq_trimmed, &self.config.adapter);
let adapter_r2 = trim_adapter(r2_seq_trimmed, &self.config.adapter);
r1_result = r1_result.combine(&adapter_r1);
r2_result = r2_result.combine(&adapter_r2);
if r1_result.is_empty() || r2_result.is_empty() {
return (r1_result, r2_result);
}
let r1_seq_final = r1_result.apply(r1_seq);
let r2_seq_final = r2_result.apply(r2_seq);
let tail_r1 = trim_poly_tail(r1_seq_final, &self.config.tail);
let tail_r2 = trim_poly_tail(r2_seq_final, &self.config.tail);
r1_result = r1_result.combine(&tail_r1);
r2_result = r2_result.combine(&tail_r2);
(r1_result, r2_result)
}
#[inline]
pub fn check_length(&self, len: usize) -> bool {
self.config.check_length(len)
}
}
#[cfg(test)]
mod trim_config_tests {
use super::*;
#[test]
fn test_trim_config_default() {
let config = TrimConfig::default();
assert!(config.quality.cut_tail);
assert!(config.adapter.adapter_r1.is_some());
}
#[test]
fn test_trim_config_short_read() {
let config = TrimConfig::short_read();
assert_eq!(config.quality.window_size, 4);
assert_eq!(config.quality.threshold, 15);
assert!(config.adapter.adapter_r1.is_some());
}
#[test]
fn test_trim_config_long_read() {
let config = TrimConfig::long_read();
assert_eq!(config.quality.window_size, 20);
assert_eq!(config.quality.threshold, 7);
assert!(config.adapter.adapter_r1.is_none());
assert_eq!(config.length.min_length, 200);
}
#[test]
fn test_trim_config_disabled() {
let config = TrimConfig::disabled();
assert!(config.adapter.adapter_r1.is_none());
assert_eq!(config.length.min_length, 0);
}
#[test]
fn test_trim_config_apply_quality() {
fn make_qual(scores: &[u8]) -> Vec<u8> {
scores.iter().map(|&s| s + 33).collect()
}
let seq = b"ACGTACGTACGTACGT";
let qual = make_qual(&[30, 30, 30, 30, 30, 30, 30, 30, 5, 5, 5, 5, 5, 5, 5, 5]);
let config = TrimConfig::new()
.with_quality(QualityTrimConfig::default().with_cut_tail(true))
.with_adapter(AdapterConfig::disabled())
.with_tail(TailConfig::new().with_min_length(usize::MAX));
let result = config.apply(seq, &qual);
assert!(result.end < seq.len());
}
#[test]
fn test_trim_config_apply_empty() {
let config = TrimConfig::default();
let result = config.apply(&[], &[]);
assert!(result.is_empty());
}
#[test]
fn test_trim_config_check_length() {
let config = TrimConfig::new()
.with_length(LengthConfig::new().with_min_length(50));
assert!(!config.check_length(30));
assert!(config.check_length(50));
assert!(config.check_length(100));
}
#[test]
fn test_trim_config_builder() {
let config = TrimConfig::new()
.with_quality(QualityTrimConfig::long_read())
.with_adapter(AdapterConfig::nextera())
.with_tail(TailConfig::poly_a())
.with_length(LengthConfig::long_read());
assert_eq!(config.quality.window_size, 20);
assert_eq!(config.length.min_length, 200);
}
#[test]
fn test_trim_trailing_n() {
let seq = b"ACGTACGTNNN";
assert_eq!(trim_trailing_n(seq, 0, seq.len()), 8);
let seq = b"ACGTACGT";
assert_eq!(trim_trailing_n(seq, 0, seq.len()), 8);
let seq = b"NNNNNNNN";
assert_eq!(trim_trailing_n(seq, 0, seq.len()), 0);
let seq = b"ACGTN";
assert_eq!(trim_trailing_n(seq, 0, seq.len()), 4);
let seq = b"ACNGT";
assert_eq!(trim_trailing_n(seq, 0, seq.len()), 5);
let seq = b"NNACGTNNN";
assert_eq!(trim_trailing_n(seq, 2, seq.len()), 6);
}
#[test]
fn test_trim_leading_n() {
let seq = b"NNNACGTACGT";
assert_eq!(trim_leading_n(seq, 0, seq.len()), 3);
let seq = b"ACGTACGT";
assert_eq!(trim_leading_n(seq, 0, seq.len()), 0);
let seq = b"NNNNNNNN";
assert_eq!(trim_leading_n(seq, 0, seq.len()), 8);
let seq = b"NACGT";
assert_eq!(trim_leading_n(seq, 0, seq.len()), 1);
let seq = b"ACNGT";
assert_eq!(trim_leading_n(seq, 0, seq.len()), 0);
}
#[test]
fn test_trim_config_removes_trailing_n() {
fn make_qual(scores: &[u8]) -> Vec<u8> {
scores.iter().map(|&s| s + 33).collect()
}
let seq = b"ACGTACGTACGTNNN";
let qual = make_qual(&[30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30]);
let config = TrimConfig::new()
.with_quality(QualityTrimConfig::default().with_cut_tail(true))
.with_adapter(AdapterConfig::disabled())
.with_tail(TailConfig::new().with_min_length(usize::MAX));
let result = config.apply(seq, &qual);
let trimmed = result.apply(seq);
assert!(!trimmed.ends_with(b"N"));
assert_eq!(trimmed, b"ACGTACGTACGT");
}
}