use anyhow::Result;
use clap::Args;
use crate::algorithms::trimmer::{trim_mott, TrimmerConfig};
use crate::core::{SeqReader, SeqRecord, SeqWriter};
#[derive(Args, Debug)]
pub struct TrimfqArgs {
#[arg(value_name = "in.fq")]
pub input: String,
#[arg(short = 'q', default_value = "0.05")]
pub error_threshold: f64,
#[arg(short = 'l', default_value = "30")]
pub min_length: usize,
#[arg(short = 'b', default_value = "0")]
pub trim_left: usize,
#[arg(short = 'e', default_value = "0")]
pub trim_right: usize,
#[arg(short = 'L', default_value = "0")]
pub max_length: usize,
}
pub fn run(args: &TrimfqArgs) -> Result<()> {
let mut reader = if args.input == "-" {
SeqReader::from_stdin()
} else {
SeqReader::from_path(&args.input)?
};
let mut writer = SeqWriter::to_stdout();
let mut record = SeqRecord::new(Vec::new(), Vec::new());
let use_fixed_trim = args.trim_left > 0 || args.trim_right > 0 || args.max_length > 0;
while reader.read_next(&mut record)? {
if !record.is_fastq() {
writer.write_record(&record)?;
continue;
}
let (start, end) = if use_fixed_trim {
fixed_trim(&record, args.trim_left, args.trim_right, args.max_length)
} else {
mott_trim(&record, args.error_threshold, args.min_length)?
};
if start >= end {
continue;
}
let trimmed_record = create_trimmed_record(&record, start, end);
writer.write_record(&trimmed_record)?;
}
writer.flush()?;
Ok(())
}
fn fixed_trim(
record: &SeqRecord,
trim_left: usize,
trim_right: usize,
max_length: usize,
) -> (usize, usize) {
let seq_len = record.seq.len();
let mut start = trim_left;
let mut end = seq_len.saturating_sub(trim_right);
if start >= end {
start = 0;
end = 0;
}
if max_length > 0 && end - start > max_length {
end = start + max_length;
}
(start, end)
}
fn mott_trim(
record: &SeqRecord,
error_threshold: f64,
min_length: usize,
) -> Result<(usize, usize)> {
if let Some(qual) = &record.qual {
if qual.len() <= min_length {
return Ok((0, record.seq.len()));
}
let config = TrimmerConfig {
error_threshold,
min_length,
qual_offset: 33,
trim_left: 0,
trim_right: 0,
};
let trim_result = trim_mott(qual, &config);
Ok((trim_result.start, trim_result.end))
} else {
Ok((0, record.seq.len()))
}
}
fn create_trimmed_record(record: &SeqRecord, start: usize, end: usize) -> SeqRecord {
let mut trimmed = SeqRecord::new(Vec::new(), Vec::new());
trimmed.name = record.name.clone();
trimmed.comment = record.comment.clone();
trimmed.seq = record.seq[start..end].to_vec();
if let Some(qual) = &record.qual {
trimmed.qual = Some(qual[start..end].to_vec());
}
trimmed
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fixed_trim() {
let record = SeqRecord {
name: b"test".to_vec(),
comment: None,
seq: b"ACGTACGTACGT".to_vec(),
qual: Some(b"IIIIIIIIIIII".to_vec()),
};
let (start, end) = fixed_trim(&record, 2, 0, 0);
assert_eq!(start, 2);
assert_eq!(end, 12);
let (start, end) = fixed_trim(&record, 0, 2, 0);
assert_eq!(start, 0);
assert_eq!(end, 10);
let (start, end) = fixed_trim(&record, 2, 2, 0);
assert_eq!(start, 2);
assert_eq!(end, 10);
let (start, end) = fixed_trim(&record, 0, 0, 5);
assert_eq!(start, 0);
assert_eq!(end, 5);
}
#[test]
fn test_create_trimmed_record() {
let record = SeqRecord {
name: b"test".to_vec(),
comment: Some(b"comment".to_vec()),
seq: b"ACGTACGT".to_vec(),
qual: Some(b"IIIIIIII".to_vec()),
};
let trimmed = create_trimmed_record(&record, 2, 6);
assert_eq!(trimmed.name, b"test");
assert_eq!(trimmed.comment, Some(b"comment".to_vec()));
assert_eq!(trimmed.seq, b"GTAC");
assert_eq!(trimmed.qual, Some(b"IIII".to_vec()));
}
}