use crate::trim::adapter::reverse_complement;
#[derive(Debug, Clone, Copy, Default)]
pub struct OverlapResult {
pub overlapped: bool,
pub offset: i32,
pub overlap_len: usize,
pub diff: usize,
}
impl OverlapResult {
#[inline]
pub fn trim_pos_r1(&self, r1_len: usize) -> Option<usize> {
if !self.overlapped || self.offset >= 0 {
return None;
}
let trim_pos = (self.overlap_len as i32 + (-self.offset).max(0)) as usize;
if trim_pos < r1_len {
Some(trim_pos.min(r1_len))
} else {
None
}
}
#[inline]
pub fn trim_pos_r2(&self, r2_len: usize) -> Option<usize> {
if !self.overlapped || self.offset >= 0 {
return None;
}
let trim_pos = self.overlap_len;
if trim_pos < r2_len {
Some(trim_pos.min(r2_len))
} else {
None
}
}
}
#[derive(Debug, Clone)]
pub struct OverlapConfig {
pub diff_limit: usize,
pub overlap_require: usize,
pub diff_percent_limit: f64,
}
impl Default for OverlapConfig {
fn default() -> Self {
Self {
diff_limit: 5,
overlap_require: 30,
diff_percent_limit: 0.2,
}
}
}
pub fn analyze_overlap(r1_seq: &[u8], r2_seq: &[u8], config: &OverlapConfig) -> OverlapResult {
let rc_r2 = reverse_complement(r2_seq);
let len1 = r1_seq.len();
let len2 = rc_r2.len();
let complete_compare_require = 50;
let overlap_require = config.overlap_require;
let diff_limit = config.diff_limit;
let diff_percent_limit = config.diff_percent_limit;
let mut offset: i32 = 0;
while (offset as usize) < len1.saturating_sub(overlap_require) {
let overlap_len = (len1 - offset as usize).min(len2);
let overlap_diff_limit = diff_limit.min((overlap_len as f64 * diff_percent_limit) as usize);
let mut diff = 0;
let mut i = 0;
while i < overlap_len {
if r1_seq[offset as usize + i] != rc_r2[i] {
diff += 1;
if diff > overlap_diff_limit && i < complete_compare_require {
break;
}
}
i += 1;
}
if diff <= overlap_diff_limit || (diff > overlap_diff_limit && i > complete_compare_require) {
return OverlapResult {
overlapped: true,
offset,
overlap_len,
diff,
};
}
offset += 1;
}
offset = 0;
while offset > -((len2.saturating_sub(overlap_require)) as i32) {
let overlap_len = len1.min(len2 - (-offset) as usize);
let overlap_diff_limit = diff_limit.min((overlap_len as f64 * diff_percent_limit) as usize);
let mut diff = 0;
let mut i = 0;
while i < overlap_len {
if r1_seq[i] != rc_r2[(-offset) as usize + i] {
diff += 1;
if diff > overlap_diff_limit && i < complete_compare_require {
break;
}
}
i += 1;
}
if diff <= overlap_diff_limit || (diff > overlap_diff_limit && i > complete_compare_require) {
return OverlapResult {
overlapped: true,
offset,
overlap_len,
diff,
};
}
offset -= 1;
}
OverlapResult::default()
}
#[inline]
pub fn trim_by_overlap(
r1_seq: &[u8],
r2_seq: &[u8],
config: &OverlapConfig,
) -> (usize, usize) {
let ov = analyze_overlap(r1_seq, r2_seq, config);
if ov.overlapped && ov.offset < 0 {
let r1_trim = ov.overlap_len.min(r1_seq.len());
let r2_trim = ov.overlap_len.min(r2_seq.len());
(r1_trim, r2_trim)
} else {
(r1_seq.len(), r2_seq.len())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_overlap_no_adapter() {
let r1 = b"ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT";
let r2 = b"NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN";
let config = OverlapConfig::default();
let result = analyze_overlap(r1, r2, &config);
assert!(!result.overlapped || result.offset >= 0);
}
#[test]
fn test_overlap_with_adapter() {
let insert = b"ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT"; let adapter_r1 = b"AGATCGGAAGAGCACACGTCTGAACTCCAGTCA"; let adapter_r2 = b"AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT";
let mut r1 = insert.to_vec();
r1.extend_from_slice(adapter_r1);
let rc_insert = reverse_complement(insert);
let mut r2 = rc_insert;
r2.extend_from_slice(adapter_r2);
let config = OverlapConfig {
diff_limit: 5,
overlap_require: 30,
diff_percent_limit: 0.2,
};
let result = analyze_overlap(&r1, &r2, &config);
assert!(result.overlapped, "should find overlap between R1 and R2");
assert!(result.offset < 0, "offset should be negative when adapter present, got {}", result.offset);
}
#[test]
fn test_trim_by_overlap() {
let config = OverlapConfig::default();
let r1 = b"ACGTACGT";
let r2 = b"NNNNNNNN";
let (t1, t2) = trim_by_overlap(r1, r2, &config);
assert_eq!(t1, r1.len());
assert_eq!(t2, r2.len());
}
#[test]
fn test_overlap_config_default() {
let config = OverlapConfig::default();
assert_eq!(config.diff_limit, 5);
assert_eq!(config.overlap_require, 30);
assert!((config.diff_percent_limit - 0.2).abs() < 0.001);
}
}