#[derive(Debug, Clone)]
pub struct XDropoffConfig {
pub match_score: f64,
pub mismatch_penalty: f64,
pub xdrop: f64,
}
impl Default for XDropoffConfig {
fn default() -> Self {
Self {
match_score: 1.0,
mismatch_penalty: 1.0,
xdrop: 10.0,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct XDropoffResult {
pub start: usize,
pub end: usize,
pub score: f64,
}
pub fn xdropoff_search<F>(
seq: &[u8],
start_pos: usize,
is_match: F,
config: &XDropoffConfig,
) -> XDropoffResult
where
F: Fn(u8) -> bool,
{
let mut score = 0.0;
let mut max_score = 0.0;
let mut max_pos = start_pos;
let start = start_pos;
for (i, &base) in seq[start_pos..].iter().enumerate() {
let pos = start_pos + i;
if is_match(base) {
score += config.match_score;
} else {
score -= config.mismatch_penalty;
}
if score > max_score {
max_score = score;
max_pos = pos + 1;
}
if max_score - score > config.xdrop {
break;
}
}
XDropoffResult {
start,
end: max_pos,
score: max_score,
}
}
pub fn find_gc_regions(
seq: &[u8],
target_gc_frac: f64,
xdrop: f64,
min_length: usize,
) -> Vec<(usize, usize, f64)> {
let q = (1.0 - target_gc_frac) / target_gc_frac;
let config = XDropoffConfig {
match_score: q,
mismatch_penalty: 1.0,
xdrop,
};
let mut regions = Vec::new();
let mut pos = 0;
while pos < seq.len() {
let result = xdropoff_search(
seq,
pos,
|base| base == b'G' || base == b'g' || base == b'C' || base == b'c',
&config,
);
if result.end > result.start && result.end - result.start >= min_length {
regions.push((result.start, result.end, result.score));
}
pos = if result.end > result.start {
result.end
} else {
pos + 1
};
}
regions
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_xdropoff_search_basic() {
let seq = b"AAAAGGGGGAAAA";
let config = XDropoffConfig::default();
let result = xdropoff_search(seq, 0, |base| base == b'G', &config);
assert!(result.end > result.start);
assert!(result.score > 0.0);
}
#[test]
fn test_xdropoff_search_no_match() {
let seq = b"AAAAAAAA";
let config = XDropoffConfig::default();
let result = xdropoff_search(seq, 0, |base| base == b'G', &config);
assert_eq!(result.score, 0.0);
}
#[test]
fn test_xdropoff_search_with_start_pos() {
let seq = b"AAAAGGGGGAAAA";
let config = XDropoffConfig::default();
let result = xdropoff_search(seq, 5, |base| base == b'G', &config);
assert_eq!(result.start, 5);
assert!(result.end > 5);
}
#[test]
fn test_find_gc_regions_basic() {
let seq = b"ATATATATGCGCGCGCGCATATATATAT";
let regions = find_gc_regions(seq, 0.5, 5.0, 5);
assert!(!regions.is_empty());
for (start, end, score) in regions {
assert!(end > start);
assert!(end - start >= 5); assert!(score > 0.0);
let region = &seq[start..end];
let gc_count = region
.iter()
.filter(|&&b| b == b'G' || b == b'C' || b == b'g' || b == b'c')
.count();
let gc_frac = gc_count as f64 / region.len() as f64;
assert!(gc_frac > 0.3, "GC含量: {}", gc_frac);
}
}
#[test]
fn test_find_gc_regions_no_gc() {
let seq = b"ATATATAT";
let regions = find_gc_regions(seq, 0.5, 5.0, 5);
assert!(regions.is_empty());
}
#[test]
fn test_find_gc_regions_min_length_filter() {
let seq = b"ATGCAT";
let regions = find_gc_regions(seq, 0.5, 5.0, 10);
assert!(regions.is_empty());
}
#[test]
fn test_xdropoff_config() {
let seq = b"AAAAGGGGGAAAA";
let config_high = XDropoffConfig {
match_score: 1.0,
mismatch_penalty: 1.0,
xdrop: 100.0,
};
let config_low = XDropoffConfig {
match_score: 1.0,
mismatch_penalty: 1.0,
xdrop: 1.0,
};
let result_high = xdropoff_search(seq, 0, |base| base == b'G', &config_high);
let result_low = xdropoff_search(seq, 0, |base| base == b'G', &config_low);
assert!(result_high.end - result_high.start >= result_low.end - result_low.start);
}
}