use crate::error::{SeqError, SeqResult};
#[derive(Debug, Clone, Copy)]
pub struct NussinovConfig {
pub min_loop: usize,
pub allow_wobble: bool,
}
impl Default for NussinovConfig {
fn default() -> Self {
Self {
min_loop: 3,
allow_wobble: false,
}
}
}
impl NussinovConfig {
pub fn new(min_loop: usize, allow_wobble: bool) -> Self {
Self {
min_loop,
allow_wobble,
}
}
}
#[derive(Debug, Clone)]
pub struct NussinovResult {
pub pairs: usize,
pub structure: String,
pub pair_list: Vec<(usize, usize)>,
}
#[inline]
fn canonical_base(byte: u8) -> u8 {
match byte {
b'A' | b'a' => b'A',
b'C' | b'c' => b'C',
b'G' | b'g' => b'G',
b'U' | b'u' | b'T' | b't' => b'U',
_ => 0,
}
}
pub fn can_pair(a: u8, b: u8, allow_wobble: bool) -> bool {
let x = canonical_base(a);
let y = canonical_base(b);
if x == 0 || y == 0 {
return false;
}
match (x, y) {
(b'A', b'U') | (b'U', b'A') => true,
(b'G', b'C') | (b'C', b'G') => true,
(b'G', b'U') | (b'U', b'G') => allow_wobble,
_ => false,
}
}
pub fn nussinov_fold(seq: &[u8], config: &NussinovConfig) -> SeqResult<NussinovResult> {
let n = seq.len();
if n == 0 {
return Err(SeqError::EmptyInput);
}
let mut dp = vec![0usize; n * n];
let min_loop = config.min_loop;
let allow_wobble = config.allow_wobble;
for span in 1..n {
for i in 0..(n - span) {
let j = i + span;
let mut best = dp[(i + 1) * n + j];
for k in (i + 1)..=j {
if k - i - 1 < min_loop {
continue;
}
if !can_pair(seq[i], seq[k], allow_wobble) {
continue;
}
let inner = if i + 1 < k {
dp[(i + 1) * n + (k - 1)]
} else {
0
};
let outer = if k < j { dp[(k + 1) * n + j] } else { 0 };
let candidate = inner + outer + 1;
if candidate > best {
best = candidate;
}
}
dp[i * n + j] = best;
}
}
let pairs = dp[n - 1];
let mut structure = vec![b'.'; n];
let mut pair_list: Vec<(usize, usize)> = Vec::new();
let mut stack: Vec<(usize, usize)> = vec![(0, n - 1)];
while let Some((i, j)) = stack.pop() {
if i >= j {
continue;
}
let unpaired = dp[(i + 1) * n + j];
if dp[i * n + j] == unpaired {
stack.push((i + 1, j));
continue;
}
let mut matched = false;
for k in (i + 1)..=j {
if k - i - 1 < min_loop {
continue;
}
if !can_pair(seq[i], seq[k], allow_wobble) {
continue;
}
let inner = if i + 1 < k {
dp[(i + 1) * n + (k - 1)]
} else {
0
};
let outer = if k < j { dp[(k + 1) * n + j] } else { 0 };
if dp[i * n + j] == inner + outer + 1 {
structure[i] = b'(';
structure[k] = b')';
pair_list.push((i, k));
stack.push((i + 1, k - 1));
stack.push((k + 1, j));
matched = true;
break;
}
}
if !matched {
stack.push((i + 1, j));
}
}
pair_list.sort_unstable();
let structure = String::from_utf8(structure)
.map_err(|e| SeqError::InvalidConfiguration(format!("non-utf8 structure: {e}")))?;
Ok(NussinovResult {
pairs,
structure,
pair_list,
})
}
#[cfg(test)]
mod tests {
use super::*;
fn is_balanced(structure: &str) -> bool {
let mut depth: i64 = 0;
for ch in structure.chars() {
match ch {
'(' => depth += 1,
')' => {
depth -= 1;
if depth < 0 {
return false;
}
}
'.' => {}
_ => return false,
}
}
depth == 0
}
#[test]
fn can_pair_watson_crick() {
assert!(can_pair(b'A', b'U', false));
assert!(can_pair(b'U', b'A', false));
assert!(can_pair(b'G', b'C', false));
assert!(can_pair(b'C', b'G', false));
assert!(can_pair(b'a', b'u', false));
assert!(can_pair(b'A', b'T', false));
assert!(can_pair(b't', b'a', false));
assert!(!can_pair(b'A', b'G', false));
assert!(!can_pair(b'A', b'C', false));
assert!(!can_pair(b'N', b'U', false));
}
#[test]
fn can_pair_wobble_toggle() {
assert!(!can_pair(b'G', b'U', false));
assert!(!can_pair(b'U', b'G', false));
assert!(can_pair(b'G', b'U', true));
assert!(can_pair(b'U', b'G', true));
assert!(!can_pair(b'A', b'A', true));
}
#[test]
fn empty_sequence_errors() {
let cfg = NussinovConfig::default();
let err = nussinov_fold(b"", &cfg);
assert!(matches!(err, Err(SeqError::EmptyInput)));
}
#[test]
fn gggaaaccc_full_stem() {
let cfg = NussinovConfig::default();
let r = nussinov_fold(b"GGGAAACCC", &cfg).expect("fold ok");
assert_eq!(r.pairs, 3, "expected 3 pairs");
assert_eq!(r.structure, "(((...)))", "exact dot-bracket");
assert!(is_balanced(&r.structure), "structure must be nested");
assert_eq!(r.pair_list.len(), r.pairs);
assert_eq!(r.structure.matches('(').count(), r.pairs);
assert_eq!(r.structure.matches(')').count(), r.pairs);
}
#[test]
fn structures_are_nested() {
let cfg = NussinovConfig::default();
for seq in [
&b"GGGAAACCC"[..],
&b"GGGGGAAACCCCC"[..],
&b"AAAA"[..],
&b"GCGCGCAAAUGCGCGC"[..],
&b"AUGCAUGCAUGC"[..],
] {
let r = nussinov_fold(seq, &cfg).expect("fold ok");
assert_eq!(r.structure.len(), seq.len());
assert!(
is_balanced(&r.structure),
"not nested for {:?}: {}",
core::str::from_utf8(seq),
r.structure
);
for &(i, k) in &r.pair_list {
assert!(i < k);
assert!(k - i > cfg.min_loop, "pair ({i},{k}) violates min_loop");
}
}
}
#[test]
fn min_loop_constraint_respected() {
let cfg = NussinovConfig::default();
let r = nussinov_fold(b"GGGGCCCC", &cfg).expect("fold ok");
for &(i, k) in &r.pair_list {
assert!(i < k);
assert!(k - i > 3, "pair ({i},{k}) loop length {} < 3", k - i - 1);
}
let r4 = nussinov_fold(b"GCGC", &cfg).expect("fold ok");
assert_eq!(r4.pairs, 0, "no admissible pair in GCGC");
assert_eq!(r4.structure, "....");
let r2 = nussinov_fold(b"GC", &cfg).expect("fold ok");
assert_eq!(r2.pairs, 0);
assert_eq!(r2.structure, "..");
}
#[test]
fn aaaa_no_pairs() {
let cfg = NussinovConfig::default();
let r = nussinov_fold(b"AAAA", &cfg).expect("fold ok");
assert_eq!(r.pairs, 0);
assert_eq!(r.structure, "....");
assert!(r.pair_list.is_empty());
}
#[test]
fn full_five_pair_stem() {
let cfg = NussinovConfig::default();
let r = nussinov_fold(b"GGGGGAAACCCCC", &cfg).expect("fold ok");
assert_eq!(r.pairs, 5, "expected a full 5-pair stem");
assert_eq!(r.structure, "(((((...)))))");
assert!(is_balanced(&r.structure));
let innermost = r
.pair_list
.iter()
.max_by_key(|&&(i, _)| i)
.copied()
.expect("at least one pair");
assert!(innermost.0 < innermost.1);
assert!(innermost.1 - innermost.0 > 3, "innermost loop too short");
}
#[test]
fn traceback_consistency() {
let cfg = NussinovConfig::default();
for seq in [
&b"GGGAAACCC"[..],
&b"GGGGGAAACCCCC"[..],
&b"GCGCGCAAAUGCGCGC"[..],
] {
let r = nussinov_fold(seq, &cfg).expect("fold ok");
assert_eq!(r.pair_list.len(), r.pairs, "pair_list len == pairs");
assert_eq!(
r.structure.matches('(').count(),
r.pairs,
"open bracket count == pairs"
);
assert_eq!(
r.structure.matches(')').count(),
r.pairs,
"close bracket count == pairs"
);
}
}
#[test]
fn wobble_increases_pairs() {
let no_wobble = NussinovConfig::new(3, false);
let wobble = NussinovConfig::new(3, true);
let seq = b"GGGUUU";
let r_no = nussinov_fold(seq, &no_wobble).expect("fold ok");
let r_yes = nussinov_fold(seq, &wobble).expect("fold ok");
assert_eq!(r_no.pairs, 0, "no WC/AU/GC pairs without wobble");
assert!(
r_yes.pairs > r_no.pairs,
"wobble must add pairs: {} > {}",
r_yes.pairs,
r_no.pairs
);
assert!(is_balanced(&r_yes.structure));
for &(i, k) in &r_yes.pair_list {
assert!(i < k);
assert!(k - i > 3);
}
}
#[test]
fn config_defaults() {
let cfg = NussinovConfig::default();
assert_eq!(cfg.min_loop, 3);
assert!(!cfg.allow_wobble);
let cfg2 = NussinovConfig::new(5, true);
assert_eq!(cfg2.min_loop, 5);
assert!(cfg2.allow_wobble);
}
}