use serde::Serialize;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
#[non_exhaustive]
pub enum DomainType {
ZincFingerC2H2,
LeuZipper,
EfHand,
WalkerA,
RgdMotif,
DeadBox,
CatalyticTriad,
Nls,
KdelSignal,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct DomainHit {
pub start: usize,
pub end: usize,
pub domain_type: DomainType,
pub fragment: String,
}
#[must_use]
pub fn scan_domains(sequence: &str) -> Option<Vec<DomainHit>> {
if sequence.is_empty() {
return None;
}
let chars: Vec<char> = sequence.chars().map(|c| c.to_ascii_uppercase()).collect();
let n = chars.len();
let mut hits = Vec::new();
for i in 0..n.saturating_sub(2) {
if chars[i] == 'R' && chars[i + 1] == 'G' && chars[i + 2] == 'D' {
hits.push(DomainHit {
start: i,
end: i + 2,
domain_type: DomainType::RgdMotif,
fragment: chars[i..=i + 2].iter().collect(),
});
}
}
for i in 0..n.saturating_sub(3) {
if chars[i] == 'D' && chars[i + 1] == 'E' && chars[i + 2] == 'A' && chars[i + 3] == 'D' {
hits.push(DomainHit {
start: i,
end: i + 3,
domain_type: DomainType::DeadBox,
fragment: chars[i..=i + 3].iter().collect(),
});
}
}
if n >= 4 {
let tail: String = chars[n - 4..].iter().collect();
if tail == "KDEL" {
hits.push(DomainHit {
start: n - 4,
end: n - 1,
domain_type: DomainType::KdelSignal,
fragment: tail,
});
}
}
for i in 0..n.saturating_sub(3) {
if (chars[i] == 'K' || chars[i] == 'R')
&& (chars[i + 1] == 'K' || chars[i + 1] == 'R')
&& (chars[i + 3] == 'K' || chars[i + 3] == 'R')
{
hits.push(DomainHit {
start: i,
end: i + 3,
domain_type: DomainType::Nls,
fragment: chars[i..=i + 3].iter().collect(),
});
}
}
for i in 0..n.saturating_sub(7) {
if chars[i] == 'G'
&& chars[i + 5] == 'G'
&& chars[i + 6] == 'K'
&& (chars[i + 7] == 'S' || chars[i + 7] == 'T')
{
hits.push(DomainHit {
start: i,
end: i + 7,
domain_type: DomainType::WalkerA,
fragment: chars[i..=i + 7].iter().collect(),
});
}
}
for i in 0..n.saturating_sub(6) {
if chars[i] == 'D'
&& matches!(chars[i + 2], 'D' | 'N' | 'S')
&& matches!(chars[i + 4], 'D' | 'E' | 'N' | 'S' | 'T' | 'G')
&& matches!(chars[i + 6], 'D' | 'E')
{
hits.push(DomainHit {
start: i,
end: i + 6,
domain_type: DomainType::EfHand,
fragment: chars[i..=i + 6].iter().collect(),
});
}
}
for i in 0..n.saturating_sub(21) {
if chars[i] == 'L' && chars[i + 7] == 'L' && chars[i + 14] == 'L' && chars[i + 21] == 'L' {
hits.push(DomainHit {
start: i,
end: i + 21,
domain_type: DomainType::LeuZipper,
fragment: chars[i..=i + 21].iter().collect(),
});
}
}
for i in 0..n.saturating_sub(22) {
if chars[i] != 'C' {
continue;
}
for gap1 in 2..=4 {
let c2 = i + 1 + gap1;
if c2 >= n || chars[c2] != 'C' {
continue;
}
let h1 = c2 + 13;
if h1 >= n || chars[h1] != 'H' {
continue;
}
for gap2 in 3..=4 {
let h2 = h1 + gap2;
if h2 < n && chars[h2] == 'H' {
hits.push(DomainHit {
start: i,
end: h2,
domain_type: DomainType::ZincFingerC2H2,
fragment: chars[i..=h2].iter().collect(),
});
}
}
}
}
hits.sort_by_key(|h| h.start);
Some(hits)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_rgd_motif() {
let hits = scan_domains("AARGDA").unwrap();
let rgd: Vec<_> = hits
.iter()
.filter(|h| h.domain_type == DomainType::RgdMotif)
.collect();
assert_eq!(rgd.len(), 1);
assert_eq!(rgd[0].start, 2);
assert_eq!(rgd[0].fragment, "RGD");
}
#[test]
fn test_dead_box() {
let hits = scan_domains("AADEADA").unwrap();
let dead: Vec<_> = hits
.iter()
.filter(|h| h.domain_type == DomainType::DeadBox)
.collect();
assert_eq!(dead.len(), 1);
assert_eq!(dead[0].fragment, "DEAD");
}
#[test]
fn test_kdel_signal() {
let hits = scan_domains("AAAAKDEL").unwrap();
let kdel: Vec<_> = hits
.iter()
.filter(|h| h.domain_type == DomainType::KdelSignal)
.collect();
assert_eq!(kdel.len(), 1);
assert_eq!(kdel[0].start, 4);
}
#[test]
fn test_kdel_not_internal() {
let hits = scan_domains("KDELAAAA").unwrap();
let kdel: Vec<_> = hits
.iter()
.filter(|h| h.domain_type == DomainType::KdelSignal)
.collect();
assert!(kdel.is_empty());
}
#[test]
fn test_nls() {
let hits = scan_domains("AKKAK").unwrap();
let nls: Vec<_> = hits
.iter()
.filter(|h| h.domain_type == DomainType::Nls)
.collect();
assert_eq!(nls.len(), 1);
assert_eq!(nls[0].fragment, "KKAK");
}
#[test]
fn test_walker_a() {
let hits = scan_domains("GAAAAGKS").unwrap();
let walker: Vec<_> = hits
.iter()
.filter(|h| h.domain_type == DomainType::WalkerA)
.collect();
assert_eq!(walker.len(), 1);
assert_eq!(walker[0].fragment, "GAAAAGKS");
}
#[test]
fn test_ef_hand() {
let hits = scan_domains("DADADED").unwrap();
let ef: Vec<_> = hits
.iter()
.filter(|h| h.domain_type == DomainType::EfHand)
.collect();
assert_eq!(ef.len(), 1);
}
#[test]
fn test_leucine_zipper() {
let seq = "LAAAAAALAAAAAALAAAAAAL";
let hits = scan_domains(seq).unwrap();
let lz: Vec<_> = hits
.iter()
.filter(|h| h.domain_type == DomainType::LeuZipper)
.collect();
assert_eq!(lz.len(), 1, "seq len={}, hits={hits:?}", seq.len());
assert_eq!(lz[0].start, 0);
assert_eq!(lz[0].end, 21);
}
#[test]
fn test_zinc_finger_c2h2() {
let mut seq = String::from("CAAC"); seq.extend(std::iter::repeat_n('A', 12)); seq.push('H'); seq.extend(std::iter::repeat_n('A', 3)); seq.push('H'); seq.extend(std::iter::repeat_n('A', 3)); assert_eq!(seq.chars().nth(16), Some('H'));
assert_eq!(seq.chars().nth(20), Some('H'));
let hits = scan_domains(&seq).unwrap();
let zf: Vec<_> = hits
.iter()
.filter(|h| h.domain_type == DomainType::ZincFingerC2H2)
.collect();
assert_eq!(zf.len(), 1, "seq len={}, hits={hits:?}", seq.len());
assert_eq!(zf[0].start, 0);
assert_eq!(zf[0].end, 20);
}
#[test]
fn test_empty() {
assert!(scan_domains("").is_none());
}
#[test]
fn test_lowercase() {
let upper = scan_domains("AARGDA").unwrap();
let lower = scan_domains("aargda").unwrap();
assert_eq!(upper.len(), lower.len());
}
#[test]
fn test_no_domains() {
let hits = scan_domains("AAAA").unwrap();
assert!(hits.is_empty());
}
#[test]
fn test_sorted_by_position() {
let hits = scan_domains("RGDAADEAD").unwrap();
for w in hits.windows(2) {
assert!(w[0].start <= w[1].start);
}
}
}