use crate::types::HitStatus;
use gapsmith_align::Hit;
use std::collections::HashSet;
#[derive(Debug, Clone)]
pub struct ClassifyOptions<'a> {
pub bitcutoff: f32,
pub identcutoff: f32,
pub ident_exception: f32,
pub exception_ecs: &'a HashSet<String>,
}
pub fn classify_reaction(
hits: &[Hit],
ec: &str,
has_seq_data: bool,
spont: bool,
opts: &ClassifyOptions<'_>,
) -> (HitStatus, bool, Option<usize>) {
let exception = ec_is_exception(ec, opts.exception_ecs);
if hits.is_empty() {
let status = if !has_seq_data && spont {
HitStatus::Spontaneous
} else if has_seq_data {
HitStatus::NoBlast
} else {
HitStatus::NoSeqData
};
return (status, exception, None);
}
let (idx, best) = hits
.iter()
.enumerate()
.max_by(|a, b| a.1.bitscore.partial_cmp(&b.1.bitscore).unwrap_or(std::cmp::Ordering::Equal))
.unwrap();
let pass_main = best.bitscore >= opts.bitcutoff && best.pident >= opts.identcutoff;
let pass_exc = if exception { best.pident >= opts.ident_exception } else { true };
let status = if pass_main && pass_exc {
HitStatus::GoodBlast
} else {
HitStatus::BadBlast
};
(status, exception, Some(idx))
}
pub fn ec_is_exception(ec: &str, exception_set: &HashSet<String>) -> bool {
for token in ec.split('/').map(str::trim).filter(|s| !s.is_empty()) {
if exception_set.contains(token) {
return true;
}
}
false
}
pub fn classify_hits(
scores: &[(f32, f32)],
ec: &str,
has_seq_data: bool,
spont: bool,
opts: &ClassifyOptions<'_>,
) -> HitStatus {
let exception = ec_is_exception(ec, opts.exception_ecs);
if scores.is_empty() {
if !has_seq_data && spont {
return HitStatus::Spontaneous;
}
if has_seq_data {
return HitStatus::NoBlast;
}
return HitStatus::NoSeqData;
}
let (b, p) = scores
.iter()
.cloned()
.fold((f32::MIN, f32::MIN), |(ab, ap), (nb, np)| (ab.max(nb), ap.max(np)));
if b >= opts.bitcutoff && p >= opts.identcutoff {
if exception && p < opts.ident_exception {
HitStatus::BadBlast
} else {
HitStatus::GoodBlast
}
} else {
HitStatus::BadBlast
}
}
#[cfg(test)]
mod tests {
use super::*;
fn opts(exc: &HashSet<String>) -> ClassifyOptions<'_> {
ClassifyOptions {
bitcutoff: 200.0,
identcutoff: 30.0,
ident_exception: 70.0,
exception_ecs: exc,
}
}
fn hit(bit: f32, pid: f32) -> Hit {
Hit {
qseqid: "q".into(),
pident: pid,
evalue: 0.0,
bitscore: bit,
qcov: 100.0,
stitle: "t".into(),
sstart: 1,
send: 100,
}
}
#[test]
fn good_blast_above_cutoffs() {
let e = HashSet::new();
let (s, _, _) = classify_reaction(&[hit(250.0, 50.0)], "1.1.1.1", true, false, &opts(&e));
assert_eq!(s, HitStatus::GoodBlast);
}
#[test]
fn bad_blast_below_bitscore() {
let e = HashSet::new();
let (s, _, _) = classify_reaction(&[hit(100.0, 50.0)], "1.1.1.1", true, false, &opts(&e));
assert_eq!(s, HitStatus::BadBlast);
}
#[test]
fn exception_requires_higher_identity() {
let mut e = HashSet::new();
e.insert("7.1.1.9".into());
let (s, exc, _) =
classify_reaction(&[hit(250.0, 50.0)], "7.1.1.9", true, false, &opts(&e));
assert_eq!(s, HitStatus::BadBlast);
assert!(exc);
let (s, _, _) = classify_reaction(&[hit(250.0, 80.0)], "7.1.1.9", true, false, &opts(&e));
assert_eq!(s, HitStatus::GoodBlast);
}
#[test]
fn no_blast_when_seq_data_but_no_hit() {
let e = HashSet::new();
let (s, _, _) = classify_reaction(&[], "1.1.1.1", true, false, &opts(&e));
assert_eq!(s, HitStatus::NoBlast);
}
#[test]
fn no_seq_data_when_nothing() {
let e = HashSet::new();
let (s, _, _) = classify_reaction(&[], "1.1.1.1", false, false, &opts(&e));
assert_eq!(s, HitStatus::NoSeqData);
}
#[test]
fn spontaneous_when_marked_and_no_data() {
let e = HashSet::new();
let (s, _, _) = classify_reaction(&[], "", false, true, &opts(&e));
assert_eq!(s, HitStatus::Spontaneous);
}
}