use crate::{ClassifiedString, StringCategory};
pub trait StringClassifier: Send + Sync {
fn name(&self) -> &str;
fn classify(&self, input: &str) -> Vec<(StringCategory, f32)>;
}
inventory::collect!(&'static dyn StringClassifier);
pub fn classify_strings(strings: &mut [ClassifiedString]) {
for s in strings.iter_mut() {
for classifier in inventory::iter::<&'static dyn StringClassifier> {
let matches = classifier.classify(&s.value);
s.categories.extend(matches);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::StringEncoding;
fn make_string(value: &str) -> ClassifiedString {
ClassifiedString {
value: value.to_string(),
physical_offset: 0,
encoding: StringEncoding::Ascii,
categories: Vec::new(),
}
}
#[test]
fn classify_populates_categories_via_inventory() {
let mut strings = vec![
make_string("https://evil.com/malware.exe"),
make_string("192.168.1.100"),
make_string("just plain text"),
];
classify_strings(&mut strings);
assert!(
!strings[0].categories.is_empty(),
"URL string should have categories"
);
assert!(
!strings[1].categories.is_empty(),
"IP string should have categories"
);
assert!(
strings[2].categories.is_empty(),
"plain text should have no categories"
);
}
#[test]
fn classify_empty_slice_is_noop() {
let mut strings: Vec<ClassifiedString> = Vec::new();
classify_strings(&mut strings);
assert!(strings.is_empty());
}
#[test]
fn classify_preserves_existing_categories() {
let mut strings = vec![ClassifiedString {
value: "https://example.com".to_string(),
physical_offset: 0x100,
encoding: StringEncoding::Ascii,
categories: vec![(crate::StringCategory::Url, 0.5)],
}];
classify_strings(&mut strings);
assert!(
strings[0].categories.len() >= 2,
"should preserve existing + add new: got {}",
strings[0].categories.len()
);
}
}