Skip to main content

memf_strings/
classify.rs

1//! Classifier pipeline orchestration.
2
3use crate::{ClassifiedString, StringCategory};
4
5/// A classifier that examines a string and returns matching categories.
6pub trait StringClassifier: Send + Sync {
7    /// Human-readable name for this classifier.
8    fn name(&self) -> &str;
9
10    /// Classify a string. Returns a list of (category, confidence) pairs.
11    fn classify(&self, input: &str) -> Vec<(StringCategory, f32)>;
12}
13
14inventory::collect!(&'static dyn StringClassifier);
15
16/// Run all registered classifiers on a list of strings, populating their categories.
17pub fn classify_strings(strings: &mut [ClassifiedString]) {
18    for s in strings.iter_mut() {
19        for classifier in inventory::iter::<&'static dyn StringClassifier> {
20            let matches = classifier.classify(&s.value);
21            s.categories.extend(matches);
22        }
23    }
24}
25
26#[cfg(test)]
27mod tests {
28    use super::*;
29    use crate::StringEncoding;
30
31    fn make_string(value: &str) -> ClassifiedString {
32        ClassifiedString {
33            value: value.to_string(),
34            physical_offset: 0,
35            encoding: StringEncoding::Ascii,
36            categories: Vec::new(),
37        }
38    }
39
40    #[test]
41    fn classify_populates_categories_via_inventory() {
42        // The RegexClassifier is registered via inventory::submit! and should
43        // classify URLs and IPs when classify_strings is called.
44        let mut strings = vec![
45            make_string("https://evil.com/malware.exe"),
46            make_string("192.168.1.100"),
47            make_string("just plain text"),
48        ];
49
50        classify_strings(&mut strings);
51
52        // URL should be classified
53        assert!(
54            !strings[0].categories.is_empty(),
55            "URL string should have categories"
56        );
57        // IP should be classified
58        assert!(
59            !strings[1].categories.is_empty(),
60            "IP string should have categories"
61        );
62        // Plain text should remain uncategorized
63        assert!(
64            strings[2].categories.is_empty(),
65            "plain text should have no categories"
66        );
67    }
68
69    #[test]
70    fn classify_empty_slice_is_noop() {
71        let mut strings: Vec<ClassifiedString> = Vec::new();
72        classify_strings(&mut strings);
73        assert!(strings.is_empty());
74    }
75
76    #[test]
77    fn classify_preserves_existing_categories() {
78        let mut strings = vec![ClassifiedString {
79            value: "https://example.com".to_string(),
80            physical_offset: 0x100,
81            encoding: StringEncoding::Ascii,
82            categories: vec![(crate::StringCategory::Url, 0.5)],
83        }];
84
85        classify_strings(&mut strings);
86
87        // Should have at least the pre-existing category plus any from classifiers
88        assert!(
89            strings[0].categories.len() >= 2,
90            "should preserve existing + add new: got {}",
91            strings[0].categories.len()
92        );
93    }
94}