Skip to main content

memf_strings/
yara_classifier.rs

1//! YARA-X rule-based string classifier.
2//!
3//! Scans strings against compiled YARA rules and returns matches
4//! as `StringCategory::YaraMatch(rule_name)`.
5
6use crate::classify::StringClassifier;
7use crate::{Error, StringCategory};
8use std::path::Path;
9
10/// A classifier that matches strings against YARA-X rules.
11pub struct YaraClassifier {
12    rules: yara_x::Rules,
13}
14
15impl YaraClassifier {
16    /// Compile YARA rules from source text.
17    pub fn from_source(source: &str) -> crate::Result<Self> {
18        let rules = yara_x::compile(source).map_err(|e| Error::Yara(e.to_string()))?;
19        Ok(Self { rules })
20    }
21
22    /// Load and compile all `.yar` / `.yara` files from a directory.
23    pub fn from_rules_dir(dir: &Path) -> crate::Result<Self> {
24        let mut compiler = yara_x::Compiler::new();
25        let mut found = false;
26
27        if dir.is_dir() {
28            for entry in std::fs::read_dir(dir)? {
29                let entry = entry?;
30                let path = entry.path();
31                if let Some(ext) = path.extension() {
32                    if ext == "yar" || ext == "yara" {
33                        let source = std::fs::read_to_string(&path)?;
34                        compiler
35                            .add_source(source.as_str())
36                            .map_err(|e| Error::Yara(e.to_string()))?;
37                        found = true;
38                    }
39                }
40            }
41        }
42
43        if !found {
44            return Err(Error::Yara(format!(
45                "no .yar/.yara files found in {}",
46                dir.display()
47            )));
48        }
49
50        let rules = compiler.build();
51        Ok(Self { rules })
52    }
53
54    /// Scan a single string against the compiled rules.
55    pub fn scan_string(&self, input: &str) -> Vec<(StringCategory, f32)> {
56        let mut scanner = yara_x::Scanner::new(&self.rules);
57        match scanner.scan(input.as_bytes()) {
58            Ok(scan_results) => scan_results
59                .matching_rules()
60                .map(|rule| {
61                    (
62                        StringCategory::YaraMatch(rule.identifier().to_string()),
63                        0.85,
64                    )
65                })
66                .collect(),
67            Err(_) => Vec::new(),
68        }
69    }
70}
71
72impl StringClassifier for YaraClassifier {
73    fn name(&self) -> &str {
74        "yara"
75    }
76
77    fn classify(&self, input: &str) -> Vec<(StringCategory, f32)> {
78        self.scan_string(input)
79    }
80}
81
82#[cfg(test)]
83mod tests {
84    use super::*;
85
86    #[test]
87    fn match_simple_rule() {
88        let source = r"
89rule test_url {
90    strings:
91        $url = /https?:\/\/[^\s]+/
92    condition:
93        $url
94}
95";
96        let classifier = YaraClassifier::from_source(source).unwrap();
97        let matches = classifier.scan_string("https://malware.example.com/payload");
98        assert_eq!(matches.len(), 1);
99        assert!(matches!(matches[0].0, StringCategory::YaraMatch(ref name) if name == "test_url"));
100    }
101
102    #[test]
103    fn no_match() {
104        let source = r#"
105rule test_never {
106    strings:
107        $never = "THIS_WILL_NEVER_MATCH_ANYTHING_12345"
108    condition:
109        $never
110}
111"#;
112        let classifier = YaraClassifier::from_source(source).unwrap();
113        let matches = classifier.scan_string("hello world");
114        assert!(matches.is_empty());
115    }
116
117    #[test]
118    fn multiple_rules() {
119        let source = r#"
120rule has_ip {
121    strings:
122        $ip = /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/
123    condition:
124        $ip
125}
126
127rule has_http {
128    strings:
129        $http = "http"
130    condition:
131        $http
132}
133"#;
134        let classifier = YaraClassifier::from_source(source).unwrap();
135        let matches = classifier.scan_string("http://10.0.0.1/shell");
136        assert_eq!(matches.len(), 2);
137    }
138
139    #[test]
140    fn invalid_rule_source_errors() {
141        let result = YaraClassifier::from_source("not valid yara");
142        assert!(result.is_err());
143    }
144
145    #[test]
146    fn from_rules_dir_with_yar_files() {
147        let dir = std::env::temp_dir().join("memf_test_yara_rules_dir");
148        std::fs::create_dir_all(&dir).unwrap();
149        let rule_path = dir.join("test_rule.yar");
150        std::fs::write(
151            &rule_path,
152            r#"
153rule detect_hello {
154    strings:
155        $hello = "HELLO_MARKER"
156    condition:
157        $hello
158}
159"#,
160        )
161        .unwrap();
162
163        let classifier = YaraClassifier::from_rules_dir(&dir).unwrap();
164        let matches = classifier.scan_string("HELLO_MARKER is here");
165        assert_eq!(matches.len(), 1);
166        assert!(
167            matches!(matches[0].0, StringCategory::YaraMatch(ref name) if name == "detect_hello")
168        );
169
170        std::fs::remove_dir_all(&dir).ok();
171    }
172
173    #[test]
174    fn from_rules_dir_empty_directory() {
175        let dir = std::env::temp_dir().join("memf_test_yara_empty_dir");
176        std::fs::create_dir_all(&dir).unwrap();
177        // Remove any stale .yar files
178        for entry in std::fs::read_dir(&dir).unwrap() {
179            let entry = entry.unwrap();
180            if entry
181                .path()
182                .extension()
183                .is_some_and(|e| e == "yar" || e == "yara")
184            {
185                std::fs::remove_file(entry.path()).ok();
186            }
187        }
188
189        let result = YaraClassifier::from_rules_dir(&dir);
190        assert!(result.is_err());
191        match result {
192            Err(e) => {
193                let err_msg = format!("{e}");
194                assert!(err_msg.contains("no .yar/.yara files found"));
195            }
196            Ok(_) => panic!("expected error for empty directory"),
197        }
198
199        std::fs::remove_dir_all(&dir).ok();
200    }
201
202    #[test]
203    fn classifier_name() {
204        let source = r#"
205rule dummy {
206    strings:
207        $x = "dummy"
208    condition:
209        $x
210}
211"#;
212        let classifier = YaraClassifier::from_source(source).unwrap();
213        assert_eq!(classifier.name(), "yara");
214    }
215}