threatflux_string_analysis/
categorizer.rs1use crate::types::AnalysisResult;
4use once_cell::sync::Lazy;
5use regex::Regex;
6use serde::{Deserialize, Serialize};
7
8type MatcherFn = Box<dyn Fn(&str) -> bool + Send + Sync>;
10
11static IPV4_REGEX: Lazy<Regex> =
13 Lazy::new(|| Regex::new(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$").unwrap());
14
15static IPV6_REGEX: Lazy<Regex> =
16 Lazy::new(|| Regex::new(r"^([0-9a-fA-F]{1,4}:){1,7}[0-9a-fA-F]{1,4}$|^::1$|^::$").unwrap());
17
18static EMAIL_REGEX: Lazy<Regex> =
19 Lazy::new(|| Regex::new(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$").unwrap());
20
21#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
23pub struct StringCategory {
24 pub name: String,
26 pub parent: Option<String>,
28 pub description: String,
30}
31
32pub struct CategoryRule {
34 pub name: String,
36 pub matcher: MatcherFn,
38 pub category: StringCategory,
40 pub priority: i32,
42}
43
44pub trait Categorizer: Send + Sync {
46 fn categorize(&self, value: &str) -> Vec<StringCategory>;
48
49 fn add_rule(&mut self, rule: CategoryRule) -> AnalysisResult<()>;
51
52 fn remove_rule(&mut self, name: &str) -> AnalysisResult<()>;
54
55 fn get_categories(&self) -> Vec<StringCategory>;
57}
58
59pub struct DefaultCategorizer {
61 rules: Vec<CategoryRule>,
62}
63
64impl DefaultCategorizer {
65 pub fn new() -> Self {
67 let mut categorizer = Self { rules: Vec::new() };
68
69 categorizer.add_default_rules();
71
72 categorizer
73 }
74
75 #[allow(dead_code)]
77 pub fn empty() -> Self {
78 Self { rules: Vec::new() }
79 }
80
81 fn add_default_rules(&mut self) {
82 self.rules.push(CategoryRule {
84 name: "url_rule".to_string(),
85 matcher: Box::new(|s| {
86 s.starts_with("http://") || s.starts_with("https://") || s.starts_with("ftp://")
87 }),
88 category: StringCategory {
89 name: "url".to_string(),
90 parent: Some("network".to_string()),
91 description: "URL or web address".to_string(),
92 },
93 priority: 100,
94 });
95
96 self.rules.push(CategoryRule {
98 name: "path_rule".to_string(),
99 matcher: Box::new(|s| {
100 (s.contains('/') || s.contains('\\'))
101 && (s.starts_with("/") || s.starts_with("\\") || s.contains(":\\"))
102 }),
103 category: StringCategory {
104 name: "path".to_string(),
105 parent: Some("filesystem".to_string()),
106 description: "File system path".to_string(),
107 },
108 priority: 90,
109 });
110
111 self.rules.push(CategoryRule {
113 name: "registry_rule".to_string(),
114 matcher: Box::new(|s| s.starts_with("HKEY_") || s.contains("\\SOFTWARE\\")),
115 category: StringCategory {
116 name: "registry".to_string(),
117 parent: Some("windows".to_string()),
118 description: "Windows registry key".to_string(),
119 },
120 priority: 95,
121 });
122
123 self.rules.push(CategoryRule {
125 name: "library_rule".to_string(),
126 matcher: Box::new(|s| {
127 s.ends_with(".dll") || s.ends_with(".so") || s.ends_with(".dylib") ||
128 s.contains(".so.") || (s.ends_with(".dll") || s.contains("kernel32") || s.contains("ntdll"))
130 }),
131 category: StringCategory {
132 name: "library".to_string(),
133 parent: Some("binary".to_string()),
134 description: "Shared library or DLL".to_string(),
135 },
136 priority: 85,
137 });
138
139 self.rules.push(CategoryRule {
141 name: "command_rule".to_string(),
142 matcher: Box::new(|s| {
143 s.contains("cmd")
144 || s.contains("powershell")
145 || s.contains("bash")
146 || s.contains("/bin/")
147 }),
148 category: StringCategory {
149 name: "command".to_string(),
150 parent: Some("execution".to_string()),
151 description: "Command or shell-related string".to_string(),
152 },
153 priority: 80,
154 });
155
156 self.rules.push(CategoryRule {
158 name: "ip_rule".to_string(),
159 matcher: Box::new(|s| IPV4_REGEX.is_match(s) || IPV6_REGEX.is_match(s)),
160 category: StringCategory {
161 name: "ip_address".to_string(),
162 parent: Some("network".to_string()),
163 description: "IP address (IPv4 or IPv6)".to_string(),
164 },
165 priority: 95,
166 });
167
168 self.rules.push(CategoryRule {
170 name: "email_rule".to_string(),
171 matcher: Box::new(|s| s.contains('@') && s.contains('.') && EMAIL_REGEX.is_match(s)),
172 category: StringCategory {
173 name: "email".to_string(),
174 parent: Some("contact".to_string()),
175 description: "Email address".to_string(),
176 },
177 priority: 85,
178 });
179
180 self.rules.push(CategoryRule {
182 name: "api_call_rule".to_string(),
183 matcher: Box::new(|s| {
184 s.contains("CreateProcess") || s.contains("VirtualAlloc") || s.contains("WriteProcessMemory") ||
186 s.contains("GetProcAddress") || s.contains("LoadLibrary") || s.contains("OpenProcess") ||
187 s == "malloc" || s == "calloc" || s == "realloc" || s == "free" ||
189 s == "fork" || s == "exec" || s == "open" || s == "read" || s == "write" ||
190 s.ends_with("A") && s.len() > 5 && s.chars().any(|c| c.is_uppercase()) }),
193 category: StringCategory {
194 name: "api_call".to_string(),
195 parent: Some("system".to_string()),
196 description: "System API call".to_string(),
197 },
198 priority: 90,
199 });
200
201 self.rules.sort_by(|a, b| b.priority.cmp(&a.priority));
203 }
204}
205
206impl Categorizer for DefaultCategorizer {
207 fn categorize(&self, value: &str) -> Vec<StringCategory> {
208 let mut categories = Vec::new();
209
210 for rule in &self.rules {
211 if (rule.matcher)(value) {
212 categories.push(rule.category.clone());
213 }
214 }
215
216 if categories.is_empty() {
218 categories.push(StringCategory {
219 name: "generic".to_string(),
220 parent: None,
221 description: "Generic string".to_string(),
222 });
223 }
224
225 categories
226 }
227
228 fn add_rule(&mut self, rule: CategoryRule) -> AnalysisResult<()> {
229 self.rules.push(rule);
230 self.rules.sort_by(|a, b| b.priority.cmp(&a.priority));
231 Ok(())
232 }
233
234 fn remove_rule(&mut self, name: &str) -> AnalysisResult<()> {
235 self.rules.retain(|r| r.name != name);
236 Ok(())
237 }
238
239 fn get_categories(&self) -> Vec<StringCategory> {
240 let mut categories = Vec::new();
241 let mut seen = std::collections::HashSet::new();
242
243 for rule in &self.rules {
244 if seen.insert(rule.category.name.clone()) {
245 categories.push(rule.category.clone());
246 }
247 }
248
249 categories
250 }
251}
252
253impl Default for DefaultCategorizer {
254 fn default() -> Self {
255 Self::new()
256 }
257}