threatflux_string_analysis/
analyzer.rs1use crate::patterns::Pattern;
4use crate::types::{AnalysisResult, StringMetadata};
5use serde::{Deserialize, Serialize};
6use std::collections::{HashMap, HashSet};
7
8#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct SuspiciousIndicator {
11 pub pattern_name: String,
13 pub description: String,
15 pub severity: u8,
17 pub matched_text: Option<String>,
19}
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct StringAnalysis {
24 pub entropy: f64,
26 pub categories: HashSet<String>,
28 pub suspicious_indicators: Vec<SuspiciousIndicator>,
30 pub metadata: StringMetadata,
32 pub is_suspicious: bool,
34}
35
36pub trait StringAnalyzer: Send + Sync {
38 fn analyze(&self, value: &str) -> StringAnalysis;
40
41 fn is_suspicious(&self, value: &str) -> bool {
43 self.analyze(value).is_suspicious
44 }
45
46 fn calculate_entropy(&self, value: &str) -> f64;
48
49 fn get_patterns(&self) -> &[Pattern];
51
52 fn add_pattern(&mut self, pattern: Pattern) -> AnalysisResult<()>;
54}
55
56pub struct DefaultStringAnalyzer {
58 patterns: Vec<Pattern>,
59 entropy_threshold: f64,
60}
61
62impl DefaultStringAnalyzer {
63 pub fn new() -> Self {
65 Self {
66 patterns: Vec::new(),
67 entropy_threshold: 4.5,
68 }
69 }
70
71 #[allow(dead_code)]
73 pub fn with_entropy_threshold(mut self, threshold: f64) -> Self {
74 self.entropy_threshold = threshold;
75 self
76 }
77
78 pub fn with_patterns(mut self, patterns: Vec<Pattern>) -> Self {
80 self.patterns = patterns;
81 self
82 }
83}
84
85impl StringAnalyzer for DefaultStringAnalyzer {
86 fn analyze(&self, value: &str) -> StringAnalysis {
87 let entropy = self.calculate_entropy(value);
88 let mut suspicious_indicators = Vec::new();
89 let mut categories = HashSet::new();
90
91 for pattern in &self.patterns {
93 if pattern.regex.is_match(value) {
94 if pattern.is_suspicious {
95 suspicious_indicators.push(SuspiciousIndicator {
96 pattern_name: pattern.name.clone(),
97 description: pattern.description.clone(),
98 severity: pattern.severity,
99 matched_text: pattern.regex.find(value).map(|m| m.as_str().to_string()),
100 });
101 }
102 categories.insert(pattern.category.clone());
103 }
104 }
105
106 let high_entropy = entropy > self.entropy_threshold && value.len() > 10;
108 if high_entropy {
109 suspicious_indicators.push(SuspiciousIndicator {
110 pattern_name: "high_entropy".to_string(),
111 description: format!(
112 "High entropy ({:.2}) indicates possible encoding/encryption",
113 entropy
114 ),
115 severity: 6,
116 matched_text: None,
117 });
118 }
119
120 let has_non_printable = value
122 .chars()
123 .any(|c| c.is_control() && c != '\n' && c != '\r' && c != '\t');
124 if has_non_printable {
125 suspicious_indicators.push(SuspiciousIndicator {
126 pattern_name: "non_printable_chars".to_string(),
127 description: "Contains non-printable characters".to_string(),
128 severity: 5,
129 matched_text: None,
130 });
131 }
132
133 let is_suspicious = !suspicious_indicators.is_empty();
134
135 StringAnalysis {
136 entropy,
137 categories,
138 suspicious_indicators,
139 metadata: HashMap::new(),
140 is_suspicious,
141 }
142 }
143
144 fn calculate_entropy(&self, s: &str) -> f64 {
145 if s.is_empty() {
146 return 0.0;
147 }
148
149 let mut byte_counts = [0u32; 256];
152 let bytes = s.as_bytes();
153
154 for &byte in bytes {
156 byte_counts[byte as usize] += 1;
157 }
158
159 let len = bytes.len() as f64;
160 let mut entropy = 0.0;
161
162 for &count in &byte_counts {
164 if count > 0 {
165 let probability = count as f64 / len;
166 entropy -= probability * probability.log2();
167 }
168 }
169
170 entropy
171 }
172
173 fn get_patterns(&self) -> &[Pattern] {
174 &self.patterns
175 }
176
177 fn add_pattern(&mut self, pattern: Pattern) -> AnalysisResult<()> {
178 self.patterns.push(pattern);
179 Ok(())
180 }
181}
182
183impl Default for DefaultStringAnalyzer {
184 fn default() -> Self {
185 Self::new()
186 }
187}