threatflux_string_analysis/
analyzer.rs

1//! String analysis functionality
2
3use crate::patterns::Pattern;
4use crate::types::{AnalysisResult, StringMetadata};
5use serde::{Deserialize, Serialize};
6use std::collections::{HashMap, HashSet};
7
8/// Represents a suspicious indicator found in a string
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct SuspiciousIndicator {
11    /// The pattern that matched
12    pub pattern_name: String,
13    /// Description of why this is suspicious
14    pub description: String,
15    /// Severity level (0-10)
16    pub severity: u8,
17    /// The specific match within the string
18    pub matched_text: Option<String>,
19}
20
21/// Result of analyzing a string
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct StringAnalysis {
24    /// Shannon entropy of the string
25    pub entropy: f64,
26    /// Categories the string belongs to
27    pub categories: HashSet<String>,
28    /// Suspicious indicators found
29    pub suspicious_indicators: Vec<SuspiciousIndicator>,
30    /// Additional metadata
31    pub metadata: StringMetadata,
32    /// Whether the string is considered suspicious overall
33    pub is_suspicious: bool,
34}
35
36/// Trait for analyzing strings
37pub trait StringAnalyzer: Send + Sync {
38    /// Analyze a string and return analysis results
39    fn analyze(&self, value: &str) -> StringAnalysis;
40
41    /// Check if a string is suspicious
42    fn is_suspicious(&self, value: &str) -> bool {
43        self.analyze(value).is_suspicious
44    }
45
46    /// Calculate entropy of a string
47    fn calculate_entropy(&self, value: &str) -> f64;
48
49    /// Get the patterns used by this analyzer
50    fn get_patterns(&self) -> &[Pattern];
51
52    /// Add a custom pattern
53    fn add_pattern(&mut self, pattern: Pattern) -> AnalysisResult<()>;
54}
55
56/// Default implementation of StringAnalyzer
57pub struct DefaultStringAnalyzer {
58    patterns: Vec<Pattern>,
59    entropy_threshold: f64,
60}
61
62impl DefaultStringAnalyzer {
63    /// Create a new analyzer with default settings
64    pub fn new() -> Self {
65        Self {
66            patterns: Vec::new(),
67            entropy_threshold: 4.5,
68        }
69    }
70
71    /// Set the entropy threshold for suspicious detection
72    #[allow(dead_code)]
73    pub fn with_entropy_threshold(mut self, threshold: f64) -> Self {
74        self.entropy_threshold = threshold;
75        self
76    }
77
78    /// Add patterns to the analyzer
79    pub fn with_patterns(mut self, patterns: Vec<Pattern>) -> Self {
80        self.patterns = patterns;
81        self
82    }
83}
84
85impl StringAnalyzer for DefaultStringAnalyzer {
86    fn analyze(&self, value: &str) -> StringAnalysis {
87        let entropy = self.calculate_entropy(value);
88        let mut suspicious_indicators = Vec::new();
89        let mut categories = HashSet::new();
90
91        // Check against patterns
92        for pattern in &self.patterns {
93            if pattern.regex.is_match(value) {
94                if pattern.is_suspicious {
95                    suspicious_indicators.push(SuspiciousIndicator {
96                        pattern_name: pattern.name.clone(),
97                        description: pattern.description.clone(),
98                        severity: pattern.severity,
99                        matched_text: pattern.regex.find(value).map(|m| m.as_str().to_string()),
100                    });
101                }
102                categories.insert(pattern.category.clone());
103            }
104        }
105
106        // Check entropy
107        let high_entropy = entropy > self.entropy_threshold && value.len() > 10;
108        if high_entropy {
109            suspicious_indicators.push(SuspiciousIndicator {
110                pattern_name: "high_entropy".to_string(),
111                description: format!(
112                    "High entropy ({:.2}) indicates possible encoding/encryption",
113                    entropy
114                ),
115                severity: 6,
116                matched_text: None,
117            });
118        }
119
120        // Check for non-printable characters
121        let has_non_printable = value
122            .chars()
123            .any(|c| c.is_control() && c != '\n' && c != '\r' && c != '\t');
124        if has_non_printable {
125            suspicious_indicators.push(SuspiciousIndicator {
126                pattern_name: "non_printable_chars".to_string(),
127                description: "Contains non-printable characters".to_string(),
128                severity: 5,
129                matched_text: None,
130            });
131        }
132
133        let is_suspicious = !suspicious_indicators.is_empty();
134
135        StringAnalysis {
136            entropy,
137            categories,
138            suspicious_indicators,
139            metadata: HashMap::new(),
140            is_suspicious,
141        }
142    }
143
144    fn calculate_entropy(&self, s: &str) -> f64 {
145        if s.is_empty() {
146            return 0.0;
147        }
148
149        // Use a fixed-size array for byte-based entropy calculation which is more efficient
150        // for ASCII strings and provides similar results
151        let mut byte_counts = [0u32; 256];
152        let bytes = s.as_bytes();
153
154        // Count byte frequencies
155        for &byte in bytes {
156            byte_counts[byte as usize] += 1;
157        }
158
159        let len = bytes.len() as f64;
160        let mut entropy = 0.0;
161
162        // Calculate entropy based on byte frequencies
163        for &count in &byte_counts {
164            if count > 0 {
165                let probability = count as f64 / len;
166                entropy -= probability * probability.log2();
167            }
168        }
169
170        entropy
171    }
172
173    fn get_patterns(&self) -> &[Pattern] {
174        &self.patterns
175    }
176
177    fn add_pattern(&mut self, pattern: Pattern) -> AnalysisResult<()> {
178        self.patterns.push(pattern);
179        Ok(())
180    }
181}
182
183impl Default for DefaultStringAnalyzer {
184    fn default() -> Self {
185        Self::new()
186    }
187}