1use serde::{Serialize, Deserialize};
4
5#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct AnalysisResult {
8 pub test_name: String,
9 pub statistic: f64,
10 pub p_value: f64,
11 pub is_suspicious: bool,
12 pub description: String,
13}
14
15pub struct StatisticalAnalyzer {
17 pub significance_level: f64,
19}
20
21impl StatisticalAnalyzer {
22 pub fn new() -> Self {
23 Self {
24 significance_level: 0.05,
25 }
26 }
27
28 pub fn with_significance(mut self, level: f64) -> Self {
29 self.significance_level = level;
30 self
31 }
32
33 pub fn byte_entropy(&self, data: &[u8]) -> f64 {
35 if data.is_empty() {
36 return 0.0;
37 }
38
39 let mut counts = [0u64; 256];
40 for &byte in data {
41 counts[byte as usize] += 1;
42 }
43
44 let len = data.len() as f64;
45 let mut entropy = 0.0;
46
47 for &count in &counts {
48 if count > 0 {
49 let p = count as f64 / len;
50 entropy -= p * p.log2();
51 }
52 }
53
54 entropy
55 }
56
57 pub fn entropy_analysis(&self, data: &[u8]) -> AnalysisResult {
59 let entropy = self.byte_entropy(data);
60 let is_suspicious = entropy < 6.0; AnalysisResult {
66 test_name: "Byte Entropy".into(),
67 statistic: entropy,
68 p_value: if is_suspicious { 0.01 } else { 0.5 },
69 is_suspicious,
70 description: format!(
71 "Entropy: {:.3} bits/byte (max 8.0). {}",
72 entropy,
73 if is_suspicious {
74 "Low entropy may indicate unencrypted/structured data."
75 } else {
76 "High entropy consistent with encryption."
77 }
78 ),
79 }
80 }
81
82 pub fn chi_squared_uniformity(&self, data: &[u8]) -> AnalysisResult {
84 if data.is_empty() {
85 return AnalysisResult {
86 test_name: "Chi-Squared".into(),
87 statistic: 0.0,
88 p_value: 1.0,
89 is_suspicious: false,
90 description: "No data".into(),
91 };
92 }
93
94 let mut counts = [0u64; 256];
95 for &byte in data {
96 counts[byte as usize] += 1;
97 }
98
99 let expected = data.len() as f64 / 256.0;
100 let chi_sq: f64 = counts
101 .iter()
102 .map(|&c| {
103 let diff = c as f64 - expected;
104 diff * diff / expected
105 })
106 .sum();
107
108 let is_suspicious = chi_sq > 293.0;
111
112 AnalysisResult {
113 test_name: "Chi-Squared Uniformity".into(),
114 statistic: chi_sq,
115 p_value: if is_suspicious { 0.01 } else { 0.5 },
116 is_suspicious,
117 description: format!(
118 "Chi-squared: {:.1} (critical: 293.0 at α=0.05). {}",
119 chi_sq,
120 if is_suspicious {
121 "Non-uniform distribution detected."
122 } else {
123 "Distribution consistent with random/encrypted data."
124 }
125 ),
126 }
127 }
128
129 pub fn pattern_detection(&self, packet_sizes: &[usize]) -> AnalysisResult {
131 if packet_sizes.len() < 4 {
132 return AnalysisResult {
133 test_name: "Pattern Detection".into(),
134 statistic: 0.0,
135 p_value: 1.0,
136 is_suspicious: false,
137 description: "Insufficient data".into(),
138 };
139 }
140
141 let mut unique: std::collections::HashSet<usize> = std::collections::HashSet::new();
143 for &s in packet_sizes {
144 unique.insert(s);
145 }
146
147 let unique_ratio = unique.len() as f64 / packet_sizes.len() as f64;
148
149 let all_same = unique.len() == 1;
151
152 let has_pattern = detect_repeating_pattern(packet_sizes);
154
155 let is_suspicious = has_pattern && !all_same;
156
157 AnalysisResult {
158 test_name: "Pattern Detection".into(),
159 statistic: unique_ratio,
160 p_value: if is_suspicious { 0.01 } else { 0.5 },
161 is_suspicious,
162 description: format!(
163 "Unique ratio: {:.2}, all_same: {}, repeating: {}. {}",
164 unique_ratio,
165 all_same,
166 has_pattern,
167 if all_same {
168 "Constant-size padding detected (good for privacy)."
169 } else if is_suspicious {
170 "Repeating pattern may enable fingerprinting."
171 } else {
172 "No obvious pattern detected."
173 }
174 ),
175 }
176 }
177
178 pub fn timing_regularity(&self, inter_packet_delays_us: &[u64]) -> AnalysisResult {
180 if inter_packet_delays_us.is_empty() {
181 return AnalysisResult {
182 test_name: "Timing Regularity".into(),
183 statistic: 0.0,
184 p_value: 1.0,
185 is_suspicious: false,
186 description: "No data".into(),
187 };
188 }
189
190 let mean = inter_packet_delays_us.iter().sum::<u64>() as f64
191 / inter_packet_delays_us.len() as f64;
192
193 let variance: f64 = inter_packet_delays_us
194 .iter()
195 .map(|&d| {
196 let diff = d as f64 - mean;
197 diff * diff
198 })
199 .sum::<f64>()
200 / inter_packet_delays_us.len() as f64;
201
202 let cv = if mean > 0.0 {
203 variance.sqrt() / mean
204 } else {
205 0.0
206 };
207
208 let is_suspicious = cv < 0.05 || cv > 2.0;
211
212 AnalysisResult {
213 test_name: "Timing Regularity".into(),
214 statistic: cv,
215 p_value: if is_suspicious { 0.01 } else { 0.5 },
216 is_suspicious,
217 description: format!(
218 "CV: {:.4} (mean delay: {:.0}µs). {}",
219 cv,
220 mean,
221 if cv < 0.05 {
222 "Very regular timing (constant-rate shaping detected)."
223 } else if cv > 2.0 {
224 "Highly bursty traffic (may enable fingerprinting)."
225 } else {
226 "Normal timing variation."
227 }
228 ),
229 }
230 }
231
232 pub fn full_analysis(
234 &self,
235 payload: &[u8],
236 packet_sizes: &[usize],
237 delays_us: &[u64],
238 ) -> Vec<AnalysisResult> {
239 vec![
240 self.entropy_analysis(payload),
241 self.chi_squared_uniformity(payload),
242 self.pattern_detection(packet_sizes),
243 self.timing_regularity(delays_us),
244 ]
245 }
246
247 pub fn suspicion_score(
249 &self,
250 payload: &[u8],
251 packet_sizes: &[usize],
252 delays_us: &[u64],
253 ) -> f64 {
254 let results = self.full_analysis(payload, packet_sizes, delays_us);
255 let suspicious_count = results.iter().filter(|r| r.is_suspicious).count();
256 suspicious_count as f64 / results.len() as f64
257 }
258}
259
260impl Default for StatisticalAnalyzer {
261 fn default() -> Self {
262 Self::new()
263 }
264}
265
266fn detect_repeating_pattern(data: &[usize]) -> bool {
268 if data.len() < 4 {
269 return false;
270 }
271
272 for pattern_len in 1..=(data.len() / 2) {
274 let pattern = &data[..pattern_len];
275 let mut matches = true;
276 for i in pattern_len..data.len() {
277 if data[i] != pattern[i % pattern_len] {
278 matches = false;
279 break;
280 }
281 }
282 if matches && pattern_len < data.len() {
283 return true;
284 }
285 }
286
287 false
288}
289
290#[cfg(test)]
291mod tests {
292 use super::*;
293
294 #[test]
295 fn test_entropy_random_data() {
296 let analyzer = StatisticalAnalyzer::new();
297 let mut data = Vec::new();
299 for i in 0u16..1000 {
300 let hash = crypto::hash_data(&i.to_le_bytes());
301 data.extend_from_slice(hash.as_bytes());
302 }
303 let entropy = analyzer.byte_entropy(&data);
304 assert!(entropy > 7.0, "Random data should have high entropy: {}", entropy);
305 }
306
307 #[test]
308 fn test_entropy_structured_data() {
309 let analyzer = StatisticalAnalyzer::new();
310 let data = b"AAAAAABBBBBBCCCCCC".repeat(100);
311 let result = analyzer.entropy_analysis(&data);
312 assert!(result.is_suspicious, "Structured data should be flagged");
313 assert!(result.statistic < 4.0);
314 }
315
316 #[test]
317 fn test_chi_squared_uniform() {
318 let analyzer = StatisticalAnalyzer::new();
319 let mut data = Vec::new();
321 for i in 0u32..500 {
322 let hash = crypto::hash_data(&i.to_le_bytes());
323 data.extend_from_slice(hash.as_bytes());
324 }
325 let result = analyzer.chi_squared_uniformity(&data);
326 assert!(!result.is_suspicious, "Uniform-like data should pass: chi²={}", result.statistic);
327 }
328
329 #[test]
330 fn test_pattern_detection() {
331 let analyzer = StatisticalAnalyzer::new();
332
333 let sizes = vec![100, 200, 100, 200, 100, 200, 100, 200];
335 let result = analyzer.pattern_detection(&sizes);
336 assert!(result.is_suspicious, "Should detect repeating pattern");
337
338 let constant = vec![256, 256, 256, 256, 256];
340 let result2 = analyzer.pattern_detection(&constant);
341 assert!(!result2.is_suspicious, "Constant size is not suspicious (padding)");
342 }
343
344 #[test]
345 fn test_timing_regularity() {
346 let analyzer = StatisticalAnalyzer::new();
347
348 let regular = vec![1000, 1000, 1000, 1000, 1000];
350 let result = analyzer.timing_regularity(®ular);
351 assert!(result.is_suspicious, "Perfectly regular timing should be detected");
352
353 let varied = vec![800, 1200, 950, 1100, 1050, 900];
355 let result2 = analyzer.timing_regularity(&varied);
356 assert!(!result2.is_suspicious, "Normal variation should not flag");
357 }
358}