1use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use std::f64::consts::PI;
10
11use crate::math;
12
13#[derive(Debug, Clone, Serialize)]
19pub struct LagCorrelation {
20 pub lag: usize,
21 pub correlation: f64,
22}
23
24#[derive(Debug, Clone, Serialize)]
26pub struct AutocorrResult {
27 pub lags: Vec<LagCorrelation>,
28 pub max_abs_correlation: f64,
29 pub max_abs_lag: usize,
30 pub threshold: f64,
32 pub violations: usize,
34}
35
36#[derive(Debug, Clone, Serialize)]
38pub struct SpectralBin {
39 pub frequency: f64,
41 pub power: f64,
42}
43
44#[derive(Debug, Clone, Serialize)]
46pub struct SpectralResult {
47 pub peaks: Vec<SpectralBin>,
49 pub flatness: f64,
51 pub dominant_frequency: f64,
53 pub total_power: f64,
55}
56
57#[derive(Debug, Clone, Serialize)]
59pub struct BitBiasResult {
60 pub bit_probabilities: [f64; 8],
62 pub overall_bias: f64,
64 pub chi_squared: f64,
66 pub p_value: f64,
68 pub has_significant_bias: bool,
70}
71
72#[derive(Debug, Clone, Serialize)]
74pub struct DistributionResult {
75 pub mean: f64,
76 pub variance: f64,
77 pub std_dev: f64,
78 pub skewness: f64,
79 pub kurtosis: f64,
80 pub histogram: Vec<u64>,
82 pub ks_statistic: f64,
84 pub ks_p_value: f64,
86}
87
88#[derive(Debug, Clone, Serialize)]
90pub struct AndersonDarlingResult {
91 pub statistic: f64,
92 pub p_value: f64,
93 pub is_uniform: bool,
94 pub critical_values: Vec<(f64, f64)>,
95 pub is_valid: bool,
96}
97
98#[derive(Debug, Clone, Serialize)]
99pub struct ApproxEntropyResult {
100 pub apen: f64,
101 pub m: usize,
102 pub r: f64,
103 pub phi_m: f64,
104 pub phi_m_plus_1: f64,
105 pub actual_samples: usize,
106 pub is_valid: bool,
107}
108
109#[derive(Debug, Clone, Serialize)]
111pub struct StationarityResult {
112 pub is_stationary: bool,
114 pub f_statistic: f64,
116 pub window_means: Vec<f64>,
118 pub window_std_devs: Vec<f64>,
120 pub n_windows: usize,
122}
123
124#[derive(Debug, Clone, Serialize)]
126pub struct RunsResult {
127 pub longest_run: usize,
129 pub expected_longest_run: f64,
131 pub total_runs: usize,
133 pub expected_runs: f64,
135}
136
137#[derive(Debug, Clone, Serialize)]
138pub struct PermutationEntropyResult {
139 pub permutation_entropy: f64,
140 pub normalized_entropy: f64,
141 pub order: usize,
142 pub delay: usize,
143 pub n_patterns: usize,
144 pub pattern_counts: Vec<(Vec<usize>, usize)>,
145 pub is_valid: bool,
146}
147
148#[derive(Debug, Clone, Serialize)]
150pub struct EntropyPoint {
151 pub sample_size: usize,
152 pub shannon_h: f64,
153 pub min_entropy: f64,
154 pub collection_time_ms: u64,
155}
156
157#[derive(Debug, Clone, Serialize)]
159pub struct ScalingResult {
160 pub points: Vec<EntropyPoint>,
161}
162
163#[derive(Debug, Clone, Serialize)]
165pub struct ThroughputResult {
166 pub measurements: Vec<ThroughputPoint>,
168}
169
170#[derive(Debug, Clone, Serialize)]
171pub struct ThroughputPoint {
172 pub sample_size: usize,
173 pub bytes_per_second: f64,
174 pub collection_time_ms: u64,
175}
176
177#[derive(Debug, Clone, Serialize, Deserialize)]
179pub struct CrossCorrPair {
180 pub source_a: String,
181 pub source_b: String,
182 pub correlation: f64,
183 pub flagged: bool,
184}
185
186#[derive(Debug, Clone, Serialize, Deserialize)]
188pub struct CrossCorrMatrix {
189 pub pairs: Vec<CrossCorrPair>,
190 pub flagged_count: usize,
192}
193
194#[derive(Debug, Clone, Serialize)]
196pub struct SourceAnalysis {
197 pub source_name: String,
198 pub sample_size: usize,
199 pub shannon_entropy: f64,
201 pub min_entropy: f64,
203 pub autocorrelation: AutocorrResult,
204 pub spectral: SpectralResult,
205 pub bit_bias: BitBiasResult,
206 pub distribution: DistributionResult,
207 pub stationarity: StationarityResult,
208 pub runs: RunsResult,
209}
210
211pub fn autocorrelation_profile(data: &[u8], max_lag: usize) -> AutocorrResult {
217 let n = data.len();
218 if n == 0 || max_lag == 0 {
219 return AutocorrResult {
220 lags: Vec::new(),
221 max_abs_correlation: 0.0,
222 max_abs_lag: 0,
223 threshold: 0.0,
224 violations: 0,
225 };
226 }
227
228 let max_lag = max_lag.min(n / 2);
229 if max_lag == 0 {
230 return AutocorrResult {
231 lags: Vec::new(),
232 max_abs_correlation: 0.0,
233 max_abs_lag: 0,
234 threshold: 2.0 / (n as f64).sqrt(),
235 violations: 0,
236 };
237 }
238 let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
239 let mean: f64 = arr.iter().sum::<f64>() / n as f64;
240 let var: f64 = arr.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n as f64;
241
242 let threshold = 2.0 / (n as f64).sqrt();
243 let mut lags = Vec::with_capacity(max_lag);
244 let mut max_abs = 0.0f64;
245 let mut max_abs_lag = 1;
246 let mut violations = 0;
247
248 for lag in 1..=max_lag {
249 let corr = if var < 1e-10 {
250 0.0
251 } else {
252 let mut sum = 0.0;
253 let count = n - lag;
254 for i in 0..count {
255 sum += (arr[i] - mean) * (arr[i + lag] - mean);
256 }
257 sum / (count as f64 * var)
258 };
259
260 if corr.abs() > max_abs {
261 max_abs = corr.abs();
262 max_abs_lag = lag;
263 }
264 if corr.abs() > threshold {
265 violations += 1;
266 }
267
268 lags.push(LagCorrelation {
269 lag,
270 correlation: corr,
271 });
272 }
273
274 AutocorrResult {
275 lags,
276 max_abs_correlation: max_abs,
277 max_abs_lag,
278 threshold,
279 violations,
280 }
281}
282
283pub fn spectral_analysis(data: &[u8]) -> SpectralResult {
285 let n = data.len().min(4096); if n < 2 {
287 return SpectralResult {
288 peaks: Vec::new(),
289 flatness: 0.0,
290 dominant_frequency: 0.0,
291 total_power: 0.0,
292 };
293 }
294
295 let arr: Vec<f64> = data[..n].iter().map(|&b| b as f64 - 127.5).collect();
296
297 let n_freq = n / 2;
299 let mut power_spectrum: Vec<f64> = Vec::with_capacity(n_freq);
300
301 for k in 1..=n_freq {
302 let mut re = 0.0;
303 let mut im = 0.0;
304 let freq = 2.0 * PI * k as f64 / n as f64;
305 for (j, &x) in arr.iter().enumerate() {
306 re += x * (freq * j as f64).cos();
307 im -= x * (freq * j as f64).sin();
308 }
309 power_spectrum.push((re * re + im * im) / n as f64);
310 }
311
312 let total_power: f64 = power_spectrum.iter().sum();
313
314 let arith_mean = total_power / n_freq as f64;
316 let log_sum: f64 = power_spectrum
317 .iter()
318 .map(|&p| if p > 1e-20 { p.ln() } else { -46.0 }) .sum();
320 let geo_mean = (log_sum / n_freq as f64).exp();
321 let flatness = if arith_mean > 1e-20 {
322 (geo_mean / arith_mean).clamp(0.0, 1.0)
323 } else {
324 0.0
325 };
326
327 let mut indexed: Vec<(usize, f64)> = power_spectrum.iter().copied().enumerate().collect();
329 indexed.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
330 let peaks: Vec<SpectralBin> = indexed
331 .iter()
332 .take(10)
333 .map(|&(i, p)| SpectralBin {
334 frequency: (i + 1) as f64 / n as f64,
335 power: p,
336 })
337 .collect();
338
339 let dominant_frequency = peaks.first().map(|p| p.frequency).unwrap_or(0.0);
340
341 SpectralResult {
342 peaks,
343 flatness,
344 dominant_frequency,
345 total_power,
346 }
347}
348
349pub fn bit_bias(data: &[u8]) -> BitBiasResult {
351 if data.is_empty() {
352 return BitBiasResult {
353 bit_probabilities: [0.0; 8],
354 overall_bias: 0.0,
355 chi_squared: 0.0,
356 p_value: 1.0,
357 has_significant_bias: false,
358 };
359 }
360
361 let n = data.len() as f64;
362 let mut counts = [0u64; 8];
363
364 for &byte in data {
365 for (bit, count) in counts.iter_mut().enumerate() {
366 if byte & (1 << bit) != 0 {
367 *count += 1;
368 }
369 }
370 }
371
372 let bit_probs: [f64; 8] = {
373 let mut arr = [0.0; 8];
374 for (i, &c) in counts.iter().enumerate() {
375 arr[i] = c as f64 / n;
376 }
377 arr
378 };
379
380 let overall_bias: f64 = bit_probs.iter().map(|&p| (p - 0.5).abs()).sum::<f64>() / 8.0;
381
382 let expected = n / 2.0;
384 let chi_squared: f64 = counts
385 .iter()
386 .map(|&c| {
387 let diff = c as f64 - expected;
388 diff * diff / expected
389 })
390 .sum();
391
392 let p_value = math::chi_squared_p_value(chi_squared, 8);
395
396 let has_significant_bias = bit_probs.iter().any(|&p| (p - 0.5).abs() > 0.01);
397
398 BitBiasResult {
399 bit_probabilities: bit_probs,
400 overall_bias,
401 chi_squared,
402 p_value,
403 has_significant_bias,
404 }
405}
406
407pub fn distribution_stats(data: &[u8]) -> DistributionResult {
409 if data.is_empty() {
410 return DistributionResult {
411 mean: 0.0,
412 variance: 0.0,
413 std_dev: 0.0,
414 skewness: 0.0,
415 kurtosis: 0.0,
416 histogram: vec![0u64; 256],
417 ks_statistic: 0.0,
418 ks_p_value: 1.0,
419 };
420 }
421
422 let n = data.len() as f64;
423 let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
424
425 let mean = arr.iter().sum::<f64>() / n;
426 let variance = arr.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n;
427 let std_dev = variance.sqrt();
428
429 let skewness = if std_dev > 1e-10 {
430 arr.iter()
431 .map(|&x| ((x - mean) / std_dev).powi(3))
432 .sum::<f64>()
433 / n
434 } else {
435 0.0
436 };
437
438 let kurtosis = if std_dev > 1e-10 {
439 arr.iter()
440 .map(|&x| ((x - mean) / std_dev).powi(4))
441 .sum::<f64>()
442 / n
443 - 3.0 } else {
445 0.0
446 };
447
448 let mut histogram = vec![0u64; 256];
450 for &b in data {
451 histogram[b as usize] += 1;
452 }
453
454 let mut sorted: Vec<f64> = arr.clone();
456 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
457 let mut ks_stat = 0.0f64;
458 for (i, &x) in sorted.iter().enumerate() {
459 let empirical = (i + 1) as f64 / n;
460 let theoretical = (x + 0.5) / 256.0; let diff = (empirical - theoretical).abs();
462 if diff > ks_stat {
463 ks_stat = diff;
464 }
465 }
466 let sqrt_n = n.sqrt();
468 let lambda = (sqrt_n + 0.12 + 0.11 / sqrt_n) * ks_stat;
469 let ks_p = (-2.0 * lambda * lambda).exp().clamp(0.0, 1.0) * 2.0;
470
471 DistributionResult {
472 mean,
473 variance,
474 std_dev,
475 skewness,
476 kurtosis,
477 histogram,
478 ks_statistic: ks_stat,
479 ks_p_value: ks_p.min(1.0),
480 }
481}
482
483pub fn approximate_entropy(data: &[u8], m: usize, r: f64) -> ApproxEntropyResult {
484 const MAX_SAMPLES: usize = 5000;
485
486 let mut sampled: Vec<f64> = data.iter().map(|&b| b as f64).collect();
487 if sampled.len() > MAX_SAMPLES {
488 let step = (sampled.len() / MAX_SAMPLES).max(1);
489 sampled = sampled
490 .iter()
491 .step_by(step)
492 .take(MAX_SAMPLES)
493 .copied()
494 .collect();
495 }
496
497 let actual_samples = sampled.len();
498 if m == 0 || actual_samples <= m + 1 || !r.is_finite() || r <= 0.0 {
499 return ApproxEntropyResult {
500 apen: f64::NAN,
501 m,
502 r,
503 phi_m: f64::NAN,
504 phi_m_plus_1: f64::NAN,
505 actual_samples,
506 is_valid: false,
507 };
508 }
509
510 let mean = sampled.iter().sum::<f64>() / actual_samples as f64;
511 let variance = sampled
512 .iter()
513 .map(|x| {
514 let d = x - mean;
515 d * d
516 })
517 .sum::<f64>()
518 / (actual_samples as f64 - 1.0);
519 let std_dev = variance.sqrt();
520 if std_dev == 0.0 {
521 return ApproxEntropyResult {
522 apen: f64::NAN,
523 m,
524 r,
525 phi_m: f64::NAN,
526 phi_m_plus_1: f64::NAN,
527 actual_samples,
528 is_valid: false,
529 };
530 }
531
532 fn phi(sampled: &[f64], m: usize, r: f64) -> Option<f64> {
533 if sampled.len() < m {
534 return None;
535 }
536
537 let n_templates = sampled.len() - m + 1;
538 if n_templates == 0 {
539 return None;
540 }
541
542 let mut sum_ln = 0.0;
543 for i in 0..n_templates {
544 let mut count = 0usize;
545 for j in 0..n_templates {
546 let mut max_dist = 0.0;
547 for k in 0..m {
548 let dist = (sampled[i + k] - sampled[j + k]).abs();
549 if dist > max_dist {
550 max_dist = dist;
551 }
552 if max_dist >= r {
553 break;
554 }
555 }
556 if max_dist < r {
557 count += 1;
558 }
559 }
560
561 if count == 0 {
562 return None;
563 }
564
565 let c_i = count as f64 / n_templates as f64;
566 if c_i <= 0.0 {
567 return None;
568 }
569 sum_ln += c_i.ln();
570 }
571
572 Some(sum_ln / n_templates as f64)
573 }
574
575 let Some(phi_m) = phi(&sampled, m, r) else {
576 return ApproxEntropyResult {
577 apen: f64::NAN,
578 m,
579 r,
580 phi_m: f64::NAN,
581 phi_m_plus_1: f64::NAN,
582 actual_samples,
583 is_valid: false,
584 };
585 };
586
587 let Some(phi_m_plus_1) = phi(&sampled, m + 1, r) else {
588 return ApproxEntropyResult {
589 apen: f64::NAN,
590 m,
591 r,
592 phi_m,
593 phi_m_plus_1: f64::NAN,
594 actual_samples,
595 is_valid: false,
596 };
597 };
598
599 let apen = phi_m - phi_m_plus_1;
600 ApproxEntropyResult {
601 apen,
602 m,
603 r,
604 phi_m,
605 phi_m_plus_1,
606 actual_samples,
607 is_valid: apen.is_finite(),
608 }
609}
610
611pub fn approximate_entropy_default(data: &[u8]) -> ApproxEntropyResult {
612 const DEFAULT_M: usize = 2;
613 const MAX_SAMPLES: usize = 5000;
614
615 let mut sampled: Vec<f64> = data.iter().map(|&b| b as f64).collect();
616 if sampled.len() > MAX_SAMPLES {
617 let step = (sampled.len() / MAX_SAMPLES).max(1);
618 sampled = sampled
619 .iter()
620 .step_by(step)
621 .take(MAX_SAMPLES)
622 .copied()
623 .collect();
624 }
625
626 if sampled.len() <= DEFAULT_M + 1 {
627 return ApproxEntropyResult {
628 apen: f64::NAN,
629 m: DEFAULT_M,
630 r: f64::NAN,
631 phi_m: f64::NAN,
632 phi_m_plus_1: f64::NAN,
633 actual_samples: sampled.len(),
634 is_valid: false,
635 };
636 }
637
638 let mean = sampled.iter().sum::<f64>() / sampled.len() as f64;
639 let variance = sampled
640 .iter()
641 .map(|x| {
642 let d = x - mean;
643 d * d
644 })
645 .sum::<f64>()
646 / (sampled.len() as f64 - 1.0);
647 let std_dev = variance.sqrt();
648 if std_dev == 0.0 {
649 return ApproxEntropyResult {
650 apen: f64::NAN,
651 m: DEFAULT_M,
652 r: 0.0,
653 phi_m: f64::NAN,
654 phi_m_plus_1: f64::NAN,
655 actual_samples: sampled.len(),
656 is_valid: false,
657 };
658 }
659
660 approximate_entropy(data, DEFAULT_M, 0.2 * std_dev)
661}
662
663pub fn anderson_darling(data: &[u8]) -> AndersonDarlingResult {
664 const MAX_SAMPLES: usize = 2048;
665
666 let critical_values = vec![
667 (0.25, 0.787),
668 (0.10, 1.248),
669 (0.05, 1.610),
670 (0.025, 2.092),
671 (0.01, 2.880),
672 ];
673
674 if data.len() < 2 {
675 return AndersonDarlingResult {
676 statistic: 0.0,
677 p_value: 0.0,
678 is_uniform: false,
679 critical_values,
680 is_valid: false,
681 };
682 }
683
684 let sample = if data.len() > MAX_SAMPLES {
685 &data[..MAX_SAMPLES]
686 } else {
687 data
688 };
689
690 let n = sample.len();
691 let n_f = n as f64;
692
693 let mut sorted = sample.to_vec();
694 sorted.sort_unstable();
695
696 let u: Vec<f64> = sorted.iter().map(|&x| (x as f64 + 0.5) / 256.0).collect();
697
698 let mut sum = 0.0;
699 for i in 0..n {
700 let coeff = (2 * i + 1) as f64;
701 let left = u[i].ln();
702 let right = (1.0 - u[n - 1 - i]).ln();
703 sum += coeff * (left + right);
704 }
705
706 let a2 = -n_f - (sum / n_f);
707 let statistic = a2 * (1.0 + 4.0 / n_f - 25.0 / (n_f * n_f));
708
709 let p_value = if statistic > 10.0 {
710 0.0
711 } else if statistic >= 0.6 {
712 (1.2937 - 5.709 * statistic + 0.0186 * statistic * statistic).exp()
713 } else if statistic >= 0.34 {
714 (0.9177 - 4.279 * statistic - 1.38 * statistic * statistic).exp()
715 } else if statistic >= 0.2 {
716 1.0 - (-8.318 + 42.796 * statistic - 59.938 * statistic * statistic).exp()
717 } else {
718 1.0 - (-13.436 + 101.14 * statistic - 223.73 * statistic * statistic).exp()
719 }
720 .clamp(0.0, 1.0);
721
722 AndersonDarlingResult {
723 statistic,
724 p_value,
725 is_uniform: p_value > 0.05,
726 critical_values,
727 is_valid: true,
728 }
729}
730
731pub fn stationarity_test(data: &[u8]) -> StationarityResult {
733 let n_windows = 10usize;
734 let window_size = data.len() / n_windows;
735 if window_size < 10 {
736 return StationarityResult {
737 is_stationary: true,
738 f_statistic: 0.0,
739 window_means: vec![],
740 window_std_devs: vec![],
741 n_windows: 0,
742 };
743 }
744
745 let mut window_means = Vec::with_capacity(n_windows);
746 let mut window_std_devs = Vec::with_capacity(n_windows);
747
748 for w in 0..n_windows {
749 let start = w * window_size;
750 let end = start + window_size;
751 let window = &data[start..end];
752 let arr: Vec<f64> = window.iter().map(|&b| b as f64).collect();
753 let mean = arr.iter().sum::<f64>() / arr.len() as f64;
754 let var = arr.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / arr.len() as f64;
755 window_means.push(mean);
756 window_std_devs.push(var.sqrt());
757 }
758
759 let grand_mean: f64 = window_means.iter().sum::<f64>() / n_windows as f64;
761 let between_var: f64 = window_means
762 .iter()
763 .map(|&m| (m - grand_mean).powi(2))
764 .sum::<f64>()
765 / (n_windows - 1) as f64
766 * window_size as f64;
767
768 let within_var: f64 = window_std_devs.iter().map(|&s| s * s).sum::<f64>() / n_windows as f64;
769
770 let f_stat = if within_var > 1e-10 {
771 between_var / within_var
772 } else {
773 0.0
774 };
775
776 let is_stationary = f_stat < 1.88;
780
781 StationarityResult {
782 is_stationary,
783 f_statistic: f_stat,
784 window_means,
785 window_std_devs,
786 n_windows,
787 }
788}
789
790pub fn runs_analysis(data: &[u8]) -> RunsResult {
792 if data.is_empty() {
793 return RunsResult {
794 longest_run: 0,
795 expected_longest_run: 0.0,
796 total_runs: 0,
797 expected_runs: 0.0,
798 };
799 }
800
801 let mut longest = 1usize;
802 let mut current = 1usize;
803 let mut total_runs = 1usize;
804
805 for i in 1..data.len() {
806 if data[i] == data[i - 1] {
807 current += 1;
808 if current > longest {
809 longest = current;
810 }
811 } else {
812 total_runs += 1;
813 current = 1;
814 }
815 }
816
817 let n = data.len() as f64;
818 let expected_longest = (n.ln() / 256.0_f64.ln()).max(1.0);
820 let expected_runs = n * (1.0 - 1.0 / 256.0) + 1.0;
822
823 RunsResult {
824 longest_run: longest,
825 expected_longest_run: expected_longest,
826 total_runs,
827 expected_runs,
828 }
829}
830
831pub fn permutation_entropy(data: &[u8], order: usize, delay: usize) -> PermutationEntropyResult {
832 if order < 2 || delay == 0 {
833 return PermutationEntropyResult {
834 permutation_entropy: 0.0,
835 normalized_entropy: 0.0,
836 order,
837 delay,
838 n_patterns: 0,
839 pattern_counts: Vec::new(),
840 is_valid: false,
841 };
842 }
843
844 let required_len = (order - 1).saturating_mul(delay).saturating_add(2);
845 if data.len() < required_len {
846 return PermutationEntropyResult {
847 permutation_entropy: 0.0,
848 normalized_entropy: 0.0,
849 order,
850 delay,
851 n_patterns: 0,
852 pattern_counts: Vec::new(),
853 is_valid: false,
854 };
855 }
856
857 let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
858 let n_windows = data.len() - (order - 1) * delay;
859
860 let mut counts: HashMap<Vec<usize>, usize> = HashMap::new();
861 for i in 0..n_windows {
862 let mut window = Vec::with_capacity(order);
863 for j in 0..order {
864 window.push(arr[i + j * delay]);
865 }
866
867 let mut pattern: Vec<usize> = (0..order).collect();
868 pattern.sort_by(|&a, &b| {
869 window[a]
870 .partial_cmp(&window[b])
871 .unwrap_or(std::cmp::Ordering::Equal)
872 });
873 *counts.entry(pattern).or_insert(0) += 1;
874 }
875
876 let total_patterns = n_windows as f64;
877 let mut entropy = 0.0;
878 for &count in counts.values() {
879 let p = count as f64 / total_patterns;
880 if p > 0.0 {
881 entropy -= p * p.ln();
882 }
883 }
884
885 let max_entropy = (2..=order).map(|v| (v as f64).ln()).sum::<f64>();
886 let normalized_entropy = if max_entropy > 0.0 {
887 entropy / max_entropy
888 } else {
889 0.0
890 };
891
892 let n_patterns = counts.len();
893 let mut pattern_counts: Vec<(Vec<usize>, usize)> = counts.into_iter().collect();
894 pattern_counts.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
895 pattern_counts.truncate(20);
896
897 PermutationEntropyResult {
898 permutation_entropy: entropy,
899 normalized_entropy,
900 order,
901 delay,
902 n_patterns,
903 pattern_counts,
904 is_valid: true,
905 }
906}
907
908pub fn permutation_entropy_default(data: &[u8]) -> PermutationEntropyResult {
909 permutation_entropy(data, 3, 1)
910}
911
912pub fn cross_correlation_matrix(sources_data: &[(String, Vec<u8>)]) -> CrossCorrMatrix {
914 let mut pairs = Vec::new();
915 let mut flagged_count = 0;
916
917 for i in 0..sources_data.len() {
918 for j in (i + 1)..sources_data.len() {
919 let (ref name_a, ref data_a) = sources_data[i];
920 let (ref name_b, ref data_b) = sources_data[j];
921 let min_len = data_a.len().min(data_b.len());
922 if min_len < 100 {
923 continue;
924 }
925 let corr = pearson_correlation(&data_a[..min_len], &data_b[..min_len]);
926 let flagged = corr.abs() > 0.3;
927 if flagged {
928 flagged_count += 1;
929 }
930 pairs.push(CrossCorrPair {
931 source_a: name_a.clone(),
932 source_b: name_b.clone(),
933 correlation: corr,
934 flagged,
935 });
936 }
937 }
938
939 CrossCorrMatrix {
940 pairs,
941 flagged_count,
942 }
943}
944
945pub fn full_analysis(source_name: &str, data: &[u8]) -> SourceAnalysis {
947 use crate::conditioning::{quick_min_entropy, quick_shannon};
948 SourceAnalysis {
949 source_name: source_name.to_string(),
950 sample_size: data.len(),
951 shannon_entropy: quick_shannon(data),
952 min_entropy: quick_min_entropy(data),
953 autocorrelation: autocorrelation_profile(data, 100),
954 spectral: spectral_analysis(data),
955 bit_bias: bit_bias(data),
956 distribution: distribution_stats(data),
957 stationarity: stationarity_test(data),
958 runs: runs_analysis(data),
959 }
960}
961
962pub fn pearson_correlation(a: &[u8], b: &[u8]) -> f64 {
968 let n = a.len() as f64;
969 let a_f: Vec<f64> = a.iter().map(|&x| x as f64).collect();
970 let b_f: Vec<f64> = b.iter().map(|&x| x as f64).collect();
971 let mean_a = a_f.iter().sum::<f64>() / n;
972 let mean_b = b_f.iter().sum::<f64>() / n;
973
974 let mut cov = 0.0;
975 let mut var_a = 0.0;
976 let mut var_b = 0.0;
977 for i in 0..a.len() {
978 let da = a_f[i] - mean_a;
979 let db = b_f[i] - mean_b;
980 cov += da * db;
981 var_a += da * da;
982 var_b += db * db;
983 }
984
985 let denom = (var_a * var_b).sqrt();
986 if denom < 1e-10 { 0.0 } else { cov / denom }
987}
988
989#[cfg(test)]
994mod tests {
995 use super::*;
996
997 fn random_data_seeded(n: usize, seed: u64) -> Vec<u8> {
998 let mut data = Vec::with_capacity(n);
999 let mut state: u64 = seed;
1000 for _ in 0..n {
1001 state = state
1002 .wrapping_mul(6364136223846793005)
1003 .wrapping_add(1442695040888963407);
1004 data.push((state >> 33) as u8);
1005 }
1006 data
1007 }
1008
1009 fn random_data(n: usize) -> Vec<u8> {
1010 random_data_seeded(n, 0xdeadbeef)
1011 }
1012
1013 #[test]
1014 fn test_autocorrelation_random() {
1015 let data = random_data(10000);
1016 let result = autocorrelation_profile(&data, 50);
1017 assert_eq!(result.lags.len(), 50);
1018 assert!(result.max_abs_correlation < 0.1);
1020 }
1021
1022 #[test]
1023 fn test_autocorrelation_correlated() {
1024 let mut data = vec![0u8; 1000];
1026 for (i, byte) in data.iter_mut().enumerate().take(1000) {
1027 *byte = if i % 2 == 0 { 200 } else { 50 };
1028 }
1029 let result = autocorrelation_profile(&data, 10);
1030 assert!(result.max_abs_correlation > 0.5);
1031 }
1032
1033 #[test]
1034 fn test_spectral_analysis() {
1035 let data = random_data(1024);
1036 let result = spectral_analysis(&data);
1037 assert!(result.flatness > 0.0);
1038 assert!(!result.peaks.is_empty());
1039 }
1040
1041 #[test]
1042 fn test_bit_bias_random() {
1043 let data = random_data(10000);
1044 let result = bit_bias(&data);
1045 for &p in &result.bit_probabilities {
1046 assert!((p - 0.5).abs() < 0.05);
1047 }
1048 }
1049
1050 #[test]
1051 fn test_bit_bias_all_ones() {
1052 let data = vec![0xFF; 1000];
1053 let result = bit_bias(&data);
1054 for &p in &result.bit_probabilities {
1055 assert!((p - 1.0).abs() < 0.001);
1056 }
1057 assert!(result.has_significant_bias);
1058 }
1059
1060 #[test]
1061 fn test_distribution_stats() {
1062 let data = random_data(10000);
1063 let result = distribution_stats(&data);
1064 assert!((result.mean - 127.5).abs() < 10.0);
1066 assert!(result.variance > 0.0);
1067 }
1068
1069 #[test]
1070 fn test_anderson_darling_random_uniform() {
1071 let data = random_data_seeded(5000, 0xdeadbeef);
1072 let result = anderson_darling(&data);
1073 assert!(result.is_valid);
1074 assert!(result.is_uniform);
1075 assert!(result.p_value > 0.05);
1076 }
1077
1078 #[test]
1079 fn test_anderson_darling_constant_rejects_uniform() {
1080 let data = vec![42u8; 1000];
1081 let result = anderson_darling(&data);
1082 assert!(result.is_valid);
1083 assert!(!result.is_uniform);
1084 assert!(result.statistic > 100.0);
1085 }
1086
1087 #[test]
1088 fn test_anderson_darling_biased_zero_data() {
1089 let data: Vec<u8> = (0..1000).map(|_| 0u8).collect();
1090 let result = anderson_darling(&data);
1091 assert!(result.is_valid);
1092 assert!(!result.is_uniform);
1093 }
1094
1095 #[test]
1096 fn test_anderson_darling_empty_invalid() {
1097 let result = anderson_darling(&[]);
1098 assert!(!result.is_valid);
1099 }
1100
1101 #[test]
1102 fn test_stationarity_stationary() {
1103 let data = random_data(10000);
1104 let result = stationarity_test(&data);
1105 assert!(result.is_stationary);
1106 assert_eq!(result.n_windows, 10);
1107 }
1108
1109 #[test]
1110 fn test_runs_analysis() {
1111 let data = random_data(10000);
1112 let result = runs_analysis(&data);
1113 assert!(result.total_runs > 0);
1114 assert!(result.longest_run >= 1);
1115 }
1116
1117 #[test]
1118 fn test_cross_correlation() {
1119 let a = random_data_seeded(1000, 0xdeadbeef);
1120 let b = random_data_seeded(1000, 0xcafebabe12345678);
1121 let result = cross_correlation_matrix(&[("a".to_string(), a), ("b".to_string(), b)]);
1122 assert_eq!(result.pairs.len(), 1);
1123 assert!(result.pairs[0].correlation.abs() < 0.3);
1124 }
1125
1126 #[test]
1127 fn test_approximate_entropy_random_high() {
1128 let data = random_data_seeded(5000, 0xdeadbeef);
1129 let result = approximate_entropy_default(&data);
1130 assert!(result.is_valid);
1131 assert!(result.apen > 0.5);
1132 }
1133
1134 #[test]
1135 fn test_approximate_entropy_periodic_lower_than_random() {
1136 let random = random_data_seeded(5000, 0xdeadbeef);
1137 let periodic: Vec<u8> = (0..5000).map(|i| (i % 4) as u8).collect();
1138
1139 let random_result = approximate_entropy_default(&random);
1140 let periodic_result = approximate_entropy_default(&periodic);
1141
1142 assert!(periodic_result.is_valid);
1143 assert!(random_result.is_valid);
1144 assert!(periodic_result.apen < random_result.apen);
1145 }
1146
1147 #[test]
1148 fn test_approximate_entropy_constant_invalid() {
1149 let data = vec![42u8; 5000];
1150 let result = approximate_entropy_default(&data);
1151 assert!(!result.is_valid);
1152 }
1153
1154 #[test]
1155 fn test_approximate_entropy_empty_invalid() {
1156 let result = approximate_entropy_default(&[]);
1157 assert!(!result.is_valid);
1158 }
1159
1160 #[test]
1161 fn test_permutation_entropy_random_high() {
1162 let data = random_data_seeded(5000, 0xdeadbeef);
1163 let result = permutation_entropy(&data, 3, 1);
1164 assert!(result.is_valid);
1165 assert!(result.normalized_entropy > 0.95);
1166 }
1167
1168 #[test]
1169 fn test_permutation_entropy_monotone_low() {
1170 let data: Vec<u8> = (0..1000).map(|i| (i % 256) as u8).collect();
1171 let result = permutation_entropy(&data, 3, 1);
1172 assert!(result.is_valid);
1173 assert!(result.normalized_entropy < 0.3);
1174 }
1175
1176 #[test]
1177 fn test_permutation_entropy_constant_zero() {
1178 let data = vec![42u8; 1000];
1179 let result = permutation_entropy_default(&data);
1180 assert!(result.is_valid);
1181 assert!(result.normalized_entropy.abs() < 1e-12);
1182 assert_eq!(result.n_patterns, 1);
1183 }
1184
1185 #[test]
1186 fn test_permutation_entropy_empty_invalid() {
1187 let result = permutation_entropy_default(&[]);
1188 assert!(!result.is_valid);
1189 assert_eq!(result.n_patterns, 0);
1190 }
1191
1192 #[test]
1193 fn test_full_analysis() {
1194 let data = random_data(1000);
1195 let result = full_analysis("test_source", &data);
1196 assert_eq!(result.source_name, "test_source");
1197 assert_eq!(result.sample_size, 1000);
1198 }
1199}