1use sha2::{Digest, Sha256};
27use std::collections::HashMap;
28
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
31pub enum ConditioningMode {
32 Raw,
34 VonNeumann,
36 #[default]
38 Sha256,
39}
40
41impl std::fmt::Display for ConditioningMode {
42 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43 match self {
44 Self::Raw => write!(f, "raw"),
45 Self::VonNeumann => write!(f, "von_neumann"),
46 Self::Sha256 => write!(f, "sha256"),
47 }
48 }
49}
50
51pub fn condition(raw: &[u8], n_output: usize, mode: ConditioningMode) -> Vec<u8> {
65 match mode {
66 ConditioningMode::Raw => {
67 let mut out = raw.to_vec();
68 out.truncate(n_output);
69 out
70 }
71 ConditioningMode::VonNeumann => {
72 let debiased = von_neumann_debias(raw);
73 let mut out = debiased;
74 out.truncate(n_output);
75 out
76 }
77 ConditioningMode::Sha256 => sha256_condition_bytes(raw, n_output),
78 }
79}
80
81pub fn sha256_condition_bytes(raw: &[u8], n_output: usize) -> Vec<u8> {
91 if raw.is_empty() {
92 return Vec::new();
93 }
94 let mut output = Vec::with_capacity(n_output);
95 let mut state = [0u8; 32];
96 let mut offset = 0;
97 let mut counter: u64 = 0;
98 while output.len() < n_output {
99 let end = (offset + 64).min(raw.len());
100 let chunk = &raw[offset..end];
101 let mut h = Sha256::new();
102 h.update(state);
103 h.update(chunk);
104 h.update(counter.to_le_bytes());
105 state = h.finalize().into();
106 output.extend_from_slice(&state);
107 offset += 64;
108 counter += 1;
109 if offset >= raw.len() {
110 offset = 0;
111 }
112 }
113 output.truncate(n_output);
114 output
115}
116
117pub fn sha256_condition(
120 state: &[u8; 32],
121 sample: &[u8],
122 counter: u64,
123 extra: &[u8],
124) -> ([u8; 32], [u8; 32]) {
125 let mut h = Sha256::new();
126 h.update(state);
127 h.update(sample);
128 h.update(counter.to_le_bytes());
129
130 let ts = std::time::SystemTime::now()
131 .duration_since(std::time::UNIX_EPOCH)
132 .unwrap_or_default();
133 h.update(ts.as_nanos().to_le_bytes());
134
135 h.update(extra);
136
137 let digest: [u8; 32] = h.finalize().into();
138 (digest, digest)
139}
140
141pub fn von_neumann_debias(data: &[u8]) -> Vec<u8> {
150 let mut bits = Vec::new();
151 for byte in data {
152 for i in (0..8).step_by(2) {
153 let b1 = (byte >> (7 - i)) & 1;
154 let b2 = (byte >> (6 - i)) & 1;
155 if b1 != b2 {
156 bits.push(b1);
157 }
158 }
159 }
160
161 let mut result = Vec::with_capacity(bits.len() / 8);
163 for chunk in bits.chunks_exact(8) {
164 let mut byte = 0u8;
165 for (i, &bit) in chunk.iter().enumerate() {
166 byte |= bit << (7 - i);
167 }
168 result.push(byte);
169 }
170 result
171}
172
173pub fn xor_fold(data: &[u8]) -> Vec<u8> {
179 if data.len() < 2 {
180 return data.to_vec();
181 }
182 let half = data.len() / 2;
183 (0..half).map(|i| data[i] ^ data[half + i]).collect()
184}
185
186pub fn min_entropy(data: &[u8]) -> f64 {
205 if data.is_empty() {
206 return 0.0;
207 }
208 let mut counts = [0u64; 256];
209 for &b in data {
210 counts[b as usize] += 1;
211 }
212 let n = data.len() as f64;
213 let p_max = counts.iter().map(|&c| c as f64 / n).fold(0.0f64, f64::max);
214 if p_max <= 0.0 {
215 return 0.0;
216 }
217 -p_max.log2()
218}
219
220pub fn mcv_estimate(data: &[u8]) -> (f64, f64) {
224 if data.is_empty() {
225 return (0.0, 1.0);
226 }
227 let mut counts = [0u64; 256];
228 for &b in data {
229 counts[b as usize] += 1;
230 }
231 let n = data.len() as f64;
232 let max_count = *counts.iter().max().unwrap() as f64;
233 let p_hat = max_count / n;
234
235 let z = 2.576; let p_u = (p_hat + z * (p_hat * (1.0 - p_hat) / n).sqrt()).min(1.0);
239
240 let h = if p_u >= 1.0 {
241 0.0
242 } else {
243 (-p_u.log2()).max(0.0)
244 };
245 (h, p_u)
246}
247
248pub fn collision_estimate(data: &[u8]) -> f64 {
261 if data.len() < 3 {
262 return 0.0;
263 }
264
265 let mut distances = Vec::new();
268 let mut last_collision: Option<usize> = None;
269
270 for i in 0..data.len() - 1 {
271 if data[i] == data[i + 1] {
272 if let Some(prev) = last_collision {
273 distances.push((i - prev) as f64);
274 }
275 last_collision = Some(i);
276 }
277 }
278
279 if distances.is_empty() {
280 let mut collision_count = 0usize;
283 for i in 0..data.len() - 1 {
284 if data[i] == data[i + 1] {
285 collision_count += 1;
286 }
287 }
288 if collision_count == 0 {
289 return 8.0; }
291 let q_hat = collision_count as f64 / (data.len() - 1) as f64;
293 let p_max = q_hat.sqrt().min(1.0);
294 return if p_max <= 0.0 {
295 8.0
296 } else {
297 (-p_max.log2()).min(8.0)
298 };
299 }
300
301 let mean_dist = distances.iter().sum::<f64>() / distances.len() as f64;
302
303 let n_collisions = distances.len() as f64;
308 let variance = distances
309 .iter()
310 .map(|d| (d - mean_dist).powi(2))
311 .sum::<f64>()
312 / (n_collisions - 1.0).max(1.0);
313 let std_err = (variance / n_collisions).sqrt();
314
315 let z = 2.576; let mean_lower = (mean_dist - z * std_err).max(1.0);
317
318 let p_max = (1.0 / mean_lower).sqrt().min(1.0);
320
321 if p_max <= 0.0 {
322 8.0
323 } else {
324 (-p_max.log2()).min(8.0)
325 }
326}
327
328pub fn markov_estimate(data: &[u8]) -> f64 {
340 if data.len() < 2 {
341 return 0.0;
342 }
343
344 let n = data.len() as f64;
345
346 let mut init_counts = [0u64; 256];
348 for &b in data {
349 init_counts[b as usize] += 1;
350 }
351
352 let mut transitions = vec![0u64; 256 * 256];
354 for w in data.windows(2) {
355 transitions[w[0] as usize * 256 + w[1] as usize] += 1;
356 }
357
358 let mut row_sums = [0u64; 256];
360 for (from, row_sum) in row_sums.iter_mut().enumerate() {
361 let base = from * 256;
362 *row_sum = transitions[base..base + 256].iter().sum();
363 }
364
365 let mut p_max = 0.0f64;
377 for s in 0..256usize {
378 let p_init_s = init_counts[s] as f64 / n;
380 p_max = p_max.max(p_init_s);
381
382 for pred in 0..256usize {
384 if row_sums[pred] > 0 {
385 let p_trans = transitions[pred * 256 + s] as f64 / row_sums[pred] as f64;
386 p_max = p_max.max(p_trans);
387 }
388 }
389 }
390
391 if p_max <= 0.0 {
392 8.0
393 } else {
394 (-p_max.log2()).min(8.0)
395 }
396}
397
398pub fn compression_estimate(data: &[u8]) -> f64 {
415 if data.len() < 100 {
416 return 0.0;
417 }
418
419 let l = 8.0f64; let q = 256.min(data.len() / 4); let k = data.len() - q; if k == 0 {
426 return 0.0;
427 }
428
429 let mut last_pos = [0usize; 256];
431 for (i, &b) in data[..q].iter().enumerate() {
432 last_pos[b as usize] = i + 1; }
434
435 let mut sum = 0.0f64;
437 let mut count = 0u64;
438 for (i, &b) in data[q..].iter().enumerate() {
439 let pos = q + i + 1; let prev = last_pos[b as usize];
441 if prev > 0 {
442 let distance = pos - prev;
443 sum += (distance as f64).log2();
444 count += 1;
445 }
446 last_pos[b as usize] = pos;
447 }
448
449 if count == 0 {
450 return l; }
452
453 let f_n = sum / count as f64;
454
455 let mut var_sum = 0.0f64;
457 let mut last_pos2 = [0usize; 256];
459 for (i, &b) in data[..q].iter().enumerate() {
460 last_pos2[b as usize] = i + 1;
461 }
462 for (i, &b) in data[q..].iter().enumerate() {
463 let pos = q + i + 1;
464 let prev = last_pos2[b as usize];
465 if prev > 0 {
466 let distance = pos - prev;
467 let log_d = (distance as f64).log2();
468 var_sum += (log_d - f_n).powi(2);
469 }
470 last_pos2[b as usize] = pos;
471 }
472 let variance = var_sum / (count as f64 - 1.0).max(1.0);
473 let std_err = (variance / count as f64).sqrt();
474
475 let z = 2.576; let f_lower = (f_n - z * std_err).max(0.0);
478
479 (f_lower * f_lower / l).min(l)
486}
487
488pub fn t_tuple_estimate(data: &[u8]) -> f64 {
492 if data.len() < 20 {
493 return 0.0;
494 }
495
496 let mut min_h = 8.0f64;
498
499 for t in 1..=3usize {
500 if data.len() < t + 1 {
501 break;
502 }
503 let mut counts: HashMap<&[u8], u64> = HashMap::new();
504 for window in data.windows(t) {
505 *counts.entry(window).or_insert(0) += 1;
506 }
507 let n = (data.len() - t + 1) as f64;
508 let max_count = *counts.values().max().unwrap_or(&0) as f64;
509 let p_max = max_count / n;
510
511 if p_max > 0.0 {
512 let h = -p_max.log2() / t as f64;
514 min_h = min_h.min(h);
515 }
516 }
517
518 min_h.min(8.0)
519}
520
521pub fn min_entropy_estimate(data: &[u8]) -> MinEntropyReport {
527 let shannon = quick_shannon(data);
528 let (mcv_h, mcv_p_upper) = mcv_estimate(data);
529 let collision_h = collision_estimate(data);
530 let markov_h = markov_estimate(data);
531 let compression_h = compression_estimate(data);
532 let t_tuple_h = t_tuple_estimate(data);
533
534 let heuristic_floor = collision_h.min(markov_h).min(compression_h).min(t_tuple_h);
535
536 MinEntropyReport {
537 shannon_entropy: shannon,
538 min_entropy: mcv_h,
539 heuristic_floor,
540 mcv_estimate: mcv_h,
541 mcv_p_upper,
542 collision_estimate: collision_h,
543 markov_estimate: markov_h,
544 compression_estimate: compression_h,
545 t_tuple_estimate: t_tuple_h,
546 samples: data.len(),
547 }
548}
549
550#[derive(Debug, Clone)]
552pub struct MinEntropyReport {
553 pub shannon_entropy: f64,
555 pub min_entropy: f64,
557 pub heuristic_floor: f64,
559 pub mcv_estimate: f64,
561 pub mcv_p_upper: f64,
563 pub collision_estimate: f64,
565 pub markov_estimate: f64,
567 pub compression_estimate: f64,
569 pub t_tuple_estimate: f64,
571 pub samples: usize,
573}
574
575impl std::fmt::Display for MinEntropyReport {
576 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
577 writeln!(f, "Min-Entropy Analysis ({} samples)", self.samples)?;
578 writeln!(
579 f,
580 " Shannon H: {:.3} bits/byte (upper bound)",
581 self.shannon_entropy
582 )?;
583 writeln!(
584 f,
585 " Min-Entropy H∞: {:.3} bits/byte (primary, MCV)",
586 self.min_entropy
587 )?;
588 writeln!(
589 f,
590 " Heuristic floor: {:.3} bits/byte (diagnostic minimum)",
591 self.heuristic_floor
592 )?;
593 writeln!(f, " ─────────────────────────────────")?;
594 writeln!(
595 f,
596 " MCV: {:.3} (p_upper={:.4})",
597 self.mcv_estimate, self.mcv_p_upper
598 )?;
599 writeln!(f, " Collision (diag): {:.3}", self.collision_estimate)?;
600 writeln!(f, " Markov (diag): {:.3}", self.markov_estimate)?;
601 writeln!(
602 f,
603 " Compression (diag): {:.3} (Maurer-inspired)",
604 self.compression_estimate
605 )?;
606 writeln!(f, " t-Tuple (diag): {:.3}", self.t_tuple_estimate)?;
607 Ok(())
608 }
609}
610
611pub fn quick_min_entropy(data: &[u8]) -> f64 {
619 mcv_estimate(data).0
620}
621
622pub fn quick_shannon(data: &[u8]) -> f64 {
624 if data.is_empty() {
625 return 0.0;
626 }
627 let mut counts = [0u64; 256];
628 for &b in data {
629 counts[b as usize] += 1;
630 }
631 let n = data.len() as f64;
632 let mut h = 0.0;
633 for &c in &counts {
634 if c > 0 {
635 let p = c as f64 / n;
636 h -= p * p.log2();
637 }
638 }
639 h
640}
641
642pub fn grade_min_entropy(min_entropy: f64) -> char {
656 if min_entropy >= 6.0 {
657 'A'
658 } else if min_entropy >= 4.0 {
659 'B'
660 } else if min_entropy >= 2.0 {
661 'C'
662 } else if min_entropy >= 1.0 {
663 'D'
664 } else {
665 'F'
666 }
667}
668
669pub fn quick_quality(data: &[u8]) -> QualityReport {
671 if data.len() < 16 {
672 return QualityReport {
673 samples: data.len(),
674 unique_values: 0,
675 shannon_entropy: 0.0,
676 compression_ratio: 0.0,
677 quality_score: 0.0,
678 grade: 'F',
679 };
680 }
681
682 let shannon = quick_shannon(data);
683
684 use flate2::Compression;
686 use flate2::write::ZlibEncoder;
687 use std::io::Write;
688 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::best());
689 encoder.write_all(data).unwrap_or_default();
690 let compressed = encoder.finish().unwrap_or_default();
691 let comp_ratio = compressed.len() as f64 / data.len() as f64;
692
693 let mut seen = [false; 256];
695 for &b in data {
696 seen[b as usize] = true;
697 }
698 let unique = seen.iter().filter(|&&s| s).count();
699
700 let eff = shannon / 8.0;
701 let score = eff * 60.0 + comp_ratio.min(1.0) * 20.0 + (unique as f64 / 256.0).min(1.0) * 20.0;
702 let grade = if score >= 80.0 {
703 'A'
704 } else if score >= 60.0 {
705 'B'
706 } else if score >= 40.0 {
707 'C'
708 } else if score >= 20.0 {
709 'D'
710 } else {
711 'F'
712 };
713
714 QualityReport {
715 samples: data.len(),
716 unique_values: unique,
717 shannon_entropy: shannon,
718 compression_ratio: comp_ratio,
719 quality_score: score,
720 grade,
721 }
722}
723
724#[derive(Debug, Clone)]
725pub struct QualityReport {
726 pub samples: usize,
727 pub unique_values: usize,
728 pub shannon_entropy: f64,
729 pub compression_ratio: f64,
730 pub quality_score: f64,
731 pub grade: char,
732}
733
734#[cfg(test)]
735mod tests {
736 use super::*;
737
738 #[test]
743 fn test_condition_raw_passthrough() {
744 let data = vec![1, 2, 3, 4, 5];
745 let out = condition(&data, 3, ConditioningMode::Raw);
746 assert_eq!(out, vec![1, 2, 3]);
747 }
748
749 #[test]
750 fn test_condition_raw_exact_length() {
751 let data: Vec<u8> = (0..100).map(|i| i as u8).collect();
752 let out = condition(&data, 100, ConditioningMode::Raw);
753 assert_eq!(out, data);
754 }
755
756 #[test]
757 fn test_condition_raw_truncates() {
758 let data: Vec<u8> = (0..100).map(|i| i as u8).collect();
759 let out = condition(&data, 50, ConditioningMode::Raw);
760 assert_eq!(out.len(), 50);
761 assert_eq!(out, &data[..50]);
762 }
763
764 #[test]
765 fn test_condition_sha256_produces_exact_length() {
766 let data = vec![42u8; 100];
767 for len in [1, 16, 32, 64, 100, 256] {
768 let out = condition(&data, len, ConditioningMode::Sha256);
769 assert_eq!(out.len(), len, "SHA256 should produce exactly {len} bytes");
770 }
771 }
772
773 #[test]
774 fn test_sha256_deterministic() {
775 let data = vec![42u8; 100];
776 let out1 = sha256_condition_bytes(&data, 64);
777 let out2 = sha256_condition_bytes(&data, 64);
778 assert_eq!(
779 out1, out2,
780 "SHA256 conditioning should be deterministic for same input"
781 );
782 }
783
784 #[test]
785 fn test_sha256_different_inputs_differ() {
786 let data1 = vec![1u8; 100];
787 let data2 = vec![2u8; 100];
788 let out1 = sha256_condition_bytes(&data1, 32);
789 let out2 = sha256_condition_bytes(&data2, 32);
790 assert_ne!(out1, out2);
791 }
792
793 #[test]
794 fn test_sha256_empty_input() {
795 let out = sha256_condition_bytes(&[], 32);
796 assert!(out.is_empty(), "Empty input should produce no output");
797 }
798
799 #[test]
800 fn test_von_neumann_reduces_size() {
801 let input = vec![0b10101010u8; 128];
802 let output = von_neumann_debias(&input);
803 assert!(output.len() < input.len());
804 }
805
806 #[test]
807 fn test_von_neumann_known_output() {
808 let input = vec![0b10101010u8; 2];
813 let output = von_neumann_debias(&input);
814 assert_eq!(output.len(), 1);
815 assert_eq!(output[0], 0b11111111);
816 }
817
818 #[test]
819 fn test_von_neumann_alternating_01() {
820 let input = vec![0b01010101u8; 2];
824 let output = von_neumann_debias(&input);
825 assert_eq!(output.len(), 1);
826 assert_eq!(output[0], 0b00000000);
827 }
828
829 #[test]
830 fn test_von_neumann_all_same_discards() {
831 let input = vec![0xFF; 100];
833 let output = von_neumann_debias(&input);
834 assert!(output.is_empty(), "All-ones should produce no output");
835 }
836
837 #[test]
838 fn test_von_neumann_all_zeros_discards() {
839 let input = vec![0x00; 100];
841 let output = von_neumann_debias(&input);
842 assert!(output.is_empty(), "All-zeros should produce no output");
843 }
844
845 #[test]
846 fn test_condition_modes_differ() {
847 let data: Vec<u8> = (0..256).map(|i| i as u8).collect();
848 let raw = condition(&data, 64, ConditioningMode::Raw);
849 let sha = condition(&data, 64, ConditioningMode::Sha256);
850 assert_ne!(raw, sha);
851 }
852
853 #[test]
854 fn test_conditioning_mode_display() {
855 assert_eq!(ConditioningMode::Raw.to_string(), "raw");
856 assert_eq!(ConditioningMode::VonNeumann.to_string(), "von_neumann");
857 assert_eq!(ConditioningMode::Sha256.to_string(), "sha256");
858 }
859
860 #[test]
861 fn test_conditioning_mode_default() {
862 assert_eq!(ConditioningMode::default(), ConditioningMode::Sha256);
863 }
864
865 #[test]
870 fn test_xor_fold_basic() {
871 let data = vec![0xFF, 0x00, 0xAA, 0x55];
872 let folded = xor_fold(&data);
873 assert_eq!(folded.len(), 2);
874 assert_eq!(folded[0], 0xFF ^ 0xAA);
875 assert_eq!(folded[1], 0x55);
876 }
877
878 #[test]
879 fn test_xor_fold_single_byte() {
880 let data = vec![42];
881 let folded = xor_fold(&data);
882 assert_eq!(folded, vec![42]);
883 }
884
885 #[test]
886 fn test_xor_fold_empty() {
887 let folded = xor_fold(&[]);
888 assert!(folded.is_empty());
889 }
890
891 #[test]
892 fn test_xor_fold_odd_length() {
893 let data = vec![1, 2, 3, 4, 5];
895 let folded = xor_fold(&data);
896 assert_eq!(folded.len(), 2);
897 assert_eq!(folded[0], 1 ^ 3);
898 assert_eq!(folded[1], 2 ^ 4);
899 }
900
901 #[test]
906 fn test_shannon_empty() {
907 assert_eq!(quick_shannon(&[]), 0.0);
908 }
909
910 #[test]
911 fn test_shannon_single_byte() {
912 assert_eq!(quick_shannon(&[42]), 0.0);
914 }
915
916 #[test]
917 fn test_shannon_all_same() {
918 let data = vec![0u8; 1000];
919 assert_eq!(quick_shannon(&data), 0.0);
920 }
921
922 #[test]
923 fn test_shannon_two_values_equal() {
924 let mut data = vec![0u8; 500];
926 data.extend(vec![1u8; 500]);
927 let h = quick_shannon(&data);
928 assert!((h - 1.0).abs() < 0.01, "Expected ~1.0, got {h}");
929 }
930
931 #[test]
932 fn test_shannon_uniform_256() {
933 let data: Vec<u8> = (0..=255).collect();
935 let h = quick_shannon(&data);
936 assert!((h - 8.0).abs() < 0.01, "Expected ~8.0, got {h}");
937 }
938
939 #[test]
940 fn test_shannon_uniform_large() {
941 let mut data = Vec::with_capacity(256 * 40);
943 for _ in 0..40 {
944 for b in 0..=255u8 {
945 data.push(b);
946 }
947 }
948 let h = quick_shannon(&data);
949 assert!((h - 8.0).abs() < 0.01, "Expected ~8.0, got {h}");
950 }
951
952 #[test]
957 fn test_min_entropy_empty() {
958 assert_eq!(min_entropy(&[]), 0.0);
959 }
960
961 #[test]
962 fn test_min_entropy_all_same() {
963 let data = vec![42u8; 1000];
964 let h = min_entropy(&data);
965 assert!(h < 0.01, "All-same should have ~0 min-entropy, got {h}");
966 }
967
968 #[test]
969 fn test_min_entropy_uniform() {
970 let mut data = Vec::with_capacity(256 * 40);
971 for _ in 0..40 {
972 for b in 0..=255u8 {
973 data.push(b);
974 }
975 }
976 let h = min_entropy(&data);
977 assert!(
978 (h - 8.0).abs() < 0.1,
979 "Uniform should have ~8.0 min-entropy, got {h}"
980 );
981 }
982
983 #[test]
984 fn test_min_entropy_two_values() {
985 let mut data = vec![0u8; 500];
986 data.extend(vec![1u8; 500]);
987 let h = min_entropy(&data);
988 assert!((h - 1.0).abs() < 0.01, "Expected ~1.0, got {h}");
990 }
991
992 #[test]
993 fn test_min_entropy_biased() {
994 let mut data = vec![0u8; 900];
996 data.extend(vec![1u8; 100]);
997 let h = min_entropy(&data);
998 let expected = -(0.9f64.log2());
999 assert!(
1000 (h - expected).abs() < 0.02,
1001 "Expected ~{expected:.3}, got {h}"
1002 );
1003 }
1004
1005 #[test]
1010 fn test_mcv_empty() {
1011 let (h, p) = mcv_estimate(&[]);
1012 assert_eq!(h, 0.0);
1013 assert_eq!(p, 1.0);
1014 }
1015
1016 #[test]
1017 fn test_mcv_all_same() {
1018 let data = vec![42u8; 1000];
1019 let (h, p_upper) = mcv_estimate(&data);
1020 assert!(h < 0.1, "All-same should have ~0 MCV entropy, got {h}");
1021 assert!((p_upper - 1.0).abs() < 0.01);
1022 }
1023
1024 #[test]
1025 fn test_mcv_uniform() {
1026 let mut data = Vec::with_capacity(256 * 100);
1027 for _ in 0..100 {
1028 for b in 0..=255u8 {
1029 data.push(b);
1030 }
1031 }
1032 let (h, _p_upper) = mcv_estimate(&data);
1033 assert!(h > 7.0, "Uniform should have high MCV entropy, got {h}");
1034 }
1035
1036 #[test]
1041 fn test_collision_too_short() {
1042 assert_eq!(collision_estimate(&[1, 2]), 0.0);
1043 }
1044
1045 #[test]
1046 fn test_collision_all_same() {
1047 let data = vec![0u8; 1000];
1048 let h = collision_estimate(&data);
1049 assert!(
1052 h < 1.0,
1053 "All-same should have very low collision entropy, got {h}"
1054 );
1055 }
1056
1057 #[test]
1058 fn test_collision_uniform_large() {
1059 let mut data = Vec::with_capacity(256 * 100);
1060 for _ in 0..100 {
1061 for b in 0..=255u8 {
1062 data.push(b);
1063 }
1064 }
1065 let h = collision_estimate(&data);
1066 assert!(
1067 h > 3.0,
1068 "Uniform should have reasonable collision entropy, got {h}"
1069 );
1070 }
1071
1072 #[test]
1077 fn test_markov_too_short() {
1078 assert_eq!(markov_estimate(&[42]), 0.0);
1079 }
1080
1081 #[test]
1082 fn test_markov_all_same() {
1083 let data = vec![0u8; 1000];
1084 let h = markov_estimate(&data);
1085 assert!(h < 1.0, "All-same should have low Markov entropy, got {h}");
1086 }
1087
1088 #[test]
1089 fn test_markov_uniform_large() {
1090 let mut data = Vec::with_capacity(256 * 100);
1102 for i in 0..(256 * 100) {
1103 let v = ((i as u64)
1104 .wrapping_mul(6364136223846793005)
1105 .wrapping_add(1442695040888963407)
1106 >> 56) as u8;
1107 data.push(v);
1108 }
1109 let h = markov_estimate(&data);
1110 assert!(
1111 h > 0.1,
1112 "Pseudo-random should have Markov entropy > 0.1, got {h}"
1113 );
1114 }
1115
1116 #[test]
1121 fn test_compression_too_short() {
1122 assert_eq!(compression_estimate(&[1; 50]), 0.0);
1123 }
1124
1125 #[test]
1126 fn test_compression_all_same() {
1127 let data = vec![0u8; 1000];
1128 let h = compression_estimate(&data);
1129 assert!(
1130 h < 2.0,
1131 "All-same should have low compression entropy, got {h}"
1132 );
1133 }
1134
1135 #[test]
1136 fn test_compression_uniform_large() {
1137 let mut data = Vec::with_capacity(256 * 100);
1138 for _ in 0..100 {
1139 for b in 0..=255u8 {
1140 data.push(b);
1141 }
1142 }
1143 let h = compression_estimate(&data);
1144 assert!(
1145 h > 4.0,
1146 "Uniform should have reasonable compression entropy, got {h}"
1147 );
1148 }
1149
1150 #[test]
1155 fn test_t_tuple_too_short() {
1156 assert_eq!(t_tuple_estimate(&[1; 10]), 0.0);
1157 }
1158
1159 #[test]
1160 fn test_t_tuple_all_same() {
1161 let data = vec![0u8; 1000];
1162 let h = t_tuple_estimate(&data);
1163 assert!(h < 0.1, "All-same should have ~0 t-tuple entropy, got {h}");
1164 }
1165
1166 #[test]
1167 fn test_t_tuple_uniform_large() {
1168 let mut data = Vec::with_capacity(256 * 100);
1173 for i in 0..(256 * 100) {
1174 let v = ((i as u64)
1175 .wrapping_mul(6364136223846793005)
1176 .wrapping_add(1442695040888963407)
1177 >> 56) as u8;
1178 data.push(v);
1179 }
1180 let h = t_tuple_estimate(&data);
1181 assert!(
1182 h > 2.5,
1183 "Pseudo-random should have t-tuple entropy > 2.5, got {h}"
1184 );
1185 }
1186
1187 #[test]
1192 fn test_min_entropy_estimate_all_same() {
1193 let data = vec![0u8; 1000];
1194 let report = min_entropy_estimate(&data);
1195 assert!(
1196 report.min_entropy < 1.0,
1197 "All-same combined estimate: {}",
1198 report.min_entropy
1199 );
1200 assert!(report.shannon_entropy < 0.01);
1201 assert_eq!(report.samples, 1000);
1202 }
1203
1204 #[test]
1205 fn test_min_entropy_estimate_uniform() {
1206 let mut data = Vec::with_capacity(256 * 100);
1209 for i in 0..(256 * 100) {
1210 let v = ((i as u64)
1211 .wrapping_mul(6364136223846793005)
1212 .wrapping_add(1442695040888963407)
1213 >> 56) as u8;
1214 data.push(v);
1215 }
1216 let report = min_entropy_estimate(&data);
1217 assert!(
1218 report.min_entropy > 6.0,
1219 "Primary min-entropy should be high for uniform marginals: {}",
1220 report.min_entropy
1221 );
1222 assert!(
1223 report.shannon_entropy > 7.9,
1224 "Shannon should be near 8.0 for uniform marginals: {}",
1225 report.shannon_entropy
1226 );
1227 assert!(
1229 report.mcv_estimate > 6.0,
1230 "MCV should be high for uniform data: {}",
1231 report.mcv_estimate
1232 );
1233 assert!(
1234 report.heuristic_floor <= report.min_entropy + 1e-9,
1235 "heuristic floor should not exceed primary min-entropy"
1236 );
1237 }
1238
1239 #[test]
1240 fn test_min_entropy_report_display() {
1241 let data = vec![0u8; 1000];
1242 let report = min_entropy_estimate(&data);
1243 let s = format!("{report}");
1244 assert!(s.contains("Min-Entropy Analysis"));
1245 assert!(s.contains("1000 samples"));
1246 }
1247
1248 #[test]
1249 fn test_quick_min_entropy_uses_mcv() {
1250 let data: Vec<u8> = (0..=255).collect();
1251 let quick = quick_min_entropy(&data);
1252 let (mcv_h, _) = mcv_estimate(&data);
1253 assert!(
1255 (quick - mcv_h).abs() < f64::EPSILON,
1256 "quick_min_entropy ({quick}) should equal MCV estimate ({mcv_h})"
1257 );
1258 }
1259
1260 #[test]
1261 fn test_quick_min_entropy_leq_shannon() {
1262 let data: Vec<u8> = (0..=255).cycle().take(2560).collect();
1264 let quick = quick_min_entropy(&data);
1265 let shannon = quick_shannon(&data);
1266 assert!(
1267 quick <= shannon + 0.01,
1268 "H∞ ({quick}) should be <= Shannon ({shannon})"
1269 );
1270 }
1271
1272 #[test]
1277 fn test_quality_too_short() {
1278 let q = quick_quality(&[1, 2, 3]);
1279 assert_eq!(q.grade, 'F');
1280 assert_eq!(q.quality_score, 0.0);
1281 }
1282
1283 #[test]
1284 fn test_quality_all_same() {
1285 let data = vec![0u8; 1000];
1286 let q = quick_quality(&data);
1287 assert!(
1288 q.grade == 'F' || q.grade == 'D',
1289 "All-same should grade poorly, got {}",
1290 q.grade
1291 );
1292 assert_eq!(q.unique_values, 1);
1293 assert!(q.shannon_entropy < 0.01);
1294 }
1295
1296 #[test]
1297 fn test_quality_uniform() {
1298 let mut data = Vec::with_capacity(256 * 40);
1299 for _ in 0..40 {
1300 for b in 0..=255u8 {
1301 data.push(b);
1302 }
1303 }
1304 let q = quick_quality(&data);
1305 assert!(
1306 q.grade == 'A' || q.grade == 'B',
1307 "Uniform should grade well, got {}",
1308 q.grade
1309 );
1310 assert_eq!(q.unique_values, 256);
1311 assert!(q.shannon_entropy > 7.9);
1312 }
1313
1314 #[test]
1319 fn test_grade_boundaries() {
1320 assert_eq!(grade_min_entropy(8.0), 'A');
1321 assert_eq!(grade_min_entropy(6.0), 'A');
1322 assert_eq!(grade_min_entropy(5.99), 'B');
1323 assert_eq!(grade_min_entropy(4.0), 'B');
1324 assert_eq!(grade_min_entropy(3.99), 'C');
1325 assert_eq!(grade_min_entropy(2.0), 'C');
1326 assert_eq!(grade_min_entropy(1.99), 'D');
1327 assert_eq!(grade_min_entropy(1.0), 'D');
1328 assert_eq!(grade_min_entropy(0.99), 'F');
1329 assert_eq!(grade_min_entropy(0.0), 'F');
1330 }
1331
1332 #[test]
1333 fn test_grade_negative() {
1334 assert_eq!(grade_min_entropy(-1.0), 'F');
1335 }
1336}