1use serde::Serialize;
27use sha2::{Digest, Sha256};
28use std::collections::HashMap;
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
32pub enum ConditioningMode {
33 Raw,
35 VonNeumann,
37 #[default]
39 Sha256,
40}
41
42impl std::fmt::Display for ConditioningMode {
43 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44 match self {
45 Self::Raw => write!(f, "raw"),
46 Self::VonNeumann => write!(f, "von_neumann"),
47 Self::Sha256 => write!(f, "sha256"),
48 }
49 }
50}
51
52pub fn condition(raw: &[u8], n_output: usize, mode: ConditioningMode) -> Vec<u8> {
66 match mode {
67 ConditioningMode::Raw => {
68 let mut out = raw.to_vec();
69 out.truncate(n_output);
70 out
71 }
72 ConditioningMode::VonNeumann => {
73 let debiased = von_neumann_debias(raw);
74 let mut out = debiased;
75 out.truncate(n_output);
76 out
77 }
78 ConditioningMode::Sha256 => sha256_condition_bytes(raw, n_output),
79 }
80}
81
82pub fn sha256_condition_bytes(raw: &[u8], n_output: usize) -> Vec<u8> {
92 if raw.is_empty() {
93 return Vec::new();
94 }
95 let mut output = Vec::with_capacity(n_output);
96 let mut state = [0u8; 32];
97 let mut offset = 0;
98 let mut counter: u64 = 0;
99 while output.len() < n_output {
100 let end = (offset + 64).min(raw.len());
101 let chunk = &raw[offset..end];
102 let mut h = Sha256::new();
103 h.update(state);
104 h.update(chunk);
105 h.update(counter.to_le_bytes());
106 let digest: [u8; 32] = h.finalize().into();
107 output.extend_from_slice(&digest);
108
109 let mut sh = Sha256::new();
111 sh.update(digest);
112 sh.update(b"openentropy_state");
113 state = sh.finalize().into();
114
115 offset += 64;
116 counter += 1;
117 if offset >= raw.len() {
118 offset = 0;
119 }
120 }
121 output.truncate(n_output);
122 output
123}
124
125pub fn sha256_condition(
131 state: &[u8; 32],
132 sample: &[u8],
133 counter: u64,
134 extra: &[u8],
135) -> ([u8; 32], [u8; 32]) {
136 let mut h = Sha256::new();
137 h.update(state);
138 h.update(sample);
139 h.update(counter.to_le_bytes());
140
141 let ts = std::time::SystemTime::now()
142 .duration_since(std::time::UNIX_EPOCH)
143 .unwrap_or_default();
144 h.update(ts.as_nanos().to_le_bytes());
145
146 h.update(extra);
147
148 let output: [u8; 32] = h.finalize().into();
149
150 let mut sh = Sha256::new();
152 sh.update(output);
153 sh.update(b"openentropy_state");
154 let new_state: [u8; 32] = sh.finalize().into();
155
156 (new_state, output)
157}
158
159pub fn von_neumann_debias(data: &[u8]) -> Vec<u8> {
168 let mut bits = Vec::new();
169 for byte in data {
170 for i in (0..8).step_by(2) {
171 let b1 = (byte >> (7 - i)) & 1;
172 let b2 = (byte >> (6 - i)) & 1;
173 if b1 != b2 {
174 bits.push(b1);
175 }
176 }
177 }
178
179 let mut result = Vec::with_capacity(bits.len() / 8);
181 for chunk in bits.chunks_exact(8) {
182 let mut byte = 0u8;
183 for (i, &bit) in chunk.iter().enumerate() {
184 byte |= bit << (7 - i);
185 }
186 result.push(byte);
187 }
188 result
189}
190
191pub fn xor_fold(data: &[u8]) -> Vec<u8> {
199 if data.len() < 2 {
200 return data.to_vec();
201 }
202 let half = data.len() / 2;
203 let mut result: Vec<u8> = (0..half).map(|i| data[i] ^ data[half + i]).collect();
204 if data.len() % 2 == 1 && !result.is_empty() {
205 *result.last_mut().unwrap() ^= data[data.len() - 1];
206 }
207 result
208}
209
210pub fn min_entropy(data: &[u8]) -> f64 {
229 if data.is_empty() {
230 return 0.0;
231 }
232 let mut counts = [0u64; 256];
233 for &b in data {
234 counts[b as usize] += 1;
235 }
236 let n = data.len() as f64;
237 let p_max = counts.iter().map(|&c| c as f64 / n).fold(0.0f64, f64::max);
238 if p_max <= 0.0 {
239 return 0.0;
240 }
241 -p_max.log2()
242}
243
244pub fn mcv_estimate(data: &[u8]) -> (f64, f64) {
248 if data.is_empty() {
249 return (0.0, 1.0);
250 }
251 let mut counts = [0u64; 256];
252 for &b in data {
253 counts[b as usize] += 1;
254 }
255 let n = data.len() as f64;
256 let max_count = *counts.iter().max().unwrap() as f64;
257 let p_hat = max_count / n;
258
259 let z = 2.576; let p_u = (p_hat + z * (p_hat * (1.0 - p_hat) / n).sqrt()).min(1.0);
263
264 let h = if p_u >= 1.0 {
265 0.0
266 } else {
267 (-p_u.log2()).max(0.0)
268 };
269 (h, p_u)
270}
271
272pub fn collision_estimate(data: &[u8]) -> f64 {
285 if data.len() < 3 {
286 return 0.0;
287 }
288
289 let mut distances = Vec::new();
292 let mut last_collision: Option<usize> = None;
293
294 for i in 0..data.len() - 1 {
295 if data[i] == data[i + 1] {
296 if let Some(prev) = last_collision {
297 distances.push((i - prev) as f64);
298 }
299 last_collision = Some(i);
300 }
301 }
302
303 if distances.is_empty() {
304 let mut collision_count = 0usize;
307 for i in 0..data.len() - 1 {
308 if data[i] == data[i + 1] {
309 collision_count += 1;
310 }
311 }
312 if collision_count == 0 {
313 return 8.0;
318 }
319 let q_hat = collision_count as f64 / (data.len() - 1) as f64;
321 let p_max = q_hat.sqrt().min(1.0);
322 return if p_max <= 0.0 {
323 8.0
324 } else {
325 (-p_max.log2()).min(8.0)
326 };
327 }
328
329 let mean_dist = distances.iter().sum::<f64>() / distances.len() as f64;
330
331 let n_collisions = distances.len() as f64;
336 let variance = distances
337 .iter()
338 .map(|d| (d - mean_dist).powi(2))
339 .sum::<f64>()
340 / (n_collisions - 1.0).max(1.0);
341 let std_err = (variance / n_collisions).sqrt();
342
343 let z = 2.576; let mean_lower = (mean_dist - z * std_err).max(1.0);
345
346 let p_max = (1.0 / mean_lower).sqrt().min(1.0);
348
349 if p_max <= 0.0 {
350 8.0
351 } else {
352 (-p_max.log2()).min(8.0)
353 }
354}
355
356pub fn markov_estimate(data: &[u8]) -> f64 {
368 if data.len() < 2 {
369 return 0.0;
370 }
371
372 let n = data.len() as f64;
373
374 let mut init_counts = [0u64; 256];
376 for &b in data {
377 init_counts[b as usize] += 1;
378 }
379
380 let mut transitions = vec![0u64; 256 * 256];
382 for w in data.windows(2) {
383 transitions[w[0] as usize * 256 + w[1] as usize] += 1;
384 }
385
386 let mut row_sums = [0u64; 256];
388 for (from, row_sum) in row_sums.iter_mut().enumerate() {
389 let base = from * 256;
390 *row_sum = transitions[base..base + 256].iter().sum();
391 }
392
393 let mut p_max = 0.0f64;
405 for s in 0..256usize {
406 let p_init_s = init_counts[s] as f64 / n;
408 p_max = p_max.max(p_init_s);
409
410 for pred in 0..256usize {
412 if row_sums[pred] > 0 {
413 let p_trans = transitions[pred * 256 + s] as f64 / row_sums[pred] as f64;
414 p_max = p_max.max(p_trans);
415 }
416 }
417 }
418
419 if p_max <= 0.0 {
420 8.0
421 } else {
422 (-p_max.log2()).min(8.0)
423 }
424}
425
426pub fn compression_estimate(data: &[u8]) -> f64 {
443 if data.len() < 100 {
444 return 0.0;
445 }
446
447 let l = 8.0f64; let q = 256.min(data.len() / 4); let k = data.len() - q; if k == 0 {
454 return 0.0;
455 }
456
457 let mut last_pos = [0usize; 256];
459 for (i, &b) in data[..q].iter().enumerate() {
460 last_pos[b as usize] = i + 1; }
462
463 let mut sum = 0.0f64;
465 let mut count = 0u64;
466 for (i, &b) in data[q..].iter().enumerate() {
467 let pos = q + i + 1; let prev = last_pos[b as usize];
469 if prev > 0 {
470 let distance = pos - prev;
471 sum += (distance as f64).log2();
472 count += 1;
473 }
474 last_pos[b as usize] = pos;
475 }
476
477 if count == 0 {
478 return l; }
480
481 let f_n = sum / count as f64;
482
483 let mut var_sum = 0.0f64;
485 let mut last_pos2 = [0usize; 256];
487 for (i, &b) in data[..q].iter().enumerate() {
488 last_pos2[b as usize] = i + 1;
489 }
490 for (i, &b) in data[q..].iter().enumerate() {
491 let pos = q + i + 1;
492 let prev = last_pos2[b as usize];
493 if prev > 0 {
494 let distance = pos - prev;
495 let log_d = (distance as f64).log2();
496 var_sum += (log_d - f_n).powi(2);
497 }
498 last_pos2[b as usize] = pos;
499 }
500 let variance = var_sum / (count as f64 - 1.0).max(1.0);
501 let std_err = (variance / count as f64).sqrt();
502
503 let z = 2.576; let f_lower = (f_n - z * std_err).max(0.0);
506
507 (f_lower * f_lower / l).min(l)
514}
515
516pub fn t_tuple_estimate(data: &[u8]) -> f64 {
520 if data.len() < 20 {
521 return 0.0;
522 }
523
524 let mut min_h = 8.0f64;
526
527 for t in 1..=3usize {
528 if data.len() < t + 1 {
529 break;
530 }
531 let mut counts: HashMap<&[u8], u64> = HashMap::new();
532 for window in data.windows(t) {
533 *counts.entry(window).or_insert(0) += 1;
534 }
535 let n = (data.len() - t + 1) as f64;
536 let max_count = *counts.values().max().unwrap_or(&0) as f64;
537 let p_max = max_count / n;
538
539 if p_max > 0.0 {
540 let h = -p_max.log2() / t as f64;
542 min_h = min_h.min(h);
543 }
544 }
545
546 min_h.min(8.0)
547}
548
549pub fn min_entropy_estimate(data: &[u8]) -> MinEntropyReport {
555 let shannon = quick_shannon(data);
556 let (mcv_h, mcv_p_upper) = mcv_estimate(data);
557 let collision_h = collision_estimate(data);
558 let markov_h = markov_estimate(data);
559 let compression_h = compression_estimate(data);
560 let t_tuple_h = t_tuple_estimate(data);
561
562 let heuristic_floor = collision_h.min(markov_h).min(compression_h).min(t_tuple_h);
563
564 MinEntropyReport {
565 shannon_entropy: shannon,
566 min_entropy: mcv_h,
567 heuristic_floor,
568 mcv_estimate: mcv_h,
569 mcv_p_upper,
570 collision_estimate: collision_h,
571 markov_estimate: markov_h,
572 compression_estimate: compression_h,
573 t_tuple_estimate: t_tuple_h,
574 samples: data.len(),
575 }
576}
577
578#[derive(Debug, Clone, Serialize)]
580pub struct MinEntropyReport {
581 pub shannon_entropy: f64,
583 pub min_entropy: f64,
585 pub heuristic_floor: f64,
587 pub mcv_estimate: f64,
589 pub mcv_p_upper: f64,
591 pub collision_estimate: f64,
593 pub markov_estimate: f64,
595 pub compression_estimate: f64,
597 pub t_tuple_estimate: f64,
599 pub samples: usize,
601}
602
603impl std::fmt::Display for MinEntropyReport {
604 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
605 writeln!(f, "Min-Entropy Analysis ({} samples)", self.samples)?;
606 writeln!(
607 f,
608 " Shannon H: {:.3} bits/byte (upper bound)",
609 self.shannon_entropy
610 )?;
611 writeln!(
612 f,
613 " Min-Entropy H∞: {:.3} bits/byte (primary, MCV)",
614 self.min_entropy
615 )?;
616 writeln!(
617 f,
618 " Heuristic floor: {:.3} bits/byte (diagnostic minimum)",
619 self.heuristic_floor
620 )?;
621 writeln!(f, " ─────────────────────────────────")?;
622 writeln!(
623 f,
624 " MCV: {:.3} (p_upper={:.4})",
625 self.mcv_estimate, self.mcv_p_upper
626 )?;
627 writeln!(f, " Collision (diag): {:.3}", self.collision_estimate)?;
628 writeln!(f, " Markov (diag): {:.3}", self.markov_estimate)?;
629 writeln!(
630 f,
631 " Compression (diag): {:.3} (Maurer-inspired)",
632 self.compression_estimate
633 )?;
634 writeln!(f, " t-Tuple (diag): {:.3}", self.t_tuple_estimate)?;
635 Ok(())
636 }
637}
638
639pub fn quick_min_entropy(data: &[u8]) -> f64 {
647 mcv_estimate(data).0
648}
649
650pub fn quick_shannon(data: &[u8]) -> f64 {
652 if data.is_empty() {
653 return 0.0;
654 }
655 let mut counts = [0u64; 256];
656 for &b in data {
657 counts[b as usize] += 1;
658 }
659 let n = data.len() as f64;
660 let mut h = 0.0;
661 for &c in &counts {
662 if c > 0 {
663 let p = c as f64 / n;
664 h -= p * p.log2();
665 }
666 }
667 h
668}
669
670pub fn quick_autocorrelation_lag1(data: &[u8]) -> f64 {
683 if data.len() < 2 {
684 return 0.0;
685 }
686 let n = data.len();
687 let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
688 let mean: f64 = arr.iter().sum::<f64>() / n as f64;
689 let var: f64 = arr.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n as f64;
690 if var < 1e-10 {
691 return 0.0;
692 }
693 let mut sum = 0.0;
694 for i in 0..n - 1 {
695 sum += (arr[i] - mean) * (arr[i + 1] - mean);
696 }
697 sum / (n as f64 * var)
699}
700
701pub fn grade_min_entropy(min_entropy: f64) -> char {
715 if min_entropy >= 6.0 {
716 'A'
717 } else if min_entropy >= 4.0 {
718 'B'
719 } else if min_entropy >= 2.0 {
720 'C'
721 } else if min_entropy >= 1.0 {
722 'D'
723 } else {
724 'F'
725 }
726}
727
728pub fn quick_quality(data: &[u8]) -> QualityReport {
730 if data.len() < 16 {
731 return QualityReport {
732 samples: data.len(),
733 unique_values: 0,
734 shannon_entropy: 0.0,
735 compression_ratio: 0.0,
736 quality_score: 0.0,
737 grade: 'F',
738 };
739 }
740
741 let shannon = quick_shannon(data);
742
743 use flate2::Compression;
747 use flate2::write::ZlibEncoder;
748 use std::io::Write;
749 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::best());
750 encoder.write_all(data).unwrap_or_default();
751 let compressed = encoder.finish().unwrap_or_default();
752 let comp_ratio = compressed.len() as f64 / data.len() as f64;
753
754 let mut seen = [false; 256];
756 for &b in data {
757 seen[b as usize] = true;
758 }
759 let unique = seen.iter().filter(|&&s| s).count();
760
761 let eff = shannon / 8.0;
762 let score = eff * 60.0 + comp_ratio.min(1.0) * 20.0 + (unique as f64 / 256.0).min(1.0) * 20.0;
763 let grade = if score >= 80.0 {
764 'A'
765 } else if score >= 60.0 {
766 'B'
767 } else if score >= 40.0 {
768 'C'
769 } else if score >= 20.0 {
770 'D'
771 } else {
772 'F'
773 };
774
775 QualityReport {
776 samples: data.len(),
777 unique_values: unique,
778 shannon_entropy: shannon,
779 compression_ratio: comp_ratio,
780 quality_score: score,
781 grade,
782 }
783}
784
785#[derive(Debug, Clone)]
786pub struct QualityReport {
787 pub samples: usize,
788 pub unique_values: usize,
789 pub shannon_entropy: f64,
790 pub compression_ratio: f64,
791 pub quality_score: f64,
792 pub grade: char,
793}
794
795#[cfg(test)]
796mod tests {
797 use super::*;
798
799 #[test]
804 fn test_condition_raw_passthrough() {
805 let data = vec![1, 2, 3, 4, 5];
806 let out = condition(&data, 3, ConditioningMode::Raw);
807 assert_eq!(out, vec![1, 2, 3]);
808 }
809
810 #[test]
811 fn test_condition_raw_exact_length() {
812 let data: Vec<u8> = (0..100).map(|i| i as u8).collect();
813 let out = condition(&data, 100, ConditioningMode::Raw);
814 assert_eq!(out, data);
815 }
816
817 #[test]
818 fn test_condition_raw_truncates() {
819 let data: Vec<u8> = (0..100).map(|i| i as u8).collect();
820 let out = condition(&data, 50, ConditioningMode::Raw);
821 assert_eq!(out.len(), 50);
822 assert_eq!(out, &data[..50]);
823 }
824
825 #[test]
826 fn test_condition_sha256_produces_exact_length() {
827 let data = vec![42u8; 100];
828 for len in [1, 16, 32, 64, 100, 256] {
829 let out = condition(&data, len, ConditioningMode::Sha256);
830 assert_eq!(out.len(), len, "SHA256 should produce exactly {len} bytes");
831 }
832 }
833
834 #[test]
835 fn test_sha256_deterministic() {
836 let data = vec![42u8; 100];
837 let out1 = sha256_condition_bytes(&data, 64);
838 let out2 = sha256_condition_bytes(&data, 64);
839 assert_eq!(
840 out1, out2,
841 "SHA256 conditioning should be deterministic for same input"
842 );
843 }
844
845 #[test]
846 fn test_sha256_different_inputs_differ() {
847 let data1 = vec![1u8; 100];
848 let data2 = vec![2u8; 100];
849 let out1 = sha256_condition_bytes(&data1, 32);
850 let out2 = sha256_condition_bytes(&data2, 32);
851 assert_ne!(out1, out2);
852 }
853
854 #[test]
855 fn test_sha256_empty_input() {
856 let out = sha256_condition_bytes(&[], 32);
857 assert!(out.is_empty(), "Empty input should produce no output");
858 }
859
860 #[test]
861 fn test_von_neumann_reduces_size() {
862 let input = vec![0b10101010u8; 128];
863 let output = von_neumann_debias(&input);
864 assert!(output.len() < input.len());
865 }
866
867 #[test]
868 fn test_von_neumann_known_output() {
869 let input = vec![0b10101010u8; 2];
874 let output = von_neumann_debias(&input);
875 assert_eq!(output.len(), 1);
876 assert_eq!(output[0], 0b11111111);
877 }
878
879 #[test]
880 fn test_von_neumann_alternating_01() {
881 let input = vec![0b01010101u8; 2];
885 let output = von_neumann_debias(&input);
886 assert_eq!(output.len(), 1);
887 assert_eq!(output[0], 0b00000000);
888 }
889
890 #[test]
891 fn test_von_neumann_all_same_discards() {
892 let input = vec![0xFF; 100];
894 let output = von_neumann_debias(&input);
895 assert!(output.is_empty(), "All-ones should produce no output");
896 }
897
898 #[test]
899 fn test_von_neumann_all_zeros_discards() {
900 let input = vec![0x00; 100];
902 let output = von_neumann_debias(&input);
903 assert!(output.is_empty(), "All-zeros should produce no output");
904 }
905
906 #[test]
907 fn test_condition_modes_differ() {
908 let data: Vec<u8> = (0..256).map(|i| i as u8).collect();
909 let raw = condition(&data, 64, ConditioningMode::Raw);
910 let sha = condition(&data, 64, ConditioningMode::Sha256);
911 assert_ne!(raw, sha);
912 }
913
914 #[test]
915 fn test_conditioning_mode_display() {
916 assert_eq!(ConditioningMode::Raw.to_string(), "raw");
917 assert_eq!(ConditioningMode::VonNeumann.to_string(), "von_neumann");
918 assert_eq!(ConditioningMode::Sha256.to_string(), "sha256");
919 }
920
921 #[test]
922 fn test_conditioning_mode_default() {
923 assert_eq!(ConditioningMode::default(), ConditioningMode::Sha256);
924 }
925
926 #[test]
931 fn test_xor_fold_basic() {
932 let data = vec![0xFF, 0x00, 0xAA, 0x55];
933 let folded = xor_fold(&data);
934 assert_eq!(folded.len(), 2);
935 assert_eq!(folded[0], 0xFF ^ 0xAA);
936 assert_eq!(folded[1], 0x55);
937 }
938
939 #[test]
940 fn test_xor_fold_single_byte() {
941 let data = vec![42];
942 let folded = xor_fold(&data);
943 assert_eq!(folded, vec![42]);
944 }
945
946 #[test]
947 fn test_xor_fold_empty() {
948 let folded = xor_fold(&[]);
949 assert!(folded.is_empty());
950 }
951
952 #[test]
953 fn test_xor_fold_odd_length() {
954 let data = vec![1, 2, 3, 4, 5];
957 let folded = xor_fold(&data);
958 assert_eq!(folded.len(), 2);
959 assert_eq!(folded[0], 1 ^ 3);
960 assert_eq!(folded[1], (2 ^ 4) ^ 5);
961 }
962
963 #[test]
968 fn test_shannon_empty() {
969 assert_eq!(quick_shannon(&[]), 0.0);
970 }
971
972 #[test]
973 fn test_shannon_single_byte() {
974 assert_eq!(quick_shannon(&[42]), 0.0);
976 }
977
978 #[test]
979 fn test_shannon_all_same() {
980 let data = vec![0u8; 1000];
981 assert_eq!(quick_shannon(&data), 0.0);
982 }
983
984 #[test]
985 fn test_shannon_two_values_equal() {
986 let mut data = vec![0u8; 500];
988 data.extend(vec![1u8; 500]);
989 let h = quick_shannon(&data);
990 assert!((h - 1.0).abs() < 0.01, "Expected ~1.0, got {h}");
991 }
992
993 #[test]
994 fn test_shannon_uniform_256() {
995 let data: Vec<u8> = (0..=255).collect();
997 let h = quick_shannon(&data);
998 assert!((h - 8.0).abs() < 0.01, "Expected ~8.0, got {h}");
999 }
1000
1001 #[test]
1002 fn test_shannon_uniform_large() {
1003 let mut data = Vec::with_capacity(256 * 40);
1005 for _ in 0..40 {
1006 for b in 0..=255u8 {
1007 data.push(b);
1008 }
1009 }
1010 let h = quick_shannon(&data);
1011 assert!((h - 8.0).abs() < 0.01, "Expected ~8.0, got {h}");
1012 }
1013
1014 #[test]
1019 fn test_min_entropy_empty() {
1020 assert_eq!(min_entropy(&[]), 0.0);
1021 }
1022
1023 #[test]
1024 fn test_min_entropy_all_same() {
1025 let data = vec![42u8; 1000];
1026 let h = min_entropy(&data);
1027 assert!(h < 0.01, "All-same should have ~0 min-entropy, got {h}");
1028 }
1029
1030 #[test]
1031 fn test_min_entropy_uniform() {
1032 let mut data = Vec::with_capacity(256 * 40);
1033 for _ in 0..40 {
1034 for b in 0..=255u8 {
1035 data.push(b);
1036 }
1037 }
1038 let h = min_entropy(&data);
1039 assert!(
1040 (h - 8.0).abs() < 0.1,
1041 "Uniform should have ~8.0 min-entropy, got {h}"
1042 );
1043 }
1044
1045 #[test]
1046 fn test_min_entropy_two_values() {
1047 let mut data = vec![0u8; 500];
1048 data.extend(vec![1u8; 500]);
1049 let h = min_entropy(&data);
1050 assert!((h - 1.0).abs() < 0.01, "Expected ~1.0, got {h}");
1052 }
1053
1054 #[test]
1055 fn test_min_entropy_biased() {
1056 let mut data = vec![0u8; 900];
1058 data.extend(vec![1u8; 100]);
1059 let h = min_entropy(&data);
1060 let expected = -(0.9f64.log2());
1061 assert!(
1062 (h - expected).abs() < 0.02,
1063 "Expected ~{expected:.3}, got {h}"
1064 );
1065 }
1066
1067 #[test]
1072 fn test_mcv_empty() {
1073 let (h, p) = mcv_estimate(&[]);
1074 assert_eq!(h, 0.0);
1075 assert_eq!(p, 1.0);
1076 }
1077
1078 #[test]
1079 fn test_mcv_all_same() {
1080 let data = vec![42u8; 1000];
1081 let (h, p_upper) = mcv_estimate(&data);
1082 assert!(h < 0.1, "All-same should have ~0 MCV entropy, got {h}");
1083 assert!((p_upper - 1.0).abs() < 0.01);
1084 }
1085
1086 #[test]
1087 fn test_mcv_uniform() {
1088 let mut data = Vec::with_capacity(256 * 100);
1089 for _ in 0..100 {
1090 for b in 0..=255u8 {
1091 data.push(b);
1092 }
1093 }
1094 let (h, _p_upper) = mcv_estimate(&data);
1095 assert!(h > 7.0, "Uniform should have high MCV entropy, got {h}");
1096 }
1097
1098 #[test]
1103 fn test_collision_too_short() {
1104 assert_eq!(collision_estimate(&[1, 2]), 0.0);
1105 }
1106
1107 #[test]
1108 fn test_collision_all_same() {
1109 let data = vec![0u8; 1000];
1110 let h = collision_estimate(&data);
1111 assert!(
1114 h < 1.0,
1115 "All-same should have very low collision entropy, got {h}"
1116 );
1117 }
1118
1119 #[test]
1120 fn test_collision_uniform_large() {
1121 let mut data = Vec::with_capacity(256 * 100);
1122 for _ in 0..100 {
1123 for b in 0..=255u8 {
1124 data.push(b);
1125 }
1126 }
1127 let h = collision_estimate(&data);
1128 assert!(
1129 h > 3.0,
1130 "Uniform should have reasonable collision entropy, got {h}"
1131 );
1132 }
1133
1134 #[test]
1139 fn test_markov_too_short() {
1140 assert_eq!(markov_estimate(&[42]), 0.0);
1141 }
1142
1143 #[test]
1144 fn test_markov_all_same() {
1145 let data = vec![0u8; 1000];
1146 let h = markov_estimate(&data);
1147 assert!(h < 1.0, "All-same should have low Markov entropy, got {h}");
1148 }
1149
1150 #[test]
1151 fn test_markov_uniform_large() {
1152 let mut data = Vec::with_capacity(256 * 100);
1164 for i in 0..(256 * 100) {
1165 let v = ((i as u64)
1166 .wrapping_mul(6364136223846793005)
1167 .wrapping_add(1442695040888963407)
1168 >> 56) as u8;
1169 data.push(v);
1170 }
1171 let h = markov_estimate(&data);
1172 assert!(
1173 h > 0.1,
1174 "Pseudo-random should have Markov entropy > 0.1, got {h}"
1175 );
1176 }
1177
1178 #[test]
1183 fn test_compression_too_short() {
1184 assert_eq!(compression_estimate(&[1; 50]), 0.0);
1185 }
1186
1187 #[test]
1188 fn test_compression_all_same() {
1189 let data = vec![0u8; 1000];
1190 let h = compression_estimate(&data);
1191 assert!(
1192 h < 2.0,
1193 "All-same should have low compression entropy, got {h}"
1194 );
1195 }
1196
1197 #[test]
1198 fn test_compression_uniform_large() {
1199 let mut data = Vec::with_capacity(256 * 100);
1200 for _ in 0..100 {
1201 for b in 0..=255u8 {
1202 data.push(b);
1203 }
1204 }
1205 let h = compression_estimate(&data);
1206 assert!(
1207 h > 4.0,
1208 "Uniform should have reasonable compression entropy, got {h}"
1209 );
1210 }
1211
1212 #[test]
1217 fn test_t_tuple_too_short() {
1218 assert_eq!(t_tuple_estimate(&[1; 10]), 0.0);
1219 }
1220
1221 #[test]
1222 fn test_t_tuple_all_same() {
1223 let data = vec![0u8; 1000];
1224 let h = t_tuple_estimate(&data);
1225 assert!(h < 0.1, "All-same should have ~0 t-tuple entropy, got {h}");
1226 }
1227
1228 #[test]
1229 fn test_t_tuple_uniform_large() {
1230 let mut data = Vec::with_capacity(256 * 100);
1235 for i in 0..(256 * 100) {
1236 let v = ((i as u64)
1237 .wrapping_mul(6364136223846793005)
1238 .wrapping_add(1442695040888963407)
1239 >> 56) as u8;
1240 data.push(v);
1241 }
1242 let h = t_tuple_estimate(&data);
1243 assert!(
1244 h > 2.5,
1245 "Pseudo-random should have t-tuple entropy > 2.5, got {h}"
1246 );
1247 }
1248
1249 #[test]
1254 fn test_min_entropy_estimate_all_same() {
1255 let data = vec![0u8; 1000];
1256 let report = min_entropy_estimate(&data);
1257 assert!(
1258 report.min_entropy < 1.0,
1259 "All-same combined estimate: {}",
1260 report.min_entropy
1261 );
1262 assert!(report.shannon_entropy < 0.01);
1263 assert_eq!(report.samples, 1000);
1264 }
1265
1266 #[test]
1267 fn test_min_entropy_estimate_uniform() {
1268 let mut data = Vec::with_capacity(256 * 100);
1271 for i in 0..(256 * 100) {
1272 let v = ((i as u64)
1273 .wrapping_mul(6364136223846793005)
1274 .wrapping_add(1442695040888963407)
1275 >> 56) as u8;
1276 data.push(v);
1277 }
1278 let report = min_entropy_estimate(&data);
1279 assert!(
1280 report.min_entropy > 6.0,
1281 "Primary min-entropy should be high for uniform marginals: {}",
1282 report.min_entropy
1283 );
1284 assert!(
1285 report.shannon_entropy > 7.9,
1286 "Shannon should be near 8.0 for uniform marginals: {}",
1287 report.shannon_entropy
1288 );
1289 assert!(
1291 report.mcv_estimate > 6.0,
1292 "MCV should be high for uniform data: {}",
1293 report.mcv_estimate
1294 );
1295 assert!(
1296 report.heuristic_floor <= report.min_entropy + 1e-9,
1297 "heuristic floor should not exceed primary min-entropy"
1298 );
1299 }
1300
1301 #[test]
1302 fn test_min_entropy_report_display() {
1303 let data = vec![0u8; 1000];
1304 let report = min_entropy_estimate(&data);
1305 let s = format!("{report}");
1306 assert!(s.contains("Min-Entropy Analysis"));
1307 assert!(s.contains("1000 samples"));
1308 }
1309
1310 #[test]
1311 fn test_quick_min_entropy_uses_mcv() {
1312 let data: Vec<u8> = (0..=255).collect();
1313 let quick = quick_min_entropy(&data);
1314 let (mcv_h, _) = mcv_estimate(&data);
1315 assert!(
1317 (quick - mcv_h).abs() < f64::EPSILON,
1318 "quick_min_entropy ({quick}) should equal MCV estimate ({mcv_h})"
1319 );
1320 }
1321
1322 #[test]
1323 fn test_quick_min_entropy_leq_shannon() {
1324 let data: Vec<u8> = (0..=255).cycle().take(2560).collect();
1326 let quick = quick_min_entropy(&data);
1327 let shannon = quick_shannon(&data);
1328 assert!(
1329 quick <= shannon + 0.01,
1330 "H∞ ({quick}) should be <= Shannon ({shannon})"
1331 );
1332 }
1333
1334 #[test]
1339 fn test_quality_too_short() {
1340 let q = quick_quality(&[1, 2, 3]);
1341 assert_eq!(q.grade, 'F');
1342 assert_eq!(q.quality_score, 0.0);
1343 }
1344
1345 #[test]
1346 fn test_quality_all_same() {
1347 let data = vec![0u8; 1000];
1348 let q = quick_quality(&data);
1349 assert!(
1350 q.grade == 'F' || q.grade == 'D',
1351 "All-same should grade poorly, got {}",
1352 q.grade
1353 );
1354 assert_eq!(q.unique_values, 1);
1355 assert!(q.shannon_entropy < 0.01);
1356 }
1357
1358 #[test]
1359 fn test_quality_uniform() {
1360 let mut data = Vec::with_capacity(256 * 40);
1361 for _ in 0..40 {
1362 for b in 0..=255u8 {
1363 data.push(b);
1364 }
1365 }
1366 let q = quick_quality(&data);
1367 assert!(
1368 q.grade == 'A' || q.grade == 'B',
1369 "Uniform should grade well, got {}",
1370 q.grade
1371 );
1372 assert_eq!(q.unique_values, 256);
1373 assert!(q.shannon_entropy > 7.9);
1374 }
1375
1376 #[test]
1381 fn test_grade_boundaries() {
1382 assert_eq!(grade_min_entropy(8.0), 'A');
1383 assert_eq!(grade_min_entropy(6.0), 'A');
1384 assert_eq!(grade_min_entropy(5.99), 'B');
1385 assert_eq!(grade_min_entropy(4.0), 'B');
1386 assert_eq!(grade_min_entropy(3.99), 'C');
1387 assert_eq!(grade_min_entropy(2.0), 'C');
1388 assert_eq!(grade_min_entropy(1.99), 'D');
1389 assert_eq!(grade_min_entropy(1.0), 'D');
1390 assert_eq!(grade_min_entropy(0.99), 'F');
1391 assert_eq!(grade_min_entropy(0.0), 'F');
1392 }
1393
1394 #[test]
1395 fn test_grade_negative() {
1396 assert_eq!(grade_min_entropy(-1.0), 'F');
1397 }
1398}