1use sha2::{Digest, Sha256};
27use std::collections::HashMap;
28
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
31pub enum ConditioningMode {
32 Raw,
34 VonNeumann,
36 #[default]
38 Sha256,
39}
40
41impl std::fmt::Display for ConditioningMode {
42 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43 match self {
44 Self::Raw => write!(f, "raw"),
45 Self::VonNeumann => write!(f, "von_neumann"),
46 Self::Sha256 => write!(f, "sha256"),
47 }
48 }
49}
50
51pub fn condition(raw: &[u8], n_output: usize, mode: ConditioningMode) -> Vec<u8> {
65 match mode {
66 ConditioningMode::Raw => {
67 let mut out = raw.to_vec();
68 out.truncate(n_output);
69 out
70 }
71 ConditioningMode::VonNeumann => {
72 let debiased = von_neumann_debias(raw);
73 let mut out = debiased;
74 out.truncate(n_output);
75 out
76 }
77 ConditioningMode::Sha256 => sha256_condition_bytes(raw, n_output),
78 }
79}
80
81pub fn sha256_condition_bytes(raw: &[u8], n_output: usize) -> Vec<u8> {
91 if raw.is_empty() {
92 return vec![0u8; n_output];
93 }
94 let mut output = Vec::with_capacity(n_output);
95 let mut state = [0u8; 32];
96 let mut offset = 0;
97 let mut counter: u64 = 0;
98 while output.len() < n_output {
99 let end = (offset + 64).min(raw.len());
100 let chunk = &raw[offset..end];
101 let mut h = Sha256::new();
102 h.update(state);
103 h.update(chunk);
104 h.update(counter.to_le_bytes());
105 state = h.finalize().into();
106 output.extend_from_slice(&state);
107 offset += 64;
108 counter += 1;
109 if offset >= raw.len() {
110 offset = 0;
111 }
112 }
113 output.truncate(n_output);
114 output
115}
116
117pub fn sha256_condition(
120 state: &[u8; 32],
121 sample: &[u8],
122 counter: u64,
123 extra: &[u8],
124) -> ([u8; 32], [u8; 32]) {
125 let mut h = Sha256::new();
126 h.update(state);
127 h.update(sample);
128 h.update(counter.to_le_bytes());
129
130 let ts = std::time::SystemTime::now()
131 .duration_since(std::time::UNIX_EPOCH)
132 .unwrap_or_default();
133 h.update(ts.as_nanos().to_le_bytes());
134
135 h.update(extra);
136
137 let digest: [u8; 32] = h.finalize().into();
138 (digest, digest)
139}
140
141pub fn von_neumann_debias(data: &[u8]) -> Vec<u8> {
150 let mut bits = Vec::new();
151 for byte in data {
152 for i in (0..8).step_by(2) {
153 let b1 = (byte >> (7 - i)) & 1;
154 let b2 = (byte >> (6 - i)) & 1;
155 if b1 != b2 {
156 bits.push(b1);
157 }
158 }
159 }
160
161 let mut result = Vec::with_capacity(bits.len() / 8);
163 for chunk in bits.chunks_exact(8) {
164 let mut byte = 0u8;
165 for (i, &bit) in chunk.iter().enumerate() {
166 byte |= bit << (7 - i);
167 }
168 result.push(byte);
169 }
170 result
171}
172
173pub fn xor_fold(data: &[u8]) -> Vec<u8> {
179 if data.len() < 2 {
180 return data.to_vec();
181 }
182 let half = data.len() / 2;
183 (0..half).map(|i| data[i] ^ data[half + i]).collect()
184}
185
186pub fn min_entropy(data: &[u8]) -> f64 {
198 if data.is_empty() {
199 return 0.0;
200 }
201 let mut counts = [0u64; 256];
202 for &b in data {
203 counts[b as usize] += 1;
204 }
205 let n = data.len() as f64;
206 let p_max = counts.iter().map(|&c| c as f64 / n).fold(0.0f64, f64::max);
207 if p_max <= 0.0 {
208 return 0.0;
209 }
210 -p_max.log2()
211}
212
213pub fn mcv_estimate(data: &[u8]) -> (f64, f64) {
217 if data.is_empty() {
218 return (0.0, 1.0);
219 }
220 let mut counts = [0u64; 256];
221 for &b in data {
222 counts[b as usize] += 1;
223 }
224 let n = data.len() as f64;
225 let max_count = *counts.iter().max().unwrap() as f64;
226 let p_hat = max_count / n;
227
228 let z = 2.576; let p_u = (p_hat + z * (p_hat * (1.0 - p_hat) / n).sqrt()).min(1.0);
232
233 let h = if p_u >= 1.0 {
234 0.0
235 } else {
236 (-p_u.log2()).max(0.0)
237 };
238 (h, p_u)
239}
240
241pub fn collision_estimate(data: &[u8]) -> f64 {
245 if data.len() < 3 {
246 return 0.0;
247 }
248
249 let mut distances = Vec::new();
251 let mut i = 0;
252 while i < data.len() - 1 {
253 let mut j = i + 1;
254 while j < data.len() && data[j] != data[i] {
256 j += 1;
257 }
258 if j < data.len() {
259 distances.push((j - i) as f64);
260 i = j + 1;
261 } else {
262 break;
263 }
264 }
265
266 if distances.is_empty() {
267 return 8.0; }
269
270 let mean_dist = distances.iter().sum::<f64>() / distances.len() as f64;
271
272 let n_collisions = distances.len() as f64;
278 let variance = distances
279 .iter()
280 .map(|d| (d - mean_dist).powi(2))
281 .sum::<f64>()
282 / (n_collisions - 1.0).max(1.0);
283 let std_err = (variance / n_collisions).sqrt();
284
285 let z = 2.576;
287 let mean_lower = (mean_dist - z * std_err).max(1.0);
288
289 let p_max = (1.0 / mean_lower).sqrt().min(1.0);
293
294 if p_max <= 0.0 {
295 8.0
296 } else {
297 (-p_max.log2()).min(8.0)
298 }
299}
300
301pub fn markov_estimate(data: &[u8]) -> f64 {
305 if data.len() < 2 {
306 return 0.0;
307 }
308
309 let bins = 16u8;
311 let bin_of = |b: u8| -> usize { (b as usize * bins as usize) / 256 };
312
313 let mut transitions = vec![vec![0u64; bins as usize]; bins as usize];
314
315 for w in data.windows(2) {
316 let from = bin_of(w[0]);
317 let to = bin_of(w[1]);
318 transitions[from][to] += 1;
319 }
320
321 let n = data.len() as f64;
323
324 let p_init: Vec<f64> = {
326 let mut counts = vec![0u64; bins as usize];
327 for &b in data {
328 counts[bin_of(b)] += 1;
329 }
330 counts.iter().map(|&c| c as f64 / n).collect()
331 };
332
333 let mut p_trans = vec![vec![0.0f64; bins as usize]; bins as usize];
335 for (i, row) in transitions.iter().enumerate() {
336 let row_sum: u64 = row.iter().sum();
337 if row_sum > 0 {
338 for (j, &count) in row.iter().enumerate() {
339 p_trans[i][j] = count as f64 / row_sum as f64;
340 }
341 }
342 }
343
344 let mut p_max = 0.0f64;
347 for s in 0..bins as usize {
348 p_max = p_max.max(p_init[s]);
349 for row in p_trans.iter().take(bins as usize) {
350 p_max = p_max.max(row[s]);
351 }
352 }
353
354 if p_max <= 0.0 {
359 8.0
360 } else {
361 (-p_max.log2()).min(8.0)
362 }
363}
364
365pub fn compression_estimate(data: &[u8]) -> f64 {
369 if data.len() < 100 {
370 return 0.0;
371 }
372
373 let l = 8; let q = 256.min(data.len() / 4); let k = data.len() - q; if k == 0 {
380 return 0.0;
381 }
382
383 let mut last_pos = [0usize; 256];
385 for (i, &b) in data[..q].iter().enumerate() {
386 last_pos[b as usize] = i + 1; }
388
389 let mut sum = 0.0f64;
391 let mut count = 0u64;
392 for (i, &b) in data[q..].iter().enumerate() {
393 let pos = q + i + 1; let prev = last_pos[b as usize];
395 if prev > 0 {
396 let distance = pos - prev;
397 sum += (distance as f64).log2();
398 count += 1;
399 }
400 last_pos[b as usize] = pos;
401 }
402
403 if count == 0 {
404 return l as f64; }
406
407 let f_n = sum / count as f64;
408
409 let mut var_sum = 0.0f64;
411 let mut last_pos2 = [0usize; 256];
413 for (i, &b) in data[..q].iter().enumerate() {
414 last_pos2[b as usize] = i + 1;
415 }
416 for (i, &b) in data[q..].iter().enumerate() {
417 let pos = q + i + 1;
418 let prev = last_pos2[b as usize];
419 if prev > 0 {
420 let distance = pos - prev;
421 let log_d = (distance as f64).log2();
422 var_sum += (log_d - f_n).powi(2);
423 }
424 last_pos2[b as usize] = pos;
425 }
426 let variance = var_sum / (count as f64 - 1.0).max(1.0);
427 let std_err = (variance / count as f64).sqrt();
428
429 let z = 2.576;
431 let f_lower = (f_n - z * std_err).max(0.0);
432
433 f_lower.min(l as f64)
438}
439
440pub fn t_tuple_estimate(data: &[u8]) -> f64 {
444 if data.len() < 20 {
445 return 0.0;
446 }
447
448 let mut min_h = 8.0f64;
450
451 for t in 1..=3usize {
452 if data.len() < t + 1 {
453 break;
454 }
455 let mut counts: HashMap<&[u8], u64> = HashMap::new();
456 for window in data.windows(t) {
457 *counts.entry(window).or_insert(0) += 1;
458 }
459 let n = (data.len() - t + 1) as f64;
460 let max_count = *counts.values().max().unwrap_or(&0) as f64;
461 let p_max = max_count / n;
462
463 if p_max > 0.0 {
464 let h = -p_max.log2() / t as f64;
466 min_h = min_h.min(h);
467 }
468 }
469
470 min_h.min(8.0)
471}
472
473pub fn min_entropy_estimate(data: &[u8]) -> MinEntropyReport {
477 let shannon = quick_shannon(data);
478 let (mcv_h, mcv_p_upper) = mcv_estimate(data);
479 let collision_h = collision_estimate(data);
480 let markov_h = markov_estimate(data);
481 let compression_h = compression_estimate(data);
482 let t_tuple_h = t_tuple_estimate(data);
483
484 let combined = mcv_h
486 .min(collision_h)
487 .min(markov_h)
488 .min(compression_h)
489 .min(t_tuple_h);
490
491 MinEntropyReport {
492 shannon_entropy: shannon,
493 min_entropy: combined,
494 mcv_estimate: mcv_h,
495 mcv_p_upper,
496 collision_estimate: collision_h,
497 markov_estimate: markov_h,
498 compression_estimate: compression_h,
499 t_tuple_estimate: t_tuple_h,
500 samples: data.len(),
501 }
502}
503
504#[derive(Debug, Clone)]
506pub struct MinEntropyReport {
507 pub shannon_entropy: f64,
509 pub min_entropy: f64,
511 pub mcv_estimate: f64,
513 pub mcv_p_upper: f64,
515 pub collision_estimate: f64,
517 pub markov_estimate: f64,
519 pub compression_estimate: f64,
521 pub t_tuple_estimate: f64,
523 pub samples: usize,
525}
526
527impl std::fmt::Display for MinEntropyReport {
528 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
529 writeln!(f, "Min-Entropy Analysis ({} samples)", self.samples)?;
530 writeln!(f, " Shannon H: {:.3} bits/byte", self.shannon_entropy)?;
531 writeln!(f, " Min-Entropy H∞: {:.3} bits/byte", self.min_entropy)?;
532 writeln!(f, " ─────────────────────────────")?;
533 writeln!(
534 f,
535 " MCV: {:.3} (p_upper={:.4})",
536 self.mcv_estimate, self.mcv_p_upper
537 )?;
538 writeln!(f, " Collision: {:.3}", self.collision_estimate)?;
539 writeln!(f, " Markov: {:.3}", self.markov_estimate)?;
540 writeln!(f, " Compression: {:.3}", self.compression_estimate)?;
541 writeln!(f, " t-Tuple: {:.3}", self.t_tuple_estimate)?;
542 Ok(())
543 }
544}
545
546pub fn quick_min_entropy(data: &[u8]) -> f64 {
548 min_entropy_estimate(data).min_entropy
549}
550
551pub fn quick_shannon(data: &[u8]) -> f64 {
553 if data.is_empty() {
554 return 0.0;
555 }
556 let mut counts = [0u64; 256];
557 for &b in data {
558 counts[b as usize] += 1;
559 }
560 let n = data.len() as f64;
561 let mut h = 0.0;
562 for &c in &counts {
563 if c > 0 {
564 let p = c as f64 / n;
565 h -= p * p.log2();
566 }
567 }
568 h
569}
570
571pub fn grade_min_entropy(min_entropy: f64) -> char {
585 if min_entropy >= 6.0 {
586 'A'
587 } else if min_entropy >= 4.0 {
588 'B'
589 } else if min_entropy >= 2.0 {
590 'C'
591 } else if min_entropy >= 1.0 {
592 'D'
593 } else {
594 'F'
595 }
596}
597
598pub fn quick_quality(data: &[u8]) -> QualityReport {
600 if data.len() < 16 {
601 return QualityReport {
602 samples: data.len(),
603 unique_values: 0,
604 shannon_entropy: 0.0,
605 compression_ratio: 0.0,
606 quality_score: 0.0,
607 grade: 'F',
608 };
609 }
610
611 let shannon = quick_shannon(data);
612
613 use flate2::Compression;
615 use flate2::write::ZlibEncoder;
616 use std::io::Write;
617 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::best());
618 encoder.write_all(data).unwrap_or_default();
619 let compressed = encoder.finish().unwrap_or_default();
620 let comp_ratio = compressed.len() as f64 / data.len() as f64;
621
622 let mut seen = [false; 256];
624 for &b in data {
625 seen[b as usize] = true;
626 }
627 let unique = seen.iter().filter(|&&s| s).count();
628
629 let eff = shannon / 8.0;
630 let score = eff * 60.0 + comp_ratio.min(1.0) * 20.0 + (unique as f64 / 256.0).min(1.0) * 20.0;
631 let grade = if score >= 80.0 {
632 'A'
633 } else if score >= 60.0 {
634 'B'
635 } else if score >= 40.0 {
636 'C'
637 } else if score >= 20.0 {
638 'D'
639 } else {
640 'F'
641 };
642
643 QualityReport {
644 samples: data.len(),
645 unique_values: unique,
646 shannon_entropy: shannon,
647 compression_ratio: comp_ratio,
648 quality_score: score,
649 grade,
650 }
651}
652
653#[derive(Debug, Clone)]
654pub struct QualityReport {
655 pub samples: usize,
656 pub unique_values: usize,
657 pub shannon_entropy: f64,
658 pub compression_ratio: f64,
659 pub quality_score: f64,
660 pub grade: char,
661}
662
663#[cfg(test)]
664mod tests {
665 use super::*;
666
667 #[test]
672 fn test_condition_raw_passthrough() {
673 let data = vec![1, 2, 3, 4, 5];
674 let out = condition(&data, 3, ConditioningMode::Raw);
675 assert_eq!(out, vec![1, 2, 3]);
676 }
677
678 #[test]
679 fn test_condition_raw_exact_length() {
680 let data: Vec<u8> = (0..100).map(|i| i as u8).collect();
681 let out = condition(&data, 100, ConditioningMode::Raw);
682 assert_eq!(out, data);
683 }
684
685 #[test]
686 fn test_condition_raw_truncates() {
687 let data: Vec<u8> = (0..100).map(|i| i as u8).collect();
688 let out = condition(&data, 50, ConditioningMode::Raw);
689 assert_eq!(out.len(), 50);
690 assert_eq!(out, &data[..50]);
691 }
692
693 #[test]
694 fn test_condition_sha256_produces_exact_length() {
695 let data = vec![42u8; 100];
696 for len in [1, 16, 32, 64, 100, 256] {
697 let out = condition(&data, len, ConditioningMode::Sha256);
698 assert_eq!(out.len(), len, "SHA256 should produce exactly {len} bytes");
699 }
700 }
701
702 #[test]
703 fn test_sha256_deterministic() {
704 let data = vec![42u8; 100];
705 let out1 = sha256_condition_bytes(&data, 64);
706 let out2 = sha256_condition_bytes(&data, 64);
707 assert_eq!(
708 out1, out2,
709 "SHA256 conditioning should be deterministic for same input"
710 );
711 }
712
713 #[test]
714 fn test_sha256_different_inputs_differ() {
715 let data1 = vec![1u8; 100];
716 let data2 = vec![2u8; 100];
717 let out1 = sha256_condition_bytes(&data1, 32);
718 let out2 = sha256_condition_bytes(&data2, 32);
719 assert_ne!(out1, out2);
720 }
721
722 #[test]
723 fn test_sha256_empty_input() {
724 let out = sha256_condition_bytes(&[], 32);
725 assert_eq!(out.len(), 32);
726 assert_eq!(out, vec![0u8; 32], "Empty input should produce zero bytes");
727 }
728
729 #[test]
730 fn test_von_neumann_reduces_size() {
731 let input = vec![0b10101010u8; 128];
732 let output = von_neumann_debias(&input);
733 assert!(output.len() < input.len());
734 }
735
736 #[test]
737 fn test_von_neumann_known_output() {
738 let input = vec![0b10101010u8; 2];
743 let output = von_neumann_debias(&input);
744 assert_eq!(output.len(), 1);
745 assert_eq!(output[0], 0b11111111);
746 }
747
748 #[test]
749 fn test_von_neumann_alternating_01() {
750 let input = vec![0b01010101u8; 2];
754 let output = von_neumann_debias(&input);
755 assert_eq!(output.len(), 1);
756 assert_eq!(output[0], 0b00000000);
757 }
758
759 #[test]
760 fn test_von_neumann_all_same_discards() {
761 let input = vec![0xFF; 100];
763 let output = von_neumann_debias(&input);
764 assert!(output.is_empty(), "All-ones should produce no output");
765 }
766
767 #[test]
768 fn test_von_neumann_all_zeros_discards() {
769 let input = vec![0x00; 100];
771 let output = von_neumann_debias(&input);
772 assert!(output.is_empty(), "All-zeros should produce no output");
773 }
774
775 #[test]
776 fn test_condition_modes_differ() {
777 let data: Vec<u8> = (0..256).map(|i| i as u8).collect();
778 let raw = condition(&data, 64, ConditioningMode::Raw);
779 let sha = condition(&data, 64, ConditioningMode::Sha256);
780 assert_ne!(raw, sha);
781 }
782
783 #[test]
784 fn test_conditioning_mode_display() {
785 assert_eq!(ConditioningMode::Raw.to_string(), "raw");
786 assert_eq!(ConditioningMode::VonNeumann.to_string(), "von_neumann");
787 assert_eq!(ConditioningMode::Sha256.to_string(), "sha256");
788 }
789
790 #[test]
791 fn test_conditioning_mode_default() {
792 assert_eq!(ConditioningMode::default(), ConditioningMode::Sha256);
793 }
794
795 #[test]
800 fn test_xor_fold_basic() {
801 let data = vec![0xFF, 0x00, 0xAA, 0x55];
802 let folded = xor_fold(&data);
803 assert_eq!(folded.len(), 2);
804 assert_eq!(folded[0], 0xFF ^ 0xAA);
805 assert_eq!(folded[1], 0x00 ^ 0x55);
806 }
807
808 #[test]
809 fn test_xor_fold_single_byte() {
810 let data = vec![42];
811 let folded = xor_fold(&data);
812 assert_eq!(folded, vec![42]);
813 }
814
815 #[test]
816 fn test_xor_fold_empty() {
817 let folded = xor_fold(&[]);
818 assert!(folded.is_empty());
819 }
820
821 #[test]
822 fn test_xor_fold_odd_length() {
823 let data = vec![1, 2, 3, 4, 5];
825 let folded = xor_fold(&data);
826 assert_eq!(folded.len(), 2);
827 assert_eq!(folded[0], 1 ^ 3);
828 assert_eq!(folded[1], 2 ^ 4);
829 }
830
831 #[test]
836 fn test_shannon_empty() {
837 assert_eq!(quick_shannon(&[]), 0.0);
838 }
839
840 #[test]
841 fn test_shannon_single_byte() {
842 assert_eq!(quick_shannon(&[42]), 0.0);
844 }
845
846 #[test]
847 fn test_shannon_all_same() {
848 let data = vec![0u8; 1000];
849 assert_eq!(quick_shannon(&data), 0.0);
850 }
851
852 #[test]
853 fn test_shannon_two_values_equal() {
854 let mut data = vec![0u8; 500];
856 data.extend(vec![1u8; 500]);
857 let h = quick_shannon(&data);
858 assert!((h - 1.0).abs() < 0.01, "Expected ~1.0, got {h}");
859 }
860
861 #[test]
862 fn test_shannon_uniform_256() {
863 let data: Vec<u8> = (0..=255).collect();
865 let h = quick_shannon(&data);
866 assert!((h - 8.0).abs() < 0.01, "Expected ~8.0, got {h}");
867 }
868
869 #[test]
870 fn test_shannon_uniform_large() {
871 let mut data = Vec::with_capacity(256 * 40);
873 for _ in 0..40 {
874 for b in 0..=255u8 {
875 data.push(b);
876 }
877 }
878 let h = quick_shannon(&data);
879 assert!((h - 8.0).abs() < 0.01, "Expected ~8.0, got {h}");
880 }
881
882 #[test]
887 fn test_min_entropy_empty() {
888 assert_eq!(min_entropy(&[]), 0.0);
889 }
890
891 #[test]
892 fn test_min_entropy_all_same() {
893 let data = vec![42u8; 1000];
894 let h = min_entropy(&data);
895 assert!(h < 0.01, "All-same should have ~0 min-entropy, got {h}");
896 }
897
898 #[test]
899 fn test_min_entropy_uniform() {
900 let mut data = Vec::with_capacity(256 * 40);
901 for _ in 0..40 {
902 for b in 0..=255u8 {
903 data.push(b);
904 }
905 }
906 let h = min_entropy(&data);
907 assert!(
908 (h - 8.0).abs() < 0.1,
909 "Uniform should have ~8.0 min-entropy, got {h}"
910 );
911 }
912
913 #[test]
914 fn test_min_entropy_two_values() {
915 let mut data = vec![0u8; 500];
916 data.extend(vec![1u8; 500]);
917 let h = min_entropy(&data);
918 assert!((h - 1.0).abs() < 0.01, "Expected ~1.0, got {h}");
920 }
921
922 #[test]
923 fn test_min_entropy_biased() {
924 let mut data = vec![0u8; 900];
926 data.extend(vec![1u8; 100]);
927 let h = min_entropy(&data);
928 let expected = -(0.9f64.log2());
929 assert!(
930 (h - expected).abs() < 0.02,
931 "Expected ~{expected:.3}, got {h}"
932 );
933 }
934
935 #[test]
940 fn test_mcv_empty() {
941 let (h, p) = mcv_estimate(&[]);
942 assert_eq!(h, 0.0);
943 assert_eq!(p, 1.0);
944 }
945
946 #[test]
947 fn test_mcv_all_same() {
948 let data = vec![42u8; 1000];
949 let (h, p_upper) = mcv_estimate(&data);
950 assert!(h < 0.1, "All-same should have ~0 MCV entropy, got {h}");
951 assert!((p_upper - 1.0).abs() < 0.01);
952 }
953
954 #[test]
955 fn test_mcv_uniform() {
956 let mut data = Vec::with_capacity(256 * 100);
957 for _ in 0..100 {
958 for b in 0..=255u8 {
959 data.push(b);
960 }
961 }
962 let (h, _p_upper) = mcv_estimate(&data);
963 assert!(h > 7.0, "Uniform should have high MCV entropy, got {h}");
964 }
965
966 #[test]
971 fn test_collision_too_short() {
972 assert_eq!(collision_estimate(&[1, 2]), 0.0);
973 }
974
975 #[test]
976 fn test_collision_all_same() {
977 let data = vec![0u8; 1000];
978 let h = collision_estimate(&data);
979 assert!(
982 h < 1.0,
983 "All-same should have very low collision entropy, got {h}"
984 );
985 }
986
987 #[test]
988 fn test_collision_uniform_large() {
989 let mut data = Vec::with_capacity(256 * 100);
990 for _ in 0..100 {
991 for b in 0..=255u8 {
992 data.push(b);
993 }
994 }
995 let h = collision_estimate(&data);
996 assert!(
997 h > 3.0,
998 "Uniform should have reasonable collision entropy, got {h}"
999 );
1000 }
1001
1002 #[test]
1007 fn test_markov_too_short() {
1008 assert_eq!(markov_estimate(&[42]), 0.0);
1009 }
1010
1011 #[test]
1012 fn test_markov_all_same() {
1013 let data = vec![0u8; 1000];
1014 let h = markov_estimate(&data);
1015 assert!(h < 1.0, "All-same should have low Markov entropy, got {h}");
1016 }
1017
1018 #[test]
1019 fn test_markov_uniform_large() {
1020 let mut data = Vec::with_capacity(256 * 100);
1025 for i in 0..(256 * 100) {
1026 let v = ((i as u64)
1027 .wrapping_mul(6364136223846793005)
1028 .wrapping_add(1442695040888963407)
1029 >> 56) as u8;
1030 data.push(v);
1031 }
1032 let h = markov_estimate(&data);
1033 assert!(
1034 h > 0.5,
1035 "Pseudo-random should have Markov entropy > 0.5, got {h}"
1036 );
1037 }
1038
1039 #[test]
1044 fn test_compression_too_short() {
1045 assert_eq!(compression_estimate(&[1; 50]), 0.0);
1046 }
1047
1048 #[test]
1049 fn test_compression_all_same() {
1050 let data = vec![0u8; 1000];
1051 let h = compression_estimate(&data);
1052 assert!(
1053 h < 2.0,
1054 "All-same should have low compression entropy, got {h}"
1055 );
1056 }
1057
1058 #[test]
1059 fn test_compression_uniform_large() {
1060 let mut data = Vec::with_capacity(256 * 100);
1061 for _ in 0..100 {
1062 for b in 0..=255u8 {
1063 data.push(b);
1064 }
1065 }
1066 let h = compression_estimate(&data);
1067 assert!(
1068 h > 4.0,
1069 "Uniform should have reasonable compression entropy, got {h}"
1070 );
1071 }
1072
1073 #[test]
1078 fn test_t_tuple_too_short() {
1079 assert_eq!(t_tuple_estimate(&[1; 10]), 0.0);
1080 }
1081
1082 #[test]
1083 fn test_t_tuple_all_same() {
1084 let data = vec![0u8; 1000];
1085 let h = t_tuple_estimate(&data);
1086 assert!(h < 0.1, "All-same should have ~0 t-tuple entropy, got {h}");
1087 }
1088
1089 #[test]
1090 fn test_t_tuple_uniform_large() {
1091 let mut data = Vec::with_capacity(256 * 100);
1096 for i in 0..(256 * 100) {
1097 let v = ((i as u64)
1098 .wrapping_mul(6364136223846793005)
1099 .wrapping_add(1442695040888963407)
1100 >> 56) as u8;
1101 data.push(v);
1102 }
1103 let h = t_tuple_estimate(&data);
1104 assert!(
1105 h > 2.5,
1106 "Pseudo-random should have t-tuple entropy > 2.5, got {h}"
1107 );
1108 }
1109
1110 #[test]
1115 fn test_min_entropy_estimate_all_same() {
1116 let data = vec![0u8; 1000];
1117 let report = min_entropy_estimate(&data);
1118 assert!(
1119 report.min_entropy < 1.0,
1120 "All-same combined estimate: {}",
1121 report.min_entropy
1122 );
1123 assert!(report.shannon_entropy < 0.01);
1124 assert_eq!(report.samples, 1000);
1125 }
1126
1127 #[test]
1128 fn test_min_entropy_estimate_uniform() {
1129 let mut data = Vec::with_capacity(256 * 100);
1133 for i in 0..(256 * 100) {
1134 let v = ((i as u64)
1135 .wrapping_mul(6364136223846793005)
1136 .wrapping_add(1442695040888963407)
1137 >> 56) as u8;
1138 data.push(v);
1139 }
1140 let report = min_entropy_estimate(&data);
1141 assert!(
1142 report.min_entropy > 0.5,
1143 "Combined estimate should be > 0.5: {}",
1144 report.min_entropy
1145 );
1146 assert!(
1147 report.shannon_entropy > 7.9,
1148 "Shannon should be near 8.0 for uniform marginals: {}",
1149 report.shannon_entropy
1150 );
1151 }
1152
1153 #[test]
1154 fn test_min_entropy_report_display() {
1155 let data = vec![0u8; 1000];
1156 let report = min_entropy_estimate(&data);
1157 let s = format!("{report}");
1158 assert!(s.contains("Min-Entropy Analysis"));
1159 assert!(s.contains("1000 samples"));
1160 }
1161
1162 #[test]
1163 fn test_quick_min_entropy_matches_report() {
1164 let data: Vec<u8> = (0..=255).collect();
1165 let quick = quick_min_entropy(&data);
1166 let report = min_entropy_estimate(&data);
1167 assert!((quick - report.min_entropy).abs() < f64::EPSILON);
1168 }
1169
1170 #[test]
1175 fn test_quality_too_short() {
1176 let q = quick_quality(&[1, 2, 3]);
1177 assert_eq!(q.grade, 'F');
1178 assert_eq!(q.quality_score, 0.0);
1179 }
1180
1181 #[test]
1182 fn test_quality_all_same() {
1183 let data = vec![0u8; 1000];
1184 let q = quick_quality(&data);
1185 assert!(
1186 q.grade == 'F' || q.grade == 'D',
1187 "All-same should grade poorly, got {}",
1188 q.grade
1189 );
1190 assert_eq!(q.unique_values, 1);
1191 assert!(q.shannon_entropy < 0.01);
1192 }
1193
1194 #[test]
1195 fn test_quality_uniform() {
1196 let mut data = Vec::with_capacity(256 * 40);
1197 for _ in 0..40 {
1198 for b in 0..=255u8 {
1199 data.push(b);
1200 }
1201 }
1202 let q = quick_quality(&data);
1203 assert!(
1204 q.grade == 'A' || q.grade == 'B',
1205 "Uniform should grade well, got {}",
1206 q.grade
1207 );
1208 assert_eq!(q.unique_values, 256);
1209 assert!(q.shannon_entropy > 7.9);
1210 }
1211
1212 #[test]
1217 fn test_grade_boundaries() {
1218 assert_eq!(grade_min_entropy(8.0), 'A');
1219 assert_eq!(grade_min_entropy(6.0), 'A');
1220 assert_eq!(grade_min_entropy(5.99), 'B');
1221 assert_eq!(grade_min_entropy(4.0), 'B');
1222 assert_eq!(grade_min_entropy(3.99), 'C');
1223 assert_eq!(grade_min_entropy(2.0), 'C');
1224 assert_eq!(grade_min_entropy(1.99), 'D');
1225 assert_eq!(grade_min_entropy(1.0), 'D');
1226 assert_eq!(grade_min_entropy(0.99), 'F');
1227 assert_eq!(grade_min_entropy(0.0), 'F');
1228 }
1229
1230 #[test]
1231 fn test_grade_negative() {
1232 assert_eq!(grade_min_entropy(-1.0), 'F');
1233 }
1234}