openentropy_tests/
lib.rs

1//! NIST SP 800-22 inspired randomness test battery.
2//!
3//! Provides 31 statistical tests for evaluating the quality of random byte sequences.
4//! Each test returns a [`TestResult`] with a p-value (where applicable), a pass/fail
5//! determination, and a letter grade (A through F).
6
7use flate2::Compression;
8use flate2::write::ZlibEncoder;
9use rustfft::{FftPlanner, num_complex::Complex};
10use statrs::distribution::{ChiSquared, ContinuousCDF, DiscreteCDF, Normal, Poisson};
11use statrs::function::erf::erfc;
12use std::collections::HashMap;
13use std::f64::consts::PI;
14use std::io::Write;
15
16// ═══════════════════════════════════════════════════════════════════════════════
17// Core types
18// ═══════════════════════════════════════════════════════════════════════════════
19
20/// Result of a single randomness test.
21#[derive(Debug, Clone)]
22pub struct TestResult {
23    pub name: String,
24    pub passed: bool,
25    pub p_value: Option<f64>,
26    pub statistic: f64,
27    pub details: String,
28    pub grade: char,
29}
30
31impl TestResult {
32    /// Assign a letter grade based on p-value.
33    ///
34    /// - A: p >= 0.1
35    /// - B: p >= 0.01
36    /// - C: p >= 0.001
37    /// - D: p >= 0.0001
38    /// - F: otherwise or None
39    pub fn grade_from_p(p: Option<f64>) -> char {
40        match p {
41            Some(p) if p >= 0.1 => 'A',
42            Some(p) if p >= 0.01 => 'B',
43            Some(p) if p >= 0.001 => 'C',
44            Some(p) if p >= 0.0001 => 'D',
45            _ => 'F',
46        }
47    }
48
49    /// Determine pass/fail from p-value against a threshold (default 0.01).
50    pub fn pass_from_p(p: Option<f64>, threshold: f64) -> bool {
51        match p {
52            Some(p) => p >= threshold,
53            None => false,
54        }
55    }
56}
57
58// ═══════════════════════════════════════════════════════════════════════════════
59// Helpers
60// ═══════════════════════════════════════════════════════════════════════════════
61
62/// Unpack a byte slice into individual bits (MSB first per byte).
63fn to_bits(data: &[u8]) -> Vec<u8> {
64    let mut bits = Vec::with_capacity(data.len() * 8);
65    for &byte in data {
66        for shift in (0..8).rev() {
67            bits.push((byte >> shift) & 1);
68        }
69    }
70    bits
71}
72
73/// Return a failing `TestResult` when data is too short.
74fn insufficient(name: &str, needed: usize, got: usize) -> TestResult {
75    TestResult {
76        name: name.to_string(),
77        passed: false,
78        p_value: None,
79        statistic: 0.0,
80        details: format!("Insufficient data: need {needed}, got {got}"),
81        grade: 'F',
82    }
83}
84
85// ═══════════════════════════════════════════════════════════════════════════════
86// 1. FREQUENCY TESTS
87// ═══════════════════════════════════════════════════════════════════════════════
88
89/// Test 1: Monobit frequency -- proportion of 1s vs 0s should be ~50%.
90pub fn monobit_frequency(data: &[u8]) -> TestResult {
91    let name = "Monobit Frequency";
92    let bits = to_bits(data);
93    let n = bits.len();
94    if n < 100 {
95        return insufficient(name, 100, n);
96    }
97    let s: i64 = bits
98        .iter()
99        .map(|&b| if b == 1 { 1i64 } else { -1i64 })
100        .sum();
101    let s_obs = (s as f64).abs() / (n as f64).sqrt();
102    let p = erfc(s_obs / 2.0_f64.sqrt());
103    TestResult {
104        name: name.to_string(),
105        passed: TestResult::pass_from_p(Some(p), 0.01),
106        p_value: Some(p),
107        statistic: s_obs,
108        details: format!("S={s}, n={n}"),
109        grade: TestResult::grade_from_p(Some(p)),
110    }
111}
112
113/// Test 2: Block frequency -- frequency within 128-bit blocks. Chi-squared test.
114pub fn block_frequency(data: &[u8]) -> TestResult {
115    let name = "Block Frequency";
116    let block_size: usize = 128;
117    let bits = to_bits(data);
118    let n = bits.len();
119    let num_blocks = n / block_size;
120    if num_blocks < 10 {
121        return insufficient(name, block_size * 10, n);
122    }
123    let mut chi2 = 0.0;
124    for i in 0..num_blocks {
125        let start = i * block_size;
126        let ones: usize = bits[start..start + block_size]
127            .iter()
128            .map(|&b| b as usize)
129            .sum();
130        let proportion = ones as f64 / block_size as f64;
131        chi2 += (proportion - 0.5) * (proportion - 0.5);
132    }
133    chi2 *= 4.0 * block_size as f64;
134    let dist = ChiSquared::new(num_blocks as f64).unwrap();
135    let p = dist.sf(chi2);
136    TestResult {
137        name: name.to_string(),
138        passed: TestResult::pass_from_p(Some(p), 0.01),
139        p_value: Some(p),
140        statistic: chi2,
141        details: format!("blocks={num_blocks}, M={block_size}"),
142        grade: TestResult::grade_from_p(Some(p)),
143    }
144}
145
146/// Test 3: Byte frequency -- chi-squared on byte value distribution (256 bins).
147pub fn byte_frequency(data: &[u8]) -> TestResult {
148    let name = "Byte Frequency";
149    let n = data.len();
150    if n < 256 {
151        return insufficient(name, 256, n);
152    }
153    let mut hist = [0u64; 256];
154    for &b in data {
155        hist[b as usize] += 1;
156    }
157    let expected = n as f64 / 256.0;
158    let chi2: f64 = hist
159        .iter()
160        .map(|&c| {
161            let diff = c as f64 - expected;
162            diff * diff / expected
163        })
164        .sum();
165    let dist = ChiSquared::new(255.0).unwrap();
166    let p = dist.sf(chi2);
167    TestResult {
168        name: name.to_string(),
169        passed: TestResult::pass_from_p(Some(p), 0.01),
170        p_value: Some(p),
171        statistic: chi2,
172        details: format!("n={n}, expected_per_bin={expected:.1}"),
173        grade: TestResult::grade_from_p(Some(p)),
174    }
175}
176
177// ═══════════════════════════════════════════════════════════════════════════════
178// 2. RUNS TESTS
179// ═══════════════════════════════════════════════════════════════════════════════
180
181/// Test 4: Runs test -- number of uninterrupted runs of 0s or 1s.
182pub fn runs_test(data: &[u8]) -> TestResult {
183    let name = "Runs Test";
184    let bits = to_bits(data);
185    let n = bits.len();
186    if n < 100 {
187        return insufficient(name, 100, n);
188    }
189    let ones: usize = bits.iter().map(|&b| b as usize).sum();
190    let prop = ones as f64 / n as f64;
191    if (prop - 0.5).abs() >= 2.0 / (n as f64).sqrt() {
192        return TestResult {
193            name: name.to_string(),
194            passed: false,
195            p_value: Some(0.0),
196            statistic: 0.0,
197            details: format!("Pre-test failed: proportion={prop:.4}"),
198            grade: 'F',
199        };
200    }
201    let mut runs: usize = 1;
202    for i in 1..n {
203        if bits[i] != bits[i - 1] {
204            runs += 1;
205        }
206    }
207    let expected = 2.0 * n as f64 * prop * (1.0 - prop) + 1.0;
208    let std = 2.0 * (2.0 * n as f64).sqrt() * prop * (1.0 - prop);
209    if std < 1e-10 {
210        return TestResult {
211            name: name.to_string(),
212            passed: false,
213            p_value: Some(0.0),
214            statistic: 0.0,
215            details: "Zero variance".to_string(),
216            grade: 'F',
217        };
218    }
219    let z = (runs as f64 - expected).abs() / std;
220    let p = erfc(z / 2.0_f64.sqrt());
221    TestResult {
222        name: name.to_string(),
223        passed: TestResult::pass_from_p(Some(p), 0.01),
224        p_value: Some(p),
225        statistic: z,
226        details: format!("runs={runs}, expected={expected:.0}"),
227        grade: TestResult::grade_from_p(Some(p)),
228    }
229}
230
231/// Test 5: Longest run of ones -- within 8-bit blocks, chi-squared against theoretical probs.
232pub fn longest_run_of_ones(data: &[u8]) -> TestResult {
233    let name = "Longest Run of Ones";
234    let bits = to_bits(data);
235    let n = bits.len();
236    if n < 128 {
237        return insufficient(name, 128, n);
238    }
239    let block_size = 8;
240    let num_blocks = n / block_size;
241
242    // For each block, find the longest run of 1s
243    let mut observed = [0u64; 4]; // NIST bins for M=8: {≤1, 2, 3, ≥4}
244    for i in 0..num_blocks {
245        let start = i * block_size;
246        let block = &bits[start..start + block_size];
247        let mut max_run = 0u32;
248        let mut current_run = 0u32;
249        for &bit in block {
250            if bit == 1 {
251                current_run += 1;
252                if current_run > max_run {
253                    max_run = current_run;
254                }
255            } else {
256                current_run = 0;
257            }
258        }
259        match max_run {
260            0 | 1 => observed[0] += 1,
261            2 => observed[1] += 1,
262            3 => observed[2] += 1,
263            _ => observed[3] += 1,
264        }
265    }
266
267    // Theoretical probabilities for M=8
268    let probs = [0.2148, 0.3672, 0.2305, 0.1875];
269    let mut chi2 = 0.0;
270    for i in 0..4 {
271        let expected = probs[i] * num_blocks as f64;
272        if expected > 0.0 {
273            let diff = observed[i] as f64 - expected;
274            chi2 += diff * diff / expected;
275        }
276    }
277    let dist = ChiSquared::new(3.0).unwrap();
278    let p = dist.sf(chi2);
279    TestResult {
280        name: name.to_string(),
281        passed: TestResult::pass_from_p(Some(p), 0.01),
282        p_value: Some(p),
283        statistic: chi2,
284        details: format!("blocks={num_blocks}, M={block_size}"),
285        grade: TestResult::grade_from_p(Some(p)),
286    }
287}
288
289// ═══════════════════════════════════════════════════════════════════════════════
290// 3. SERIAL TESTS
291// ═══════════════════════════════════════════════════════════════════════════════
292
293/// Helper: compute psi-squared for the serial test.
294fn psi_sq(bits: &[u8], n: usize, m: usize) -> f64 {
295    if m < 1 {
296        return 0.0;
297    }
298    let num_patterns = 1usize << m;
299    let mut counts = vec![0u64; num_patterns];
300    for i in 0..n {
301        let mut val = 0usize;
302        for j in 0..m {
303            val = (val << 1) | bits[(i + j) % n] as usize;
304        }
305        counts[val] += 1;
306    }
307    let sum_sq: f64 = counts.iter().map(|&c| (c as f64) * (c as f64)).sum();
308    sum_sq * (num_patterns as f64) / (n as f64) - n as f64
309}
310
311/// Test 6: Serial test -- frequency of overlapping m-bit patterns (m=4).
312pub fn serial_test(data: &[u8]) -> TestResult {
313    let name = "Serial Test";
314    let m = 4usize;
315    let mut bits = to_bits(data);
316    let mut n = bits.len();
317    if n > 20000 {
318        bits.truncate(20000);
319        n = 20000;
320    }
321    if n < (1 << m) + 10 {
322        return insufficient(name, (1 << m) + 10, n);
323    }
324
325    let psi_m = psi_sq(&bits, n, m);
326    let psi_m1 = psi_sq(&bits, n, m - 1);
327    let psi_m2 = if m >= 2 { psi_sq(&bits, n, m - 2) } else { 0.0 };
328    let delta1 = psi_m - psi_m1;
329    let delta2 = psi_m - 2.0 * psi_m1 + psi_m2;
330
331    let df1 = (1u64 << (m - 1)) as f64;
332    let df2 = (1u64 << (m - 2)) as f64;
333    let dist1 = ChiSquared::new(df1).unwrap();
334    let dist2 = ChiSquared::new(df2).unwrap();
335    let p1 = dist1.sf(delta1);
336    let p2 = dist2.sf(delta2.max(0.0));
337
338    // Use the more conservative (lower) p-value of the two serial statistics.
339    let p = p1.min(p2);
340    TestResult {
341        name: name.to_string(),
342        passed: TestResult::pass_from_p(Some(p), 0.01),
343        p_value: Some(p),
344        statistic: delta1,
345        details: format!("m={m}, n_bits={n}, p1={p1:.4}, p2={p2:.4}"),
346        grade: TestResult::grade_from_p(Some(p)),
347    }
348}
349
350/// Test 7: Approximate entropy -- compare m and m+1 bit pattern frequencies (m=3).
351pub fn approximate_entropy(data: &[u8]) -> TestResult {
352    let name = "Approximate Entropy";
353    let m = 3usize;
354    let mut bits = to_bits(data);
355    let mut n = bits.len();
356    if n > 20000 {
357        bits.truncate(20000);
358        n = 20000;
359    }
360    if n < 64 {
361        return insufficient(name, 64, n);
362    }
363
364    let phi = |block_len: usize| -> f64 {
365        let num_patterns = 1usize << block_len;
366        let mut counts = vec![0u64; num_patterns];
367        for i in 0..n {
368            let mut val = 0usize;
369            for j in 0..block_len {
370                val = (val << 1) | bits[(i + j) % n] as usize;
371            }
372            counts[val] += 1;
373        }
374        let mut sum = 0.0;
375        for &c in &counts {
376            if c > 0 {
377                let p = c as f64 / n as f64;
378                sum += p * p.log2();
379            }
380        }
381        sum
382    };
383
384    let phi_m = phi(m);
385    let phi_m1 = phi(m + 1);
386    let apen = phi_m - phi_m1;
387    // NIST formula (natural log): chi2 = 2*n*(ln(2) - ApEn_ln).
388    // Since phi uses log2, ApEn_log2 = ApEn_ln / ln(2), so:
389    // chi2 = 2*n*ln(2)*(1 - ApEn_log2)
390    let chi2 = 2.0 * n as f64 * 2.0_f64.ln() * (1.0 - apen);
391
392    let df = (1u64 << m) as f64;
393    let dist = ChiSquared::new(df).unwrap();
394    let p = dist.sf(chi2);
395    TestResult {
396        name: name.to_string(),
397        passed: TestResult::pass_from_p(Some(p), 0.01),
398        p_value: Some(p),
399        statistic: chi2,
400        details: format!("ApEn={apen:.6}, m={m}"),
401        grade: TestResult::grade_from_p(Some(p)),
402    }
403}
404
405// ═══════════════════════════════════════════════════════════════════════════════
406// 4. SPECTRAL TESTS
407// ═══════════════════════════════════════════════════════════════════════════════
408
409/// Test 8: DFT spectral -- detect periodic features via FFT.
410pub fn dft_spectral(data: &[u8]) -> TestResult {
411    let name = "DFT Spectral";
412    let bits = to_bits(data);
413    let n = bits.len();
414    if n < 64 {
415        return insufficient(name, 64, n);
416    }
417
418    let mut buffer: Vec<Complex<f64>> = bits
419        .iter()
420        .map(|&b| Complex {
421            re: if b == 1 { 1.0 } else { -1.0 },
422            im: 0.0,
423        })
424        .collect();
425
426    let mut planner = FftPlanner::new();
427    let fft = planner.plan_fft_forward(n);
428    fft.process(&mut buffer);
429
430    let half = n / 2;
431    let magnitudes: Vec<f64> = buffer[..half].iter().map(|c| c.norm()).collect();
432
433    let threshold = (2.995732274 * n as f64).sqrt();
434    let n0 = 0.95 * half as f64;
435    let n1 = magnitudes.iter().filter(|&&m| m < threshold).count() as f64;
436    let d = (n1 - n0) / (n as f64 * 0.95 * 0.05 / 4.0).sqrt();
437    let p = erfc(d.abs() / 2.0_f64.sqrt());
438    TestResult {
439        name: name.to_string(),
440        passed: TestResult::pass_from_p(Some(p), 0.01),
441        p_value: Some(p),
442        statistic: d,
443        details: format!("peaks_below_threshold={}/{half}", n1 as u64),
444        grade: TestResult::grade_from_p(Some(p)),
445    }
446}
447
448/// Test 9: Spectral flatness -- geometric/arithmetic mean ratio of power spectrum.
449pub fn spectral_flatness(data: &[u8]) -> TestResult {
450    let name = "Spectral Flatness";
451    let n = data.len();
452    if n < 64 {
453        return insufficient(name, 64, n);
454    }
455
456    let mean_val: f64 = data.iter().map(|&b| b as f64).sum::<f64>() / n as f64;
457    let mut buffer: Vec<Complex<f64>> = data
458        .iter()
459        .map(|&b| Complex {
460            re: b as f64 - mean_val,
461            im: 0.0,
462        })
463        .collect();
464
465    let mut planner = FftPlanner::new();
466    let fft = planner.plan_fft_forward(n);
467    fft.process(&mut buffer);
468
469    // Power spectrum, skip DC bin (index 0)
470    let half = n / 2;
471    if half < 2 {
472        return insufficient(name, 64, n);
473    }
474    let power: Vec<f64> = buffer[1..half]
475        .iter()
476        .map(|c| c.norm_sqr() + 1e-15)
477        .collect();
478
479    if power.is_empty() {
480        return insufficient(name, 64, n);
481    }
482
483    let log_sum: f64 = power.iter().map(|&p| p.ln()).sum();
484    let geo_mean = (log_sum / power.len() as f64).exp();
485    let arith_mean: f64 = power.iter().sum::<f64>() / power.len() as f64;
486    let flatness = geo_mean / arith_mean;
487
488    let passed = flatness > 0.5;
489    let grade = if flatness > 0.8 {
490        'A'
491    } else if flatness > 0.6 {
492        'B'
493    } else if flatness > 0.4 {
494        'C'
495    } else if flatness > 0.2 {
496        'D'
497    } else {
498        'F'
499    };
500    TestResult {
501        name: name.to_string(),
502        passed,
503        p_value: None,
504        statistic: flatness,
505        details: format!("flatness={flatness:.4} (1.0=white noise)"),
506        grade,
507    }
508}
509
510// ═══════════════════════════════════════════════════════════════════════════════
511// 5. ENTROPY TESTS
512// ═══════════════════════════════════════════════════════════════════════════════
513
514/// Test 10: Shannon entropy -- bits per byte (max 8.0).
515pub fn shannon_entropy(data: &[u8]) -> TestResult {
516    let name = "Shannon Entropy";
517    let n = data.len();
518    if n < 16 {
519        return insufficient(name, 16, n);
520    }
521    let mut hist = [0u64; 256];
522    for &b in data {
523        hist[b as usize] += 1;
524    }
525    let mut h = 0.0;
526    for &c in &hist {
527        if c > 0 {
528            let p = c as f64 / n as f64;
529            h -= p * p.log2();
530        }
531    }
532    let ratio = h / 8.0;
533    let grade = if ratio > 0.95 {
534        'A'
535    } else if ratio > 0.85 {
536        'B'
537    } else if ratio > 0.7 {
538        'C'
539    } else if ratio > 0.5 {
540        'D'
541    } else {
542        'F'
543    };
544    TestResult {
545        name: name.to_string(),
546        passed: ratio > 0.85,
547        p_value: None,
548        statistic: h,
549        details: format!("{h:.4} / 8.0 bits ({:.1}%)", ratio * 100.0),
550        grade,
551    }
552}
553
554/// Test 11: Min-entropy (NIST SP 800-90B): -log2(p_max).
555pub fn min_entropy(data: &[u8]) -> TestResult {
556    let name = "Min-Entropy";
557    let n = data.len();
558    if n < 16 {
559        return insufficient(name, 16, n);
560    }
561    let mut hist = [0u64; 256];
562    for &b in data {
563        hist[b as usize] += 1;
564    }
565    let p_max = *hist.iter().max().unwrap() as f64 / n as f64;
566    let h_min = -(p_max + 1e-15).log2();
567    let ratio = h_min / 8.0;
568    let grade = if ratio > 0.9 {
569        'A'
570    } else if ratio > 0.75 {
571        'B'
572    } else if ratio > 0.5 {
573        'C'
574    } else if ratio > 0.25 {
575        'D'
576    } else {
577        'F'
578    };
579    TestResult {
580        name: name.to_string(),
581        passed: ratio > 0.7,
582        p_value: None,
583        statistic: h_min,
584        details: format!("{h_min:.4} / 8.0 bits ({:.1}%)", ratio * 100.0),
585        grade,
586    }
587}
588
589/// Test 12: Permutation entropy -- complexity of ordinal patterns (order=4).
590pub fn permutation_entropy(data: &[u8]) -> TestResult {
591    let name = "Permutation Entropy";
592    let order = 4usize;
593    let n = data.len();
594    if n < order + 10 {
595        return insufficient(name, order + 10, n);
596    }
597    let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
598
599    let mut patterns: HashMap<Vec<usize>, u64> = HashMap::new();
600    for i in 0..n - order {
601        let window = &arr[i..i + order];
602        let mut indices: Vec<usize> = (0..order).collect();
603        indices.sort_by(|&a, &b| {
604            window[a]
605                .partial_cmp(&window[b])
606                .unwrap_or(std::cmp::Ordering::Equal)
607                .then(a.cmp(&b))
608        });
609        *patterns.entry(indices).or_insert(0) += 1;
610    }
611
612    let total: u64 = patterns.values().sum();
613    let mut h = 0.0;
614    for &c in patterns.values() {
615        let p = c as f64 / total as f64;
616        h -= p * p.log2();
617    }
618    // factorial(4) = 24
619    let h_max = 24.0_f64.log2();
620    let normalized = if h_max > 0.0 { h / h_max } else { 0.0 };
621    let grade = if normalized > 0.95 {
622        'A'
623    } else if normalized > 0.85 {
624        'B'
625    } else if normalized > 0.7 {
626        'C'
627    } else if normalized > 0.5 {
628        'D'
629    } else {
630        'F'
631    };
632    TestResult {
633        name: name.to_string(),
634        passed: normalized > 0.85,
635        p_value: None,
636        statistic: normalized,
637        details: format!("PE={h:.4}/{h_max:.4} = {normalized:.4}"),
638        grade,
639    }
640}
641
642/// Test 13: Compression ratio -- zlib compression ratio (random ~ 1.0+).
643pub fn compression_ratio(data: &[u8]) -> TestResult {
644    let name = "Compression Ratio";
645    let n = data.len();
646    if n < 32 {
647        return insufficient(name, 32, n);
648    }
649    let mut encoder = ZlibEncoder::new(Vec::new(), Compression::best());
650    if encoder.write_all(data).is_err() {
651        return TestResult {
652            name: name.to_string(),
653            passed: false,
654            p_value: None,
655            statistic: 0.0,
656            details: "zlib write failed".to_string(),
657            grade: 'F',
658        };
659    }
660    let compressed = match encoder.finish() {
661        Ok(c) => c,
662        Err(_) => {
663            return TestResult {
664                name: name.to_string(),
665                passed: false,
666                p_value: None,
667                statistic: 0.0,
668                details: "zlib finish failed".to_string(),
669                grade: 'F',
670            };
671        }
672    };
673    let ratio = compressed.len() as f64 / n as f64;
674    let grade = if ratio > 0.95 {
675        'A'
676    } else if ratio > 0.85 {
677        'B'
678    } else if ratio > 0.7 {
679        'C'
680    } else if ratio > 0.5 {
681        'D'
682    } else {
683        'F'
684    };
685    TestResult {
686        name: name.to_string(),
687        passed: ratio > 0.85,
688        p_value: None,
689        statistic: ratio,
690        details: format!("{}/{n} = {ratio:.4}", compressed.len()),
691        grade,
692    }
693}
694
695/// Test 14: Kolmogorov complexity -- compression at levels 1 and 9, compute complexity and spread.
696pub fn kolmogorov_complexity(data: &[u8]) -> TestResult {
697    let name = "Kolmogorov Complexity";
698    let n = data.len();
699    if n < 32 {
700        return insufficient(name, 32, n);
701    }
702
703    let compress_at = |level: u32| -> Option<usize> {
704        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::new(level));
705        encoder.write_all(data).ok()?;
706        Some(encoder.finish().ok()?.len())
707    };
708
709    let (c1, c9) = match (compress_at(1), compress_at(9)) {
710        (Some(a), Some(b)) => (a, b),
711        _ => {
712            return TestResult {
713                name: name.to_string(),
714                passed: false,
715                p_value: None,
716                statistic: 0.0,
717                details: "zlib compression failed".to_string(),
718                grade: 'F',
719            };
720        }
721    };
722    let complexity = c9 as f64 / n as f64;
723    let spread = (c1 as f64 - c9 as f64) / n as f64;
724    let grade = if complexity > 0.95 {
725        'A'
726    } else if complexity > 0.85 {
727        'B'
728    } else if complexity > 0.7 {
729        'C'
730    } else if complexity > 0.5 {
731        'D'
732    } else {
733        'F'
734    };
735    TestResult {
736        name: name.to_string(),
737        passed: complexity > 0.85,
738        p_value: None,
739        statistic: complexity,
740        details: format!("K~={complexity:.4}, spread={spread:.4}"),
741        grade,
742    }
743}
744
745// ═══════════════════════════════════════════════════════════════════════════════
746// 6. CORRELATION TESTS
747// ═══════════════════════════════════════════════════════════════════════════════
748
749/// Test 15: Autocorrelation -- at lags 1-50. Count violations of 2/sqrt(n) threshold.
750pub fn autocorrelation(data: &[u8]) -> TestResult {
751    let name = "Autocorrelation";
752    let max_lag = 50usize;
753    let n = data.len();
754    if n < max_lag + 10 {
755        return insufficient(name, max_lag + 10, n);
756    }
757    let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
758    let mean: f64 = arr.iter().sum::<f64>() / n as f64;
759    let var: f64 = arr.iter().map(|x| (x - mean) * (x - mean)).sum::<f64>() / n as f64;
760    if var < 1e-10 {
761        return TestResult {
762            name: name.to_string(),
763            passed: false,
764            p_value: None,
765            statistic: 1.0,
766            details: "Zero variance".to_string(),
767            grade: 'F',
768        };
769    }
770    let threshold = 2.0 / (n as f64).sqrt();
771    let mut max_corr = 0.0f64;
772    let mut violations = 0u64;
773    for lag in 1..=max_lag.min(n - 1) {
774        let mut sum = 0.0;
775        let count = n - lag;
776        for i in 0..count {
777            sum += (arr[i] - mean) * (arr[i + lag] - mean);
778        }
779        let c = sum / (count as f64 * var);
780        if c.abs() > max_corr {
781            max_corr = c.abs();
782        }
783        if c.abs() > threshold {
784            violations += 1;
785        }
786    }
787    let expected_violations = 0.05 * max_lag as f64;
788    let lambda = expected_violations.max(1.0);
789    let p = if violations > 0 {
790        let poisson = Poisson::new(lambda).unwrap();
791        poisson.sf(violations - 1)
792    } else {
793        1.0
794    };
795    TestResult {
796        name: name.to_string(),
797        passed: TestResult::pass_from_p(Some(p), 0.01),
798        p_value: Some(p),
799        statistic: max_corr,
800        details: format!("violations={violations}/{max_lag}, max|r|={max_corr:.4}"),
801        grade: TestResult::grade_from_p(Some(p)),
802    }
803}
804
805/// Test 16: Serial correlation -- adjacent value correlation. Z-test.
806pub fn serial_correlation(data: &[u8]) -> TestResult {
807    let name = "Serial Correlation";
808    let n = data.len();
809    if n < 20 {
810        return insufficient(name, 20, n);
811    }
812    let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
813    let mean: f64 = arr.iter().sum::<f64>() / n as f64;
814    let var: f64 = arr.iter().map(|x| (x - mean) * (x - mean)).sum::<f64>() / n as f64;
815    if var < 1e-10 {
816        return TestResult {
817            name: name.to_string(),
818            passed: false,
819            p_value: None,
820            statistic: 1.0,
821            details: "Zero variance".to_string(),
822            grade: 'F',
823        };
824    }
825    let mut sum = 0.0;
826    for i in 0..n - 1 {
827        sum += (arr[i] - mean) * (arr[i + 1] - mean);
828    }
829    let r = sum / ((n - 1) as f64 * var);
830    let z = r * (n as f64).sqrt();
831    let norm = Normal::standard();
832    let p = 2.0 * (1.0 - norm.cdf(z.abs()));
833    TestResult {
834        name: name.to_string(),
835        passed: TestResult::pass_from_p(Some(p), 0.01),
836        p_value: Some(p),
837        statistic: r.abs(),
838        details: format!("r={r:.6}, z={z:.4}"),
839        grade: TestResult::grade_from_p(Some(p)),
840    }
841}
842
843/// Test 17: Lag-N correlation -- correlation at lags [1, 2, 4, 8, 16, 32].
844pub fn lag_n_correlation(data: &[u8]) -> TestResult {
845    let name = "Lag-N Correlation";
846    let lags: &[usize] = &[1, 2, 4, 8, 16, 32];
847    let n = data.len();
848    let max_lag = *lags.iter().max().unwrap();
849    if n < max_lag + 10 {
850        return insufficient(name, max_lag + 10, n);
851    }
852    let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
853    let mean: f64 = arr.iter().sum::<f64>() / n as f64;
854    let var: f64 = arr.iter().map(|x| (x - mean) * (x - mean)).sum::<f64>() / n as f64;
855    if var < 1e-10 {
856        return TestResult {
857            name: name.to_string(),
858            passed: false,
859            p_value: None,
860            statistic: 1.0,
861            details: "Zero variance".to_string(),
862            grade: 'F',
863        };
864    }
865    let threshold = 2.0 / (n as f64).sqrt();
866    let mut max_corr = 0.0f64;
867    let mut details_parts = Vec::new();
868    for &lag in lags {
869        if lag >= n {
870            continue;
871        }
872        let mut sum = 0.0;
873        let count = n - lag;
874        for i in 0..count {
875            sum += (arr[i] - mean) * (arr[i + lag] - mean);
876        }
877        let c = sum / (count as f64 * var);
878        if c.abs() > max_corr {
879            max_corr = c.abs();
880        }
881        details_parts.push(format!("lag{lag}={c:.4}"));
882    }
883    let passed = max_corr < threshold;
884    let grade = if max_corr < threshold * 0.5 {
885        'A'
886    } else if max_corr < threshold {
887        'B'
888    } else if max_corr < threshold * 2.0 {
889        'C'
890    } else if max_corr < threshold * 4.0 {
891        'D'
892    } else {
893        'F'
894    };
895    TestResult {
896        name: name.to_string(),
897        passed,
898        p_value: None,
899        statistic: max_corr,
900        details: details_parts.join(", "),
901        grade,
902    }
903}
904
905/// Test 18: Cross-correlation -- even vs odd byte independence. Pearson r.
906pub fn cross_correlation(data: &[u8]) -> TestResult {
907    let name = "Cross-Correlation";
908    let n = data.len();
909    if n < 100 {
910        return insufficient(name, 100, n);
911    }
912    let even: Vec<f64> = data.iter().step_by(2).map(|&b| b as f64).collect();
913    let odd: Vec<f64> = data.iter().skip(1).step_by(2).map(|&b| b as f64).collect();
914    let min_len = even.len().min(odd.len());
915    if min_len < 2 {
916        return insufficient(name, 100, n);
917    }
918    let even = &even[..min_len];
919    let odd = &odd[..min_len];
920
921    let mean_e: f64 = even.iter().sum::<f64>() / min_len as f64;
922    let mean_o: f64 = odd.iter().sum::<f64>() / min_len as f64;
923    let mut cov = 0.0;
924    let mut var_e = 0.0;
925    let mut var_o = 0.0;
926    for i in 0..min_len {
927        let de = even[i] - mean_e;
928        let do_ = odd[i] - mean_o;
929        cov += de * do_;
930        var_e += de * de;
931        var_o += do_ * do_;
932    }
933    let denom = (var_e * var_o).sqrt();
934    if denom < 1e-10 {
935        return TestResult {
936            name: name.to_string(),
937            passed: false,
938            p_value: None,
939            statistic: 0.0,
940            details: "Zero variance in one or both halves".to_string(),
941            grade: 'F',
942        };
943    }
944    let r = cov / denom;
945
946    // For large n, t ~ N(0,1)
947    let t = r * ((min_len as f64 - 2.0) / (1.0 - r * r).max(1e-15)).sqrt();
948    let norm = Normal::standard();
949    let p = 2.0 * (1.0 - norm.cdf(t.abs()));
950    TestResult {
951        name: name.to_string(),
952        passed: TestResult::pass_from_p(Some(p), 0.01),
953        p_value: Some(p),
954        statistic: r.abs(),
955        details: format!("r={r:.6} (even vs odd bytes)"),
956        grade: TestResult::grade_from_p(Some(p)),
957    }
958}
959
960// ═══════════════════════════════════════════════════════════════════════════════
961// 7. DISTRIBUTION TESTS
962// ═══════════════════════════════════════════════════════════════════════════════
963
964/// Test 19: Kolmogorov-Smirnov test vs uniform distribution.
965pub fn ks_test(data: &[u8]) -> TestResult {
966    let name = "Kolmogorov-Smirnov";
967    let n = data.len();
968    if n < 50 {
969        return insufficient(name, 50, n);
970    }
971    // Map discrete bytes to continuous [0,1] with continuity correction
972    // (matching the Anderson-Darling test mapping)
973    let mut normalized: Vec<f64> = data.iter().map(|&b| (b as f64 + 0.5) / 256.0).collect();
974    normalized.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
975
976    // KS statistic: max |F_n(x) - F(x)|
977    let mut d_max = 0.0f64;
978    let nf = n as f64;
979    for (i, &x) in normalized.iter().enumerate() {
980        let f_n_plus = (i + 1) as f64 / nf;
981        let f_n_minus = i as f64 / nf;
982        let f_x = x.clamp(0.0, 1.0);
983        let d1 = (f_n_plus - f_x).abs();
984        let d2 = (f_n_minus - f_x).abs();
985        d_max = d_max.max(d1).max(d2);
986    }
987
988    // Asymptotic KS p-value (Kolmogorov distribution)
989    let sqrt_n = nf.sqrt();
990    let lambda = (sqrt_n + 0.12 + 0.11 / sqrt_n) * d_max;
991    let mut p = 0.0;
992    for k in 1..=100i32 {
993        let sign = if k % 2 == 0 { -1.0 } else { 1.0 };
994        p += sign * (-2.0 * (k as f64 * lambda).powi(2)).exp();
995    }
996    p = (2.0 * p).clamp(0.0, 1.0);
997
998    TestResult {
999        name: name.to_string(),
1000        passed: TestResult::pass_from_p(Some(p), 0.01),
1001        p_value: Some(p),
1002        statistic: d_max,
1003        details: format!("D={d_max:.6}, n={n}"),
1004        grade: TestResult::grade_from_p(Some(p)),
1005    }
1006}
1007
1008/// Test 20: Anderson-Darling -- A-squared statistic for uniform. Critical values:
1009/// 1.933 (5%), 2.492 (2.5%), 3.857 (1%).
1010pub fn anderson_darling(data: &[u8]) -> TestResult {
1011    let name = "Anderson-Darling";
1012    let n = data.len();
1013    if n < 50 {
1014        return insufficient(name, 50, n);
1015    }
1016    // Map bytes to (0, 1): (value + 0.5) / 256
1017    let mut sorted: Vec<f64> = data.iter().map(|&b| (b as f64 + 0.5) / 256.0).collect();
1018    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1019
1020    let nf = n as f64;
1021    let mut s = 0.0;
1022    for i in 0..n {
1023        let idx = (i + 1) as f64;
1024        let u = sorted[i].clamp(1e-15, 1.0 - 1e-15);
1025        let u_rev = sorted[n - 1 - i].clamp(1e-15, 1.0 - 1e-15);
1026        s += (2.0 * idx - 1.0) * (u.ln() + (1.0 - u_rev).ln());
1027    }
1028    let a2 = -nf - s / nf;
1029    let a2_star = a2 * (1.0 + 0.75 / nf + 2.25 / (nf * nf));
1030
1031    let passed = a2_star < 2.492;
1032    let grade = if a2_star < 1.248 {
1033        'A'
1034    } else if a2_star < 1.933 {
1035        'B'
1036    } else if a2_star < 2.492 {
1037        'C'
1038    } else if a2_star < 3.857 {
1039        'D'
1040    } else {
1041        'F'
1042    };
1043    TestResult {
1044        name: name.to_string(),
1045        passed,
1046        p_value: None,
1047        statistic: a2_star,
1048        details: format!("A^2*={a2_star:.4}, 5% critical=2.492"),
1049        grade,
1050    }
1051}
1052
1053// ═══════════════════════════════════════════════════════════════════════════════
1054// 8. PATTERN TESTS
1055// ═══════════════════════════════════════════════════════════════════════════════
1056
1057/// Test 21: Overlapping template -- frequency of overlapping bit pattern (1,1,1,1).
1058pub fn overlapping_template(data: &[u8]) -> TestResult {
1059    let name = "Overlapping Template";
1060    let template: &[u8] = &[1, 1, 1, 1];
1061    let m = template.len();
1062    let bits = to_bits(data);
1063    let n = bits.len();
1064    if n < 1000 {
1065        return insufficient(name, 1000, n);
1066    }
1067
1068    let mut count = 0u64;
1069    for i in 0..=n - m {
1070        if bits[i..i + m] == *template {
1071            count += 1;
1072        }
1073    }
1074    let expected = (n - m + 1) as f64 / (1u64 << m) as f64;
1075    let std = (expected * (1.0 - 1.0 / (1u64 << m) as f64)).sqrt();
1076    if std < 1e-10 {
1077        return TestResult {
1078            name: name.to_string(),
1079            passed: false,
1080            p_value: None,
1081            statistic: 0.0,
1082            details: "Zero std".to_string(),
1083            grade: 'F',
1084        };
1085    }
1086    let z = (count as f64 - expected) / std;
1087    let norm = Normal::standard();
1088    let p = 2.0 * (1.0 - norm.cdf(z.abs()));
1089    TestResult {
1090        name: name.to_string(),
1091        passed: TestResult::pass_from_p(Some(p), 0.01),
1092        p_value: Some(p),
1093        statistic: z.abs(),
1094        details: format!("count={count}, expected={expected:.0}"),
1095        grade: TestResult::grade_from_p(Some(p)),
1096    }
1097}
1098
1099/// Test 22: Non-overlapping template -- non-overlapping occurrences of (0,0,1,1).
1100pub fn non_overlapping_template(data: &[u8]) -> TestResult {
1101    let name = "Non-overlapping Template";
1102    let template: &[u8] = &[0, 0, 1, 1];
1103    let m = template.len();
1104    let bits = to_bits(data);
1105    let n = bits.len();
1106    if n < 1000 {
1107        return insufficient(name, 1000, n);
1108    }
1109
1110    let mut count = 0u64;
1111    let mut i = 0;
1112    while i + m <= n {
1113        if bits[i..i + m] == *template {
1114            count += 1;
1115            i += m;
1116        } else {
1117            i += 1;
1118        }
1119    }
1120    let expected = n as f64 / (1u64 << m) as f64;
1121    let var =
1122        n as f64 * (1.0 / (1u64 << m) as f64 - (2.0 * m as f64 - 1.0) / (1u64 << (2 * m)) as f64);
1123    let var = if var <= 0.0 { 1.0 } else { var };
1124    let z = (count as f64 - expected) / var.sqrt();
1125    let norm = Normal::standard();
1126    let p = 2.0 * (1.0 - norm.cdf(z.abs()));
1127    TestResult {
1128        name: name.to_string(),
1129        passed: TestResult::pass_from_p(Some(p), 0.01),
1130        p_value: Some(p),
1131        statistic: z.abs(),
1132        details: format!("count={count}, expected={expected:.0}"),
1133        grade: TestResult::grade_from_p(Some(p)),
1134    }
1135}
1136
1137/// Test 23: Maurer's universal statistical test (L=6, Q=640).
1138pub fn maurers_universal(data: &[u8]) -> TestResult {
1139    let name = "Maurer's Universal";
1140    let l = 6usize;
1141    let q = 640usize;
1142    let bits = to_bits(data);
1143    let n_bits = bits.len();
1144    let total_blocks = n_bits / l;
1145    if total_blocks <= q {
1146        return insufficient(name, (q + 100) * l, n_bits);
1147    }
1148    let k = total_blocks - q;
1149    if k < 100 || q < 10 * (1 << l) {
1150        return insufficient(name, (q + 100) * l, n_bits);
1151    }
1152
1153    let num_patterns = 1usize << l;
1154    let mut table = vec![0usize; num_patterns];
1155
1156    // Initialization phase
1157    for i in 0..q {
1158        let mut block = 0usize;
1159        for j in 0..l {
1160            block = (block << 1) | bits[i * l + j] as usize;
1161        }
1162        table[block] = i + 1;
1163    }
1164
1165    // Test phase
1166    let mut total = 0.0f64;
1167    for i in q..q + k {
1168        let mut block = 0usize;
1169        for j in 0..l {
1170            block = (block << 1) | bits[i * l + j] as usize;
1171        }
1172        let prev = table[block];
1173        let distance = if prev > 0 {
1174            (i + 1 - prev) as f64
1175        } else {
1176            (i + 1) as f64
1177        };
1178        total += distance.log2();
1179        table[block] = i + 1;
1180    }
1181
1182    let fn_val = total / k as f64;
1183    let expected = 5.2177052;
1184    let variance = 2.954;
1185    let sigma = (variance / k as f64).sqrt();
1186    let z = (fn_val - expected).abs() / sigma.max(1e-10);
1187    let p = erfc(z / 2.0_f64.sqrt());
1188    TestResult {
1189        name: name.to_string(),
1190        passed: TestResult::pass_from_p(Some(p), 0.01),
1191        p_value: Some(p),
1192        statistic: fn_val,
1193        details: format!("fn={fn_val:.4}, expected={expected:.4}, L={l}"),
1194        grade: TestResult::grade_from_p(Some(p)),
1195    }
1196}
1197
1198// ═══════════════════════════════════════════════════════════════════════════════
1199// 9. ADVANCED TESTS
1200// ═══════════════════════════════════════════════════════════════════════════════
1201
1202/// GF(2) Gaussian elimination to compute binary matrix rank.
1203fn gf2_rank(matrix: &[u8], rows: usize, cols: usize) -> usize {
1204    let mut m: Vec<Vec<u8>> = (0..rows)
1205        .map(|r| matrix[r * cols..(r + 1) * cols].to_vec())
1206        .collect();
1207    let mut rank = 0;
1208    for col in 0..cols {
1209        let mut pivot = None;
1210        for (row, m_row) in m.iter().enumerate().take(rows).skip(rank) {
1211            if m_row[col] == 1 {
1212                pivot = Some(row);
1213                break;
1214            }
1215        }
1216        let pivot = match pivot {
1217            Some(p) => p,
1218            None => continue,
1219        };
1220        m.swap(rank, pivot);
1221        for row in 0..rows {
1222            if row != rank && m[row][col] == 1 {
1223                let rank_row = m[rank].clone();
1224                for (m_c, r_c) in m[row].iter_mut().zip(rank_row.iter()) {
1225                    *m_c ^= r_c;
1226                }
1227            }
1228        }
1229        rank += 1;
1230    }
1231    rank
1232}
1233
1234/// Test 24: Binary matrix rank -- GF(2) Gaussian elimination on 32x32 binary matrices.
1235pub fn binary_matrix_rank(data: &[u8]) -> TestResult {
1236    let name = "Binary Matrix Rank";
1237    let bits = to_bits(data);
1238    let n = bits.len();
1239    let m_size = 32;
1240    let q_size = 32;
1241    let bits_per_matrix = m_size * q_size;
1242    let num_matrices = n / bits_per_matrix;
1243    if num_matrices < 38 {
1244        return insufficient(name, 38 * bits_per_matrix, n);
1245    }
1246
1247    let mut full_rank = 0u64;
1248    let mut rank_m1 = 0u64;
1249    let min_dim = m_size.min(q_size);
1250    for i in 0..num_matrices {
1251        let start = i * bits_per_matrix;
1252        let matrix = &bits[start..start + bits_per_matrix];
1253        let rank = gf2_rank(matrix, m_size, q_size);
1254        if rank == min_dim {
1255            full_rank += 1;
1256        } else if rank == min_dim - 1 {
1257            rank_m1 += 1;
1258        }
1259    }
1260    let rest = num_matrices as u64 - full_rank - rank_m1;
1261    let n_f = num_matrices as f64;
1262
1263    let p_full = 0.2888;
1264    let p_m1 = 0.5776;
1265    let p_rest = 0.1336;
1266    let chi2 = (full_rank as f64 - n_f * p_full).powi(2) / (n_f * p_full)
1267        + (rank_m1 as f64 - n_f * p_m1).powi(2) / (n_f * p_m1)
1268        + (rest as f64 - n_f * p_rest).powi(2) / (n_f * p_rest);
1269
1270    let dist = ChiSquared::new(2.0).unwrap();
1271    let p = dist.sf(chi2);
1272    TestResult {
1273        name: name.to_string(),
1274        passed: TestResult::pass_from_p(Some(p), 0.01),
1275        p_value: Some(p),
1276        statistic: chi2,
1277        details: format!("N={num_matrices}, full={full_rank}, full-1={rank_m1}"),
1278        grade: TestResult::grade_from_p(Some(p)),
1279    }
1280}
1281
1282/// Berlekamp-Massey algorithm for binary sequences. Returns the LFSR complexity.
1283fn berlekamp_massey(seq: &[u8]) -> usize {
1284    let n = seq.len();
1285    let mut c = vec![0u8; n];
1286    let mut b = vec![0u8; n];
1287    c[0] = 1;
1288    b[0] = 1;
1289    let mut l: usize = 0;
1290    let mut m: isize = -1;
1291
1292    for ni in 0..n {
1293        let mut d: u8 = seq[ni];
1294        for i in 1..=l {
1295            d ^= c[i] & seq[ni - i];
1296        }
1297        if d == 1 {
1298            let t = c.clone();
1299            let shift = (ni as isize - m) as usize;
1300            for i in shift..n {
1301                c[i] ^= b[i - shift];
1302            }
1303            if l <= ni / 2 {
1304                l = ni + 1 - l;
1305                m = ni as isize;
1306                b = t;
1307            }
1308        }
1309    }
1310    l
1311}
1312
1313/// Test 25: Linear complexity -- Berlekamp-Massey LFSR complexity on 200-bit blocks.
1314pub fn linear_complexity(data: &[u8]) -> TestResult {
1315    let name = "Linear Complexity";
1316    let block_size = 200usize;
1317    let bits = to_bits(data);
1318    let n = bits.len();
1319    let num_blocks = n / block_size;
1320    if num_blocks < 6 {
1321        return insufficient(name, 6 * block_size, n);
1322    }
1323
1324    let mut complexities = Vec::with_capacity(num_blocks);
1325    for i in 0..num_blocks {
1326        let start = i * block_size;
1327        let block = &bits[start..start + block_size];
1328        complexities.push(berlekamp_massey(block));
1329    }
1330
1331    let m = block_size as f64;
1332    let sign = if block_size.is_multiple_of(2) {
1333        1.0
1334    } else {
1335        -1.0
1336    };
1337    let mu = m / 2.0 + (9.0 + sign) / 36.0 - (m / 3.0 + 2.0 / 9.0) / 2.0_f64.powf(m);
1338
1339    let t_vals: Vec<f64> = complexities
1340        .iter()
1341        .map(|&c| sign * (c as f64 - mu) + 2.0 / 9.0)
1342        .collect();
1343
1344    let mut observed = [0u64; 7];
1345    for &t in &t_vals {
1346        let bin = if t <= -2.5 {
1347            0
1348        } else if t <= -1.5 {
1349            1
1350        } else if t <= -0.5 {
1351            2
1352        } else if t <= 0.5 {
1353            3
1354        } else if t <= 1.5 {
1355            4
1356        } else if t <= 2.5 {
1357            5
1358        } else {
1359            6
1360        };
1361        observed[bin] += 1;
1362    }
1363
1364    let mut probs = [0.010882, 0.03534, 0.08884, 0.5, 0.08884, 0.03534, 0.010882];
1365    let sum_rest: f64 = probs[..6].iter().sum();
1366    probs[6] = 1.0 - sum_rest;
1367
1368    let mut chi2 = 0.0;
1369    let n_f = num_blocks as f64;
1370    for i in 0..7 {
1371        let expected = probs[i] * n_f;
1372        if expected > 0.0 {
1373            let diff = observed[i] as f64 - expected;
1374            chi2 += diff * diff / expected;
1375        }
1376    }
1377
1378    let dist = ChiSquared::new(6.0).unwrap();
1379    let p = dist.sf(chi2);
1380    let mean_c: f64 = complexities.iter().map(|&c| c as f64).sum::<f64>() / num_blocks as f64;
1381    TestResult {
1382        name: name.to_string(),
1383        passed: TestResult::pass_from_p(Some(p), 0.01),
1384        p_value: Some(p),
1385        statistic: chi2,
1386        details: format!("N={num_blocks}, mean_complexity={mean_c:.1}"),
1387        grade: TestResult::grade_from_p(Some(p)),
1388    }
1389}
1390
1391/// Test 26: Cumulative sums (CUSUM) -- detect drift/bias.
1392pub fn cusum_test(data: &[u8]) -> TestResult {
1393    let name = "Cumulative Sums";
1394    let bits = to_bits(data);
1395    let n = bits.len();
1396    if n < 100 {
1397        return insufficient(name, 100, n);
1398    }
1399
1400    let mut cumsum = Vec::with_capacity(n);
1401    let mut s: i64 = 0;
1402    for &bit in &bits {
1403        s += if bit == 1 { 1 } else { -1 };
1404        cumsum.push(s);
1405    }
1406    let z = cumsum.iter().map(|&x| x.unsigned_abs()).max().unwrap() as f64;
1407    if z < 1e-10 {
1408        return TestResult {
1409            name: name.to_string(),
1410            passed: true,
1411            p_value: Some(1.0),
1412            statistic: 0.0,
1413            details: format!("max|S|=0, n={n}"),
1414            grade: 'A',
1415        };
1416    }
1417
1418    let nf = n as f64;
1419    let sqrt_n = nf.sqrt();
1420    let norm = Normal::standard();
1421    let k_start = ((-nf / z + 1.0) / 4.0).floor() as i64;
1422    let k_end = ((nf / z - 1.0) / 4.0).ceil() as i64;
1423    let mut s_val = 0.0;
1424    for k in k_start..=k_end {
1425        let kf = k as f64;
1426        s_val += norm.cdf((4.0 * kf + 1.0) * z / sqrt_n) - norm.cdf((4.0 * kf - 1.0) * z / sqrt_n);
1427    }
1428    let p = (1.0 - s_val).clamp(0.0, 1.0);
1429    TestResult {
1430        name: name.to_string(),
1431        passed: TestResult::pass_from_p(Some(p), 0.01),
1432        p_value: Some(p),
1433        statistic: z,
1434        details: format!("max|S|={z:.1}, n={n}"),
1435        grade: TestResult::grade_from_p(Some(p)),
1436    }
1437}
1438
1439/// Test 27: Random excursions -- cycles in cumulative sum random walk.
1440pub fn random_excursions(data: &[u8]) -> TestResult {
1441    let name = "Random Excursions";
1442    let bits = to_bits(data);
1443    let n = bits.len();
1444    if n < 1000 {
1445        return insufficient(name, 1000, n);
1446    }
1447
1448    // Build cumulative sum with leading and trailing zeros
1449    let mut cumsum = Vec::with_capacity(n + 2);
1450    cumsum.push(0i64);
1451    let mut s: i64 = 0;
1452    for &bit in &bits {
1453        s += if bit == 1 { 1 } else { -1 };
1454        cumsum.push(s);
1455    }
1456    cumsum.push(0);
1457
1458    let zeros: Vec<usize> = cumsum
1459        .iter()
1460        .enumerate()
1461        .filter_map(|(i, &v)| if v == 0 { Some(i) } else { None })
1462        .collect();
1463
1464    let j = if !zeros.is_empty() {
1465        zeros.len() - 1
1466    } else {
1467        0
1468    };
1469
1470    if j < 500 {
1471        return TestResult {
1472            name: name.to_string(),
1473            passed: true,
1474            p_value: None,
1475            statistic: j as f64,
1476            details: format!("Only {j} cycles (need 500 for reliable test)"),
1477            grade: 'B',
1478        };
1479    }
1480
1481    let expected_cycles = (n as f64) / (2.0 * PI * n as f64).sqrt();
1482    let ratio = j as f64 / expected_cycles.max(1.0);
1483    let passed = ratio > 0.5 && ratio < 2.0;
1484    let grade = if ratio > 0.8 && ratio < 1.2 {
1485        'A'
1486    } else if ratio > 0.6 && ratio < 1.5 {
1487        'B'
1488    } else if passed {
1489        'C'
1490    } else {
1491        'F'
1492    };
1493    TestResult {
1494        name: name.to_string(),
1495        passed,
1496        p_value: None,
1497        statistic: j as f64,
1498        details: format!("cycles={j}, expected~={expected_cycles:.0}"),
1499        grade,
1500    }
1501}
1502
1503/// Test 28: Birthday spacing -- spacing between repeated values, Poisson test.
1504pub fn birthday_spacing(data: &[u8]) -> TestResult {
1505    let name = "Birthday Spacing";
1506    let n = data.len();
1507    if n < 100 {
1508        return insufficient(name, 100, n);
1509    }
1510
1511    let values: Vec<u64> = if n >= 200 {
1512        let half = n / 2;
1513        (0..half)
1514            .map(|i| data[i * 2] as u64 * 256 + data[i * 2 + 1] as u64)
1515            .collect()
1516    } else {
1517        data.iter().map(|&b| b as u64).collect()
1518    };
1519
1520    let m = values.len();
1521    let mut sorted = values.clone();
1522    sorted.sort();
1523
1524    let mut spacings: Vec<u64> = Vec::with_capacity(m.saturating_sub(1));
1525    for i in 1..m {
1526        spacings.push(sorted[i] - sorted[i - 1]);
1527    }
1528    spacings.sort();
1529
1530    let mut dups = 0u64;
1531    for i in 1..spacings.len() {
1532        if spacings[i] == spacings[i - 1] {
1533            dups += 1;
1534        }
1535    }
1536
1537    let d = sorted.last().copied().unwrap_or(1).max(1) as f64;
1538    let mf = m as f64;
1539    let lambda = (mf * mf * mf / (4.0 * d)).max(0.01);
1540
1541    let p = if lambda > 0.0 {
1542        let poisson = Poisson::new(lambda).unwrap();
1543        let p_upper = if dups > 0 { poisson.sf(dups - 1) } else { 1.0 };
1544        let p_lower = poisson.cdf(dups);
1545        p_upper.max(p_lower).min(1.0)
1546    } else {
1547        1.0
1548    };
1549
1550    TestResult {
1551        name: name.to_string(),
1552        passed: TestResult::pass_from_p(Some(p), 0.01),
1553        p_value: Some(p),
1554        statistic: dups as f64,
1555        details: format!("duplicates={dups}, lambda={lambda:.2}, m={m}"),
1556        grade: TestResult::grade_from_p(Some(p)),
1557    }
1558}
1559
1560// ═══════════════════════════════════════════════════════════════════════════════
1561// 10. PRACTICAL TESTS
1562// ═══════════════════════════════════════════════════════════════════════════════
1563
1564/// Test 29: Bit avalanche -- adjacent bytes should differ by ~4 bits (50%).
1565pub fn bit_avalanche(data: &[u8]) -> TestResult {
1566    let name = "Bit Avalanche";
1567    let n = data.len();
1568    if n < 100 {
1569        return insufficient(name, 100, n);
1570    }
1571
1572    let mut total_diffs = 0u64;
1573    let pairs = n - 1;
1574    for i in 0..pairs {
1575        total_diffs += (data[i] ^ data[i + 1]).count_ones() as u64;
1576    }
1577    let mean_diff = total_diffs as f64 / pairs as f64;
1578    let expected = 4.0;
1579    let std = 2.0_f64.sqrt(); // binomial std for n=8, p=0.5
1580    let z = (mean_diff - expected).abs() / (std / (pairs as f64).sqrt());
1581    let norm = Normal::standard();
1582    let p = 2.0 * (1.0 - norm.cdf(z));
1583    TestResult {
1584        name: name.to_string(),
1585        passed: TestResult::pass_from_p(Some(p), 0.01),
1586        p_value: Some(p),
1587        statistic: mean_diff,
1588        details: format!("mean_diff={mean_diff:.3}/8 bits, expected=4.0"),
1589        grade: TestResult::grade_from_p(Some(p)),
1590    }
1591}
1592
1593/// Test 30: Monte Carlo pi -- estimate pi using (x,y) pairs in unit circle.
1594pub fn monte_carlo_pi(data: &[u8]) -> TestResult {
1595    let name = "Monte Carlo Pi";
1596    let n = data.len();
1597    if n < 200 {
1598        return insufficient(name, 200, n);
1599    }
1600    let pairs = n / 2;
1601    let mut inside = 0u64;
1602    for i in 0..pairs {
1603        let x = data[i] as f64 / 255.0;
1604        let y = data[pairs + i] as f64 / 255.0;
1605        if x * x + y * y <= 1.0 {
1606            inside += 1;
1607        }
1608    }
1609    let pi_est = 4.0 * inside as f64 / pairs as f64;
1610    let error = (pi_est - PI).abs() / PI;
1611    let grade = if error < 0.01 {
1612        'A'
1613    } else if error < 0.03 {
1614        'B'
1615    } else if error < 0.1 {
1616        'C'
1617    } else if error < 0.2 {
1618        'D'
1619    } else {
1620        'F'
1621    };
1622    TestResult {
1623        name: name.to_string(),
1624        passed: error < 0.05,
1625        p_value: None,
1626        statistic: pi_est,
1627        details: format!("pi~={pi_est:.6}, error={:.4}%", error * 100.0),
1628        grade,
1629    }
1630}
1631
1632/// Test 31: Mean and variance -- mean (~127.5) and variance (~5461.25) of uniform bytes.
1633pub fn mean_variance(data: &[u8]) -> TestResult {
1634    let name = "Mean & Variance";
1635    let n = data.len();
1636    if n < 50 {
1637        return insufficient(name, 50, n);
1638    }
1639    let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
1640    let nf = n as f64;
1641    let mean: f64 = arr.iter().sum::<f64>() / nf;
1642    let var: f64 = arr.iter().map(|x| (x - mean) * (x - mean)).sum::<f64>() / nf;
1643
1644    let expected_mean = 127.5;
1645    let expected_var = (256.0 * 256.0 - 1.0) / 12.0; // 5461.25
1646
1647    let z_mean = (mean - expected_mean).abs() / (expected_var / nf).sqrt();
1648    let norm = Normal::standard();
1649    let p_mean = 2.0 * (1.0 - norm.cdf(z_mean));
1650
1651    let chi2_var = (nf - 1.0) * var / expected_var;
1652    let chi_dist = ChiSquared::new(nf - 1.0).unwrap();
1653    let p_var = 2.0 * chi_dist.cdf(chi2_var).min(chi_dist.sf(chi2_var));
1654
1655    let p = p_mean.min(p_var);
1656    TestResult {
1657        name: name.to_string(),
1658        passed: TestResult::pass_from_p(Some(p), 0.01),
1659        p_value: Some(p),
1660        statistic: z_mean,
1661        details: format!("mean={mean:.2} (exp 127.5), var={var:.1} (exp {expected_var:.1})"),
1662        grade: TestResult::grade_from_p(Some(p)),
1663    }
1664}
1665
1666// ═══════════════════════════════════════════════════════════════════════════════
1667// Test battery
1668// ═══════════════════════════════════════════════════════════════════════════════
1669
1670/// Run the complete 31-test battery on a byte slice.
1671pub fn run_all_tests(data: &[u8]) -> Vec<TestResult> {
1672    let tests: Vec<fn(&[u8]) -> TestResult> = vec![
1673        // Frequency (3)
1674        monobit_frequency,
1675        block_frequency,
1676        byte_frequency,
1677        // Runs (2)
1678        runs_test,
1679        longest_run_of_ones,
1680        // Serial (2)
1681        serial_test,
1682        approximate_entropy,
1683        // Spectral (2)
1684        dft_spectral,
1685        spectral_flatness,
1686        // Entropy (5)
1687        shannon_entropy,
1688        min_entropy,
1689        permutation_entropy,
1690        compression_ratio,
1691        kolmogorov_complexity,
1692        // Correlation (4)
1693        autocorrelation,
1694        serial_correlation,
1695        lag_n_correlation,
1696        cross_correlation,
1697        // Distribution (2)
1698        ks_test,
1699        anderson_darling,
1700        // Pattern (3)
1701        overlapping_template,
1702        non_overlapping_template,
1703        maurers_universal,
1704        // Advanced (5)
1705        binary_matrix_rank,
1706        linear_complexity,
1707        cusum_test,
1708        random_excursions,
1709        birthday_spacing,
1710        // Practical (3)
1711        bit_avalanche,
1712        monte_carlo_pi,
1713        mean_variance,
1714    ];
1715
1716    tests
1717        .iter()
1718        .map(|test_fn| {
1719            match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| test_fn(data))) {
1720                Ok(result) => result,
1721                Err(_) => TestResult {
1722                    name: "Unknown".to_string(),
1723                    passed: false,
1724                    p_value: None,
1725                    statistic: 0.0,
1726                    details: "Test panicked".to_string(),
1727                    grade: 'F',
1728                },
1729            }
1730        })
1731        .collect()
1732}
1733
1734/// Calculate overall quality score (0-100) from test results.
1735///
1736/// Each grade maps to a score: A=100, B=75, C=50, D=25, F=0.
1737/// Returns the average across all tests.
1738pub fn calculate_quality_score(results: &[TestResult]) -> f64 {
1739    if results.is_empty() {
1740        return 0.0;
1741    }
1742    let total: f64 = results
1743        .iter()
1744        .map(|r| match r.grade {
1745            'A' => 100.0,
1746            'B' => 75.0,
1747            'C' => 50.0,
1748            'D' => 25.0,
1749            _ => 0.0,
1750        })
1751        .sum();
1752    total / results.len() as f64
1753}
1754
1755#[cfg(test)]
1756mod tests {
1757    use super::*;
1758
1759    /// Generate pseudo-random data for testing (simple LCG).
1760    fn pseudo_random(n: usize) -> Vec<u8> {
1761        let mut data = Vec::with_capacity(n);
1762        let mut state: u64 = 0xDEAD_BEEF_CAFE_BABE;
1763        for _ in 0..n {
1764            state = state
1765                .wrapping_mul(6364136223846793005)
1766                .wrapping_add(1442695040888963407);
1767            data.push((state >> 33) as u8);
1768        }
1769        data
1770    }
1771
1772    #[test]
1773    fn test_to_bits() {
1774        let data = [0b10110001u8];
1775        let bits = to_bits(&data);
1776        assert_eq!(bits, vec![1, 0, 1, 1, 0, 0, 0, 1]);
1777    }
1778
1779    #[test]
1780    fn test_grade_from_p() {
1781        assert_eq!(TestResult::grade_from_p(Some(0.5)), 'A');
1782        assert_eq!(TestResult::grade_from_p(Some(0.05)), 'B');
1783        assert_eq!(TestResult::grade_from_p(Some(0.005)), 'C');
1784        assert_eq!(TestResult::grade_from_p(Some(0.0005)), 'D');
1785        assert_eq!(TestResult::grade_from_p(Some(0.00000001)), 'F');
1786        assert_eq!(TestResult::grade_from_p(None), 'F');
1787    }
1788
1789    #[test]
1790    fn test_pass_from_p() {
1791        assert!(TestResult::pass_from_p(Some(0.05), 0.01));
1792        assert!(!TestResult::pass_from_p(Some(0.005), 0.01));
1793        assert!(!TestResult::pass_from_p(None, 0.01));
1794    }
1795
1796    #[test]
1797    fn test_insufficient_data() {
1798        let data = [0u8; 5];
1799        let result = monobit_frequency(&data);
1800        assert!(!result.passed);
1801        assert!(result.details.contains("Insufficient"));
1802    }
1803
1804    #[test]
1805    fn test_constant_data_fails() {
1806        let data = vec![0u8; 1000];
1807        let results = run_all_tests(&data);
1808        let passed_count = results.iter().filter(|r| r.passed).count();
1809        assert!(passed_count < results.len() / 2);
1810    }
1811
1812    #[test]
1813    fn test_pseudo_random_passes() {
1814        let data = pseudo_random(10000);
1815        let results = run_all_tests(&data);
1816        let passed_count = results.iter().filter(|r| r.passed).count();
1817        assert!(
1818            passed_count > results.len() / 2,
1819            "Only {passed_count}/{} tests passed",
1820            results.len()
1821        );
1822    }
1823
1824    #[test]
1825    fn test_quality_score() {
1826        let results = vec![
1827            TestResult {
1828                name: "A".into(),
1829                passed: true,
1830                p_value: Some(0.5),
1831                statistic: 0.0,
1832                details: String::new(),
1833                grade: 'A',
1834            },
1835            TestResult {
1836                name: "F".into(),
1837                passed: false,
1838                p_value: Some(0.0),
1839                statistic: 0.0,
1840                details: String::new(),
1841                grade: 'F',
1842            },
1843        ];
1844        let score = calculate_quality_score(&results);
1845        assert!((score - 50.0).abs() < 0.01);
1846    }
1847
1848    #[test]
1849    fn test_all_31_tests_present() {
1850        let data = pseudo_random(10000);
1851        let results = run_all_tests(&data);
1852        assert_eq!(results.len(), 31);
1853    }
1854
1855    #[test]
1856    fn test_monobit_basic() {
1857        let data = pseudo_random(1000);
1858        let result = monobit_frequency(&data);
1859        assert!(result.p_value.is_some());
1860    }
1861
1862    #[test]
1863    fn test_shannon_entropy_random() {
1864        let data = pseudo_random(10000);
1865        let result = shannon_entropy(&data);
1866        assert!(
1867            result.statistic > 7.0,
1868            "Shannon entropy too low: {}",
1869            result.statistic
1870        );
1871    }
1872
1873    #[test]
1874    fn test_compression_ratio_random() {
1875        let data = pseudo_random(10000);
1876        let result = compression_ratio(&data);
1877        assert!(
1878            result.statistic > 0.9,
1879            "Compression ratio too low: {}",
1880            result.statistic
1881        );
1882    }
1883
1884    #[test]
1885    fn test_calculate_quality_score_empty() {
1886        assert_eq!(calculate_quality_score(&[]), 0.0);
1887    }
1888}
openentropy_tests/lib.rs

openentropy_tests/
lib.rs