openentropy_tests/
lib.rs

1//! NIST SP 800-22 inspired randomness test battery.
2//!
3//! Provides 31 statistical tests for evaluating the quality of random byte sequences.
4//! Each test returns a [`TestResult`] with a p-value (where applicable), a pass/fail
5//! determination, and a letter grade (A through F).
6
7use flate2::Compression;
8use flate2::write::ZlibEncoder;
9use rustfft::{FftPlanner, num_complex::Complex};
10use statrs::distribution::{ChiSquared, ContinuousCDF, DiscreteCDF, Normal, Poisson};
11use statrs::function::erf::erfc;
12use std::collections::HashMap;
13use std::f64::consts::PI;
14use std::io::Write;
15
16// ═══════════════════════════════════════════════════════════════════════════════
17// Core types
18// ═══════════════════════════════════════════════════════════════════════════════
19
20/// Result of a single randomness test.
21#[derive(Debug, Clone)]
22pub struct TestResult {
23    pub name: String,
24    pub passed: bool,
25    pub p_value: Option<f64>,
26    pub statistic: f64,
27    pub details: String,
28    pub grade: char,
29}
30
31impl TestResult {
32    /// Assign a letter grade based on p-value.
33    ///
34    /// - A: p >= 0.1
35    /// - B: p >= 0.01
36    /// - C: p >= 0.001
37    /// - D: p >= 0.0001
38    /// - F: otherwise or None
39    pub fn grade_from_p(p: Option<f64>) -> char {
40        match p {
41            Some(p) if p >= 0.1 => 'A',
42            Some(p) if p >= 0.01 => 'B',
43            Some(p) if p >= 0.001 => 'C',
44            Some(p) if p >= 0.0001 => 'D',
45            _ => 'F',
46        }
47    }
48
49    /// Determine pass/fail from p-value against a threshold (default 0.01).
50    pub fn pass_from_p(p: Option<f64>, threshold: f64) -> bool {
51        match p {
52            Some(p) => p >= threshold,
53            None => false,
54        }
55    }
56}
57
58// ═══════════════════════════════════════════════════════════════════════════════
59// Helpers
60// ═══════════════════════════════════════════════════════════════════════════════
61
62/// Unpack a byte slice into individual bits (MSB first per byte).
63fn to_bits(data: &[u8]) -> Vec<u8> {
64    let mut bits = Vec::with_capacity(data.len() * 8);
65    for &byte in data {
66        for shift in (0..8).rev() {
67            bits.push((byte >> shift) & 1);
68        }
69    }
70    bits
71}
72
73/// Return a failing `TestResult` when data is too short.
74fn insufficient(name: &str, needed: usize, got: usize) -> TestResult {
75    TestResult {
76        name: name.to_string(),
77        passed: false,
78        p_value: None,
79        statistic: 0.0,
80        details: format!("Insufficient data: need {needed}, got {got}"),
81        grade: 'F',
82    }
83}
84
85// ═══════════════════════════════════════════════════════════════════════════════
86// 1. FREQUENCY TESTS
87// ═══════════════════════════════════════════════════════════════════════════════
88
89/// Test 1: Monobit frequency -- proportion of 1s vs 0s should be ~50%.
90pub fn monobit_frequency(data: &[u8]) -> TestResult {
91    let name = "Monobit Frequency";
92    let bits = to_bits(data);
93    let n = bits.len();
94    if n < 100 {
95        return insufficient(name, 100, n);
96    }
97    let s: i64 = bits
98        .iter()
99        .map(|&b| if b == 1 { 1i64 } else { -1i64 })
100        .sum();
101    let s_obs = (s as f64).abs() / (n as f64).sqrt();
102    let p = erfc(s_obs / 2.0_f64.sqrt());
103    TestResult {
104        name: name.to_string(),
105        passed: TestResult::pass_from_p(Some(p), 0.01),
106        p_value: Some(p),
107        statistic: s_obs,
108        details: format!("S={s}, n={n}"),
109        grade: TestResult::grade_from_p(Some(p)),
110    }
111}
112
113/// Test 2: Block frequency -- frequency within 128-bit blocks. Chi-squared test.
114pub fn block_frequency(data: &[u8]) -> TestResult {
115    let name = "Block Frequency";
116    let block_size: usize = 128;
117    let bits = to_bits(data);
118    let n = bits.len();
119    let num_blocks = n / block_size;
120    if num_blocks < 10 {
121        return insufficient(name, block_size * 10, n);
122    }
123    let mut chi2 = 0.0;
124    for i in 0..num_blocks {
125        let start = i * block_size;
126        let ones: usize = bits[start..start + block_size]
127            .iter()
128            .map(|&b| b as usize)
129            .sum();
130        let proportion = ones as f64 / block_size as f64;
131        chi2 += (proportion - 0.5) * (proportion - 0.5);
132    }
133    chi2 *= 4.0 * block_size as f64;
134    let dist = ChiSquared::new(num_blocks as f64).unwrap();
135    let p = dist.sf(chi2);
136    TestResult {
137        name: name.to_string(),
138        passed: TestResult::pass_from_p(Some(p), 0.01),
139        p_value: Some(p),
140        statistic: chi2,
141        details: format!("blocks={num_blocks}, M={block_size}"),
142        grade: TestResult::grade_from_p(Some(p)),
143    }
144}
145
146/// Test 3: Byte frequency -- chi-squared on byte value distribution (256 bins).
147pub fn byte_frequency(data: &[u8]) -> TestResult {
148    let name = "Byte Frequency";
149    let n = data.len();
150    if n < 256 {
151        return insufficient(name, 256, n);
152    }
153    let mut hist = [0u64; 256];
154    for &b in data {
155        hist[b as usize] += 1;
156    }
157    let expected = n as f64 / 256.0;
158    let chi2: f64 = hist
159        .iter()
160        .map(|&c| {
161            let diff = c as f64 - expected;
162            diff * diff / expected
163        })
164        .sum();
165    let dist = ChiSquared::new(255.0).unwrap();
166    let p = dist.sf(chi2);
167    TestResult {
168        name: name.to_string(),
169        passed: TestResult::pass_from_p(Some(p), 0.01),
170        p_value: Some(p),
171        statistic: chi2,
172        details: format!("n={n}, expected_per_bin={expected:.1}"),
173        grade: TestResult::grade_from_p(Some(p)),
174    }
175}
176
177// ═══════════════════════════════════════════════════════════════════════════════
178// 2. RUNS TESTS
179// ═══════════════════════════════════════════════════════════════════════════════
180
181/// Test 4: Runs test -- number of uninterrupted runs of 0s or 1s.
182pub fn runs_test(data: &[u8]) -> TestResult {
183    let name = "Runs Test";
184    let bits = to_bits(data);
185    let n = bits.len();
186    if n < 100 {
187        return insufficient(name, 100, n);
188    }
189    let ones: usize = bits.iter().map(|&b| b as usize).sum();
190    let prop = ones as f64 / n as f64;
191    if (prop - 0.5).abs() >= 2.0 / (n as f64).sqrt() {
192        return TestResult {
193            name: name.to_string(),
194            passed: false,
195            p_value: Some(0.0),
196            statistic: 0.0,
197            details: format!("Pre-test failed: proportion={prop:.4}"),
198            grade: 'F',
199        };
200    }
201    let mut runs: usize = 1;
202    for i in 1..n {
203        if bits[i] != bits[i - 1] {
204            runs += 1;
205        }
206    }
207    let expected = 2.0 * n as f64 * prop * (1.0 - prop) + 1.0;
208    let std = 2.0 * (2.0 * n as f64).sqrt() * prop * (1.0 - prop);
209    if std < 1e-10 {
210        return TestResult {
211            name: name.to_string(),
212            passed: false,
213            p_value: Some(0.0),
214            statistic: 0.0,
215            details: "Zero variance".to_string(),
216            grade: 'F',
217        };
218    }
219    let z = (runs as f64 - expected).abs() / std;
220    let p = erfc(z / 2.0_f64.sqrt());
221    TestResult {
222        name: name.to_string(),
223        passed: TestResult::pass_from_p(Some(p), 0.01),
224        p_value: Some(p),
225        statistic: z,
226        details: format!("runs={runs}, expected={expected:.0}"),
227        grade: TestResult::grade_from_p(Some(p)),
228    }
229}
230
231/// Test 5: Longest run of ones -- within 8-bit blocks, chi-squared against theoretical probs.
232pub fn longest_run_of_ones(data: &[u8]) -> TestResult {
233    let name = "Longest Run of Ones";
234    let bits = to_bits(data);
235    let n = bits.len();
236    if n < 128 {
237        return insufficient(name, 128, n);
238    }
239    let block_size = 8;
240    let num_blocks = n / block_size;
241
242    // For each block, find the longest run of 1s
243    let mut observed = [0u64; 4]; // NIST bins for M=8: {≤1, 2, 3, ≥4}
244    for i in 0..num_blocks {
245        let start = i * block_size;
246        let block = &bits[start..start + block_size];
247        let mut max_run = 0u32;
248        let mut current_run = 0u32;
249        for &bit in block {
250            if bit == 1 {
251                current_run += 1;
252                if current_run > max_run {
253                    max_run = current_run;
254                }
255            } else {
256                current_run = 0;
257            }
258        }
259        match max_run {
260            0 | 1 => observed[0] += 1,
261            2 => observed[1] += 1,
262            3 => observed[2] += 1,
263            _ => observed[3] += 1,
264        }
265    }
266
267    // Theoretical probabilities for M=8
268    let probs = [0.2148, 0.3672, 0.2305, 0.1875];
269    let mut chi2 = 0.0;
270    for i in 0..4 {
271        let expected = probs[i] * num_blocks as f64;
272        if expected > 0.0 {
273            let diff = observed[i] as f64 - expected;
274            chi2 += diff * diff / expected;
275        }
276    }
277    let dist = ChiSquared::new(3.0).unwrap();
278    let p = dist.sf(chi2);
279    TestResult {
280        name: name.to_string(),
281        passed: TestResult::pass_from_p(Some(p), 0.01),
282        p_value: Some(p),
283        statistic: chi2,
284        details: format!("blocks={num_blocks}, M={block_size}"),
285        grade: TestResult::grade_from_p(Some(p)),
286    }
287}
288
289// ═══════════════════════════════════════════════════════════════════════════════
290// 3. SERIAL TESTS
291// ═══════════════════════════════════════════════════════════════════════════════
292
293/// Helper: compute psi-squared for the serial test.
294fn psi_sq(bits: &[u8], n: usize, m: usize) -> f64 {
295    if m < 1 {
296        return 0.0;
297    }
298    let num_patterns = 1usize << m;
299    let mut counts = vec![0u64; num_patterns];
300    for i in 0..n {
301        let mut val = 0usize;
302        for j in 0..m {
303            val = (val << 1) | bits[(i + j) % n] as usize;
304        }
305        counts[val] += 1;
306    }
307    let sum_sq: f64 = counts.iter().map(|&c| (c as f64) * (c as f64)).sum();
308    sum_sq * (num_patterns as f64) / (n as f64) - n as f64
309}
310
311/// Test 6: Serial test -- frequency of overlapping m-bit patterns (m=4).
312pub fn serial_test(data: &[u8]) -> TestResult {
313    let name = "Serial Test";
314    let m = 4usize;
315    let mut bits = to_bits(data);
316    let mut n = bits.len();
317    if n > 20000 {
318        bits.truncate(20000);
319        n = 20000;
320    }
321    if n < (1 << m) + 10 {
322        return insufficient(name, (1 << m) + 10, n);
323    }
324
325    let psi_m = psi_sq(&bits, n, m);
326    let psi_m1 = psi_sq(&bits, n, m - 1);
327    let psi_m2 = if m >= 2 { psi_sq(&bits, n, m - 2) } else { 0.0 };
328    let delta1 = psi_m - psi_m1;
329    let delta2 = psi_m - 2.0 * psi_m1 + psi_m2;
330
331    let df1 = (1u64 << (m - 1)) as f64;
332    let df2 = (1u64 << (m - 2)) as f64;
333    let dist1 = ChiSquared::new(df1).unwrap();
334    let dist2 = ChiSquared::new(df2).unwrap();
335    let p1 = dist1.sf(delta1);
336    let p2 = dist2.sf(delta2.max(0.0));
337
338    // Use the more conservative (lower) p-value of the two serial statistics.
339    let p = p1.min(p2);
340    TestResult {
341        name: name.to_string(),
342        passed: TestResult::pass_from_p(Some(p), 0.01),
343        p_value: Some(p),
344        statistic: delta1,
345        details: format!("m={m}, n_bits={n}, p1={p1:.4}, p2={p2:.4}"),
346        grade: TestResult::grade_from_p(Some(p)),
347    }
348}
349
350/// Test 7: Approximate entropy -- compare m and m+1 bit pattern frequencies (m=3).
351pub fn approximate_entropy(data: &[u8]) -> TestResult {
352    let name = "Approximate Entropy";
353    let m = 3usize;
354    let mut bits = to_bits(data);
355    let mut n = bits.len();
356    if n > 20000 {
357        bits.truncate(20000);
358        n = 20000;
359    }
360    if n < 64 {
361        return insufficient(name, 64, n);
362    }
363
364    let phi = |block_len: usize| -> f64 {
365        let num_patterns = 1usize << block_len;
366        let mut counts = vec![0u64; num_patterns];
367        for i in 0..n {
368            let mut val = 0usize;
369            for j in 0..block_len {
370                val = (val << 1) | bits[(i + j) % n] as usize;
371            }
372            counts[val] += 1;
373        }
374        let mut sum = 0.0;
375        for &c in &counts {
376            if c > 0 {
377                let p = c as f64 / n as f64;
378                sum += p * p.log2();
379            }
380        }
381        sum
382    };
383
384    let phi_m = phi(m);
385    let phi_m1 = phi(m + 1);
386    let apen = phi_m - phi_m1;
387    // NIST formula (natural log): chi2 = 2*n*(ln(2) - ApEn_ln).
388    // Since phi uses log2, ApEn_log2 = ApEn_ln / ln(2), so:
389    // chi2 = 2*n*ln(2)*(1 - ApEn_log2)
390    let chi2 = 2.0 * n as f64 * 2.0_f64.ln() * (1.0 - apen);
391
392    let df = (1u64 << m) as f64;
393    let dist = ChiSquared::new(df).unwrap();
394    let p = dist.sf(chi2);
395    TestResult {
396        name: name.to_string(),
397        passed: TestResult::pass_from_p(Some(p), 0.01),
398        p_value: Some(p),
399        statistic: chi2,
400        details: format!("ApEn={apen:.6}, m={m}"),
401        grade: TestResult::grade_from_p(Some(p)),
402    }
403}
404
405// ═══════════════════════════════════════════════════════════════════════════════
406// 4. SPECTRAL TESTS
407// ═══════════════════════════════════════════════════════════════════════════════
408
409/// Test 8: DFT spectral -- detect periodic features via FFT.
410pub fn dft_spectral(data: &[u8]) -> TestResult {
411    let name = "DFT Spectral";
412    let bits = to_bits(data);
413    let n = bits.len();
414    if n < 64 {
415        return insufficient(name, 64, n);
416    }
417
418    let mut buffer: Vec<Complex<f64>> = bits
419        .iter()
420        .map(|&b| Complex {
421            re: if b == 1 { 1.0 } else { -1.0 },
422            im: 0.0,
423        })
424        .collect();
425
426    let mut planner = FftPlanner::new();
427    let fft = planner.plan_fft_forward(n);
428    fft.process(&mut buffer);
429
430    let half = n / 2;
431    let magnitudes: Vec<f64> = buffer[..half].iter().map(|c| c.norm()).collect();
432
433    let threshold = (2.995732274 * n as f64).sqrt();
434    let n0 = 0.95 * half as f64;
435    let n1 = magnitudes.iter().filter(|&&m| m < threshold).count() as f64;
436    let d = (n1 - n0) / (n as f64 * 0.95 * 0.05 / 4.0).sqrt();
437    let p = erfc(d.abs() / 2.0_f64.sqrt());
438    TestResult {
439        name: name.to_string(),
440        passed: TestResult::pass_from_p(Some(p), 0.01),
441        p_value: Some(p),
442        statistic: d,
443        details: format!("peaks_below_threshold={}/{half}", n1 as u64),
444        grade: TestResult::grade_from_p(Some(p)),
445    }
446}
447
448/// Test 9: Spectral flatness -- geometric/arithmetic mean ratio of power spectrum.
449pub fn spectral_flatness(data: &[u8]) -> TestResult {
450    let name = "Spectral Flatness";
451    let n = data.len();
452    if n < 64 {
453        return insufficient(name, 64, n);
454    }
455
456    let mean_val: f64 = data.iter().map(|&b| b as f64).sum::<f64>() / n as f64;
457    let mut buffer: Vec<Complex<f64>> = data
458        .iter()
459        .map(|&b| Complex {
460            re: b as f64 - mean_val,
461            im: 0.0,
462        })
463        .collect();
464
465    let mut planner = FftPlanner::new();
466    let fft = planner.plan_fft_forward(n);
467    fft.process(&mut buffer);
468
469    // Power spectrum, skip DC bin (index 0)
470    let half = n / 2;
471    if half < 2 {
472        return insufficient(name, 64, n);
473    }
474    let power: Vec<f64> = buffer[1..half]
475        .iter()
476        .map(|c| c.norm_sqr() + 1e-15)
477        .collect();
478
479    if power.is_empty() {
480        return insufficient(name, 64, n);
481    }
482
483    let log_sum: f64 = power.iter().map(|&p| p.ln()).sum();
484    let geo_mean = (log_sum / power.len() as f64).exp();
485    let arith_mean: f64 = power.iter().sum::<f64>() / power.len() as f64;
486    let flatness = geo_mean / arith_mean;
487
488    let passed = flatness > 0.5;
489    let grade = if flatness > 0.8 {
490        'A'
491    } else if flatness > 0.6 {
492        'B'
493    } else if flatness > 0.4 {
494        'C'
495    } else if flatness > 0.2 {
496        'D'
497    } else {
498        'F'
499    };
500    TestResult {
501        name: name.to_string(),
502        passed,
503        p_value: None,
504        statistic: flatness,
505        details: format!("flatness={flatness:.4} (1.0=white noise)"),
506        grade,
507    }
508}
509
510// ═══════════════════════════════════════════════════════════════════════════════
511// 5. ENTROPY TESTS
512// ═══════════════════════════════════════════════════════════════════════════════
513
514/// Test 10: Shannon entropy -- bits per byte (max 8.0).
515pub fn shannon_entropy(data: &[u8]) -> TestResult {
516    let name = "Shannon Entropy";
517    let n = data.len();
518    if n < 16 {
519        return insufficient(name, 16, n);
520    }
521    let mut hist = [0u64; 256];
522    for &b in data {
523        hist[b as usize] += 1;
524    }
525    let mut h = 0.0;
526    for &c in &hist {
527        if c > 0 {
528            let p = c as f64 / n as f64;
529            h -= p * p.log2();
530        }
531    }
532    let ratio = h / 8.0;
533    let grade = if ratio > 0.95 {
534        'A'
535    } else if ratio > 0.85 {
536        'B'
537    } else if ratio > 0.7 {
538        'C'
539    } else if ratio > 0.5 {
540        'D'
541    } else {
542        'F'
543    };
544    TestResult {
545        name: name.to_string(),
546        passed: ratio > 0.85,
547        p_value: None,
548        statistic: h,
549        details: format!("{h:.4} / 8.0 bits ({:.1}%)", ratio * 100.0),
550        grade,
551    }
552}
553
554/// Test 11: Min-entropy (NIST SP 800-90B): -log2(p_max).
555pub fn min_entropy(data: &[u8]) -> TestResult {
556    let name = "Min-Entropy";
557    let n = data.len();
558    if n < 16 {
559        return insufficient(name, 16, n);
560    }
561    let mut hist = [0u64; 256];
562    for &b in data {
563        hist[b as usize] += 1;
564    }
565    let p_max = *hist.iter().max().unwrap() as f64 / n as f64;
566    let h_min = -(p_max + 1e-15).log2();
567    let ratio = h_min / 8.0;
568    let grade = if ratio > 0.9 {
569        'A'
570    } else if ratio > 0.75 {
571        'B'
572    } else if ratio > 0.5 {
573        'C'
574    } else if ratio > 0.25 {
575        'D'
576    } else {
577        'F'
578    };
579    TestResult {
580        name: name.to_string(),
581        passed: ratio > 0.7,
582        p_value: None,
583        statistic: h_min,
584        details: format!("{h_min:.4} / 8.0 bits ({:.1}%)", ratio * 100.0),
585        grade,
586    }
587}
588
589/// Test 12: Permutation entropy -- complexity of ordinal patterns (order=4).
590pub fn permutation_entropy(data: &[u8]) -> TestResult {
591    let name = "Permutation Entropy";
592    let order = 4usize;
593    let n = data.len();
594    if n < order + 10 {
595        return insufficient(name, order + 10, n);
596    }
597    let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
598
599    let mut patterns: HashMap<Vec<usize>, u64> = HashMap::new();
600    for i in 0..n - order {
601        let window = &arr[i..i + order];
602        let mut indices: Vec<usize> = (0..order).collect();
603        indices.sort_by(|&a, &b| {
604            window[a]
605                .partial_cmp(&window[b])
606                .unwrap_or(std::cmp::Ordering::Equal)
607                .then(a.cmp(&b))
608        });
609        *patterns.entry(indices).or_insert(0) += 1;
610    }
611
612    let total: u64 = patterns.values().sum();
613    let mut h = 0.0;
614    for &c in patterns.values() {
615        let p = c as f64 / total as f64;
616        h -= p * p.log2();
617    }
618    // factorial(4) = 24
619    let h_max = 24.0_f64.log2();
620    let normalized = if h_max > 0.0 { h / h_max } else { 0.0 };
621    let grade = if normalized > 0.95 {
622        'A'
623    } else if normalized > 0.85 {
624        'B'
625    } else if normalized > 0.7 {
626        'C'
627    } else if normalized > 0.5 {
628        'D'
629    } else {
630        'F'
631    };
632    TestResult {
633        name: name.to_string(),
634        passed: normalized > 0.85,
635        p_value: None,
636        statistic: normalized,
637        details: format!("PE={h:.4}/{h_max:.4} = {normalized:.4}"),
638        grade,
639    }
640}
641
642/// Test 13: Compression ratio -- zlib compression ratio (random ~ 1.0+).
643pub fn compression_ratio(data: &[u8]) -> TestResult {
644    let name = "Compression Ratio";
645    let n = data.len();
646    if n < 32 {
647        return insufficient(name, 32, n);
648    }
649    let mut encoder = ZlibEncoder::new(Vec::new(), Compression::best());
650    encoder.write_all(data).unwrap();
651    let compressed = encoder.finish().unwrap();
652    let ratio = compressed.len() as f64 / n as f64;
653    let grade = if ratio > 0.95 {
654        'A'
655    } else if ratio > 0.85 {
656        'B'
657    } else if ratio > 0.7 {
658        'C'
659    } else if ratio > 0.5 {
660        'D'
661    } else {
662        'F'
663    };
664    TestResult {
665        name: name.to_string(),
666        passed: ratio > 0.85,
667        p_value: None,
668        statistic: ratio,
669        details: format!("{}/{n} = {ratio:.4}", compressed.len()),
670        grade,
671    }
672}
673
674/// Test 14: Kolmogorov complexity -- compression at levels 1 and 9, compute complexity and spread.
675pub fn kolmogorov_complexity(data: &[u8]) -> TestResult {
676    let name = "Kolmogorov Complexity";
677    let n = data.len();
678    if n < 32 {
679        return insufficient(name, 32, n);
680    }
681
682    let compress_at = |level: u32| -> usize {
683        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::new(level));
684        encoder.write_all(data).unwrap();
685        encoder.finish().unwrap().len()
686    };
687
688    let c1 = compress_at(1);
689    let c9 = compress_at(9);
690    let complexity = c9 as f64 / n as f64;
691    let spread = (c1 as f64 - c9 as f64) / n as f64;
692    let grade = if complexity > 0.95 {
693        'A'
694    } else if complexity > 0.85 {
695        'B'
696    } else if complexity > 0.7 {
697        'C'
698    } else if complexity > 0.5 {
699        'D'
700    } else {
701        'F'
702    };
703    TestResult {
704        name: name.to_string(),
705        passed: complexity > 0.85,
706        p_value: None,
707        statistic: complexity,
708        details: format!("K~={complexity:.4}, spread={spread:.4}"),
709        grade,
710    }
711}
712
713// ═══════════════════════════════════════════════════════════════════════════════
714// 6. CORRELATION TESTS
715// ═══════════════════════════════════════════════════════════════════════════════
716
717/// Test 15: Autocorrelation -- at lags 1-50. Count violations of 2/sqrt(n) threshold.
718pub fn autocorrelation(data: &[u8]) -> TestResult {
719    let name = "Autocorrelation";
720    let max_lag = 50usize;
721    let n = data.len();
722    if n < max_lag + 10 {
723        return insufficient(name, max_lag + 10, n);
724    }
725    let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
726    let mean: f64 = arr.iter().sum::<f64>() / n as f64;
727    let var: f64 = arr.iter().map(|x| (x - mean) * (x - mean)).sum::<f64>() / n as f64;
728    if var < 1e-10 {
729        return TestResult {
730            name: name.to_string(),
731            passed: false,
732            p_value: None,
733            statistic: 1.0,
734            details: "Zero variance".to_string(),
735            grade: 'F',
736        };
737    }
738    let threshold = 2.0 / (n as f64).sqrt();
739    let mut max_corr = 0.0f64;
740    let mut violations = 0u64;
741    for lag in 1..=max_lag.min(n - 1) {
742        let mut sum = 0.0;
743        let count = n - lag;
744        for i in 0..count {
745            sum += (arr[i] - mean) * (arr[i + lag] - mean);
746        }
747        let c = sum / (count as f64 * var);
748        if c.abs() > max_corr {
749            max_corr = c.abs();
750        }
751        if c.abs() > threshold {
752            violations += 1;
753        }
754    }
755    let expected_violations = 0.05 * max_lag as f64;
756    let lambda = expected_violations.max(1.0);
757    let p = if violations > 0 {
758        let poisson = Poisson::new(lambda).unwrap();
759        poisson.sf(violations - 1)
760    } else {
761        1.0
762    };
763    TestResult {
764        name: name.to_string(),
765        passed: TestResult::pass_from_p(Some(p), 0.01),
766        p_value: Some(p),
767        statistic: max_corr,
768        details: format!("violations={violations}/{max_lag}, max|r|={max_corr:.4}"),
769        grade: TestResult::grade_from_p(Some(p)),
770    }
771}
772
773/// Test 16: Serial correlation -- adjacent value correlation. Z-test.
774pub fn serial_correlation(data: &[u8]) -> TestResult {
775    let name = "Serial Correlation";
776    let n = data.len();
777    if n < 20 {
778        return insufficient(name, 20, n);
779    }
780    let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
781    let mean: f64 = arr.iter().sum::<f64>() / n as f64;
782    let var: f64 = arr.iter().map(|x| (x - mean) * (x - mean)).sum::<f64>() / n as f64;
783    if var < 1e-10 {
784        return TestResult {
785            name: name.to_string(),
786            passed: false,
787            p_value: None,
788            statistic: 1.0,
789            details: "Zero variance".to_string(),
790            grade: 'F',
791        };
792    }
793    let mut sum = 0.0;
794    for i in 0..n - 1 {
795        sum += (arr[i] - mean) * (arr[i + 1] - mean);
796    }
797    let r = sum / ((n - 1) as f64 * var);
798    let z = r * (n as f64).sqrt();
799    let norm = Normal::standard();
800    let p = 2.0 * (1.0 - norm.cdf(z.abs()));
801    TestResult {
802        name: name.to_string(),
803        passed: TestResult::pass_from_p(Some(p), 0.01),
804        p_value: Some(p),
805        statistic: r.abs(),
806        details: format!("r={r:.6}, z={z:.4}"),
807        grade: TestResult::grade_from_p(Some(p)),
808    }
809}
810
811/// Test 17: Lag-N correlation -- correlation at lags [1, 2, 4, 8, 16, 32].
812pub fn lag_n_correlation(data: &[u8]) -> TestResult {
813    let name = "Lag-N Correlation";
814    let lags: &[usize] = &[1, 2, 4, 8, 16, 32];
815    let n = data.len();
816    let max_lag = *lags.iter().max().unwrap();
817    if n < max_lag + 10 {
818        return insufficient(name, max_lag + 10, n);
819    }
820    let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
821    let mean: f64 = arr.iter().sum::<f64>() / n as f64;
822    let var: f64 = arr.iter().map(|x| (x - mean) * (x - mean)).sum::<f64>() / n as f64;
823    if var < 1e-10 {
824        return TestResult {
825            name: name.to_string(),
826            passed: false,
827            p_value: None,
828            statistic: 1.0,
829            details: "Zero variance".to_string(),
830            grade: 'F',
831        };
832    }
833    let threshold = 2.0 / (n as f64).sqrt();
834    let mut max_corr = 0.0f64;
835    let mut details_parts = Vec::new();
836    for &lag in lags {
837        if lag >= n {
838            continue;
839        }
840        let mut sum = 0.0;
841        let count = n - lag;
842        for i in 0..count {
843            sum += (arr[i] - mean) * (arr[i + lag] - mean);
844        }
845        let c = sum / (count as f64 * var);
846        if c.abs() > max_corr {
847            max_corr = c.abs();
848        }
849        details_parts.push(format!("lag{lag}={c:.4}"));
850    }
851    let passed = max_corr < threshold;
852    let grade = if max_corr < threshold * 0.5 {
853        'A'
854    } else if max_corr < threshold {
855        'B'
856    } else if max_corr < threshold * 2.0 {
857        'C'
858    } else if max_corr < threshold * 4.0 {
859        'D'
860    } else {
861        'F'
862    };
863    TestResult {
864        name: name.to_string(),
865        passed,
866        p_value: None,
867        statistic: max_corr,
868        details: details_parts.join(", "),
869        grade,
870    }
871}
872
873/// Test 18: Cross-correlation -- even vs odd byte independence. Pearson r.
874pub fn cross_correlation(data: &[u8]) -> TestResult {
875    let name = "Cross-Correlation";
876    let n = data.len();
877    if n < 100 {
878        return insufficient(name, 100, n);
879    }
880    let even: Vec<f64> = data.iter().step_by(2).map(|&b| b as f64).collect();
881    let odd: Vec<f64> = data.iter().skip(1).step_by(2).map(|&b| b as f64).collect();
882    let min_len = even.len().min(odd.len());
883    if min_len < 2 {
884        return insufficient(name, 100, n);
885    }
886    let even = &even[..min_len];
887    let odd = &odd[..min_len];
888
889    let mean_e: f64 = even.iter().sum::<f64>() / min_len as f64;
890    let mean_o: f64 = odd.iter().sum::<f64>() / min_len as f64;
891    let mut cov = 0.0;
892    let mut var_e = 0.0;
893    let mut var_o = 0.0;
894    for i in 0..min_len {
895        let de = even[i] - mean_e;
896        let do_ = odd[i] - mean_o;
897        cov += de * do_;
898        var_e += de * de;
899        var_o += do_ * do_;
900    }
901    let denom = (var_e * var_o).sqrt();
902    if denom < 1e-10 {
903        return TestResult {
904            name: name.to_string(),
905            passed: false,
906            p_value: None,
907            statistic: 0.0,
908            details: "Zero variance in one or both halves".to_string(),
909            grade: 'F',
910        };
911    }
912    let r = cov / denom;
913
914    // For large n, t ~ N(0,1)
915    let t = r * ((min_len as f64 - 2.0) / (1.0 - r * r).max(1e-15)).sqrt();
916    let norm = Normal::standard();
917    let p = 2.0 * (1.0 - norm.cdf(t.abs()));
918    TestResult {
919        name: name.to_string(),
920        passed: TestResult::pass_from_p(Some(p), 0.01),
921        p_value: Some(p),
922        statistic: r.abs(),
923        details: format!("r={r:.6} (even vs odd bytes)"),
924        grade: TestResult::grade_from_p(Some(p)),
925    }
926}
927
928// ═══════════════════════════════════════════════════════════════════════════════
929// 7. DISTRIBUTION TESTS
930// ═══════════════════════════════════════════════════════════════════════════════
931
932/// Test 19: Kolmogorov-Smirnov test vs uniform distribution.
933pub fn ks_test(data: &[u8]) -> TestResult {
934    let name = "Kolmogorov-Smirnov";
935    let n = data.len();
936    if n < 50 {
937        return insufficient(name, 50, n);
938    }
939    // Map discrete bytes to continuous [0,1] with continuity correction
940    // (matching the Anderson-Darling test mapping)
941    let mut normalized: Vec<f64> = data.iter().map(|&b| (b as f64 + 0.5) / 256.0).collect();
942    normalized.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
943
944    // KS statistic: max |F_n(x) - F(x)|
945    let mut d_max = 0.0f64;
946    let nf = n as f64;
947    for (i, &x) in normalized.iter().enumerate() {
948        let f_n_plus = (i + 1) as f64 / nf;
949        let f_n_minus = i as f64 / nf;
950        let f_x = x.clamp(0.0, 1.0);
951        let d1 = (f_n_plus - f_x).abs();
952        let d2 = (f_n_minus - f_x).abs();
953        d_max = d_max.max(d1).max(d2);
954    }
955
956    // Asymptotic KS p-value (Kolmogorov distribution)
957    let sqrt_n = nf.sqrt();
958    let lambda = (sqrt_n + 0.12 + 0.11 / sqrt_n) * d_max;
959    let mut p = 0.0;
960    for k in 1..=100i32 {
961        let sign = if k % 2 == 0 { -1.0 } else { 1.0 };
962        p += sign * (-2.0 * (k as f64 * lambda).powi(2)).exp();
963    }
964    p = (2.0 * p).clamp(0.0, 1.0);
965
966    TestResult {
967        name: name.to_string(),
968        passed: TestResult::pass_from_p(Some(p), 0.01),
969        p_value: Some(p),
970        statistic: d_max,
971        details: format!("D={d_max:.6}, n={n}"),
972        grade: TestResult::grade_from_p(Some(p)),
973    }
974}
975
976/// Test 20: Anderson-Darling -- A-squared statistic for uniform. Critical values:
977/// 1.933 (5%), 2.492 (2.5%), 3.857 (1%).
978pub fn anderson_darling(data: &[u8]) -> TestResult {
979    let name = "Anderson-Darling";
980    let n = data.len();
981    if n < 50 {
982        return insufficient(name, 50, n);
983    }
984    // Map bytes to (0, 1): (value + 0.5) / 256
985    let mut sorted: Vec<f64> = data.iter().map(|&b| (b as f64 + 0.5) / 256.0).collect();
986    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
987
988    let nf = n as f64;
989    let mut s = 0.0;
990    for i in 0..n {
991        let idx = (i + 1) as f64;
992        let u = sorted[i].clamp(1e-15, 1.0 - 1e-15);
993        let u_rev = sorted[n - 1 - i].clamp(1e-15, 1.0 - 1e-15);
994        s += (2.0 * idx - 1.0) * (u.ln() + (1.0 - u_rev).ln());
995    }
996    let a2 = -nf - s / nf;
997    let a2_star = a2 * (1.0 + 0.75 / nf + 2.25 / (nf * nf));
998
999    let passed = a2_star < 2.492;
1000    let grade = if a2_star < 1.248 {
1001        'A'
1002    } else if a2_star < 1.933 {
1003        'B'
1004    } else if a2_star < 2.492 {
1005        'C'
1006    } else if a2_star < 3.857 {
1007        'D'
1008    } else {
1009        'F'
1010    };
1011    TestResult {
1012        name: name.to_string(),
1013        passed,
1014        p_value: None,
1015        statistic: a2_star,
1016        details: format!("A^2*={a2_star:.4}, 5% critical=2.492"),
1017        grade,
1018    }
1019}
1020
1021// ═══════════════════════════════════════════════════════════════════════════════
1022// 8. PATTERN TESTS
1023// ═══════════════════════════════════════════════════════════════════════════════
1024
1025/// Test 21: Overlapping template -- frequency of overlapping bit pattern (1,1,1,1).
1026pub fn overlapping_template(data: &[u8]) -> TestResult {
1027    let name = "Overlapping Template";
1028    let template: &[u8] = &[1, 1, 1, 1];
1029    let m = template.len();
1030    let bits = to_bits(data);
1031    let n = bits.len();
1032    if n < 1000 {
1033        return insufficient(name, 1000, n);
1034    }
1035
1036    let mut count = 0u64;
1037    for i in 0..=n - m {
1038        if bits[i..i + m] == *template {
1039            count += 1;
1040        }
1041    }
1042    let expected = (n - m + 1) as f64 / (1u64 << m) as f64;
1043    let std = (expected * (1.0 - 1.0 / (1u64 << m) as f64)).sqrt();
1044    if std < 1e-10 {
1045        return TestResult {
1046            name: name.to_string(),
1047            passed: false,
1048            p_value: None,
1049            statistic: 0.0,
1050            details: "Zero std".to_string(),
1051            grade: 'F',
1052        };
1053    }
1054    let z = (count as f64 - expected) / std;
1055    let norm = Normal::standard();
1056    let p = 2.0 * (1.0 - norm.cdf(z.abs()));
1057    TestResult {
1058        name: name.to_string(),
1059        passed: TestResult::pass_from_p(Some(p), 0.01),
1060        p_value: Some(p),
1061        statistic: z.abs(),
1062        details: format!("count={count}, expected={expected:.0}"),
1063        grade: TestResult::grade_from_p(Some(p)),
1064    }
1065}
1066
1067/// Test 22: Non-overlapping template -- non-overlapping occurrences of (0,0,1,1).
1068pub fn non_overlapping_template(data: &[u8]) -> TestResult {
1069    let name = "Non-overlapping Template";
1070    let template: &[u8] = &[0, 0, 1, 1];
1071    let m = template.len();
1072    let bits = to_bits(data);
1073    let n = bits.len();
1074    if n < 1000 {
1075        return insufficient(name, 1000, n);
1076    }
1077
1078    let mut count = 0u64;
1079    let mut i = 0;
1080    while i + m <= n {
1081        if bits[i..i + m] == *template {
1082            count += 1;
1083            i += m;
1084        } else {
1085            i += 1;
1086        }
1087    }
1088    let expected = n as f64 / (1u64 << m) as f64;
1089    let var =
1090        n as f64 * (1.0 / (1u64 << m) as f64 - (2.0 * m as f64 - 1.0) / (1u64 << (2 * m)) as f64);
1091    let var = if var <= 0.0 { 1.0 } else { var };
1092    let z = (count as f64 - expected) / var.sqrt();
1093    let norm = Normal::standard();
1094    let p = 2.0 * (1.0 - norm.cdf(z.abs()));
1095    TestResult {
1096        name: name.to_string(),
1097        passed: TestResult::pass_from_p(Some(p), 0.01),
1098        p_value: Some(p),
1099        statistic: z.abs(),
1100        details: format!("count={count}, expected={expected:.0}"),
1101        grade: TestResult::grade_from_p(Some(p)),
1102    }
1103}
1104
1105/// Test 23: Maurer's universal statistical test (L=6, Q=640).
1106pub fn maurers_universal(data: &[u8]) -> TestResult {
1107    let name = "Maurer's Universal";
1108    let l = 6usize;
1109    let q = 640usize;
1110    let bits = to_bits(data);
1111    let n_bits = bits.len();
1112    let total_blocks = n_bits / l;
1113    if total_blocks <= q {
1114        return insufficient(name, (q + 100) * l, n_bits);
1115    }
1116    let k = total_blocks - q;
1117    if k < 100 || q < 10 * (1 << l) {
1118        return insufficient(name, (q + 100) * l, n_bits);
1119    }
1120
1121    let num_patterns = 1usize << l;
1122    let mut table = vec![0usize; num_patterns];
1123
1124    // Initialization phase
1125    for i in 0..q {
1126        let mut block = 0usize;
1127        for j in 0..l {
1128            block = (block << 1) | bits[i * l + j] as usize;
1129        }
1130        table[block] = i + 1;
1131    }
1132
1133    // Test phase
1134    let mut total = 0.0f64;
1135    for i in q..q + k {
1136        let mut block = 0usize;
1137        for j in 0..l {
1138            block = (block << 1) | bits[i * l + j] as usize;
1139        }
1140        let prev = table[block];
1141        let distance = if prev > 0 {
1142            (i + 1 - prev) as f64
1143        } else {
1144            (i + 1) as f64
1145        };
1146        total += distance.log2();
1147        table[block] = i + 1;
1148    }
1149
1150    let fn_val = total / k as f64;
1151    let expected = 5.2177052;
1152    let variance = 2.954;
1153    let sigma = (variance / k as f64).sqrt();
1154    let z = (fn_val - expected).abs() / sigma.max(1e-10);
1155    let p = erfc(z / 2.0_f64.sqrt());
1156    TestResult {
1157        name: name.to_string(),
1158        passed: TestResult::pass_from_p(Some(p), 0.01),
1159        p_value: Some(p),
1160        statistic: fn_val,
1161        details: format!("fn={fn_val:.4}, expected={expected:.4}, L={l}"),
1162        grade: TestResult::grade_from_p(Some(p)),
1163    }
1164}
1165
1166// ═══════════════════════════════════════════════════════════════════════════════
1167// 9. ADVANCED TESTS
1168// ═══════════════════════════════════════════════════════════════════════════════
1169
1170/// GF(2) Gaussian elimination to compute binary matrix rank.
1171fn gf2_rank(matrix: &[u8], rows: usize, cols: usize) -> usize {
1172    let mut m: Vec<Vec<u8>> = (0..rows)
1173        .map(|r| matrix[r * cols..(r + 1) * cols].to_vec())
1174        .collect();
1175    let mut rank = 0;
1176    for col in 0..cols {
1177        let mut pivot = None;
1178        for (row, m_row) in m.iter().enumerate().take(rows).skip(rank) {
1179            if m_row[col] == 1 {
1180                pivot = Some(row);
1181                break;
1182            }
1183        }
1184        let pivot = match pivot {
1185            Some(p) => p,
1186            None => continue,
1187        };
1188        m.swap(rank, pivot);
1189        for row in 0..rows {
1190            if row != rank && m[row][col] == 1 {
1191                let rank_row = m[rank].clone();
1192                for (m_c, r_c) in m[row].iter_mut().zip(rank_row.iter()) {
1193                    *m_c ^= r_c;
1194                }
1195            }
1196        }
1197        rank += 1;
1198    }
1199    rank
1200}
1201
1202/// Test 24: Binary matrix rank -- GF(2) Gaussian elimination on 32x32 binary matrices.
1203pub fn binary_matrix_rank(data: &[u8]) -> TestResult {
1204    let name = "Binary Matrix Rank";
1205    let bits = to_bits(data);
1206    let n = bits.len();
1207    let m_size = 32;
1208    let q_size = 32;
1209    let bits_per_matrix = m_size * q_size;
1210    let num_matrices = n / bits_per_matrix;
1211    if num_matrices < 38 {
1212        return insufficient(name, 38 * bits_per_matrix, n);
1213    }
1214
1215    let mut full_rank = 0u64;
1216    let mut rank_m1 = 0u64;
1217    let min_dim = m_size.min(q_size);
1218    for i in 0..num_matrices {
1219        let start = i * bits_per_matrix;
1220        let matrix = &bits[start..start + bits_per_matrix];
1221        let rank = gf2_rank(matrix, m_size, q_size);
1222        if rank == min_dim {
1223            full_rank += 1;
1224        } else if rank == min_dim - 1 {
1225            rank_m1 += 1;
1226        }
1227    }
1228    let rest = num_matrices as u64 - full_rank - rank_m1;
1229    let n_f = num_matrices as f64;
1230
1231    let p_full = 0.2888;
1232    let p_m1 = 0.5776;
1233    let p_rest = 0.1336;
1234    let chi2 = (full_rank as f64 - n_f * p_full).powi(2) / (n_f * p_full)
1235        + (rank_m1 as f64 - n_f * p_m1).powi(2) / (n_f * p_m1)
1236        + (rest as f64 - n_f * p_rest).powi(2) / (n_f * p_rest);
1237
1238    let dist = ChiSquared::new(2.0).unwrap();
1239    let p = dist.sf(chi2);
1240    TestResult {
1241        name: name.to_string(),
1242        passed: TestResult::pass_from_p(Some(p), 0.01),
1243        p_value: Some(p),
1244        statistic: chi2,
1245        details: format!("N={num_matrices}, full={full_rank}, full-1={rank_m1}"),
1246        grade: TestResult::grade_from_p(Some(p)),
1247    }
1248}
1249
1250/// Berlekamp-Massey algorithm for binary sequences. Returns the LFSR complexity.
1251fn berlekamp_massey(seq: &[u8]) -> usize {
1252    let n = seq.len();
1253    let mut c = vec![0u8; n];
1254    let mut b = vec![0u8; n];
1255    c[0] = 1;
1256    b[0] = 1;
1257    let mut l: usize = 0;
1258    let mut m: isize = -1;
1259
1260    for ni in 0..n {
1261        let mut d: u8 = seq[ni];
1262        for i in 1..=l {
1263            d ^= c[i] & seq[ni - i];
1264        }
1265        if d == 1 {
1266            let t = c.clone();
1267            let shift = (ni as isize - m) as usize;
1268            for i in shift..n {
1269                c[i] ^= b[i - shift];
1270            }
1271            if l <= ni / 2 {
1272                l = ni + 1 - l;
1273                m = ni as isize;
1274                b = t;
1275            }
1276        }
1277    }
1278    l
1279}
1280
1281/// Test 25: Linear complexity -- Berlekamp-Massey LFSR complexity on 200-bit blocks.
1282pub fn linear_complexity(data: &[u8]) -> TestResult {
1283    let name = "Linear Complexity";
1284    let block_size = 200usize;
1285    let bits = to_bits(data);
1286    let n = bits.len();
1287    let num_blocks = n / block_size;
1288    if num_blocks < 6 {
1289        return insufficient(name, 6 * block_size, n);
1290    }
1291
1292    let mut complexities = Vec::with_capacity(num_blocks);
1293    for i in 0..num_blocks {
1294        let start = i * block_size;
1295        let block = &bits[start..start + block_size];
1296        complexities.push(berlekamp_massey(block));
1297    }
1298
1299    let m = block_size as f64;
1300    let sign = if block_size.is_multiple_of(2) {
1301        1.0
1302    } else {
1303        -1.0
1304    };
1305    let mu = m / 2.0 + (9.0 + sign) / 36.0 - (m / 3.0 + 2.0 / 9.0) / 2.0_f64.powf(m);
1306
1307    let t_vals: Vec<f64> = complexities
1308        .iter()
1309        .map(|&c| sign * (c as f64 - mu) + 2.0 / 9.0)
1310        .collect();
1311
1312    let mut observed = [0u64; 7];
1313    for &t in &t_vals {
1314        let bin = if t <= -2.5 {
1315            0
1316        } else if t <= -1.5 {
1317            1
1318        } else if t <= -0.5 {
1319            2
1320        } else if t <= 0.5 {
1321            3
1322        } else if t <= 1.5 {
1323            4
1324        } else if t <= 2.5 {
1325            5
1326        } else {
1327            6
1328        };
1329        observed[bin] += 1;
1330    }
1331
1332    let mut probs = [0.010882, 0.03534, 0.08884, 0.5, 0.08884, 0.03534, 0.010882];
1333    let sum_rest: f64 = probs[..6].iter().sum();
1334    probs[6] = 1.0 - sum_rest;
1335
1336    let mut chi2 = 0.0;
1337    let n_f = num_blocks as f64;
1338    for i in 0..7 {
1339        let expected = probs[i] * n_f;
1340        if expected > 0.0 {
1341            let diff = observed[i] as f64 - expected;
1342            chi2 += diff * diff / expected;
1343        }
1344    }
1345
1346    let dist = ChiSquared::new(6.0).unwrap();
1347    let p = dist.sf(chi2);
1348    let mean_c: f64 = complexities.iter().map(|&c| c as f64).sum::<f64>() / num_blocks as f64;
1349    TestResult {
1350        name: name.to_string(),
1351        passed: TestResult::pass_from_p(Some(p), 0.01),
1352        p_value: Some(p),
1353        statistic: chi2,
1354        details: format!("N={num_blocks}, mean_complexity={mean_c:.1}"),
1355        grade: TestResult::grade_from_p(Some(p)),
1356    }
1357}
1358
1359/// Test 26: Cumulative sums (CUSUM) -- detect drift/bias.
1360pub fn cusum_test(data: &[u8]) -> TestResult {
1361    let name = "Cumulative Sums";
1362    let bits = to_bits(data);
1363    let n = bits.len();
1364    if n < 100 {
1365        return insufficient(name, 100, n);
1366    }
1367
1368    let mut cumsum = Vec::with_capacity(n);
1369    let mut s: i64 = 0;
1370    for &bit in &bits {
1371        s += if bit == 1 { 1 } else { -1 };
1372        cumsum.push(s);
1373    }
1374    let z = cumsum.iter().map(|&x| x.unsigned_abs()).max().unwrap() as f64;
1375    if z < 1e-10 {
1376        return TestResult {
1377            name: name.to_string(),
1378            passed: true,
1379            p_value: Some(1.0),
1380            statistic: 0.0,
1381            details: format!("max|S|=0, n={n}"),
1382            grade: 'A',
1383        };
1384    }
1385
1386    let nf = n as f64;
1387    let sqrt_n = nf.sqrt();
1388    let norm = Normal::standard();
1389    let k_start = ((-nf / z + 1.0) / 4.0).floor() as i64;
1390    let k_end = ((nf / z - 1.0) / 4.0).ceil() as i64;
1391    let mut s_val = 0.0;
1392    for k in k_start..=k_end {
1393        let kf = k as f64;
1394        s_val += norm.cdf((4.0 * kf + 1.0) * z / sqrt_n) - norm.cdf((4.0 * kf - 1.0) * z / sqrt_n);
1395    }
1396    let p = (1.0 - s_val).clamp(0.0, 1.0);
1397    TestResult {
1398        name: name.to_string(),
1399        passed: TestResult::pass_from_p(Some(p), 0.01),
1400        p_value: Some(p),
1401        statistic: z,
1402        details: format!("max|S|={z:.1}, n={n}"),
1403        grade: TestResult::grade_from_p(Some(p)),
1404    }
1405}
1406
1407/// Test 27: Random excursions -- cycles in cumulative sum random walk.
1408pub fn random_excursions(data: &[u8]) -> TestResult {
1409    let name = "Random Excursions";
1410    let bits = to_bits(data);
1411    let n = bits.len();
1412    if n < 1000 {
1413        return insufficient(name, 1000, n);
1414    }
1415
1416    // Build cumulative sum with leading and trailing zeros
1417    let mut cumsum = Vec::with_capacity(n + 2);
1418    cumsum.push(0i64);
1419    let mut s: i64 = 0;
1420    for &bit in &bits {
1421        s += if bit == 1 { 1 } else { -1 };
1422        cumsum.push(s);
1423    }
1424    cumsum.push(0);
1425
1426    let zeros: Vec<usize> = cumsum
1427        .iter()
1428        .enumerate()
1429        .filter_map(|(i, &v)| if v == 0 { Some(i) } else { None })
1430        .collect();
1431
1432    let j = if !zeros.is_empty() {
1433        zeros.len() - 1
1434    } else {
1435        0
1436    };
1437
1438    if j < 500 {
1439        return TestResult {
1440            name: name.to_string(),
1441            passed: true,
1442            p_value: None,
1443            statistic: j as f64,
1444            details: format!("Only {j} cycles (need 500 for reliable test)"),
1445            grade: 'B',
1446        };
1447    }
1448
1449    let expected_cycles = (n as f64) / (2.0 * PI * n as f64).sqrt();
1450    let ratio = j as f64 / expected_cycles.max(1.0);
1451    let passed = ratio > 0.5 && ratio < 2.0;
1452    let grade = if ratio > 0.8 && ratio < 1.2 {
1453        'A'
1454    } else if ratio > 0.6 && ratio < 1.5 {
1455        'B'
1456    } else if passed {
1457        'C'
1458    } else {
1459        'F'
1460    };
1461    TestResult {
1462        name: name.to_string(),
1463        passed,
1464        p_value: None,
1465        statistic: j as f64,
1466        details: format!("cycles={j}, expected~={expected_cycles:.0}"),
1467        grade,
1468    }
1469}
1470
1471/// Test 28: Birthday spacing -- spacing between repeated values, Poisson test.
1472pub fn birthday_spacing(data: &[u8]) -> TestResult {
1473    let name = "Birthday Spacing";
1474    let n = data.len();
1475    if n < 100 {
1476        return insufficient(name, 100, n);
1477    }
1478
1479    let values: Vec<u64> = if n >= 200 {
1480        let half = n / 2;
1481        (0..half)
1482            .map(|i| data[i * 2] as u64 * 256 + data[i * 2 + 1] as u64)
1483            .collect()
1484    } else {
1485        data.iter().map(|&b| b as u64).collect()
1486    };
1487
1488    let m = values.len();
1489    let mut sorted = values.clone();
1490    sorted.sort();
1491
1492    let mut spacings: Vec<u64> = Vec::with_capacity(m.saturating_sub(1));
1493    for i in 1..m {
1494        spacings.push(sorted[i] - sorted[i - 1]);
1495    }
1496    spacings.sort();
1497
1498    let mut dups = 0u64;
1499    for i in 1..spacings.len() {
1500        if spacings[i] == spacings[i - 1] {
1501            dups += 1;
1502        }
1503    }
1504
1505    let d = sorted.last().copied().unwrap_or(1).max(1) as f64;
1506    let mf = m as f64;
1507    let lambda = (mf * mf * mf / (4.0 * d)).max(0.01);
1508
1509    let p = if lambda > 0.0 {
1510        let poisson = Poisson::new(lambda).unwrap();
1511        let p_upper = if dups > 0 { poisson.sf(dups - 1) } else { 1.0 };
1512        let p_lower = poisson.cdf(dups);
1513        p_upper.max(p_lower).min(1.0)
1514    } else {
1515        1.0
1516    };
1517
1518    TestResult {
1519        name: name.to_string(),
1520        passed: TestResult::pass_from_p(Some(p), 0.01),
1521        p_value: Some(p),
1522        statistic: dups as f64,
1523        details: format!("duplicates={dups}, lambda={lambda:.2}, m={m}"),
1524        grade: TestResult::grade_from_p(Some(p)),
1525    }
1526}
1527
1528// ═══════════════════════════════════════════════════════════════════════════════
1529// 10. PRACTICAL TESTS
1530// ═══════════════════════════════════════════════════════════════════════════════
1531
1532/// Test 29: Bit avalanche -- adjacent bytes should differ by ~4 bits (50%).
1533pub fn bit_avalanche(data: &[u8]) -> TestResult {
1534    let name = "Bit Avalanche";
1535    let n = data.len();
1536    if n < 100 {
1537        return insufficient(name, 100, n);
1538    }
1539
1540    let mut total_diffs = 0u64;
1541    let pairs = n - 1;
1542    for i in 0..pairs {
1543        total_diffs += (data[i] ^ data[i + 1]).count_ones() as u64;
1544    }
1545    let mean_diff = total_diffs as f64 / pairs as f64;
1546    let expected = 4.0;
1547    let std = 2.0_f64.sqrt(); // binomial std for n=8, p=0.5
1548    let z = (mean_diff - expected).abs() / (std / (pairs as f64).sqrt());
1549    let norm = Normal::standard();
1550    let p = 2.0 * (1.0 - norm.cdf(z));
1551    TestResult {
1552        name: name.to_string(),
1553        passed: TestResult::pass_from_p(Some(p), 0.01),
1554        p_value: Some(p),
1555        statistic: mean_diff,
1556        details: format!("mean_diff={mean_diff:.3}/8 bits, expected=4.0"),
1557        grade: TestResult::grade_from_p(Some(p)),
1558    }
1559}
1560
1561/// Test 30: Monte Carlo pi -- estimate pi using (x,y) pairs in unit circle.
1562pub fn monte_carlo_pi(data: &[u8]) -> TestResult {
1563    let name = "Monte Carlo Pi";
1564    let n = data.len();
1565    if n < 200 {
1566        return insufficient(name, 200, n);
1567    }
1568    let pairs = n / 2;
1569    let mut inside = 0u64;
1570    for i in 0..pairs {
1571        let x = data[i] as f64 / 255.0;
1572        let y = data[pairs + i] as f64 / 255.0;
1573        if x * x + y * y <= 1.0 {
1574            inside += 1;
1575        }
1576    }
1577    let pi_est = 4.0 * inside as f64 / pairs as f64;
1578    let error = (pi_est - PI).abs() / PI;
1579    let grade = if error < 0.01 {
1580        'A'
1581    } else if error < 0.03 {
1582        'B'
1583    } else if error < 0.1 {
1584        'C'
1585    } else if error < 0.2 {
1586        'D'
1587    } else {
1588        'F'
1589    };
1590    TestResult {
1591        name: name.to_string(),
1592        passed: error < 0.05,
1593        p_value: None,
1594        statistic: pi_est,
1595        details: format!("pi~={pi_est:.6}, error={:.4}%", error * 100.0),
1596        grade,
1597    }
1598}
1599
1600/// Test 31: Mean and variance -- mean (~127.5) and variance (~5461.25) of uniform bytes.
1601pub fn mean_variance(data: &[u8]) -> TestResult {
1602    let name = "Mean & Variance";
1603    let n = data.len();
1604    if n < 50 {
1605        return insufficient(name, 50, n);
1606    }
1607    let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
1608    let nf = n as f64;
1609    let mean: f64 = arr.iter().sum::<f64>() / nf;
1610    let var: f64 = arr.iter().map(|x| (x - mean) * (x - mean)).sum::<f64>() / nf;
1611
1612    let expected_mean = 127.5;
1613    let expected_var = (256.0 * 256.0 - 1.0) / 12.0; // 5461.25
1614
1615    let z_mean = (mean - expected_mean).abs() / (expected_var / nf).sqrt();
1616    let norm = Normal::standard();
1617    let p_mean = 2.0 * (1.0 - norm.cdf(z_mean));
1618
1619    let chi2_var = (nf - 1.0) * var / expected_var;
1620    let chi_dist = ChiSquared::new(nf - 1.0).unwrap();
1621    let p_var = 2.0 * chi_dist.cdf(chi2_var).min(chi_dist.sf(chi2_var));
1622
1623    let p = p_mean.min(p_var);
1624    TestResult {
1625        name: name.to_string(),
1626        passed: TestResult::pass_from_p(Some(p), 0.01),
1627        p_value: Some(p),
1628        statistic: z_mean,
1629        details: format!("mean={mean:.2} (exp 127.5), var={var:.1} (exp {expected_var:.1})"),
1630        grade: TestResult::grade_from_p(Some(p)),
1631    }
1632}
1633
1634// ═══════════════════════════════════════════════════════════════════════════════
1635// Test battery
1636// ═══════════════════════════════════════════════════════════════════════════════
1637
1638/// Run the complete 31-test battery on a byte slice.
1639pub fn run_all_tests(data: &[u8]) -> Vec<TestResult> {
1640    let tests: Vec<fn(&[u8]) -> TestResult> = vec![
1641        // Frequency (3)
1642        monobit_frequency,
1643        block_frequency,
1644        byte_frequency,
1645        // Runs (2)
1646        runs_test,
1647        longest_run_of_ones,
1648        // Serial (2)
1649        serial_test,
1650        approximate_entropy,
1651        // Spectral (2)
1652        dft_spectral,
1653        spectral_flatness,
1654        // Entropy (5)
1655        shannon_entropy,
1656        min_entropy,
1657        permutation_entropy,
1658        compression_ratio,
1659        kolmogorov_complexity,
1660        // Correlation (4)
1661        autocorrelation,
1662        serial_correlation,
1663        lag_n_correlation,
1664        cross_correlation,
1665        // Distribution (2)
1666        ks_test,
1667        anderson_darling,
1668        // Pattern (3)
1669        overlapping_template,
1670        non_overlapping_template,
1671        maurers_universal,
1672        // Advanced (5)
1673        binary_matrix_rank,
1674        linear_complexity,
1675        cusum_test,
1676        random_excursions,
1677        birthday_spacing,
1678        // Practical (3)
1679        bit_avalanche,
1680        monte_carlo_pi,
1681        mean_variance,
1682    ];
1683
1684    tests
1685        .iter()
1686        .map(|test_fn| {
1687            match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| test_fn(data))) {
1688                Ok(result) => result,
1689                Err(_) => TestResult {
1690                    name: "Unknown".to_string(),
1691                    passed: false,
1692                    p_value: None,
1693                    statistic: 0.0,
1694                    details: "Test panicked".to_string(),
1695                    grade: 'F',
1696                },
1697            }
1698        })
1699        .collect()
1700}
1701
1702/// Calculate overall quality score (0-100) from test results.
1703///
1704/// Each grade maps to a score: A=100, B=75, C=50, D=25, F=0.
1705/// Returns the average across all tests.
1706pub fn calculate_quality_score(results: &[TestResult]) -> f64 {
1707    if results.is_empty() {
1708        return 0.0;
1709    }
1710    let total: f64 = results
1711        .iter()
1712        .map(|r| match r.grade {
1713            'A' => 100.0,
1714            'B' => 75.0,
1715            'C' => 50.0,
1716            'D' => 25.0,
1717            _ => 0.0,
1718        })
1719        .sum();
1720    total / results.len() as f64
1721}
1722
1723#[cfg(test)]
1724mod tests {
1725    use super::*;
1726
1727    /// Generate pseudo-random data for testing (simple LCG).
1728    fn pseudo_random(n: usize) -> Vec<u8> {
1729        let mut data = Vec::with_capacity(n);
1730        let mut state: u64 = 0xDEAD_BEEF_CAFE_BABE;
1731        for _ in 0..n {
1732            state = state
1733                .wrapping_mul(6364136223846793005)
1734                .wrapping_add(1442695040888963407);
1735            data.push((state >> 33) as u8);
1736        }
1737        data
1738    }
1739
1740    #[test]
1741    fn test_to_bits() {
1742        let data = [0b10110001u8];
1743        let bits = to_bits(&data);
1744        assert_eq!(bits, vec![1, 0, 1, 1, 0, 0, 0, 1]);
1745    }
1746
1747    #[test]
1748    fn test_grade_from_p() {
1749        assert_eq!(TestResult::grade_from_p(Some(0.5)), 'A');
1750        assert_eq!(TestResult::grade_from_p(Some(0.05)), 'B');
1751        assert_eq!(TestResult::grade_from_p(Some(0.005)), 'C');
1752        assert_eq!(TestResult::grade_from_p(Some(0.0005)), 'D');
1753        assert_eq!(TestResult::grade_from_p(Some(0.00000001)), 'F');
1754        assert_eq!(TestResult::grade_from_p(None), 'F');
1755    }
1756
1757    #[test]
1758    fn test_pass_from_p() {
1759        assert!(TestResult::pass_from_p(Some(0.05), 0.01));
1760        assert!(!TestResult::pass_from_p(Some(0.005), 0.01));
1761        assert!(!TestResult::pass_from_p(None, 0.01));
1762    }
1763
1764    #[test]
1765    fn test_insufficient_data() {
1766        let data = [0u8; 5];
1767        let result = monobit_frequency(&data);
1768        assert!(!result.passed);
1769        assert!(result.details.contains("Insufficient"));
1770    }
1771
1772    #[test]
1773    fn test_constant_data_fails() {
1774        let data = vec![0u8; 1000];
1775        let results = run_all_tests(&data);
1776        let passed_count = results.iter().filter(|r| r.passed).count();
1777        assert!(passed_count < results.len() / 2);
1778    }
1779
1780    #[test]
1781    fn test_pseudo_random_passes() {
1782        let data = pseudo_random(10000);
1783        let results = run_all_tests(&data);
1784        let passed_count = results.iter().filter(|r| r.passed).count();
1785        assert!(
1786            passed_count > results.len() / 2,
1787            "Only {passed_count}/{} tests passed",
1788            results.len()
1789        );
1790    }
1791
1792    #[test]
1793    fn test_quality_score() {
1794        let results = vec![
1795            TestResult {
1796                name: "A".into(),
1797                passed: true,
1798                p_value: Some(0.5),
1799                statistic: 0.0,
1800                details: String::new(),
1801                grade: 'A',
1802            },
1803            TestResult {
1804                name: "F".into(),
1805                passed: false,
1806                p_value: Some(0.0),
1807                statistic: 0.0,
1808                details: String::new(),
1809                grade: 'F',
1810            },
1811        ];
1812        let score = calculate_quality_score(&results);
1813        assert!((score - 50.0).abs() < 0.01);
1814    }
1815
1816    #[test]
1817    fn test_all_31_tests_present() {
1818        let data = pseudo_random(10000);
1819        let results = run_all_tests(&data);
1820        assert_eq!(results.len(), 31);
1821    }
1822
1823    #[test]
1824    fn test_monobit_basic() {
1825        let data = pseudo_random(1000);
1826        let result = monobit_frequency(&data);
1827        assert!(result.p_value.is_some());
1828    }
1829
1830    #[test]
1831    fn test_shannon_entropy_random() {
1832        let data = pseudo_random(10000);
1833        let result = shannon_entropy(&data);
1834        assert!(
1835            result.statistic > 7.0,
1836            "Shannon entropy too low: {}",
1837            result.statistic
1838        );
1839    }
1840
1841    #[test]
1842    fn test_compression_ratio_random() {
1843        let data = pseudo_random(10000);
1844        let result = compression_ratio(&data);
1845        assert!(
1846            result.statistic > 0.9,
1847            "Compression ratio too low: {}",
1848            result.statistic
1849        );
1850    }
1851
1852    #[test]
1853    fn test_calculate_quality_score_empty() {
1854        assert_eq!(calculate_quality_score(&[]), 0.0);
1855    }
1856}
openentropy_tests/lib.rs

openentropy_tests/
lib.rs