openentropy_tests/
lib.rs

1//! NIST SP 800-22 inspired randomness test battery.
2//!
3//! Provides 31 statistical tests for evaluating the quality of random byte sequences.
4//! Each test returns a [`TestResult`] with a p-value (where applicable), a pass/fail
5//! determination, and a letter grade (A through F).
6
7use flate2::Compression;
8use flate2::write::ZlibEncoder;
9use rustfft::{FftPlanner, num_complex::Complex};
10use statrs::distribution::{ChiSquared, ContinuousCDF, DiscreteCDF, Normal, Poisson};
11use statrs::function::erf::erfc;
12use std::collections::HashMap;
13use std::f64::consts::PI;
14use std::io::Write;
15
16// ═══════════════════════════════════════════════════════════════════════════════
17// Core types
18// ═══════════════════════════════════════════════════════════════════════════════
19
20/// Result of a single randomness test.
21#[derive(Debug, Clone)]
22pub struct TestResult {
23    pub name: String,
24    pub passed: bool,
25    pub p_value: Option<f64>,
26    pub statistic: f64,
27    pub details: String,
28    pub grade: char,
29}
30
31impl TestResult {
32    /// Assign a letter grade based on p-value.
33    ///
34    /// - A: p >= 0.1
35    /// - B: p >= 0.01
36    /// - C: p >= 0.001
37    /// - D: p >= 0.0001
38    /// - F: otherwise or None
39    pub fn grade_from_p(p: Option<f64>) -> char {
40        match p {
41            Some(p) if p >= 0.1 => 'A',
42            Some(p) if p >= 0.01 => 'B',
43            Some(p) if p >= 0.001 => 'C',
44            Some(p) if p >= 0.0001 => 'D',
45            _ => 'F',
46        }
47    }
48
49    /// Determine pass/fail from p-value against a threshold (default 0.01).
50    pub fn pass_from_p(p: Option<f64>, threshold: f64) -> bool {
51        match p {
52            Some(p) => p >= threshold,
53            None => false,
54        }
55    }
56}
57
58// ═══════════════════════════════════════════════════════════════════════════════
59// Helpers
60// ═══════════════════════════════════════════════════════════════════════════════
61
62/// Unpack a byte slice into individual bits (MSB first per byte).
63fn to_bits(data: &[u8]) -> Vec<u8> {
64    let mut bits = Vec::with_capacity(data.len() * 8);
65    for &byte in data {
66        for shift in (0..8).rev() {
67            bits.push((byte >> shift) & 1);
68        }
69    }
70    bits
71}
72
73/// Return a failing `TestResult` when data is too short.
74fn insufficient(name: &str, needed: usize, got: usize) -> TestResult {
75    TestResult {
76        name: name.to_string(),
77        passed: false,
78        p_value: None,
79        statistic: 0.0,
80        details: format!("Insufficient data: need {needed}, got {got}"),
81        grade: 'F',
82    }
83}
84
85// ═══════════════════════════════════════════════════════════════════════════════
86// 1. FREQUENCY TESTS
87// ═══════════════════════════════════════════════════════════════════════════════
88
89/// Test 1: Monobit frequency -- proportion of 1s vs 0s should be ~50%.
90pub fn monobit_frequency(data: &[u8]) -> TestResult {
91    let name = "Monobit Frequency";
92    let bits = to_bits(data);
93    let n = bits.len();
94    if n < 100 {
95        return insufficient(name, 100, n);
96    }
97    let s: i64 = bits
98        .iter()
99        .map(|&b| if b == 1 { 1i64 } else { -1i64 })
100        .sum();
101    let s_obs = (s as f64).abs() / (n as f64).sqrt();
102    let p = erfc(s_obs / 2.0_f64.sqrt());
103    TestResult {
104        name: name.to_string(),
105        passed: TestResult::pass_from_p(Some(p), 0.01),
106        p_value: Some(p),
107        statistic: s_obs,
108        details: format!("S={s}, n={n}"),
109        grade: TestResult::grade_from_p(Some(p)),
110    }
111}
112
113/// Test 2: Block frequency -- frequency within 128-bit blocks. Chi-squared test.
114pub fn block_frequency(data: &[u8]) -> TestResult {
115    let name = "Block Frequency";
116    let block_size: usize = 128;
117    let bits = to_bits(data);
118    let n = bits.len();
119    let num_blocks = n / block_size;
120    if num_blocks < 10 {
121        return insufficient(name, block_size * 10, n);
122    }
123    let mut chi2 = 0.0;
124    for i in 0..num_blocks {
125        let start = i * block_size;
126        let ones: usize = bits[start..start + block_size]
127            .iter()
128            .map(|&b| b as usize)
129            .sum();
130        let proportion = ones as f64 / block_size as f64;
131        chi2 += (proportion - 0.5) * (proportion - 0.5);
132    }
133    chi2 *= 4.0 * block_size as f64;
134    let dist = ChiSquared::new(num_blocks as f64).unwrap();
135    let p = dist.sf(chi2);
136    TestResult {
137        name: name.to_string(),
138        passed: TestResult::pass_from_p(Some(p), 0.01),
139        p_value: Some(p),
140        statistic: chi2,
141        details: format!("blocks={num_blocks}, M={block_size}"),
142        grade: TestResult::grade_from_p(Some(p)),
143    }
144}
145
146/// Test 3: Byte frequency -- chi-squared on byte value distribution (256 bins).
147pub fn byte_frequency(data: &[u8]) -> TestResult {
148    let name = "Byte Frequency";
149    let n = data.len();
150    if n < 256 {
151        return insufficient(name, 256, n);
152    }
153    let mut hist = [0u64; 256];
154    for &b in data {
155        hist[b as usize] += 1;
156    }
157    let expected = n as f64 / 256.0;
158    let chi2: f64 = hist
159        .iter()
160        .map(|&c| {
161            let diff = c as f64 - expected;
162            diff * diff / expected
163        })
164        .sum();
165    let dist = ChiSquared::new(255.0).unwrap();
166    let p = dist.sf(chi2);
167    TestResult {
168        name: name.to_string(),
169        passed: TestResult::pass_from_p(Some(p), 0.01),
170        p_value: Some(p),
171        statistic: chi2,
172        details: format!("n={n}, expected_per_bin={expected:.1}"),
173        grade: TestResult::grade_from_p(Some(p)),
174    }
175}
176
177// ═══════════════════════════════════════════════════════════════════════════════
178// 2. RUNS TESTS
179// ═══════════════════════════════════════════════════════════════════════════════
180
181/// Test 4: Runs test -- number of uninterrupted runs of 0s or 1s.
182pub fn runs_test(data: &[u8]) -> TestResult {
183    let name = "Runs Test";
184    let bits = to_bits(data);
185    let n = bits.len();
186    if n < 100 {
187        return insufficient(name, 100, n);
188    }
189    let ones: usize = bits.iter().map(|&b| b as usize).sum();
190    let prop = ones as f64 / n as f64;
191    if (prop - 0.5).abs() >= 2.0 / (n as f64).sqrt() {
192        return TestResult {
193            name: name.to_string(),
194            passed: false,
195            p_value: Some(0.0),
196            statistic: 0.0,
197            details: format!("Pre-test failed: proportion={prop:.4}"),
198            grade: 'F',
199        };
200    }
201    let mut runs: usize = 1;
202    for i in 1..n {
203        if bits[i] != bits[i - 1] {
204            runs += 1;
205        }
206    }
207    let expected = 2.0 * n as f64 * prop * (1.0 - prop) + 1.0;
208    let std = 2.0 * (2.0 * n as f64).sqrt() * prop * (1.0 - prop);
209    if std < 1e-10 {
210        return TestResult {
211            name: name.to_string(),
212            passed: false,
213            p_value: Some(0.0),
214            statistic: 0.0,
215            details: "Zero variance".to_string(),
216            grade: 'F',
217        };
218    }
219    let z = (runs as f64 - expected).abs() / std;
220    let p = erfc(z / 2.0_f64.sqrt());
221    TestResult {
222        name: name.to_string(),
223        passed: TestResult::pass_from_p(Some(p), 0.01),
224        p_value: Some(p),
225        statistic: z,
226        details: format!("runs={runs}, expected={expected:.0}"),
227        grade: TestResult::grade_from_p(Some(p)),
228    }
229}
230
231/// Test 5: Longest run of ones -- within 8-bit blocks, chi-squared against theoretical probs.
232pub fn longest_run_of_ones(data: &[u8]) -> TestResult {
233    let name = "Longest Run of Ones";
234    let bits = to_bits(data);
235    let n = bits.len();
236    if n < 128 {
237        return insufficient(name, 128, n);
238    }
239    let block_size = 8;
240    let num_blocks = n / block_size;
241
242    // For each block, find the longest run of 1s
243    let mut observed = [0u64; 4]; // NIST bins for M=8: {≤1, 2, 3, ≥4}
244    for i in 0..num_blocks {
245        let start = i * block_size;
246        let block = &bits[start..start + block_size];
247        let mut max_run = 0u32;
248        let mut current_run = 0u32;
249        for &bit in block {
250            if bit == 1 {
251                current_run += 1;
252                if current_run > max_run {
253                    max_run = current_run;
254                }
255            } else {
256                current_run = 0;
257            }
258        }
259        match max_run {
260            0 | 1 => observed[0] += 1,
261            2 => observed[1] += 1,
262            3 => observed[2] += 1,
263            _ => observed[3] += 1,
264        }
265    }
266
267    // Theoretical probabilities for M=8
268    let probs = [0.2148, 0.3672, 0.2305, 0.1875];
269    let mut chi2 = 0.0;
270    for i in 0..4 {
271        let expected = probs[i] * num_blocks as f64;
272        if expected > 0.0 {
273            let diff = observed[i] as f64 - expected;
274            chi2 += diff * diff / expected;
275        }
276    }
277    let dist = ChiSquared::new(3.0).unwrap();
278    let p = dist.sf(chi2);
279    TestResult {
280        name: name.to_string(),
281        passed: TestResult::pass_from_p(Some(p), 0.01),
282        p_value: Some(p),
283        statistic: chi2,
284        details: format!("blocks={num_blocks}, M={block_size}"),
285        grade: TestResult::grade_from_p(Some(p)),
286    }
287}
288
289// ═══════════════════════════════════════════════════════════════════════════════
290// 3. SERIAL TESTS
291// ═══════════════════════════════════════════════════════════════════════════════
292
293/// Helper: compute psi-squared for the serial test.
294fn psi_sq(bits: &[u8], n: usize, m: usize) -> f64 {
295    if m < 1 {
296        return 0.0;
297    }
298    let num_patterns = 1usize << m;
299    let mut counts = vec![0u64; num_patterns];
300    for i in 0..n {
301        let mut val = 0usize;
302        for j in 0..m {
303            val = (val << 1) | bits[(i + j) % n] as usize;
304        }
305        counts[val] += 1;
306    }
307    let sum_sq: f64 = counts.iter().map(|&c| (c as f64) * (c as f64)).sum();
308    sum_sq * (num_patterns as f64) / (n as f64) - n as f64
309}
310
311/// Test 6: Serial test -- frequency of overlapping m-bit patterns (m=4).
312pub fn serial_test(data: &[u8]) -> TestResult {
313    let name = "Serial Test";
314    let m = 4usize;
315    let mut bits = to_bits(data);
316    let mut n = bits.len();
317    if n > 20000 {
318        bits.truncate(20000);
319        n = 20000;
320    }
321    if n < (1 << m) + 10 {
322        return insufficient(name, (1 << m) + 10, n);
323    }
324
325    let psi_m = psi_sq(&bits, n, m);
326    let psi_m1 = psi_sq(&bits, n, m - 1);
327    let _psi_m2 = if m >= 2 { psi_sq(&bits, n, m - 2) } else { 0.0 };
328    let delta1 = psi_m - psi_m1;
329
330    let df = (1u64 << (m - 1)) as f64;
331    let dist = ChiSquared::new(df).unwrap();
332    let p = dist.sf(delta1);
333    TestResult {
334        name: name.to_string(),
335        passed: TestResult::pass_from_p(Some(p), 0.01),
336        p_value: Some(p),
337        statistic: delta1,
338        details: format!("m={m}, n_bits={n}"),
339        grade: TestResult::grade_from_p(Some(p)),
340    }
341}
342
343/// Test 7: Approximate entropy -- compare m and m+1 bit pattern frequencies (m=3).
344pub fn approximate_entropy(data: &[u8]) -> TestResult {
345    let name = "Approximate Entropy";
346    let m = 3usize;
347    let mut bits = to_bits(data);
348    let mut n = bits.len();
349    if n > 20000 {
350        bits.truncate(20000);
351        n = 20000;
352    }
353    if n < 64 {
354        return insufficient(name, 64, n);
355    }
356
357    let phi = |block_len: usize| -> f64 {
358        let num_patterns = 1usize << block_len;
359        let mut counts = vec![0u64; num_patterns];
360        for i in 0..n {
361            let mut val = 0usize;
362            for j in 0..block_len {
363                val = (val << 1) | bits[(i + j) % n] as usize;
364            }
365            counts[val] += 1;
366        }
367        let mut sum = 0.0;
368        for &c in &counts {
369            if c > 0 {
370                let p = c as f64 / n as f64;
371                sum += p * p.log2();
372            }
373        }
374        sum
375    };
376
377    let phi_m = phi(m);
378    let phi_m1 = phi(m + 1);
379    let apen = phi_m - phi_m1;
380    // NIST formula (natural log): chi2 = 2*n*(ln(2) - ApEn_ln).
381    // Since phi uses log2, ApEn_log2 = ApEn_ln / ln(2), so:
382    // chi2 = 2*n*ln(2)*(1 - ApEn_log2)
383    let chi2 = 2.0 * n as f64 * 2.0_f64.ln() * (1.0 - apen);
384
385    let df = (1u64 << m) as f64;
386    let dist = ChiSquared::new(df).unwrap();
387    let p = dist.sf(chi2);
388    TestResult {
389        name: name.to_string(),
390        passed: TestResult::pass_from_p(Some(p), 0.01),
391        p_value: Some(p),
392        statistic: chi2,
393        details: format!("ApEn={apen:.6}, m={m}"),
394        grade: TestResult::grade_from_p(Some(p)),
395    }
396}
397
398// ═══════════════════════════════════════════════════════════════════════════════
399// 4. SPECTRAL TESTS
400// ═══════════════════════════════════════════════════════════════════════════════
401
402/// Test 8: DFT spectral -- detect periodic features via FFT.
403pub fn dft_spectral(data: &[u8]) -> TestResult {
404    let name = "DFT Spectral";
405    let bits = to_bits(data);
406    let n = bits.len();
407    if n < 64 {
408        return insufficient(name, 64, n);
409    }
410
411    let mut buffer: Vec<Complex<f64>> = bits
412        .iter()
413        .map(|&b| Complex {
414            re: if b == 1 { 1.0 } else { -1.0 },
415            im: 0.0,
416        })
417        .collect();
418
419    let mut planner = FftPlanner::new();
420    let fft = planner.plan_fft_forward(n);
421    fft.process(&mut buffer);
422
423    let half = n / 2;
424    let magnitudes: Vec<f64> = buffer[..half].iter().map(|c| c.norm()).collect();
425
426    let threshold = (2.995732274 * n as f64).sqrt();
427    let n0 = 0.95 * half as f64;
428    let n1 = magnitudes.iter().filter(|&&m| m < threshold).count() as f64;
429    let d = (n1 - n0) / (n as f64 * 0.95 * 0.05 / 4.0).sqrt();
430    let p = erfc(d.abs() / 2.0_f64.sqrt());
431    TestResult {
432        name: name.to_string(),
433        passed: TestResult::pass_from_p(Some(p), 0.01),
434        p_value: Some(p),
435        statistic: d,
436        details: format!("peaks_below_threshold={}/{half}", n1 as u64),
437        grade: TestResult::grade_from_p(Some(p)),
438    }
439}
440
441/// Test 9: Spectral flatness -- geometric/arithmetic mean ratio of power spectrum.
442pub fn spectral_flatness(data: &[u8]) -> TestResult {
443    let name = "Spectral Flatness";
444    let n = data.len();
445    if n < 64 {
446        return insufficient(name, 64, n);
447    }
448
449    let mean_val: f64 = data.iter().map(|&b| b as f64).sum::<f64>() / n as f64;
450    let mut buffer: Vec<Complex<f64>> = data
451        .iter()
452        .map(|&b| Complex {
453            re: b as f64 - mean_val,
454            im: 0.0,
455        })
456        .collect();
457
458    let mut planner = FftPlanner::new();
459    let fft = planner.plan_fft_forward(n);
460    fft.process(&mut buffer);
461
462    // Power spectrum, skip DC bin (index 0)
463    let half = n / 2;
464    if half < 2 {
465        return insufficient(name, 64, n);
466    }
467    let power: Vec<f64> = buffer[1..half]
468        .iter()
469        .map(|c| c.norm_sqr() + 1e-15)
470        .collect();
471
472    if power.is_empty() {
473        return insufficient(name, 64, n);
474    }
475
476    let log_sum: f64 = power.iter().map(|&p| p.ln()).sum();
477    let geo_mean = (log_sum / power.len() as f64).exp();
478    let arith_mean: f64 = power.iter().sum::<f64>() / power.len() as f64;
479    let flatness = geo_mean / arith_mean;
480
481    let passed = flatness > 0.5;
482    let grade = if flatness > 0.8 {
483        'A'
484    } else if flatness > 0.6 {
485        'B'
486    } else if flatness > 0.4 {
487        'C'
488    } else if flatness > 0.2 {
489        'D'
490    } else {
491        'F'
492    };
493    TestResult {
494        name: name.to_string(),
495        passed,
496        p_value: None,
497        statistic: flatness,
498        details: format!("flatness={flatness:.4} (1.0=white noise)"),
499        grade,
500    }
501}
502
503// ═══════════════════════════════════════════════════════════════════════════════
504// 5. ENTROPY TESTS
505// ═══════════════════════════════════════════════════════════════════════════════
506
507/// Test 10: Shannon entropy -- bits per byte (max 8.0).
508pub fn shannon_entropy(data: &[u8]) -> TestResult {
509    let name = "Shannon Entropy";
510    let n = data.len();
511    if n < 16 {
512        return insufficient(name, 16, n);
513    }
514    let mut hist = [0u64; 256];
515    for &b in data {
516        hist[b as usize] += 1;
517    }
518    let mut h = 0.0;
519    for &c in &hist {
520        if c > 0 {
521            let p = c as f64 / n as f64;
522            h -= p * p.log2();
523        }
524    }
525    let ratio = h / 8.0;
526    let grade = if ratio > 0.95 {
527        'A'
528    } else if ratio > 0.85 {
529        'B'
530    } else if ratio > 0.7 {
531        'C'
532    } else if ratio > 0.5 {
533        'D'
534    } else {
535        'F'
536    };
537    TestResult {
538        name: name.to_string(),
539        passed: ratio > 0.85,
540        p_value: None,
541        statistic: h,
542        details: format!("{h:.4} / 8.0 bits ({:.1}%)", ratio * 100.0),
543        grade,
544    }
545}
546
547/// Test 11: Min-entropy (NIST SP 800-90B): -log2(p_max).
548pub fn min_entropy(data: &[u8]) -> TestResult {
549    let name = "Min-Entropy";
550    let n = data.len();
551    if n < 16 {
552        return insufficient(name, 16, n);
553    }
554    let mut hist = [0u64; 256];
555    for &b in data {
556        hist[b as usize] += 1;
557    }
558    let p_max = *hist.iter().max().unwrap() as f64 / n as f64;
559    let h_min = -(p_max + 1e-15).log2();
560    let ratio = h_min / 8.0;
561    let grade = if ratio > 0.9 {
562        'A'
563    } else if ratio > 0.75 {
564        'B'
565    } else if ratio > 0.5 {
566        'C'
567    } else if ratio > 0.25 {
568        'D'
569    } else {
570        'F'
571    };
572    TestResult {
573        name: name.to_string(),
574        passed: ratio > 0.7,
575        p_value: None,
576        statistic: h_min,
577        details: format!("{h_min:.4} / 8.0 bits ({:.1}%)", ratio * 100.0),
578        grade,
579    }
580}
581
582/// Test 12: Permutation entropy -- complexity of ordinal patterns (order=4).
583pub fn permutation_entropy(data: &[u8]) -> TestResult {
584    let name = "Permutation Entropy";
585    let order = 4usize;
586    let n = data.len();
587    if n < order + 10 {
588        return insufficient(name, order + 10, n);
589    }
590    let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
591
592    let mut patterns: HashMap<Vec<usize>, u64> = HashMap::new();
593    for i in 0..n - order {
594        let window = &arr[i..i + order];
595        let mut indices: Vec<usize> = (0..order).collect();
596        indices.sort_by(|&a, &b| {
597            window[a]
598                .partial_cmp(&window[b])
599                .unwrap_or(std::cmp::Ordering::Equal)
600                .then(a.cmp(&b))
601        });
602        *patterns.entry(indices).or_insert(0) += 1;
603    }
604
605    let total: u64 = patterns.values().sum();
606    let mut h = 0.0;
607    for &c in patterns.values() {
608        let p = c as f64 / total as f64;
609        h -= p * p.log2();
610    }
611    // factorial(4) = 24
612    let h_max = 24.0_f64.log2();
613    let normalized = if h_max > 0.0 { h / h_max } else { 0.0 };
614    let grade = if normalized > 0.95 {
615        'A'
616    } else if normalized > 0.85 {
617        'B'
618    } else if normalized > 0.7 {
619        'C'
620    } else if normalized > 0.5 {
621        'D'
622    } else {
623        'F'
624    };
625    TestResult {
626        name: name.to_string(),
627        passed: normalized > 0.85,
628        p_value: None,
629        statistic: normalized,
630        details: format!("PE={h:.4}/{h_max:.4} = {normalized:.4}"),
631        grade,
632    }
633}
634
635/// Test 13: Compression ratio -- zlib compression ratio (random ~ 1.0+).
636pub fn compression_ratio(data: &[u8]) -> TestResult {
637    let name = "Compression Ratio";
638    let n = data.len();
639    if n < 32 {
640        return insufficient(name, 32, n);
641    }
642    let mut encoder = ZlibEncoder::new(Vec::new(), Compression::best());
643    encoder.write_all(data).unwrap();
644    let compressed = encoder.finish().unwrap();
645    let ratio = compressed.len() as f64 / n as f64;
646    let grade = if ratio > 0.95 {
647        'A'
648    } else if ratio > 0.85 {
649        'B'
650    } else if ratio > 0.7 {
651        'C'
652    } else if ratio > 0.5 {
653        'D'
654    } else {
655        'F'
656    };
657    TestResult {
658        name: name.to_string(),
659        passed: ratio > 0.85,
660        p_value: None,
661        statistic: ratio,
662        details: format!("{}/{n} = {ratio:.4}", compressed.len()),
663        grade,
664    }
665}
666
667/// Test 14: Kolmogorov complexity -- compression at levels 1 and 9, compute complexity and spread.
668pub fn kolmogorov_complexity(data: &[u8]) -> TestResult {
669    let name = "Kolmogorov Complexity";
670    let n = data.len();
671    if n < 32 {
672        return insufficient(name, 32, n);
673    }
674
675    let compress_at = |level: u32| -> usize {
676        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::new(level));
677        encoder.write_all(data).unwrap();
678        encoder.finish().unwrap().len()
679    };
680
681    let c1 = compress_at(1);
682    let c9 = compress_at(9);
683    let complexity = c9 as f64 / n as f64;
684    let spread = (c1 as f64 - c9 as f64) / n as f64;
685    let grade = if complexity > 0.95 {
686        'A'
687    } else if complexity > 0.85 {
688        'B'
689    } else if complexity > 0.7 {
690        'C'
691    } else if complexity > 0.5 {
692        'D'
693    } else {
694        'F'
695    };
696    TestResult {
697        name: name.to_string(),
698        passed: complexity > 0.85,
699        p_value: None,
700        statistic: complexity,
701        details: format!("K~={complexity:.4}, spread={spread:.4}"),
702        grade,
703    }
704}
705
706// ═══════════════════════════════════════════════════════════════════════════════
707// 6. CORRELATION TESTS
708// ═══════════════════════════════════════════════════════════════════════════════
709
710/// Test 15: Autocorrelation -- at lags 1-50. Count violations of 2/sqrt(n) threshold.
711pub fn autocorrelation(data: &[u8]) -> TestResult {
712    let name = "Autocorrelation";
713    let max_lag = 50usize;
714    let n = data.len();
715    if n < max_lag + 10 {
716        return insufficient(name, max_lag + 10, n);
717    }
718    let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
719    let mean: f64 = arr.iter().sum::<f64>() / n as f64;
720    let var: f64 = arr.iter().map(|x| (x - mean) * (x - mean)).sum::<f64>() / n as f64;
721    if var < 1e-10 {
722        return TestResult {
723            name: name.to_string(),
724            passed: false,
725            p_value: None,
726            statistic: 1.0,
727            details: "Zero variance".to_string(),
728            grade: 'F',
729        };
730    }
731    let threshold = 2.0 / (n as f64).sqrt();
732    let mut max_corr = 0.0f64;
733    let mut violations = 0u64;
734    for lag in 1..=max_lag.min(n - 1) {
735        let mut sum = 0.0;
736        let count = n - lag;
737        for i in 0..count {
738            sum += (arr[i] - mean) * (arr[i + lag] - mean);
739        }
740        let c = sum / (count as f64 * var);
741        if c.abs() > max_corr {
742            max_corr = c.abs();
743        }
744        if c.abs() > threshold {
745            violations += 1;
746        }
747    }
748    let expected_violations = 0.05 * max_lag as f64;
749    let lambda = expected_violations.max(1.0);
750    let p = if violations > 0 {
751        let poisson = Poisson::new(lambda).unwrap();
752        poisson.sf(violations - 1)
753    } else {
754        1.0
755    };
756    TestResult {
757        name: name.to_string(),
758        passed: TestResult::pass_from_p(Some(p), 0.01),
759        p_value: Some(p),
760        statistic: max_corr,
761        details: format!("violations={violations}/{max_lag}, max|r|={max_corr:.4}"),
762        grade: TestResult::grade_from_p(Some(p)),
763    }
764}
765
766/// Test 16: Serial correlation -- adjacent value correlation. Z-test.
767pub fn serial_correlation(data: &[u8]) -> TestResult {
768    let name = "Serial Correlation";
769    let n = data.len();
770    if n < 20 {
771        return insufficient(name, 20, n);
772    }
773    let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
774    let mean: f64 = arr.iter().sum::<f64>() / n as f64;
775    let var: f64 = arr.iter().map(|x| (x - mean) * (x - mean)).sum::<f64>() / n as f64;
776    if var < 1e-10 {
777        return TestResult {
778            name: name.to_string(),
779            passed: false,
780            p_value: None,
781            statistic: 1.0,
782            details: "Zero variance".to_string(),
783            grade: 'F',
784        };
785    }
786    let mut sum = 0.0;
787    for i in 0..n - 1 {
788        sum += (arr[i] - mean) * (arr[i + 1] - mean);
789    }
790    let r = sum / ((n - 1) as f64 * var);
791    let z = r * (n as f64).sqrt();
792    let norm = Normal::standard();
793    let p = 2.0 * (1.0 - norm.cdf(z.abs()));
794    TestResult {
795        name: name.to_string(),
796        passed: TestResult::pass_from_p(Some(p), 0.01),
797        p_value: Some(p),
798        statistic: r.abs(),
799        details: format!("r={r:.6}, z={z:.4}"),
800        grade: TestResult::grade_from_p(Some(p)),
801    }
802}
803
804/// Test 17: Lag-N correlation -- correlation at lags [1, 2, 4, 8, 16, 32].
805pub fn lag_n_correlation(data: &[u8]) -> TestResult {
806    let name = "Lag-N Correlation";
807    let lags: &[usize] = &[1, 2, 4, 8, 16, 32];
808    let n = data.len();
809    let max_lag = *lags.iter().max().unwrap();
810    if n < max_lag + 10 {
811        return insufficient(name, max_lag + 10, n);
812    }
813    let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
814    let mean: f64 = arr.iter().sum::<f64>() / n as f64;
815    let var: f64 = arr.iter().map(|x| (x - mean) * (x - mean)).sum::<f64>() / n as f64;
816    if var < 1e-10 {
817        return TestResult {
818            name: name.to_string(),
819            passed: false,
820            p_value: None,
821            statistic: 1.0,
822            details: "Zero variance".to_string(),
823            grade: 'F',
824        };
825    }
826    let threshold = 2.0 / (n as f64).sqrt();
827    let mut max_corr = 0.0f64;
828    let mut details_parts = Vec::new();
829    for &lag in lags {
830        if lag >= n {
831            continue;
832        }
833        let mut sum = 0.0;
834        let count = n - lag;
835        for i in 0..count {
836            sum += (arr[i] - mean) * (arr[i + lag] - mean);
837        }
838        let c = sum / (count as f64 * var);
839        if c.abs() > max_corr {
840            max_corr = c.abs();
841        }
842        details_parts.push(format!("lag{lag}={c:.4}"));
843    }
844    let passed = max_corr < threshold;
845    let grade = if max_corr < threshold * 0.5 {
846        'A'
847    } else if max_corr < threshold {
848        'B'
849    } else if max_corr < threshold * 2.0 {
850        'C'
851    } else if max_corr < threshold * 4.0 {
852        'D'
853    } else {
854        'F'
855    };
856    TestResult {
857        name: name.to_string(),
858        passed,
859        p_value: None,
860        statistic: max_corr,
861        details: details_parts.join(", "),
862        grade,
863    }
864}
865
866/// Test 18: Cross-correlation -- even vs odd byte independence. Pearson r.
867pub fn cross_correlation(data: &[u8]) -> TestResult {
868    let name = "Cross-Correlation";
869    let n = data.len();
870    if n < 100 {
871        return insufficient(name, 100, n);
872    }
873    let even: Vec<f64> = data.iter().step_by(2).map(|&b| b as f64).collect();
874    let odd: Vec<f64> = data.iter().skip(1).step_by(2).map(|&b| b as f64).collect();
875    let min_len = even.len().min(odd.len());
876    if min_len < 2 {
877        return insufficient(name, 100, n);
878    }
879    let even = &even[..min_len];
880    let odd = &odd[..min_len];
881
882    let mean_e: f64 = even.iter().sum::<f64>() / min_len as f64;
883    let mean_o: f64 = odd.iter().sum::<f64>() / min_len as f64;
884    let mut cov = 0.0;
885    let mut var_e = 0.0;
886    let mut var_o = 0.0;
887    for i in 0..min_len {
888        let de = even[i] - mean_e;
889        let do_ = odd[i] - mean_o;
890        cov += de * do_;
891        var_e += de * de;
892        var_o += do_ * do_;
893    }
894    let denom = (var_e * var_o).sqrt();
895    if denom < 1e-10 {
896        return TestResult {
897            name: name.to_string(),
898            passed: false,
899            p_value: None,
900            statistic: 0.0,
901            details: "Zero variance in one or both halves".to_string(),
902            grade: 'F',
903        };
904    }
905    let r = cov / denom;
906
907    // For large n, t ~ N(0,1)
908    let t = r * ((min_len as f64 - 2.0) / (1.0 - r * r).max(1e-15)).sqrt();
909    let norm = Normal::standard();
910    let p = 2.0 * (1.0 - norm.cdf(t.abs()));
911    TestResult {
912        name: name.to_string(),
913        passed: TestResult::pass_from_p(Some(p), 0.01),
914        p_value: Some(p),
915        statistic: r.abs(),
916        details: format!("r={r:.6} (even vs odd bytes)"),
917        grade: TestResult::grade_from_p(Some(p)),
918    }
919}
920
921// ═══════════════════════════════════════════════════════════════════════════════
922// 7. DISTRIBUTION TESTS
923// ═══════════════════════════════════════════════════════════════════════════════
924
925/// Test 19: Kolmogorov-Smirnov test vs uniform distribution.
926pub fn ks_test(data: &[u8]) -> TestResult {
927    let name = "Kolmogorov-Smirnov";
928    let n = data.len();
929    if n < 50 {
930        return insufficient(name, 50, n);
931    }
932    // Map discrete bytes to continuous [0,1] with continuity correction
933    // (matching the Anderson-Darling test mapping)
934    let mut normalized: Vec<f64> = data.iter().map(|&b| (b as f64 + 0.5) / 256.0).collect();
935    normalized.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
936
937    // KS statistic: max |F_n(x) - F(x)|
938    let mut d_max = 0.0f64;
939    let nf = n as f64;
940    for (i, &x) in normalized.iter().enumerate() {
941        let f_n_plus = (i + 1) as f64 / nf;
942        let f_n_minus = i as f64 / nf;
943        let f_x = x.clamp(0.0, 1.0);
944        let d1 = (f_n_plus - f_x).abs();
945        let d2 = (f_n_minus - f_x).abs();
946        d_max = d_max.max(d1).max(d2);
947    }
948
949    // Asymptotic KS p-value (Kolmogorov distribution)
950    let sqrt_n = nf.sqrt();
951    let lambda = (sqrt_n + 0.12 + 0.11 / sqrt_n) * d_max;
952    let mut p = 0.0;
953    for k in 1..=100i32 {
954        let sign = if k % 2 == 0 { -1.0 } else { 1.0 };
955        p += sign * (-2.0 * (k as f64 * lambda).powi(2)).exp();
956    }
957    p = (2.0 * p).clamp(0.0, 1.0);
958
959    TestResult {
960        name: name.to_string(),
961        passed: TestResult::pass_from_p(Some(p), 0.01),
962        p_value: Some(p),
963        statistic: d_max,
964        details: format!("D={d_max:.6}, n={n}"),
965        grade: TestResult::grade_from_p(Some(p)),
966    }
967}
968
969/// Test 20: Anderson-Darling -- A-squared statistic for uniform. Critical values:
970/// 1.933 (5%), 2.492 (2.5%), 3.857 (1%).
971pub fn anderson_darling(data: &[u8]) -> TestResult {
972    let name = "Anderson-Darling";
973    let n = data.len();
974    if n < 50 {
975        return insufficient(name, 50, n);
976    }
977    // Map bytes to (0, 1): (value + 0.5) / 256
978    let mut sorted: Vec<f64> = data.iter().map(|&b| (b as f64 + 0.5) / 256.0).collect();
979    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
980
981    let nf = n as f64;
982    let mut s = 0.0;
983    for i in 0..n {
984        let idx = (i + 1) as f64;
985        let u = sorted[i].clamp(1e-15, 1.0 - 1e-15);
986        let u_rev = sorted[n - 1 - i].clamp(1e-15, 1.0 - 1e-15);
987        s += (2.0 * idx - 1.0) * (u.ln() + (1.0 - u_rev).ln());
988    }
989    let a2 = -nf - s / nf;
990    let a2_star = a2 * (1.0 + 0.75 / nf + 2.25 / (nf * nf));
991
992    let passed = a2_star < 2.492;
993    let grade = if a2_star < 1.248 {
994        'A'
995    } else if a2_star < 1.933 {
996        'B'
997    } else if a2_star < 2.492 {
998        'C'
999    } else if a2_star < 3.857 {
1000        'D'
1001    } else {
1002        'F'
1003    };
1004    TestResult {
1005        name: name.to_string(),
1006        passed,
1007        p_value: None,
1008        statistic: a2_star,
1009        details: format!("A^2*={a2_star:.4}, 5% critical=2.492"),
1010        grade,
1011    }
1012}
1013
1014// ═══════════════════════════════════════════════════════════════════════════════
1015// 8. PATTERN TESTS
1016// ═══════════════════════════════════════════════════════════════════════════════
1017
1018/// Test 21: Overlapping template -- frequency of overlapping bit pattern (1,1,1,1).
1019pub fn overlapping_template(data: &[u8]) -> TestResult {
1020    let name = "Overlapping Template";
1021    let template: &[u8] = &[1, 1, 1, 1];
1022    let m = template.len();
1023    let bits = to_bits(data);
1024    let n = bits.len();
1025    if n < 1000 {
1026        return insufficient(name, 1000, n);
1027    }
1028
1029    let mut count = 0u64;
1030    for i in 0..=n - m {
1031        if bits[i..i + m] == *template {
1032            count += 1;
1033        }
1034    }
1035    let expected = (n - m + 1) as f64 / (1u64 << m) as f64;
1036    let std = (expected * (1.0 - 1.0 / (1u64 << m) as f64)).sqrt();
1037    if std < 1e-10 {
1038        return TestResult {
1039            name: name.to_string(),
1040            passed: false,
1041            p_value: None,
1042            statistic: 0.0,
1043            details: "Zero std".to_string(),
1044            grade: 'F',
1045        };
1046    }
1047    let z = (count as f64 - expected) / std;
1048    let norm = Normal::standard();
1049    let p = 2.0 * (1.0 - norm.cdf(z.abs()));
1050    TestResult {
1051        name: name.to_string(),
1052        passed: TestResult::pass_from_p(Some(p), 0.01),
1053        p_value: Some(p),
1054        statistic: z.abs(),
1055        details: format!("count={count}, expected={expected:.0}"),
1056        grade: TestResult::grade_from_p(Some(p)),
1057    }
1058}
1059
1060/// Test 22: Non-overlapping template -- non-overlapping occurrences of (0,0,1,1).
1061pub fn non_overlapping_template(data: &[u8]) -> TestResult {
1062    let name = "Non-overlapping Template";
1063    let template: &[u8] = &[0, 0, 1, 1];
1064    let m = template.len();
1065    let bits = to_bits(data);
1066    let n = bits.len();
1067    if n < 1000 {
1068        return insufficient(name, 1000, n);
1069    }
1070
1071    let mut count = 0u64;
1072    let mut i = 0;
1073    while i + m <= n {
1074        if bits[i..i + m] == *template {
1075            count += 1;
1076            i += m;
1077        } else {
1078            i += 1;
1079        }
1080    }
1081    let expected = n as f64 / (1u64 << m) as f64;
1082    let var =
1083        n as f64 * (1.0 / (1u64 << m) as f64 - (2.0 * m as f64 - 1.0) / (1u64 << (2 * m)) as f64);
1084    let var = if var <= 0.0 { 1.0 } else { var };
1085    let z = (count as f64 - expected) / var.sqrt();
1086    let norm = Normal::standard();
1087    let p = 2.0 * (1.0 - norm.cdf(z.abs()));
1088    TestResult {
1089        name: name.to_string(),
1090        passed: TestResult::pass_from_p(Some(p), 0.01),
1091        p_value: Some(p),
1092        statistic: z.abs(),
1093        details: format!("count={count}, expected={expected:.0}"),
1094        grade: TestResult::grade_from_p(Some(p)),
1095    }
1096}
1097
1098/// Test 23: Maurer's universal statistical test (L=6, Q=640).
1099pub fn maurers_universal(data: &[u8]) -> TestResult {
1100    let name = "Maurer's Universal";
1101    let l = 6usize;
1102    let q = 640usize;
1103    let bits = to_bits(data);
1104    let n_bits = bits.len();
1105    let total_blocks = n_bits / l;
1106    if total_blocks <= q {
1107        return insufficient(name, (q + 100) * l, n_bits);
1108    }
1109    let k = total_blocks - q;
1110    if k < 100 || q < 10 * (1 << l) {
1111        return insufficient(name, (q + 100) * l, n_bits);
1112    }
1113
1114    let num_patterns = 1usize << l;
1115    let mut table = vec![0usize; num_patterns];
1116
1117    // Initialization phase
1118    for i in 0..q {
1119        let mut block = 0usize;
1120        for j in 0..l {
1121            block = (block << 1) | bits[i * l + j] as usize;
1122        }
1123        table[block] = i + 1;
1124    }
1125
1126    // Test phase
1127    let mut total = 0.0f64;
1128    for i in q..q + k {
1129        let mut block = 0usize;
1130        for j in 0..l {
1131            block = (block << 1) | bits[i * l + j] as usize;
1132        }
1133        let prev = table[block];
1134        let distance = if prev > 0 {
1135            (i + 1 - prev) as f64
1136        } else {
1137            (i + 1) as f64
1138        };
1139        total += distance.log2();
1140        table[block] = i + 1;
1141    }
1142
1143    let fn_val = total / k as f64;
1144    let expected = 5.2177052;
1145    let variance = 2.954;
1146    let sigma = (variance / k as f64).sqrt();
1147    let z = (fn_val - expected).abs() / sigma.max(1e-10);
1148    let p = erfc(z / 2.0_f64.sqrt());
1149    TestResult {
1150        name: name.to_string(),
1151        passed: TestResult::pass_from_p(Some(p), 0.01),
1152        p_value: Some(p),
1153        statistic: fn_val,
1154        details: format!("fn={fn_val:.4}, expected={expected:.4}, L={l}"),
1155        grade: TestResult::grade_from_p(Some(p)),
1156    }
1157}
1158
1159// ═══════════════════════════════════════════════════════════════════════════════
1160// 9. ADVANCED TESTS
1161// ═══════════════════════════════════════════════════════════════════════════════
1162
1163/// GF(2) Gaussian elimination to compute binary matrix rank.
1164fn gf2_rank(matrix: &[u8], rows: usize, cols: usize) -> usize {
1165    let mut m: Vec<Vec<u8>> = (0..rows)
1166        .map(|r| matrix[r * cols..(r + 1) * cols].to_vec())
1167        .collect();
1168    let mut rank = 0;
1169    for col in 0..cols {
1170        let mut pivot = None;
1171        for (row, m_row) in m.iter().enumerate().take(rows).skip(rank) {
1172            if m_row[col] == 1 {
1173                pivot = Some(row);
1174                break;
1175            }
1176        }
1177        let pivot = match pivot {
1178            Some(p) => p,
1179            None => continue,
1180        };
1181        m.swap(rank, pivot);
1182        for row in 0..rows {
1183            if row != rank && m[row][col] == 1 {
1184                let rank_row = m[rank].clone();
1185                for (m_c, r_c) in m[row].iter_mut().zip(rank_row.iter()) {
1186                    *m_c ^= r_c;
1187                }
1188            }
1189        }
1190        rank += 1;
1191    }
1192    rank
1193}
1194
1195/// Test 24: Binary matrix rank -- GF(2) Gaussian elimination on 32x32 binary matrices.
1196pub fn binary_matrix_rank(data: &[u8]) -> TestResult {
1197    let name = "Binary Matrix Rank";
1198    let bits = to_bits(data);
1199    let n = bits.len();
1200    let m_size = 32;
1201    let q_size = 32;
1202    let bits_per_matrix = m_size * q_size;
1203    let num_matrices = n / bits_per_matrix;
1204    if num_matrices < 38 {
1205        return insufficient(name, 38 * bits_per_matrix, n);
1206    }
1207
1208    let mut full_rank = 0u64;
1209    let mut rank_m1 = 0u64;
1210    let min_dim = m_size.min(q_size);
1211    for i in 0..num_matrices {
1212        let start = i * bits_per_matrix;
1213        let matrix = &bits[start..start + bits_per_matrix];
1214        let rank = gf2_rank(matrix, m_size, q_size);
1215        if rank == min_dim {
1216            full_rank += 1;
1217        } else if rank == min_dim - 1 {
1218            rank_m1 += 1;
1219        }
1220    }
1221    let rest = num_matrices as u64 - full_rank - rank_m1;
1222    let n_f = num_matrices as f64;
1223
1224    let p_full = 0.2888;
1225    let p_m1 = 0.5776;
1226    let p_rest = 0.1336;
1227    let chi2 = (full_rank as f64 - n_f * p_full).powi(2) / (n_f * p_full)
1228        + (rank_m1 as f64 - n_f * p_m1).powi(2) / (n_f * p_m1)
1229        + (rest as f64 - n_f * p_rest).powi(2) / (n_f * p_rest);
1230
1231    let dist = ChiSquared::new(2.0).unwrap();
1232    let p = dist.sf(chi2);
1233    TestResult {
1234        name: name.to_string(),
1235        passed: TestResult::pass_from_p(Some(p), 0.01),
1236        p_value: Some(p),
1237        statistic: chi2,
1238        details: format!("N={num_matrices}, full={full_rank}, full-1={rank_m1}"),
1239        grade: TestResult::grade_from_p(Some(p)),
1240    }
1241}
1242
1243/// Berlekamp-Massey algorithm for binary sequences. Returns the LFSR complexity.
1244fn berlekamp_massey(seq: &[u8]) -> usize {
1245    let n = seq.len();
1246    let mut c = vec![0u8; n];
1247    let mut b = vec![0u8; n];
1248    c[0] = 1;
1249    b[0] = 1;
1250    let mut l: usize = 0;
1251    let mut m: isize = -1;
1252
1253    for ni in 0..n {
1254        let mut d: u8 = seq[ni];
1255        for i in 1..=l {
1256            d ^= c[i] & seq[ni - i];
1257        }
1258        if d == 1 {
1259            let t = c.clone();
1260            let shift = (ni as isize - m) as usize;
1261            for i in shift..n {
1262                c[i] ^= b[i - shift];
1263            }
1264            if l <= ni / 2 {
1265                l = ni + 1 - l;
1266                m = ni as isize;
1267                b = t;
1268            }
1269        }
1270    }
1271    l
1272}
1273
1274/// Test 25: Linear complexity -- Berlekamp-Massey LFSR complexity on 200-bit blocks.
1275pub fn linear_complexity(data: &[u8]) -> TestResult {
1276    let name = "Linear Complexity";
1277    let block_size = 200usize;
1278    let bits = to_bits(data);
1279    let n = bits.len();
1280    let num_blocks = n / block_size;
1281    if num_blocks < 6 {
1282        return insufficient(name, 6 * block_size, n);
1283    }
1284
1285    let mut complexities = Vec::with_capacity(num_blocks);
1286    for i in 0..num_blocks {
1287        let start = i * block_size;
1288        let block = &bits[start..start + block_size];
1289        complexities.push(berlekamp_massey(block));
1290    }
1291
1292    let m = block_size as f64;
1293    let sign = if block_size.is_multiple_of(2) {
1294        1.0
1295    } else {
1296        -1.0
1297    };
1298    let mu = m / 2.0 + (9.0 + sign) / 36.0 - (m / 3.0 + 2.0 / 9.0) / 2.0_f64.powf(m);
1299
1300    let t_vals: Vec<f64> = complexities
1301        .iter()
1302        .map(|&c| sign * (c as f64 - mu) + 2.0 / 9.0)
1303        .collect();
1304
1305    let mut observed = [0u64; 7];
1306    for &t in &t_vals {
1307        let bin = if t <= -2.5 {
1308            0
1309        } else if t <= -1.5 {
1310            1
1311        } else if t <= -0.5 {
1312            2
1313        } else if t <= 0.5 {
1314            3
1315        } else if t <= 1.5 {
1316            4
1317        } else if t <= 2.5 {
1318            5
1319        } else {
1320            6
1321        };
1322        observed[bin] += 1;
1323    }
1324
1325    let mut probs = [0.010882, 0.03534, 0.08884, 0.5, 0.08884, 0.03534, 0.010882];
1326    let sum_rest: f64 = probs[..6].iter().sum();
1327    probs[6] = 1.0 - sum_rest;
1328
1329    let mut chi2 = 0.0;
1330    let n_f = num_blocks as f64;
1331    for i in 0..7 {
1332        let expected = probs[i] * n_f;
1333        if expected > 0.0 {
1334            let diff = observed[i] as f64 - expected;
1335            chi2 += diff * diff / expected;
1336        }
1337    }
1338
1339    let dist = ChiSquared::new(6.0).unwrap();
1340    let p = dist.sf(chi2);
1341    let mean_c: f64 = complexities.iter().map(|&c| c as f64).sum::<f64>() / num_blocks as f64;
1342    TestResult {
1343        name: name.to_string(),
1344        passed: TestResult::pass_from_p(Some(p), 0.01),
1345        p_value: Some(p),
1346        statistic: chi2,
1347        details: format!("N={num_blocks}, mean_complexity={mean_c:.1}"),
1348        grade: TestResult::grade_from_p(Some(p)),
1349    }
1350}
1351
1352/// Test 26: Cumulative sums (CUSUM) -- detect drift/bias.
1353pub fn cusum_test(data: &[u8]) -> TestResult {
1354    let name = "Cumulative Sums";
1355    let bits = to_bits(data);
1356    let n = bits.len();
1357    if n < 100 {
1358        return insufficient(name, 100, n);
1359    }
1360
1361    let mut cumsum = Vec::with_capacity(n);
1362    let mut s: i64 = 0;
1363    for &bit in &bits {
1364        s += if bit == 1 { 1 } else { -1 };
1365        cumsum.push(s);
1366    }
1367    let z = cumsum.iter().map(|&x| x.unsigned_abs()).max().unwrap() as f64;
1368    if z < 1e-10 {
1369        return TestResult {
1370            name: name.to_string(),
1371            passed: true,
1372            p_value: Some(1.0),
1373            statistic: 0.0,
1374            details: format!("max|S|=0, n={n}"),
1375            grade: 'A',
1376        };
1377    }
1378
1379    let nf = n as f64;
1380    let sqrt_n = nf.sqrt();
1381    let norm = Normal::standard();
1382    let k_start = ((-nf / z + 1.0) / 4.0).floor() as i64;
1383    let k_end = ((nf / z - 1.0) / 4.0).ceil() as i64;
1384    let mut s_val = 0.0;
1385    for k in k_start..=k_end {
1386        let kf = k as f64;
1387        s_val += norm.cdf((4.0 * kf + 1.0) * z / sqrt_n) - norm.cdf((4.0 * kf - 1.0) * z / sqrt_n);
1388    }
1389    let p = (1.0 - s_val).clamp(0.0, 1.0);
1390    TestResult {
1391        name: name.to_string(),
1392        passed: TestResult::pass_from_p(Some(p), 0.01),
1393        p_value: Some(p),
1394        statistic: z,
1395        details: format!("max|S|={z:.1}, n={n}"),
1396        grade: TestResult::grade_from_p(Some(p)),
1397    }
1398}
1399
1400/// Test 27: Random excursions -- cycles in cumulative sum random walk.
1401pub fn random_excursions(data: &[u8]) -> TestResult {
1402    let name = "Random Excursions";
1403    let bits = to_bits(data);
1404    let n = bits.len();
1405    if n < 1000 {
1406        return insufficient(name, 1000, n);
1407    }
1408
1409    // Build cumulative sum with leading and trailing zeros
1410    let mut cumsum = Vec::with_capacity(n + 2);
1411    cumsum.push(0i64);
1412    let mut s: i64 = 0;
1413    for &bit in &bits {
1414        s += if bit == 1 { 1 } else { -1 };
1415        cumsum.push(s);
1416    }
1417    cumsum.push(0);
1418
1419    let zeros: Vec<usize> = cumsum
1420        .iter()
1421        .enumerate()
1422        .filter_map(|(i, &v)| if v == 0 { Some(i) } else { None })
1423        .collect();
1424
1425    let j = if !zeros.is_empty() {
1426        zeros.len() - 1
1427    } else {
1428        0
1429    };
1430
1431    if j < 500 {
1432        return TestResult {
1433            name: name.to_string(),
1434            passed: true,
1435            p_value: None,
1436            statistic: j as f64,
1437            details: format!("Only {j} cycles (need 500 for reliable test)"),
1438            grade: 'B',
1439        };
1440    }
1441
1442    let expected_cycles = (n as f64) / (2.0 * PI * n as f64).sqrt();
1443    let ratio = j as f64 / expected_cycles.max(1.0);
1444    let passed = ratio > 0.5 && ratio < 2.0;
1445    let grade = if ratio > 0.8 && ratio < 1.2 {
1446        'A'
1447    } else if ratio > 0.6 && ratio < 1.5 {
1448        'B'
1449    } else if passed {
1450        'C'
1451    } else {
1452        'F'
1453    };
1454    TestResult {
1455        name: name.to_string(),
1456        passed,
1457        p_value: None,
1458        statistic: j as f64,
1459        details: format!("cycles={j}, expected~={expected_cycles:.0}"),
1460        grade,
1461    }
1462}
1463
1464/// Test 28: Birthday spacing -- spacing between repeated values, Poisson test.
1465pub fn birthday_spacing(data: &[u8]) -> TestResult {
1466    let name = "Birthday Spacing";
1467    let n = data.len();
1468    if n < 100 {
1469        return insufficient(name, 100, n);
1470    }
1471
1472    let values: Vec<u64> = if n >= 200 {
1473        let half = n / 2;
1474        (0..half)
1475            .map(|i| data[i * 2] as u64 * 256 + data[i * 2 + 1] as u64)
1476            .collect()
1477    } else {
1478        data.iter().map(|&b| b as u64).collect()
1479    };
1480
1481    let m = values.len();
1482    let mut sorted = values.clone();
1483    sorted.sort();
1484
1485    let mut spacings: Vec<u64> = Vec::with_capacity(m.saturating_sub(1));
1486    for i in 1..m {
1487        spacings.push(sorted[i] - sorted[i - 1]);
1488    }
1489    spacings.sort();
1490
1491    let mut dups = 0u64;
1492    for i in 1..spacings.len() {
1493        if spacings[i] == spacings[i - 1] {
1494            dups += 1;
1495        }
1496    }
1497
1498    let d = sorted.last().copied().unwrap_or(1).max(1) as f64;
1499    let mf = m as f64;
1500    let lambda = (mf * mf * mf / (4.0 * d)).max(0.01);
1501
1502    let p = if lambda > 0.0 {
1503        let poisson = Poisson::new(lambda).unwrap();
1504        let p_upper = if dups > 0 { poisson.sf(dups - 1) } else { 1.0 };
1505        let p_lower = poisson.cdf(dups);
1506        p_upper.max(p_lower).min(1.0)
1507    } else {
1508        1.0
1509    };
1510
1511    TestResult {
1512        name: name.to_string(),
1513        passed: TestResult::pass_from_p(Some(p), 0.01),
1514        p_value: Some(p),
1515        statistic: dups as f64,
1516        details: format!("duplicates={dups}, lambda={lambda:.2}, m={m}"),
1517        grade: TestResult::grade_from_p(Some(p)),
1518    }
1519}
1520
1521// ═══════════════════════════════════════════════════════════════════════════════
1522// 10. PRACTICAL TESTS
1523// ═══════════════════════════════════════════════════════════════════════════════
1524
1525/// Test 29: Bit avalanche -- adjacent bytes should differ by ~4 bits (50%).
1526pub fn bit_avalanche(data: &[u8]) -> TestResult {
1527    let name = "Bit Avalanche";
1528    let n = data.len();
1529    if n < 100 {
1530        return insufficient(name, 100, n);
1531    }
1532
1533    let mut total_diffs = 0u64;
1534    let pairs = n - 1;
1535    for i in 0..pairs {
1536        total_diffs += (data[i] ^ data[i + 1]).count_ones() as u64;
1537    }
1538    let mean_diff = total_diffs as f64 / pairs as f64;
1539    let expected = 4.0;
1540    let std = 2.0_f64.sqrt(); // binomial std for n=8, p=0.5
1541    let z = (mean_diff - expected).abs() / (std / (pairs as f64).sqrt());
1542    let norm = Normal::standard();
1543    let p = 2.0 * (1.0 - norm.cdf(z));
1544    TestResult {
1545        name: name.to_string(),
1546        passed: TestResult::pass_from_p(Some(p), 0.01),
1547        p_value: Some(p),
1548        statistic: mean_diff,
1549        details: format!("mean_diff={mean_diff:.3}/8 bits, expected=4.0"),
1550        grade: TestResult::grade_from_p(Some(p)),
1551    }
1552}
1553
1554/// Test 30: Monte Carlo pi -- estimate pi using (x,y) pairs in unit circle.
1555pub fn monte_carlo_pi(data: &[u8]) -> TestResult {
1556    let name = "Monte Carlo Pi";
1557    let n = data.len();
1558    if n < 200 {
1559        return insufficient(name, 200, n);
1560    }
1561    let pairs = n / 2;
1562    let mut inside = 0u64;
1563    for i in 0..pairs {
1564        let x = data[i] as f64 / 255.0;
1565        let y = data[pairs + i] as f64 / 255.0;
1566        if x * x + y * y <= 1.0 {
1567            inside += 1;
1568        }
1569    }
1570    let pi_est = 4.0 * inside as f64 / pairs as f64;
1571    let error = (pi_est - PI).abs() / PI;
1572    let grade = if error < 0.01 {
1573        'A'
1574    } else if error < 0.03 {
1575        'B'
1576    } else if error < 0.1 {
1577        'C'
1578    } else if error < 0.2 {
1579        'D'
1580    } else {
1581        'F'
1582    };
1583    TestResult {
1584        name: name.to_string(),
1585        passed: error < 0.05,
1586        p_value: None,
1587        statistic: pi_est,
1588        details: format!("pi~={pi_est:.6}, error={:.4}%", error * 100.0),
1589        grade,
1590    }
1591}
1592
1593/// Test 31: Mean and variance -- mean (~127.5) and variance (~5461.25) of uniform bytes.
1594pub fn mean_variance(data: &[u8]) -> TestResult {
1595    let name = "Mean & Variance";
1596    let n = data.len();
1597    if n < 50 {
1598        return insufficient(name, 50, n);
1599    }
1600    let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
1601    let nf = n as f64;
1602    let mean: f64 = arr.iter().sum::<f64>() / nf;
1603    let var: f64 = arr.iter().map(|x| (x - mean) * (x - mean)).sum::<f64>() / nf;
1604
1605    let expected_mean = 127.5;
1606    let expected_var = (256.0 * 256.0 - 1.0) / 12.0; // 5461.25
1607
1608    let z_mean = (mean - expected_mean).abs() / (expected_var / nf).sqrt();
1609    let norm = Normal::standard();
1610    let p_mean = 2.0 * (1.0 - norm.cdf(z_mean));
1611
1612    let chi2_var = (nf - 1.0) * var / expected_var;
1613    let chi_dist = ChiSquared::new(nf - 1.0).unwrap();
1614    let p_var = 2.0 * chi_dist.cdf(chi2_var).min(chi_dist.sf(chi2_var));
1615
1616    let p = p_mean.min(p_var);
1617    TestResult {
1618        name: name.to_string(),
1619        passed: TestResult::pass_from_p(Some(p), 0.01),
1620        p_value: Some(p),
1621        statistic: z_mean,
1622        details: format!("mean={mean:.2} (exp 127.5), var={var:.1} (exp {expected_var:.1})"),
1623        grade: TestResult::grade_from_p(Some(p)),
1624    }
1625}
1626
1627// ═══════════════════════════════════════════════════════════════════════════════
1628// Test battery
1629// ═══════════════════════════════════════════════════════════════════════════════
1630
1631/// Run the complete 31-test battery on a byte slice.
1632pub fn run_all_tests(data: &[u8]) -> Vec<TestResult> {
1633    let tests: Vec<fn(&[u8]) -> TestResult> = vec![
1634        // Frequency (3)
1635        monobit_frequency,
1636        block_frequency,
1637        byte_frequency,
1638        // Runs (2)
1639        runs_test,
1640        longest_run_of_ones,
1641        // Serial (2)
1642        serial_test,
1643        approximate_entropy,
1644        // Spectral (2)
1645        dft_spectral,
1646        spectral_flatness,
1647        // Entropy (5)
1648        shannon_entropy,
1649        min_entropy,
1650        permutation_entropy,
1651        compression_ratio,
1652        kolmogorov_complexity,
1653        // Correlation (4)
1654        autocorrelation,
1655        serial_correlation,
1656        lag_n_correlation,
1657        cross_correlation,
1658        // Distribution (2)
1659        ks_test,
1660        anderson_darling,
1661        // Pattern (3)
1662        overlapping_template,
1663        non_overlapping_template,
1664        maurers_universal,
1665        // Advanced (5)
1666        binary_matrix_rank,
1667        linear_complexity,
1668        cusum_test,
1669        random_excursions,
1670        birthday_spacing,
1671        // Practical (3)
1672        bit_avalanche,
1673        monte_carlo_pi,
1674        mean_variance,
1675    ];
1676
1677    tests
1678        .iter()
1679        .map(|test_fn| {
1680            match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| test_fn(data))) {
1681                Ok(result) => result,
1682                Err(_) => TestResult {
1683                    name: "Unknown".to_string(),
1684                    passed: false,
1685                    p_value: None,
1686                    statistic: 0.0,
1687                    details: "Test panicked".to_string(),
1688                    grade: 'F',
1689                },
1690            }
1691        })
1692        .collect()
1693}
1694
1695/// Calculate overall quality score (0-100) from test results.
1696///
1697/// Each grade maps to a score: A=100, B=75, C=50, D=25, F=0.
1698/// Returns the average across all tests.
1699pub fn calculate_quality_score(results: &[TestResult]) -> f64 {
1700    if results.is_empty() {
1701        return 0.0;
1702    }
1703    let total: f64 = results
1704        .iter()
1705        .map(|r| match r.grade {
1706            'A' => 100.0,
1707            'B' => 75.0,
1708            'C' => 50.0,
1709            'D' => 25.0,
1710            _ => 0.0,
1711        })
1712        .sum();
1713    total / results.len() as f64
1714}
1715
1716#[cfg(test)]
1717mod tests {
1718    use super::*;
1719
1720    /// Generate pseudo-random data for testing (simple LCG).
1721    fn pseudo_random(n: usize) -> Vec<u8> {
1722        let mut data = Vec::with_capacity(n);
1723        let mut state: u64 = 0xDEAD_BEEF_CAFE_BABE;
1724        for _ in 0..n {
1725            state = state
1726                .wrapping_mul(6364136223846793005)
1727                .wrapping_add(1442695040888963407);
1728            data.push((state >> 33) as u8);
1729        }
1730        data
1731    }
1732
1733    #[test]
1734    fn test_to_bits() {
1735        let data = [0b10110001u8];
1736        let bits = to_bits(&data);
1737        assert_eq!(bits, vec![1, 0, 1, 1, 0, 0, 0, 1]);
1738    }
1739
1740    #[test]
1741    fn test_grade_from_p() {
1742        assert_eq!(TestResult::grade_from_p(Some(0.5)), 'A');
1743        assert_eq!(TestResult::grade_from_p(Some(0.05)), 'B');
1744        assert_eq!(TestResult::grade_from_p(Some(0.005)), 'C');
1745        assert_eq!(TestResult::grade_from_p(Some(0.0005)), 'D');
1746        assert_eq!(TestResult::grade_from_p(Some(0.00000001)), 'F');
1747        assert_eq!(TestResult::grade_from_p(None), 'F');
1748    }
1749
1750    #[test]
1751    fn test_pass_from_p() {
1752        assert!(TestResult::pass_from_p(Some(0.05), 0.01));
1753        assert!(!TestResult::pass_from_p(Some(0.005), 0.01));
1754        assert!(!TestResult::pass_from_p(None, 0.01));
1755    }
1756
1757    #[test]
1758    fn test_insufficient_data() {
1759        let data = [0u8; 5];
1760        let result = monobit_frequency(&data);
1761        assert!(!result.passed);
1762        assert!(result.details.contains("Insufficient"));
1763    }
1764
1765    #[test]
1766    fn test_constant_data_fails() {
1767        let data = vec![0u8; 1000];
1768        let results = run_all_tests(&data);
1769        let passed_count = results.iter().filter(|r| r.passed).count();
1770        assert!(passed_count < results.len() / 2);
1771    }
1772
1773    #[test]
1774    fn test_pseudo_random_passes() {
1775        let data = pseudo_random(10000);
1776        let results = run_all_tests(&data);
1777        let passed_count = results.iter().filter(|r| r.passed).count();
1778        assert!(
1779            passed_count > results.len() / 2,
1780            "Only {passed_count}/{} tests passed",
1781            results.len()
1782        );
1783    }
1784
1785    #[test]
1786    fn test_quality_score() {
1787        let results = vec![
1788            TestResult {
1789                name: "A".into(),
1790                passed: true,
1791                p_value: Some(0.5),
1792                statistic: 0.0,
1793                details: String::new(),
1794                grade: 'A',
1795            },
1796            TestResult {
1797                name: "F".into(),
1798                passed: false,
1799                p_value: Some(0.0),
1800                statistic: 0.0,
1801                details: String::new(),
1802                grade: 'F',
1803            },
1804        ];
1805        let score = calculate_quality_score(&results);
1806        assert!((score - 50.0).abs() < 0.01);
1807    }
1808
1809    #[test]
1810    fn test_all_31_tests_present() {
1811        let data = pseudo_random(10000);
1812        let results = run_all_tests(&data);
1813        assert_eq!(results.len(), 31);
1814    }
1815
1816    #[test]
1817    fn test_monobit_basic() {
1818        let data = pseudo_random(1000);
1819        let result = monobit_frequency(&data);
1820        assert!(result.p_value.is_some());
1821    }
1822
1823    #[test]
1824    fn test_shannon_entropy_random() {
1825        let data = pseudo_random(10000);
1826        let result = shannon_entropy(&data);
1827        assert!(
1828            result.statistic > 7.0,
1829            "Shannon entropy too low: {}",
1830            result.statistic
1831        );
1832    }
1833
1834    #[test]
1835    fn test_compression_ratio_random() {
1836        let data = pseudo_random(10000);
1837        let result = compression_ratio(&data);
1838        assert!(
1839            result.statistic > 0.9,
1840            "Compression ratio too low: {}",
1841            result.statistic
1842        );
1843    }
1844
1845    #[test]
1846    fn test_calculate_quality_score_empty() {
1847        assert_eq!(calculate_quality_score(&[]), 0.0);
1848    }
1849}
openentropy_tests/lib.rs

openentropy_tests/
lib.rs