Skip to main content

openentropy_core/
conditioning.rs

1//! Centralized entropy conditioning module.
2//!
3//! **ALL** post-processing of raw entropy lives here — no conditioning code
4//! should exist in individual source implementations. Sources produce raw bytes;
5//! this module is the single, auditable gateway for any transformation.
6//!
7//! # Architecture
8//!
9//! ```text
10//! Source → Raw Bytes → Conditioning Layer (this module) → Output
11//! ```
12//!
13//! # Conditioning Modes
14//!
15//! - **Raw**: No processing. XOR-combined bytes pass through unchanged.
16//!   Preserves the actual hardware noise signal for research.
17//! - **VonNeumann**: Debias only. Removes first-order bias without destroying
18//!   the noise structure. Output is shorter than input (~25% yield).
19//! - **Sha256**: Full SHA-256 conditioning with counter and timestamp mixing.
20//!   Produces cryptographically strong output but destroys the raw signal.
21//!
22//! Most QRNG APIs (ANU, Outshift/Cisco) apply DRBG post-processing that makes
23//! output indistinguishable from PRNG. The `Raw` mode here is what makes
24//! openentropy useful for researchers studying actual hardware noise.
25
26use serde::Serialize;
27use sha2::{Digest, Sha256};
28use std::collections::HashMap;
29
30/// Conditioning mode for entropy output.
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
32pub enum ConditioningMode {
33    /// No conditioning. Raw bytes pass through unchanged.
34    Raw,
35    /// Von Neumann debiasing only.
36    VonNeumann,
37    /// SHA-256 hash conditioning (default). Cryptographically strong output.
38    #[default]
39    Sha256,
40}
41
42impl std::fmt::Display for ConditioningMode {
43    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44        match self {
45            Self::Raw => write!(f, "raw"),
46            Self::VonNeumann => write!(f, "von_neumann"),
47            Self::Sha256 => write!(f, "sha256"),
48        }
49    }
50}
51
52// ---------------------------------------------------------------------------
53// Central conditioning gateway
54// ---------------------------------------------------------------------------
55
56/// Apply the specified conditioning mode to raw entropy bytes.
57///
58/// This is the **single gateway** for all entropy conditioning. No other code
59/// in the crate should perform SHA-256, Von Neumann debiasing, or any other
60/// form of whitening/post-processing on entropy data.
61///
62/// - `Raw`: returns the input unchanged (truncated to `n_output`)
63/// - `VonNeumann`: debiases then truncates to `n_output`
64/// - `Sha256`: chained SHA-256 hashing to produce exactly `n_output` bytes
65pub fn condition(raw: &[u8], n_output: usize, mode: ConditioningMode) -> Vec<u8> {
66    match mode {
67        ConditioningMode::Raw => {
68            let mut out = raw.to_vec();
69            out.truncate(n_output);
70            out
71        }
72        ConditioningMode::VonNeumann => {
73            let debiased = von_neumann_debias(raw);
74            let mut out = debiased;
75            out.truncate(n_output);
76            out
77        }
78        ConditioningMode::Sha256 => sha256_condition_bytes(raw, n_output),
79    }
80}
81
82// ---------------------------------------------------------------------------
83// SHA-256 conditioning
84// ---------------------------------------------------------------------------
85
86/// SHA-256 chained conditioning: stretches or compresses raw bytes to exactly
87/// `n_output` bytes using counter-mode hashing.
88///
89/// Each 32-byte output block is: SHA-256(state || chunk || counter).
90/// State is chained from the previous block's digest.
91pub fn sha256_condition_bytes(raw: &[u8], n_output: usize) -> Vec<u8> {
92    if raw.is_empty() {
93        return Vec::new();
94    }
95    let mut output = Vec::with_capacity(n_output);
96    let mut state = [0u8; 32];
97    let mut offset = 0;
98    let mut counter: u64 = 0;
99    while output.len() < n_output {
100        let end = (offset + 64).min(raw.len());
101        let chunk = &raw[offset..end];
102        let mut h = Sha256::new();
103        h.update(state);
104        h.update(chunk);
105        h.update(counter.to_le_bytes());
106        let digest: [u8; 32] = h.finalize().into();
107        output.extend_from_slice(&digest);
108
109        // Derive state separately from output for forward secrecy.
110        let mut sh = Sha256::new();
111        sh.update(digest);
112        sh.update(b"openentropy_state");
113        state = sh.finalize().into();
114
115        offset += 64;
116        counter += 1;
117        if offset >= raw.len() {
118            offset = 0;
119        }
120    }
121    output.truncate(n_output);
122    output
123}
124
125/// SHA-256 condition with explicit state, sample, counter, and extra data.
126/// Returns (new_state, 32-byte output).
127///
128/// The new state is derived separately from the output to provide forward
129/// secrecy: knowing the output does not reveal the internal state.
130pub fn sha256_condition(
131    state: &[u8; 32],
132    sample: &[u8],
133    counter: u64,
134    extra: &[u8],
135) -> ([u8; 32], [u8; 32]) {
136    let mut h = Sha256::new();
137    h.update(state);
138    h.update(sample);
139    h.update(counter.to_le_bytes());
140
141    let ts = std::time::SystemTime::now()
142        .duration_since(std::time::UNIX_EPOCH)
143        .unwrap_or_default();
144    h.update(ts.as_nanos().to_le_bytes());
145
146    h.update(extra);
147
148    let output: [u8; 32] = h.finalize().into();
149
150    // Derive state separately from output for forward secrecy.
151    let mut sh = Sha256::new();
152    sh.update(output);
153    sh.update(b"openentropy_state");
154    let new_state: [u8; 32] = sh.finalize().into();
155
156    (new_state, output)
157}
158
159// ---------------------------------------------------------------------------
160// Von Neumann debiasing
161// ---------------------------------------------------------------------------
162
163/// Von Neumann debiasing: extract unbiased bits from a biased stream.
164///
165/// Takes pairs of bits: (0,1) → 0, (1,0) → 1, same → discard.
166/// Expected yield: ~25% of input bits (for unbiased input).
167pub fn von_neumann_debias(data: &[u8]) -> Vec<u8> {
168    let mut bits = Vec::new();
169    for byte in data {
170        for i in (0..8).step_by(2) {
171            let b1 = (byte >> (7 - i)) & 1;
172            let b2 = (byte >> (6 - i)) & 1;
173            if b1 != b2 {
174                bits.push(b1);
175            }
176        }
177    }
178
179    // Pack bits back into bytes
180    let mut result = Vec::with_capacity(bits.len() / 8);
181    for chunk in bits.chunks_exact(8) {
182        let mut byte = 0u8;
183        for (i, &bit) in chunk.iter().enumerate() {
184            byte |= bit << (7 - i);
185        }
186        result.push(byte);
187    }
188    result
189}
190
191// ---------------------------------------------------------------------------
192// XOR folding
193// ---------------------------------------------------------------------------
194
195/// XOR-fold: reduce data by XORing the first half with the second half.
196/// If the input has an odd length, the trailing byte is XORed into the last
197/// output byte to avoid silently discarding entropy.
198pub fn xor_fold(data: &[u8]) -> Vec<u8> {
199    if data.len() < 2 {
200        return data.to_vec();
201    }
202    let half = data.len() / 2;
203    let mut result: Vec<u8> = (0..half).map(|i| data[i] ^ data[half + i]).collect();
204    if data.len() % 2 == 1 && !result.is_empty() {
205        *result.last_mut().unwrap() ^= data[data.len() - 1];
206    }
207    result
208}
209
210// ---------------------------------------------------------------------------
211// Quick analysis utilities
212// ---------------------------------------------------------------------------
213
214// ---------------------------------------------------------------------------
215// Min-entropy estimators
216//
217// Notes:
218// - `mcv_estimate` follows the NIST 800-90B MCV style closely and is used as
219//   the primary conservative estimate.
220// - The other estimators are retained as NIST-inspired diagnostics. They are
221//   useful for comparative/source characterization, but this implementation is
222//   not a strict validation harness for 800-90B.
223// ---------------------------------------------------------------------------
224
225/// Min-entropy estimate: H∞ = -log2(max probability).
226/// More conservative than Shannon — reflects worst-case guessing probability.
227/// Returns bits per sample (0.0 to 8.0 for byte-valued data).
228pub fn min_entropy(data: &[u8]) -> f64 {
229    if data.is_empty() {
230        return 0.0;
231    }
232    let mut counts = [0u64; 256];
233    for &b in data {
234        counts[b as usize] += 1;
235    }
236    let n = data.len() as f64;
237    let p_max = counts.iter().map(|&c| c as f64 / n).fold(0.0f64, f64::max);
238    if p_max <= 0.0 {
239        return 0.0;
240    }
241    -p_max.log2()
242}
243
244/// Most Common Value (MCV) estimator (NIST-inspired 800-90B 6.3.1 style).
245/// Estimates min-entropy with upper bound on p_max using confidence interval.
246/// Returns (min_entropy_bits_per_sample, p_max_upper_bound).
247pub fn mcv_estimate(data: &[u8]) -> (f64, f64) {
248    if data.is_empty() {
249        return (0.0, 1.0);
250    }
251    let mut counts = [0u64; 256];
252    for &b in data {
253        counts[b as usize] += 1;
254    }
255    let n = data.len() as f64;
256    let max_count = *counts.iter().max().unwrap() as f64;
257    let p_hat = max_count / n;
258
259    // Upper bound of 99% confidence interval
260    // p_u = min(1, p_hat + 2.576 * sqrt(p_hat * (1 - p_hat) / n))
261    let z = 2.576; // z_{0.995} for 99% CI
262    let p_u = (p_hat + z * (p_hat * (1.0 - p_hat) / n).sqrt()).min(1.0);
263
264    let h = if p_u >= 1.0 {
265        0.0
266    } else {
267        (-p_u.log2()).max(0.0)
268    };
269    (h, p_u)
270}
271
272/// Collision estimator (NIST-inspired diagnostic).
273///
274/// Scans the data sequentially, finding the distance between successive
275/// "collisions" — where any two adjacent samples in the sequence are equal
276/// (data[i] == data[i+1]). The mean collision distance relates to the
277/// collision probability q = sum(p_i^2), from which we derive min-entropy.
278///
279/// Key correction vs prior implementation: NIST defines a collision as any
280/// two consecutive equal values, not as a repeat of a specific starting value.
281/// We scan pairs sequentially and measure the gap between collisions.
282///
283/// Returns estimated min-entropy bits per sample.
284pub fn collision_estimate(data: &[u8]) -> f64 {
285    if data.len() < 3 {
286        return 0.0;
287    }
288
289    // Scan for collisions: positions where data[i] == data[i+1].
290    // Record the distance (in samples) between successive collisions.
291    let mut distances = Vec::new();
292    let mut last_collision: Option<usize> = None;
293
294    for i in 0..data.len() - 1 {
295        if data[i] == data[i + 1] {
296            if let Some(prev) = last_collision {
297                distances.push((i - prev) as f64);
298            }
299            last_collision = Some(i);
300        }
301    }
302
303    if distances.is_empty() {
304        // No repeated collisions found — either very high entropy or too little data.
305        // Fall back to counting total collisions vs total pairs.
306        let mut collision_count = 0usize;
307        for i in 0..data.len() - 1 {
308            if data[i] == data[i + 1] {
309                collision_count += 1;
310            }
311        }
312        if collision_count == 0 {
313            // No adjacent collisions at all. This is consistent with high entropy
314            // but also with small sample sizes. Returns 8.0 (maximum) as a
315            // non-conservative upper bound. The primary MCV estimator provides
316            // the conservative bound; this is a diagnostic only.
317            return 8.0;
318        }
319        // q_hat ≈ collision_count / (n-1), min-entropy from q >= p_max^2
320        let q_hat = collision_count as f64 / (data.len() - 1) as f64;
321        let p_max = q_hat.sqrt().min(1.0);
322        return if p_max <= 0.0 {
323            8.0
324        } else {
325            (-p_max.log2()).min(8.0)
326        };
327    }
328
329    let mean_dist = distances.iter().sum::<f64>() / distances.len() as f64;
330
331    // The mean inter-collision distance ≈ 1/q where q = sum(p_i^2).
332    // Since p_max^2 <= q, we have p_max <= sqrt(q) <= sqrt(1/mean_dist).
333    // Apply a confidence bound: use the lower bound on mean distance
334    // (conservative → higher q → higher p_max → lower entropy).
335    let n_collisions = distances.len() as f64;
336    let variance = distances
337        .iter()
338        .map(|d| (d - mean_dist).powi(2))
339        .sum::<f64>()
340        / (n_collisions - 1.0).max(1.0);
341    let std_err = (variance / n_collisions).sqrt();
342
343    let z = 2.576; // 99% CI
344    let mean_lower = (mean_dist - z * std_err).max(1.0);
345
346    // q_upper ≈ 1/mean_lower, p_max <= sqrt(q_upper)
347    let p_max = (1.0 / mean_lower).sqrt().min(1.0);
348
349    if p_max <= 0.0 {
350        8.0
351    } else {
352        (-p_max.log2()).min(8.0)
353    }
354}
355
356/// Markov estimator (NIST-inspired diagnostic).
357///
358/// Models first-order dependencies between consecutive samples using byte-level
359/// transition counts. For each byte value, computes the maximum transition
360/// probability from any predecessor. The per-sample entropy is then bounded by
361/// the maximum over all values of: p_init[s] * max_predecessor(p_trans[pred][s]).
362///
363/// Unlike a binned approach, this operates on all 256 byte values directly.
364/// To keep memory bounded (256x256 = 64KB), we use a flat array.
365///
366/// Returns estimated min-entropy bits per sample.
367pub fn markov_estimate(data: &[u8]) -> f64 {
368    if data.len() < 2 {
369        return 0.0;
370    }
371
372    let n = data.len() as f64;
373
374    // Initial distribution: count of each byte value
375    let mut init_counts = [0u64; 256];
376    for &b in data {
377        init_counts[b as usize] += 1;
378    }
379
380    // Transition counts: transitions[from * 256 + to]
381    let mut transitions = vec![0u64; 256 * 256];
382    for w in data.windows(2) {
383        transitions[w[0] as usize * 256 + w[1] as usize] += 1;
384    }
385
386    // Row sums for transition probabilities
387    let mut row_sums = [0u64; 256];
388    for (from, row_sum) in row_sums.iter_mut().enumerate() {
389        let base = from * 256;
390        *row_sum = transitions[base..base + 256].iter().sum();
391    }
392
393    // NIST-inspired Markov-style bound:
394    // For each output value s, find the maximum probability of producing s
395    // considering all possible predecessor states.
396    //
397    // p_max_markov = max over s of: max over pred of (p_init[pred] * p_trans[pred][s])
398    //
399    // But a simpler conservative bound: for each value s, compute
400    //   p_s = max(p_init[s], max over pred of p_trans[pred][s])
401    // and take p_max = max over s of p_s.
402    //
403    // This bounds the per-sample probability under the first-order Markov model.
404    let mut p_max = 0.0f64;
405    for s in 0..256usize {
406        // Initial probability
407        let p_init_s = init_counts[s] as f64 / n;
408        p_max = p_max.max(p_init_s);
409
410        // Max transition probability into s from any predecessor
411        for pred in 0..256usize {
412            if row_sums[pred] > 0 {
413                let p_trans = transitions[pred * 256 + s] as f64 / row_sums[pred] as f64;
414                p_max = p_max.max(p_trans);
415            }
416        }
417    }
418
419    if p_max <= 0.0 {
420        8.0
421    } else {
422        (-p_max.log2()).min(8.0)
423    }
424}
425
426/// Compression estimator (NIST-inspired diagnostic).
427///
428/// Uses Maurer's universal statistic to estimate entropy via compression.
429/// Maurer's f_n converges to the Shannon entropy rate, NOT min-entropy.
430///
431/// To convert to a min-entropy bound, we use the relationship:
432///   H∞ <= H_Shannon
433/// and apply a conservative correction. For IID data with alphabet size k=256:
434///   H∞ = -log2(p_max), H_Shannon = -sum(p_i * log2(p_i))
435/// The gap between them grows with distribution skew. We use:
436///   H∞_est ≈ f_lower * (f_lower / log2(k))
437/// which maps f_lower=log2(256)=8.0 → 8.0 (uniform) and compresses lower
438/// values quadratically, reflecting that low Shannon entropy implies even
439/// lower min-entropy.
440///
441/// Returns estimated min-entropy bits per sample.
442pub fn compression_estimate(data: &[u8]) -> f64 {
443    if data.len() < 100 {
444        return 0.0;
445    }
446
447    // Maurer's universal statistic
448    // For each byte, record the distance to its previous occurrence
449    let l = 8.0f64; // log2(alphabet_size) = log2(256) = 8
450    let q = 256.min(data.len() / 4); // initialization segment length
451    let k = data.len() - q; // test segment length
452
453    if k == 0 {
454        return 0.0;
455    }
456
457    // Initialize: record last position of each byte value
458    let mut last_pos = [0usize; 256];
459    for (i, &b) in data[..q].iter().enumerate() {
460        last_pos[b as usize] = i + 1; // 1-indexed
461    }
462
463    // Test segment: compute log2 of distances
464    let mut sum = 0.0f64;
465    let mut count = 0u64;
466    for (i, &b) in data[q..].iter().enumerate() {
467        let pos = q + i + 1; // 1-indexed
468        let prev = last_pos[b as usize];
469        if prev > 0 {
470            let distance = pos - prev;
471            sum += (distance as f64).log2();
472            count += 1;
473        }
474        last_pos[b as usize] = pos;
475    }
476
477    if count == 0 {
478        return l; // No repeated values
479    }
480
481    let f_n = sum / count as f64;
482
483    // Variance estimate for confidence bound
484    let mut var_sum = 0.0f64;
485    // Reset for second pass
486    let mut last_pos2 = [0usize; 256];
487    for (i, &b) in data[..q].iter().enumerate() {
488        last_pos2[b as usize] = i + 1;
489    }
490    for (i, &b) in data[q..].iter().enumerate() {
491        let pos = q + i + 1;
492        let prev = last_pos2[b as usize];
493        if prev > 0 {
494            let distance = pos - prev;
495            let log_d = (distance as f64).log2();
496            var_sum += (log_d - f_n).powi(2);
497        }
498        last_pos2[b as usize] = pos;
499    }
500    let variance = var_sum / (count as f64 - 1.0).max(1.0);
501    let std_err = (variance / count as f64).sqrt();
502
503    // Lower confidence bound on Shannon estimate (conservative)
504    let z = 2.576; // 99% CI
505    let f_lower = (f_n - z * std_err).max(0.0);
506
507    // Convert Shannon estimate to min-entropy bound.
508    // Maurer's statistic ≈ Shannon entropy. Min-entropy <= Shannon entropy.
509    // Apply quadratic scaling: H∞_est = f_lower^2 / log2(k).
510    // This correctly maps: 8.0 → 8.0 (uniform), 4.0 → 2.0, 1.0 → 0.125.
511    // The quadratic penalty reflects that skewed distributions have a larger
512    // gap between Shannon and min-entropy.
513    (f_lower * f_lower / l).min(l)
514}
515
516/// t-Tuple estimator (NIST-inspired diagnostic).
517/// Estimates entropy from most frequent t-length tuple.
518/// Returns estimated min-entropy bits per sample.
519pub fn t_tuple_estimate(data: &[u8]) -> f64 {
520    if data.len() < 20 {
521        return 0.0;
522    }
523
524    // Try t=1,2,3 and take the minimum (most conservative)
525    let mut min_h = 8.0f64;
526
527    for t in 1..=3usize {
528        if data.len() < t + 1 {
529            break;
530        }
531        let mut counts: HashMap<&[u8], u64> = HashMap::new();
532        for window in data.windows(t) {
533            *counts.entry(window).or_insert(0) += 1;
534        }
535        let n = (data.len() - t + 1) as f64;
536        let max_count = *counts.values().max().unwrap_or(&0) as f64;
537        let p_max = max_count / n;
538
539        if p_max > 0.0 {
540            // For t-tuples, per-sample entropy is -log2(p_max) / t
541            let h = -p_max.log2() / t as f64;
542            min_h = min_h.min(h);
543        }
544    }
545
546    min_h.min(8.0)
547}
548
549/// Min-entropy estimate with diagnostic side metrics.
550///
551/// For professional operational use, `min_entropy` is the MCV-based estimate.
552/// Additional estimators are reported as diagnostics, and their minimum is
553/// exposed as `heuristic_floor`.
554pub fn min_entropy_estimate(data: &[u8]) -> MinEntropyReport {
555    let shannon = quick_shannon(data);
556    let (mcv_h, mcv_p_upper) = mcv_estimate(data);
557    let collision_h = collision_estimate(data);
558    let markov_h = markov_estimate(data);
559    let compression_h = compression_estimate(data);
560    let t_tuple_h = t_tuple_estimate(data);
561
562    let heuristic_floor = collision_h.min(markov_h).min(compression_h).min(t_tuple_h);
563
564    MinEntropyReport {
565        shannon_entropy: shannon,
566        min_entropy: mcv_h,
567        heuristic_floor,
568        mcv_estimate: mcv_h,
569        mcv_p_upper,
570        collision_estimate: collision_h,
571        markov_estimate: markov_h,
572        compression_estimate: compression_h,
573        t_tuple_estimate: t_tuple_h,
574        samples: data.len(),
575    }
576}
577
578/// Min-entropy analysis report with individual estimator results.
579#[derive(Debug, Clone, Serialize)]
580pub struct MinEntropyReport {
581    /// Shannon entropy (bits/byte, max 8.0). Upper bound, not conservative.
582    pub shannon_entropy: f64,
583    /// Primary conservative min-entropy estimate (bits/byte), MCV-based.
584    pub min_entropy: f64,
585    /// Minimum across heuristic diagnostic estimators.
586    pub heuristic_floor: f64,
587    /// Most Common Value estimator.
588    pub mcv_estimate: f64,
589    /// Upper bound on max probability from MCV
590    pub mcv_p_upper: f64,
591    /// Collision estimator (diagnostic)
592    pub collision_estimate: f64,
593    /// Markov estimator (diagnostic)
594    pub markov_estimate: f64,
595    /// Compression estimator (diagnostic)
596    pub compression_estimate: f64,
597    /// t-Tuple estimator (diagnostic)
598    pub t_tuple_estimate: f64,
599    /// Number of samples analyzed
600    pub samples: usize,
601}
602
603impl std::fmt::Display for MinEntropyReport {
604    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
605        writeln!(f, "Min-Entropy Analysis ({} samples)", self.samples)?;
606        writeln!(
607            f,
608            "  Shannon H:       {:.3} bits/byte  (upper bound)",
609            self.shannon_entropy
610        )?;
611        writeln!(
612            f,
613            "  Min-Entropy H∞:  {:.3} bits/byte  (primary, MCV)",
614            self.min_entropy
615        )?;
616        writeln!(
617            f,
618            "  Heuristic floor: {:.3} bits/byte  (diagnostic minimum)",
619            self.heuristic_floor
620        )?;
621        writeln!(f, "  ─────────────────────────────────")?;
622        writeln!(
623            f,
624            "  MCV:                 {:.3}  (p_upper={:.4})",
625            self.mcv_estimate, self.mcv_p_upper
626        )?;
627        writeln!(f, "  Collision (diag):    {:.3}", self.collision_estimate)?;
628        writeln!(f, "  Markov (diag):       {:.3}", self.markov_estimate)?;
629        writeln!(
630            f,
631            "  Compression (diag):  {:.3}  (Maurer-inspired)",
632            self.compression_estimate
633        )?;
634        writeln!(f, "  t-Tuple (diag):      {:.3}", self.t_tuple_estimate)?;
635        Ok(())
636    }
637}
638
639/// Quick min-entropy estimate using only the MCV estimator (NIST SP 800-90B 6.3.1).
640///
641/// This is the fast path used by the entropy pool and TUI for per-collection
642/// health checks. It uses only the Most Common Value estimator — the most
643/// well-established and computationally cheap NIST estimator (O(n) single pass).
644///
645/// For a full multi-estimator breakdown, use [`min_entropy_estimate`] instead.
646pub fn quick_min_entropy(data: &[u8]) -> f64 {
647    mcv_estimate(data).0
648}
649
650/// Quick Shannon entropy in bits/byte for a byte slice.
651pub fn quick_shannon(data: &[u8]) -> f64 {
652    if data.is_empty() {
653        return 0.0;
654    }
655    let mut counts = [0u64; 256];
656    for &b in data {
657        counts[b as usize] += 1;
658    }
659    let n = data.len() as f64;
660    let mut h = 0.0;
661    for &c in &counts {
662        if c > 0 {
663            let p = c as f64 / n;
664            h -= p * p.log2();
665        }
666    }
667    h
668}
669
670/// Quick lag-1 autocorrelation for a byte slice.
671///
672/// Returns the biased (population) lag-1 ACF estimate: r ∈ [-1, 1].
673/// Values near 0 indicate no serial correlation (good for entropy).
674/// Values near ±1 indicate strong correlation (bad — consecutive samples
675/// are predictable from their predecessors).
676///
677/// Uses the standard biased ACF estimator (denominator = n, not n-1).
678/// This is the Box-Jenkins convention, preferred for ACF because it
679/// guarantees the resulting autocorrelation function is positive semi-definite.
680///
681/// O(n), suitable for hot-path use during collection.
682pub fn quick_autocorrelation_lag1(data: &[u8]) -> f64 {
683    if data.len() < 2 {
684        return 0.0;
685    }
686    let n = data.len();
687    let arr: Vec<f64> = data.iter().map(|&b| b as f64).collect();
688    let mean: f64 = arr.iter().sum::<f64>() / n as f64;
689    let var: f64 = arr.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n as f64;
690    if var < 1e-10 {
691        return 0.0;
692    }
693    let mut sum = 0.0;
694    for i in 0..n - 1 {
695        sum += (arr[i] - mean) * (arr[i + 1] - mean);
696    }
697    // Biased ACF: divide by n*var (same denominator as variance).
698    sum / (n as f64 * var)
699}
700
701/// Grade a source based on its min-entropy (H∞) value.
702///
703/// This is the **single source of truth** for entropy grading. All CLI commands,
704/// server endpoints, and reports should use this function instead of duplicating
705/// threshold logic.
706///
707/// | Grade | Min-Entropy (H∞) |
708/// |-------|-------------------|
709/// | A     | ≥ 6.0             |
710/// | B     | ≥ 4.0             |
711/// | C     | ≥ 2.0             |
712/// | D     | ≥ 1.0             |
713/// | F     | < 1.0             |
714pub fn grade_min_entropy(min_entropy: f64) -> char {
715    if min_entropy >= 6.0 {
716        'A'
717    } else if min_entropy >= 4.0 {
718        'B'
719    } else if min_entropy >= 2.0 {
720        'C'
721    } else if min_entropy >= 1.0 {
722        'D'
723    } else {
724        'F'
725    }
726}
727
728/// Quick quality assessment.
729pub fn quick_quality(data: &[u8]) -> QualityReport {
730    if data.len() < 16 {
731        return QualityReport {
732            samples: data.len(),
733            unique_values: 0,
734            shannon_entropy: 0.0,
735            compression_ratio: 0.0,
736            quality_score: 0.0,
737            grade: 'F',
738        };
739    }
740
741    let shannon = quick_shannon(data);
742
743    // Compression ratio — silenced errors are intentional: if compression
744    // fails, comp_ratio = 0 and the score degrades gracefully (loses the
745    // 20% compression component).
746    use flate2::Compression;
747    use flate2::write::ZlibEncoder;
748    use std::io::Write;
749    let mut encoder = ZlibEncoder::new(Vec::new(), Compression::best());
750    encoder.write_all(data).unwrap_or_default();
751    let compressed = encoder.finish().unwrap_or_default();
752    let comp_ratio = compressed.len() as f64 / data.len() as f64;
753
754    // Unique values
755    let mut seen = [false; 256];
756    for &b in data {
757        seen[b as usize] = true;
758    }
759    let unique = seen.iter().filter(|&&s| s).count();
760
761    let eff = shannon / 8.0;
762    let score = eff * 60.0 + comp_ratio.min(1.0) * 20.0 + (unique as f64 / 256.0).min(1.0) * 20.0;
763    let grade = if score >= 80.0 {
764        'A'
765    } else if score >= 60.0 {
766        'B'
767    } else if score >= 40.0 {
768        'C'
769    } else if score >= 20.0 {
770        'D'
771    } else {
772        'F'
773    };
774
775    QualityReport {
776        samples: data.len(),
777        unique_values: unique,
778        shannon_entropy: shannon,
779        compression_ratio: comp_ratio,
780        quality_score: score,
781        grade,
782    }
783}
784
785#[derive(Debug, Clone)]
786pub struct QualityReport {
787    pub samples: usize,
788    pub unique_values: usize,
789    pub shannon_entropy: f64,
790    pub compression_ratio: f64,
791    pub quality_score: f64,
792    pub grade: char,
793}
794
795#[cfg(test)]
796mod tests {
797    use super::*;
798
799    // -----------------------------------------------------------------------
800    // Conditioning mode tests
801    // -----------------------------------------------------------------------
802
803    #[test]
804    fn test_condition_raw_passthrough() {
805        let data = vec![1, 2, 3, 4, 5];
806        let out = condition(&data, 3, ConditioningMode::Raw);
807        assert_eq!(out, vec![1, 2, 3]);
808    }
809
810    #[test]
811    fn test_condition_raw_exact_length() {
812        let data: Vec<u8> = (0..100).map(|i| i as u8).collect();
813        let out = condition(&data, 100, ConditioningMode::Raw);
814        assert_eq!(out, data);
815    }
816
817    #[test]
818    fn test_condition_raw_truncates() {
819        let data: Vec<u8> = (0..100).map(|i| i as u8).collect();
820        let out = condition(&data, 50, ConditioningMode::Raw);
821        assert_eq!(out.len(), 50);
822        assert_eq!(out, &data[..50]);
823    }
824
825    #[test]
826    fn test_condition_sha256_produces_exact_length() {
827        let data = vec![42u8; 100];
828        for len in [1, 16, 32, 64, 100, 256] {
829            let out = condition(&data, len, ConditioningMode::Sha256);
830            assert_eq!(out.len(), len, "SHA256 should produce exactly {len} bytes");
831        }
832    }
833
834    #[test]
835    fn test_sha256_deterministic() {
836        let data = vec![42u8; 100];
837        let out1 = sha256_condition_bytes(&data, 64);
838        let out2 = sha256_condition_bytes(&data, 64);
839        assert_eq!(
840            out1, out2,
841            "SHA256 conditioning should be deterministic for same input"
842        );
843    }
844
845    #[test]
846    fn test_sha256_different_inputs_differ() {
847        let data1 = vec![1u8; 100];
848        let data2 = vec![2u8; 100];
849        let out1 = sha256_condition_bytes(&data1, 32);
850        let out2 = sha256_condition_bytes(&data2, 32);
851        assert_ne!(out1, out2);
852    }
853
854    #[test]
855    fn test_sha256_empty_input() {
856        let out = sha256_condition_bytes(&[], 32);
857        assert!(out.is_empty(), "Empty input should produce no output");
858    }
859
860    #[test]
861    fn test_von_neumann_reduces_size() {
862        let input = vec![0b10101010u8; 128];
863        let output = von_neumann_debias(&input);
864        assert!(output.len() < input.len());
865    }
866
867    #[test]
868    fn test_von_neumann_known_output() {
869        // Input: 0b10_10_10_10 = pairs (1,0)(1,0)(1,0)(1,0)
870        // Von Neumann: (1,0) -> 1, repeated 4 times = 4 bits = 1111 per byte
871        // But we need 8 bits for one output byte.
872        // Two input bytes = 8 pairs of bits -> each (1,0) -> 1, so 8 bits -> 0b11111111
873        let input = vec![0b10101010u8; 2];
874        let output = von_neumann_debias(&input);
875        assert_eq!(output.len(), 1);
876        assert_eq!(output[0], 0b11111111);
877    }
878
879    #[test]
880    fn test_von_neumann_alternating_01() {
881        // Input: 0b01_01_01_01 = pairs (0,1)(0,1)(0,1)(0,1)
882        // Von Neumann: (0,1) -> 0, repeated 4 times per byte
883        // Two input bytes = 8 pairs -> 8 zero bits -> 0b00000000
884        let input = vec![0b01010101u8; 2];
885        let output = von_neumann_debias(&input);
886        assert_eq!(output.len(), 1);
887        assert_eq!(output[0], 0b00000000);
888    }
889
890    #[test]
891    fn test_von_neumann_all_same_discards() {
892        // Input: all 0xFF = pairs (1,1)(1,1)... -> all discarded
893        let input = vec![0xFF; 100];
894        let output = von_neumann_debias(&input);
895        assert!(output.is_empty(), "All-ones should produce no output");
896    }
897
898    #[test]
899    fn test_von_neumann_all_zeros_discards() {
900        // Input: all 0x00 = pairs (0,0)(0,0)... -> all discarded
901        let input = vec![0x00; 100];
902        let output = von_neumann_debias(&input);
903        assert!(output.is_empty(), "All-zeros should produce no output");
904    }
905
906    #[test]
907    fn test_condition_modes_differ() {
908        let data: Vec<u8> = (0..256).map(|i| i as u8).collect();
909        let raw = condition(&data, 64, ConditioningMode::Raw);
910        let sha = condition(&data, 64, ConditioningMode::Sha256);
911        assert_ne!(raw, sha);
912    }
913
914    #[test]
915    fn test_conditioning_mode_display() {
916        assert_eq!(ConditioningMode::Raw.to_string(), "raw");
917        assert_eq!(ConditioningMode::VonNeumann.to_string(), "von_neumann");
918        assert_eq!(ConditioningMode::Sha256.to_string(), "sha256");
919    }
920
921    #[test]
922    fn test_conditioning_mode_default() {
923        assert_eq!(ConditioningMode::default(), ConditioningMode::Sha256);
924    }
925
926    // -----------------------------------------------------------------------
927    // XOR fold tests
928    // -----------------------------------------------------------------------
929
930    #[test]
931    fn test_xor_fold_basic() {
932        let data = vec![0xFF, 0x00, 0xAA, 0x55];
933        let folded = xor_fold(&data);
934        assert_eq!(folded.len(), 2);
935        assert_eq!(folded[0], 0xFF ^ 0xAA);
936        assert_eq!(folded[1], 0x55);
937    }
938
939    #[test]
940    fn test_xor_fold_single_byte() {
941        let data = vec![42];
942        let folded = xor_fold(&data);
943        assert_eq!(folded, vec![42]);
944    }
945
946    #[test]
947    fn test_xor_fold_empty() {
948        let folded = xor_fold(&[]);
949        assert!(folded.is_empty());
950    }
951
952    #[test]
953    fn test_xor_fold_odd_length() {
954        // With 5 bytes, half=2, so XOR data[0..2] with data[2..4],
955        // then XOR the trailing byte (5) into the last output byte.
956        let data = vec![1, 2, 3, 4, 5];
957        let folded = xor_fold(&data);
958        assert_eq!(folded.len(), 2);
959        assert_eq!(folded[0], 1 ^ 3);
960        assert_eq!(folded[1], (2 ^ 4) ^ 5);
961    }
962
963    // -----------------------------------------------------------------------
964    // Shannon entropy tests
965    // -----------------------------------------------------------------------
966
967    #[test]
968    fn test_shannon_empty() {
969        assert_eq!(quick_shannon(&[]), 0.0);
970    }
971
972    #[test]
973    fn test_shannon_single_byte() {
974        // One byte = one value, p=1.0, H = -1.0 * log2(1.0) = 0.0
975        assert_eq!(quick_shannon(&[42]), 0.0);
976    }
977
978    #[test]
979    fn test_shannon_all_same() {
980        let data = vec![0u8; 1000];
981        assert_eq!(quick_shannon(&data), 0.0);
982    }
983
984    #[test]
985    fn test_shannon_two_values_equal() {
986        // 50/50 split between two values = 1.0 bits
987        let mut data = vec![0u8; 500];
988        data.extend(vec![1u8; 500]);
989        let h = quick_shannon(&data);
990        assert!((h - 1.0).abs() < 0.01, "Expected ~1.0, got {h}");
991    }
992
993    #[test]
994    fn test_shannon_uniform_256() {
995        // Perfectly uniform over 256 values = 8.0 bits
996        let data: Vec<u8> = (0..=255).collect();
997        let h = quick_shannon(&data);
998        assert!((h - 8.0).abs() < 0.01, "Expected ~8.0, got {h}");
999    }
1000
1001    #[test]
1002    fn test_shannon_uniform_large() {
1003        // Large uniform sample — each value appears ~40 times
1004        let mut data = Vec::with_capacity(256 * 40);
1005        for _ in 0..40 {
1006            for b in 0..=255u8 {
1007                data.push(b);
1008            }
1009        }
1010        let h = quick_shannon(&data);
1011        assert!((h - 8.0).abs() < 0.01, "Expected ~8.0, got {h}");
1012    }
1013
1014    // -----------------------------------------------------------------------
1015    // Min-entropy estimator tests
1016    // -----------------------------------------------------------------------
1017
1018    #[test]
1019    fn test_min_entropy_empty() {
1020        assert_eq!(min_entropy(&[]), 0.0);
1021    }
1022
1023    #[test]
1024    fn test_min_entropy_all_same() {
1025        let data = vec![42u8; 1000];
1026        let h = min_entropy(&data);
1027        assert!(h < 0.01, "All-same should have ~0 min-entropy, got {h}");
1028    }
1029
1030    #[test]
1031    fn test_min_entropy_uniform() {
1032        let mut data = Vec::with_capacity(256 * 40);
1033        for _ in 0..40 {
1034            for b in 0..=255u8 {
1035                data.push(b);
1036            }
1037        }
1038        let h = min_entropy(&data);
1039        assert!(
1040            (h - 8.0).abs() < 0.1,
1041            "Uniform should have ~8.0 min-entropy, got {h}"
1042        );
1043    }
1044
1045    #[test]
1046    fn test_min_entropy_two_values() {
1047        let mut data = vec![0u8; 500];
1048        data.extend(vec![1u8; 500]);
1049        let h = min_entropy(&data);
1050        // p_max = 0.5, H∞ = -log2(0.5) = 1.0
1051        assert!((h - 1.0).abs() < 0.01, "Expected ~1.0, got {h}");
1052    }
1053
1054    #[test]
1055    fn test_min_entropy_biased() {
1056        // 90% value 0, 10% value 1: p_max=0.9, H∞ = -log2(0.9) ≈ 0.152
1057        let mut data = vec![0u8; 900];
1058        data.extend(vec![1u8; 100]);
1059        let h = min_entropy(&data);
1060        let expected = -(0.9f64.log2());
1061        assert!(
1062            (h - expected).abs() < 0.02,
1063            "Expected ~{expected:.3}, got {h}"
1064        );
1065    }
1066
1067    // -----------------------------------------------------------------------
1068    // MCV estimator tests
1069    // -----------------------------------------------------------------------
1070
1071    #[test]
1072    fn test_mcv_empty() {
1073        let (h, p) = mcv_estimate(&[]);
1074        assert_eq!(h, 0.0);
1075        assert_eq!(p, 1.0);
1076    }
1077
1078    #[test]
1079    fn test_mcv_all_same() {
1080        let data = vec![42u8; 1000];
1081        let (h, p_upper) = mcv_estimate(&data);
1082        assert!(h < 0.1, "All-same should have ~0 MCV entropy, got {h}");
1083        assert!((p_upper - 1.0).abs() < 0.01);
1084    }
1085
1086    #[test]
1087    fn test_mcv_uniform() {
1088        let mut data = Vec::with_capacity(256 * 100);
1089        for _ in 0..100 {
1090            for b in 0..=255u8 {
1091                data.push(b);
1092            }
1093        }
1094        let (h, _p_upper) = mcv_estimate(&data);
1095        assert!(h > 7.0, "Uniform should have high MCV entropy, got {h}");
1096    }
1097
1098    // -----------------------------------------------------------------------
1099    // Collision estimator tests
1100    // -----------------------------------------------------------------------
1101
1102    #[test]
1103    fn test_collision_too_short() {
1104        assert_eq!(collision_estimate(&[1, 2]), 0.0);
1105    }
1106
1107    #[test]
1108    fn test_collision_all_same() {
1109        let data = vec![0u8; 1000];
1110        let h = collision_estimate(&data);
1111        // All same -> every adjacent pair is a collision -> mean distance = 1
1112        // -> p_max = 1.0 -> H = 0
1113        assert!(
1114            h < 1.0,
1115            "All-same should have very low collision entropy, got {h}"
1116        );
1117    }
1118
1119    #[test]
1120    fn test_collision_uniform_large() {
1121        let mut data = Vec::with_capacity(256 * 100);
1122        for _ in 0..100 {
1123            for b in 0..=255u8 {
1124                data.push(b);
1125            }
1126        }
1127        let h = collision_estimate(&data);
1128        assert!(
1129            h > 3.0,
1130            "Uniform should have reasonable collision entropy, got {h}"
1131        );
1132    }
1133
1134    // -----------------------------------------------------------------------
1135    // Markov estimator tests
1136    // -----------------------------------------------------------------------
1137
1138    #[test]
1139    fn test_markov_too_short() {
1140        assert_eq!(markov_estimate(&[42]), 0.0);
1141    }
1142
1143    #[test]
1144    fn test_markov_all_same() {
1145        let data = vec![0u8; 1000];
1146        let h = markov_estimate(&data);
1147        assert!(h < 1.0, "All-same should have low Markov entropy, got {h}");
1148    }
1149
1150    #[test]
1151    fn test_markov_uniform_large() {
1152        // Byte-level Markov estimator finds the max transition probability across
1153        // all 256x256 = 65536 transitions. With ~25600 samples, the transition
1154        // matrix is very sparse (~0.4 counts per cell on average). Some cells will
1155        // get a disproportionate share by chance, making p_max high.
1156        //
1157        // This is the correct, expected behavior: the Markov estimator is inherently
1158        // conservative with small sample sizes relative to the state space.
1159        // With truly uniform IID data you'd need ~1M+ samples for the Markov
1160        // estimate to converge near 8.0.
1161        //
1162        // We verify it's meaningfully above zero (all-same baseline).
1163        let mut data = Vec::with_capacity(256 * 100);
1164        for i in 0..(256 * 100) {
1165            let v = ((i as u64)
1166                .wrapping_mul(6364136223846793005)
1167                .wrapping_add(1442695040888963407)
1168                >> 56) as u8;
1169            data.push(v);
1170        }
1171        let h = markov_estimate(&data);
1172        assert!(
1173            h > 0.1,
1174            "Pseudo-random should have Markov entropy > 0.1, got {h}"
1175        );
1176    }
1177
1178    // -----------------------------------------------------------------------
1179    // Compression estimator tests
1180    // -----------------------------------------------------------------------
1181
1182    #[test]
1183    fn test_compression_too_short() {
1184        assert_eq!(compression_estimate(&[1; 50]), 0.0);
1185    }
1186
1187    #[test]
1188    fn test_compression_all_same() {
1189        let data = vec![0u8; 1000];
1190        let h = compression_estimate(&data);
1191        assert!(
1192            h < 2.0,
1193            "All-same should have low compression entropy, got {h}"
1194        );
1195    }
1196
1197    #[test]
1198    fn test_compression_uniform_large() {
1199        let mut data = Vec::with_capacity(256 * 100);
1200        for _ in 0..100 {
1201            for b in 0..=255u8 {
1202                data.push(b);
1203            }
1204        }
1205        let h = compression_estimate(&data);
1206        assert!(
1207            h > 4.0,
1208            "Uniform should have reasonable compression entropy, got {h}"
1209        );
1210    }
1211
1212    // -----------------------------------------------------------------------
1213    // t-Tuple estimator tests
1214    // -----------------------------------------------------------------------
1215
1216    #[test]
1217    fn test_t_tuple_too_short() {
1218        assert_eq!(t_tuple_estimate(&[1; 10]), 0.0);
1219    }
1220
1221    #[test]
1222    fn test_t_tuple_all_same() {
1223        let data = vec![0u8; 1000];
1224        let h = t_tuple_estimate(&data);
1225        assert!(h < 0.1, "All-same should have ~0 t-tuple entropy, got {h}");
1226    }
1227
1228    #[test]
1229    fn test_t_tuple_uniform_large() {
1230        // t-Tuple estimator finds the most frequent t-length tuple and computes
1231        // -log2(p_max)/t. For t>1, pseudo-random data with sequential correlation
1232        // may show elevated tuple frequencies. We verify the result is well above
1233        // the all-same baseline (~0).
1234        let mut data = Vec::with_capacity(256 * 100);
1235        for i in 0..(256 * 100) {
1236            let v = ((i as u64)
1237                .wrapping_mul(6364136223846793005)
1238                .wrapping_add(1442695040888963407)
1239                >> 56) as u8;
1240            data.push(v);
1241        }
1242        let h = t_tuple_estimate(&data);
1243        assert!(
1244            h > 2.5,
1245            "Pseudo-random should have t-tuple entropy > 2.5, got {h}"
1246        );
1247    }
1248
1249    // -----------------------------------------------------------------------
1250    // Combined min-entropy report tests
1251    // -----------------------------------------------------------------------
1252
1253    #[test]
1254    fn test_min_entropy_estimate_all_same() {
1255        let data = vec![0u8; 1000];
1256        let report = min_entropy_estimate(&data);
1257        assert!(
1258            report.min_entropy < 1.0,
1259            "All-same combined estimate: {}",
1260            report.min_entropy
1261        );
1262        assert!(report.shannon_entropy < 0.01);
1263        assert_eq!(report.samples, 1000);
1264    }
1265
1266    #[test]
1267    fn test_min_entropy_estimate_uniform() {
1268        // Primary min-entropy is MCV-based; heuristic floor remains available
1269        // as an additional diagnostic view.
1270        let mut data = Vec::with_capacity(256 * 100);
1271        for i in 0..(256 * 100) {
1272            let v = ((i as u64)
1273                .wrapping_mul(6364136223846793005)
1274                .wrapping_add(1442695040888963407)
1275                >> 56) as u8;
1276            data.push(v);
1277        }
1278        let report = min_entropy_estimate(&data);
1279        assert!(
1280            report.min_entropy > 6.0,
1281            "Primary min-entropy should be high for uniform marginals: {}",
1282            report.min_entropy
1283        );
1284        assert!(
1285            report.shannon_entropy > 7.9,
1286            "Shannon should be near 8.0 for uniform marginals: {}",
1287            report.shannon_entropy
1288        );
1289        // MCV should be close to 8.0 for uniform-ish data
1290        assert!(
1291            report.mcv_estimate > 6.0,
1292            "MCV should be high for uniform data: {}",
1293            report.mcv_estimate
1294        );
1295        assert!(
1296            report.heuristic_floor <= report.min_entropy + 1e-9,
1297            "heuristic floor should not exceed primary min-entropy"
1298        );
1299    }
1300
1301    #[test]
1302    fn test_min_entropy_report_display() {
1303        let data = vec![0u8; 1000];
1304        let report = min_entropy_estimate(&data);
1305        let s = format!("{report}");
1306        assert!(s.contains("Min-Entropy Analysis"));
1307        assert!(s.contains("1000 samples"));
1308    }
1309
1310    #[test]
1311    fn test_quick_min_entropy_uses_mcv() {
1312        let data: Vec<u8> = (0..=255).collect();
1313        let quick = quick_min_entropy(&data);
1314        let (mcv_h, _) = mcv_estimate(&data);
1315        // quick_min_entropy uses MCV only — should match exactly
1316        assert!(
1317            (quick - mcv_h).abs() < f64::EPSILON,
1318            "quick_min_entropy ({quick}) should equal MCV estimate ({mcv_h})"
1319        );
1320    }
1321
1322    #[test]
1323    fn test_quick_min_entropy_leq_shannon() {
1324        // Min-entropy should always be <= Shannon entropy
1325        let data: Vec<u8> = (0..=255).cycle().take(2560).collect();
1326        let quick = quick_min_entropy(&data);
1327        let shannon = quick_shannon(&data);
1328        assert!(
1329            quick <= shannon + 0.01,
1330            "H∞ ({quick}) should be <= Shannon ({shannon})"
1331        );
1332    }
1333
1334    // -----------------------------------------------------------------------
1335    // Quality report tests
1336    // -----------------------------------------------------------------------
1337
1338    #[test]
1339    fn test_quality_too_short() {
1340        let q = quick_quality(&[1, 2, 3]);
1341        assert_eq!(q.grade, 'F');
1342        assert_eq!(q.quality_score, 0.0);
1343    }
1344
1345    #[test]
1346    fn test_quality_all_same() {
1347        let data = vec![0u8; 1000];
1348        let q = quick_quality(&data);
1349        assert!(
1350            q.grade == 'F' || q.grade == 'D',
1351            "All-same should grade poorly, got {}",
1352            q.grade
1353        );
1354        assert_eq!(q.unique_values, 1);
1355        assert!(q.shannon_entropy < 0.01);
1356    }
1357
1358    #[test]
1359    fn test_quality_uniform() {
1360        let mut data = Vec::with_capacity(256 * 40);
1361        for _ in 0..40 {
1362            for b in 0..=255u8 {
1363                data.push(b);
1364            }
1365        }
1366        let q = quick_quality(&data);
1367        assert!(
1368            q.grade == 'A' || q.grade == 'B',
1369            "Uniform should grade well, got {}",
1370            q.grade
1371        );
1372        assert_eq!(q.unique_values, 256);
1373        assert!(q.shannon_entropy > 7.9);
1374    }
1375
1376    // -----------------------------------------------------------------------
1377    // grade_min_entropy tests
1378    // -----------------------------------------------------------------------
1379
1380    #[test]
1381    fn test_grade_boundaries() {
1382        assert_eq!(grade_min_entropy(8.0), 'A');
1383        assert_eq!(grade_min_entropy(6.0), 'A');
1384        assert_eq!(grade_min_entropy(5.99), 'B');
1385        assert_eq!(grade_min_entropy(4.0), 'B');
1386        assert_eq!(grade_min_entropy(3.99), 'C');
1387        assert_eq!(grade_min_entropy(2.0), 'C');
1388        assert_eq!(grade_min_entropy(1.99), 'D');
1389        assert_eq!(grade_min_entropy(1.0), 'D');
1390        assert_eq!(grade_min_entropy(0.99), 'F');
1391        assert_eq!(grade_min_entropy(0.0), 'F');
1392    }
1393
1394    #[test]
1395    fn test_grade_negative() {
1396        assert_eq!(grade_min_entropy(-1.0), 'F');
1397    }
1398}