Skip to main content

openentropy_core/sources/
compression.rs

1//! Compression and hash timing entropy sources.
2//!
3//! These sources exploit data-dependent branch prediction behaviour and
4//! micro-architectural side-effects to extract timing entropy from
5//! compression (zlib) and hashing (SHA-256) operations.
6//!
7//! **Raw output characteristics:** XOR-folded timing deltas between successive
8//! operations. Shannon entropy ~5-7 bits/byte. The timing jitter is driven
9//! by branch predictor state, cache contention, and pipeline hazards.
10//!
11//! Note: HashTimingSource uses SHA-256 as its *workload* (the thing being
12//! timed) — this is NOT conditioning. The entropy comes from the timing
13//! variation, not from the hash output.
14
15use std::io::Write;
16use std::time::Instant;
17
18use flate2::Compression;
19use flate2::write::ZlibEncoder;
20use sha2::{Digest, Sha256};
21
22use crate::source::{EntropySource, SourceCategory, SourceInfo};
23
24use super::helpers::{extract_timing_entropy, mach_time};
25
26// ---------------------------------------------------------------------------
27// CompressionTimingSource
28// ---------------------------------------------------------------------------
29
30static COMPRESSION_TIMING_INFO: SourceInfo = SourceInfo {
31    name: "compression_timing",
32    description: "Zlib compression timing jitter from data-dependent branch prediction",
33    physics: "Compresses varying data with zlib and measures per-operation timing. \
34              Compression algorithms have heavily data-dependent branches (Huffman tree \
35              traversal, LZ77 match finding). The CPU\u{2019}s branch predictor state from \
36              ALL running code affects prediction accuracy for these branches. Pipeline \
37              stalls from mispredictions create timing variation.",
38    category: SourceCategory::Novel,
39    platform_requirements: &[],
40    entropy_rate_estimate: 1800.0,
41    composite: false,
42};
43
44/// Entropy source that harvests timing jitter from zlib compression.
45pub struct CompressionTimingSource;
46
47impl EntropySource for CompressionTimingSource {
48    fn info(&self) -> &SourceInfo {
49        &COMPRESSION_TIMING_INFO
50    }
51
52    fn is_available(&self) -> bool {
53        true
54    }
55
56    fn collect(&self, n_samples: usize) -> Vec<u8> {
57        // 4x oversampling for better XOR-fold quality.
58        let raw_count = n_samples * 4 + 64;
59        let mut timings: Vec<u64> = Vec::with_capacity(raw_count);
60
61        // Seed from high-resolution timer for per-call variation.
62        let mut lcg: u64 = mach_time() | 1;
63
64        for i in 0..raw_count {
65            // Vary data size (128-512 bytes) to create more timing diversity.
66            let data_len = 128 + (lcg as usize % 385);
67            let mut data = vec![0u8; data_len];
68
69            // First third: pseudo-random
70            let third = data_len / 3;
71            for byte in data[..third].iter_mut() {
72                lcg = lcg.wrapping_mul(6364136223846793005).wrapping_add(1);
73                *byte = (lcg >> 32) as u8;
74            }
75
76            // Middle third: repeating pattern (highly compressible)
77            for (j, byte) in data[third..third * 2].iter_mut().enumerate() {
78                *byte = (j % 4) as u8;
79            }
80
81            // Last third: more pseudo-random
82            for byte in data[third * 2..].iter_mut() {
83                lcg = lcg.wrapping_mul(6364136223846793005).wrapping_add(i as u64);
84                *byte = (lcg >> 32) as u8;
85            }
86
87            let t0 = Instant::now();
88            let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
89            let _ = encoder.write_all(&data);
90            let _ = encoder.finish();
91            let elapsed_ns = t0.elapsed().as_nanos() as u64;
92            timings.push(elapsed_ns);
93        }
94
95        extract_timing_entropy(&timings, n_samples)
96    }
97}
98
99// ---------------------------------------------------------------------------
100// HashTimingSource
101// ---------------------------------------------------------------------------
102
103static HASH_TIMING_INFO: SourceInfo = SourceInfo {
104    name: "hash_timing",
105    description: "SHA-256 hashing timing jitter from micro-architectural side effects",
106    physics: "SHA-256 hashes data of varying sizes and measures timing. While SHA-256 is \
107              algorithmically constant-time, the actual execution time varies due to: \
108              memory access patterns for the message schedule, cache line alignment, TLB \
109              state, and CPU frequency scaling. The timing also captures micro-architectural \
110              side effects from other processes.",
111    category: SourceCategory::Novel,
112    platform_requirements: &[],
113    entropy_rate_estimate: 2000.0,
114    composite: false,
115};
116
117/// Entropy source that harvests timing jitter from SHA-256 hashing.
118/// Note: SHA-256 is used as the *workload* being timed, not for conditioning.
119pub struct HashTimingSource;
120
121impl EntropySource for HashTimingSource {
122    fn info(&self) -> &SourceInfo {
123        &HASH_TIMING_INFO
124    }
125
126    fn is_available(&self) -> bool {
127        true
128    }
129
130    fn collect(&self, n_samples: usize) -> Vec<u8> {
131        // 4x oversampling for better XOR-fold quality after delta computation.
132        let raw_count = n_samples * 4 + 64;
133        let mut timings: Vec<u64> = Vec::with_capacity(raw_count);
134
135        // Seed from high-resolution timer for per-call variation.
136        let mut lcg: u64 = mach_time() | 1;
137
138        for i in 0..raw_count {
139            // Wider range of sizes (32-2048 bytes) to create more timing diversity.
140            let size = 32 + (lcg as usize % 2017);
141            let mut data = Vec::with_capacity(size);
142            for _ in 0..size {
143                lcg = lcg.wrapping_mul(6364136223846793005).wrapping_add(1);
144                data.push((lcg >> 32) as u8);
145            }
146
147            // SHA-256 is the WORKLOAD being timed — not conditioning.
148            // Hash multiple rounds for smaller inputs to amplify timing variation.
149            let rounds = if size < 256 { 3 } else { 1 };
150            let t0 = Instant::now();
151            for _ in 0..rounds {
152                let mut hasher = Sha256::new();
153                hasher.update(&data);
154                let digest = hasher.finalize();
155                std::hint::black_box(&digest);
156                // Feed digest back as additional data to prevent loop elision
157                if let Some(b) = data.last_mut() {
158                    *b ^= digest[i % 32];
159                }
160            }
161            let elapsed_ns = t0.elapsed().as_nanos() as u64;
162            timings.push(elapsed_ns);
163        }
164
165        extract_timing_entropy(&timings, n_samples)
166    }
167}
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172
173    #[test]
174    #[ignore] // Run with: cargo test -- --ignored
175    fn compression_timing_collects_bytes() {
176        let src = CompressionTimingSource;
177        assert!(src.is_available());
178        let data = src.collect(64);
179        assert!(!data.is_empty());
180    }
181
182    #[test]
183    #[ignore] // Run with: cargo test -- --ignored
184    fn hash_timing_collects_bytes() {
185        let src = HashTimingSource;
186        assert!(src.is_available());
187        let data = src.collect(64);
188        assert!(!data.is_empty());
189    }
190}