Skip to main content

openentropy_core/sources/
compression.rs

1//! Compression and hash timing entropy sources.
2//!
3//! These sources exploit data-dependent branch prediction behaviour and
4//! micro-architectural side-effects to extract timing entropy from
5//! compression (zlib) and hashing (SHA-256) operations.
6//!
7//! **Raw output characteristics:** XOR-folded timing deltas between successive
8//! operations. The timing jitter is driven by branch predictor state,
9//! cache contention, and pipeline hazards.
10//!
11//! Note: HashTimingSource uses SHA-256 as its *workload* (the thing being
12//! timed) — this is NOT conditioning. The entropy comes from the timing
13//! variation, not from the hash output.
14
15use std::io::Write;
16use std::time::Instant;
17
18use flate2::Compression;
19use flate2::write::ZlibEncoder;
20use sha2::{Digest, Sha256};
21
22use crate::source::{EntropySource, Platform, SourceCategory, SourceInfo};
23
24use super::helpers::{extract_timing_entropy, mach_time};
25
26// ---------------------------------------------------------------------------
27// CompressionTimingSource
28// ---------------------------------------------------------------------------
29
30static COMPRESSION_TIMING_INFO: SourceInfo = SourceInfo {
31    name: "compression_timing",
32    description: "Zlib compression timing jitter from data-dependent branch prediction",
33    physics: "Compresses varying data with zlib and measures per-operation timing. \
34              Compression algorithms have heavily data-dependent branches (Huffman tree \
35              traversal, LZ77 match finding). The CPU\u{2019}s branch predictor state from \
36              ALL running code affects prediction accuracy for these branches. Pipeline \
37              stalls from mispredictions create timing variation.",
38    category: SourceCategory::Signal,
39    platform: Platform::Any,
40    requirements: &[],
41    entropy_rate_estimate: 1800.0,
42    composite: false,
43};
44
45/// Entropy source that harvests timing jitter from zlib compression.
46pub struct CompressionTimingSource;
47
48impl EntropySource for CompressionTimingSource {
49    fn info(&self) -> &SourceInfo {
50        &COMPRESSION_TIMING_INFO
51    }
52
53    fn is_available(&self) -> bool {
54        true
55    }
56
57    fn collect(&self, n_samples: usize) -> Vec<u8> {
58        // 4x oversampling for better XOR-fold quality.
59        let raw_count = n_samples * 4 + 64;
60        let mut timings: Vec<u64> = Vec::with_capacity(raw_count);
61
62        // Seed from high-resolution timer for per-call variation.
63        let mut lcg: u64 = mach_time() | 1;
64
65        for i in 0..raw_count {
66            // Vary data size (128-512 bytes) to create more timing diversity.
67            let data_len = 128 + (lcg as usize % 385);
68            let mut data = vec![0u8; data_len];
69
70            // First third: pseudo-random
71            let third = data_len / 3;
72            for byte in data[..third].iter_mut() {
73                lcg = lcg.wrapping_mul(6364136223846793005).wrapping_add(1);
74                *byte = (lcg >> 32) as u8;
75            }
76
77            // Middle third: repeating pattern (highly compressible)
78            for (j, byte) in data[third..third * 2].iter_mut().enumerate() {
79                *byte = (j % 4) as u8;
80            }
81
82            // Last third: more pseudo-random
83            for byte in data[third * 2..].iter_mut() {
84                lcg = lcg.wrapping_mul(6364136223846793005).wrapping_add(i as u64);
85                *byte = (lcg >> 32) as u8;
86            }
87
88            let t0 = Instant::now();
89            let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
90            let _ = encoder.write_all(&data);
91            let _ = encoder.finish();
92            let elapsed_ns = t0.elapsed().as_nanos() as u64;
93            timings.push(elapsed_ns);
94        }
95
96        extract_timing_entropy(&timings, n_samples)
97    }
98}
99
100// ---------------------------------------------------------------------------
101// HashTimingSource
102// ---------------------------------------------------------------------------
103
104static HASH_TIMING_INFO: SourceInfo = SourceInfo {
105    name: "hash_timing",
106    description: "SHA-256 hashing timing jitter from micro-architectural side effects",
107    physics: "SHA-256 hashes data of varying sizes and measures timing. While SHA-256 is \
108              algorithmically constant-time, the actual execution time varies due to: \
109              memory access patterns for the message schedule, cache line alignment, TLB \
110              state, and CPU frequency scaling. The timing also captures micro-architectural \
111              side effects from other processes.",
112    category: SourceCategory::Signal,
113    platform: Platform::Any,
114    requirements: &[],
115    entropy_rate_estimate: 2000.0,
116    composite: false,
117};
118
119/// Entropy source that harvests timing jitter from SHA-256 hashing.
120/// Note: SHA-256 is used as the *workload* being timed, not for conditioning.
121pub struct HashTimingSource;
122
123impl EntropySource for HashTimingSource {
124    fn info(&self) -> &SourceInfo {
125        &HASH_TIMING_INFO
126    }
127
128    fn is_available(&self) -> bool {
129        true
130    }
131
132    fn collect(&self, n_samples: usize) -> Vec<u8> {
133        // 4x oversampling for better XOR-fold quality after delta computation.
134        let raw_count = n_samples * 4 + 64;
135        let mut timings: Vec<u64> = Vec::with_capacity(raw_count);
136
137        // Seed from high-resolution timer for per-call variation.
138        let mut lcg: u64 = mach_time() | 1;
139
140        for i in 0..raw_count {
141            // Wider range of sizes (32-2048 bytes) to create more timing diversity.
142            let size = 32 + (lcg as usize % 2017);
143            let mut data = Vec::with_capacity(size);
144            for _ in 0..size {
145                lcg = lcg.wrapping_mul(6364136223846793005).wrapping_add(1);
146                data.push((lcg >> 32) as u8);
147            }
148
149            // SHA-256 is the WORKLOAD being timed — not conditioning.
150            // Hash multiple rounds for smaller inputs to amplify timing variation.
151            let rounds = if size < 256 { 3 } else { 1 };
152            let t0 = Instant::now();
153            for _ in 0..rounds {
154                let mut hasher = Sha256::new();
155                hasher.update(&data);
156                let digest = hasher.finalize();
157                std::hint::black_box(&digest);
158                // Feed digest back as additional data to prevent loop elision
159                if let Some(b) = data.last_mut() {
160                    *b ^= digest[i % 32];
161                }
162            }
163            let elapsed_ns = t0.elapsed().as_nanos() as u64;
164            timings.push(elapsed_ns);
165        }
166
167        extract_timing_entropy(&timings, n_samples)
168    }
169}
170
171#[cfg(test)]
172mod tests {
173    use super::*;
174
175    #[test]
176    fn compression_timing_info() {
177        let src = CompressionTimingSource;
178        assert_eq!(src.name(), "compression_timing");
179        assert_eq!(src.info().category, SourceCategory::Signal);
180        assert!(!src.info().composite);
181    }
182
183    #[test]
184    fn hash_timing_info() {
185        let src = HashTimingSource;
186        assert_eq!(src.name(), "hash_timing");
187        assert_eq!(src.info().category, SourceCategory::Signal);
188        assert!(!src.info().composite);
189    }
190
191    #[test]
192    #[ignore] // Run with: cargo test -- --ignored
193    fn compression_timing_collects_bytes() {
194        let src = CompressionTimingSource;
195        assert!(src.is_available());
196        let data = src.collect(64);
197        assert!(!data.is_empty());
198    }
199
200    #[test]
201    #[ignore] // Run with: cargo test -- --ignored
202    fn hash_timing_collects_bytes() {
203        let src = HashTimingSource;
204        assert!(src.is_available());
205        let data = src.collect(64);
206        assert!(!data.is_empty());
207    }
208}