Skip to main content

shadowforge_lib/domain/analysis/
mod.rs

1//! Capacity estimation and chi-square detectability analysis.
2//!
3//! Pure domain logic — no I/O, no file system, no async runtime.
4
5use crate::domain::types::{CoverMedia, CoverMediaKind, DetectabilityRisk, StegoTechnique};
6
7/// `DetectabilityRisk` thresholds in dB.
8const HIGH_THRESHOLD_DB: f64 = -6.0;
9const MEDIUM_THRESHOLD_DB: f64 = -12.0;
10
11/// Classify detectability risk from a chi-square score in dB.
12#[must_use]
13pub fn classify_risk(chi_square_db: f64) -> DetectabilityRisk {
14    if chi_square_db > HIGH_THRESHOLD_DB {
15        DetectabilityRisk::High
16    } else if chi_square_db > MEDIUM_THRESHOLD_DB {
17        DetectabilityRisk::Medium
18    } else {
19        DetectabilityRisk::Low
20    }
21}
22
23/// Compute recommended max payload bytes for a given capacity and risk.
24#[must_use]
25pub const fn recommended_payload(capacity_bytes: u64, risk: DetectabilityRisk) -> u64 {
26    match risk {
27        DetectabilityRisk::Low => capacity_bytes / 2,
28        DetectabilityRisk::Medium => capacity_bytes / 4,
29        DetectabilityRisk::High => capacity_bytes / 8,
30    }
31}
32
33/// Estimate embedding capacity for a cover/technique pair.
34///
35/// Returns capacity in bytes.
36#[must_use]
37pub fn estimate_capacity(cover: &CoverMedia, technique: StegoTechnique) -> u64 {
38    match technique {
39        StegoTechnique::LsbImage => estimate_image_lsb_capacity(cover),
40        StegoTechnique::DctJpeg => estimate_jpeg_dct_capacity(cover),
41        StegoTechnique::Palette => estimate_palette_capacity(cover),
42        StegoTechnique::LsbAudio => estimate_audio_lsb_capacity(cover),
43        StegoTechnique::PhaseEncoding | StegoTechnique::EchoHiding => {
44            // Audio techniques: ~1 bit per segment
45            estimate_audio_lsb_capacity(cover) / 8
46        }
47        StegoTechnique::ZeroWidthText => estimate_text_capacity(cover),
48        StegoTechnique::PdfContentStream => estimate_pdf_content_capacity(cover),
49        StegoTechnique::PdfMetadata => estimate_pdf_metadata_capacity(cover),
50        StegoTechnique::CorpusSelection => {
51            // Corpus reuses LsbImage capacity of the matched cover
52            estimate_image_lsb_capacity(cover)
53        }
54        StegoTechnique::DualPayload => {
55            // Dual payload splits capacity in half
56            estimate_image_lsb_capacity(cover) / 2
57        }
58    }
59}
60
61/// Chi-square statistic on byte value distribution.
62///
63/// Measures how uniformly distributed the LSBs are. A perfectly random
64/// distribution scores low (close to 0 dB below expected).
65#[must_use]
66#[expect(
67    clippy::cast_precision_loss,
68    reason = "byte histogram counts are small enough for f64"
69)]
70pub fn chi_square_score(data: &[u8]) -> f64 {
71    if data.is_empty() {
72        return 0.0;
73    }
74
75    // Build byte histogram (256 bins)
76    let mut histogram = [0u64; 256];
77    for &b in data {
78        // usize::from(u8) is always 0..=255, histogram has 256 entries
79        #[expect(
80            clippy::indexing_slicing,
81            reason = "u8 index into [_; 256] cannot be out of bounds"
82        )]
83        {
84            histogram[usize::from(b)] = histogram[usize::from(b)].strict_add(1);
85        }
86    }
87
88    let expected = data.len() as f64 / 256.0;
89    if expected < f64::EPSILON {
90        return 0.0;
91    }
92
93    let chi_sq: f64 = histogram
94        .iter()
95        .map(|&count| {
96            let diff = count as f64 - expected;
97            (diff * diff) / expected
98        })
99        .sum();
100
101    // Convert to dB scale relative to expected (255 degrees of freedom)
102    let normalised = chi_sq / 255.0;
103    if normalised < f64::EPSILON {
104        -100.0 // Essentially undetectable
105    } else {
106        10.0 * normalised.log10()
107    }
108}
109
110// ─── Private capacity estimators ──────────────────────────────────────────────
111
112const fn estimate_image_lsb_capacity(cover: &CoverMedia) -> u64 {
113    match cover.kind {
114        CoverMediaKind::PngImage | CoverMediaKind::BmpImage => {
115            // ~1 bit per colour channel per pixel, 3 channels
116            // Rough estimate: data.len() / 8 (header overhead subtracted)
117            let usable = cover.data.len().saturating_sub(54); // BMP header ~54
118            (usable / 8) as u64
119        }
120        CoverMediaKind::GifImage => (cover.data.len().saturating_sub(128) / 16) as u64,
121        _ => 0,
122    }
123}
124
125fn estimate_jpeg_dct_capacity(cover: &CoverMedia) -> u64 {
126    if cover.kind != CoverMediaKind::JpegImage {
127        return 0;
128    }
129    // ~1 bit per nonzero AC coefficient; rough: data_len / 16
130    (cover.data.len() / 16) as u64
131}
132
133const fn estimate_palette_capacity(cover: &CoverMedia) -> u64 {
134    match cover.kind {
135        CoverMediaKind::GifImage | CoverMediaKind::PngImage => {
136            // ~1 bit per palette entry reorder
137            (cover.data.len().saturating_sub(128) / 32) as u64
138        }
139        _ => 0,
140    }
141}
142
143fn estimate_audio_lsb_capacity(cover: &CoverMedia) -> u64 {
144    if cover.kind != CoverMediaKind::WavAudio {
145        return 0;
146    }
147    // WAV: 1 bit per sample, 16-bit samples -> data/16 bytes
148    let usable = cover.data.len().saturating_sub(44); // WAV header ~44
149    (usable / 16) as u64
150}
151
152use unicode_segmentation::UnicodeSegmentation;
153
154fn estimate_text_capacity(cover: &CoverMedia) -> u64 {
155    if cover.kind != CoverMediaKind::PlainText {
156        return 0;
157    }
158    // ~2 bits per grapheme boundary (ZWJ/ZWNJ)
159    let text = String::from_utf8_lossy(&cover.data);
160    let grapheme_count = text.graphemes(true).count();
161    // 2 bits at each boundary = grapheme_count / 4 bytes
162    (grapheme_count / 4) as u64
163}
164
165fn estimate_pdf_content_capacity(cover: &CoverMedia) -> u64 {
166    if cover.kind != CoverMediaKind::PdfDocument {
167        return 0;
168    }
169    // Rough: 1 bit per content-stream byte, ~10% of PDF is content stream
170    (cover.data.len() / 80) as u64
171}
172
173const fn estimate_pdf_metadata_capacity(_cover: &CoverMedia) -> u64 {
174    // Metadata fields: limited capacity (~256 bytes typical)
175    256
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181    use bytes::Bytes;
182    use std::collections::HashMap;
183
184    fn make_cover(kind: CoverMediaKind, size: usize) -> CoverMedia {
185        CoverMedia {
186            kind,
187            data: Bytes::from(vec![0u8; size]),
188            metadata: HashMap::new(),
189        }
190    }
191
192    #[test]
193    fn classify_risk_thresholds() {
194        assert_eq!(classify_risk(-1.0), DetectabilityRisk::High);
195        assert_eq!(classify_risk(-5.9), DetectabilityRisk::High);
196        assert_eq!(classify_risk(-7.0), DetectabilityRisk::Medium);
197        assert_eq!(classify_risk(-11.9), DetectabilityRisk::Medium);
198        assert_eq!(classify_risk(-13.0), DetectabilityRisk::Low);
199        assert_eq!(classify_risk(-50.0), DetectabilityRisk::Low);
200    }
201
202    #[test]
203    fn recommended_payload_scales_with_risk() {
204        assert_eq!(recommended_payload(1000, DetectabilityRisk::Low), 500);
205        assert_eq!(recommended_payload(1000, DetectabilityRisk::Medium), 250);
206        assert_eq!(recommended_payload(1000, DetectabilityRisk::High), 125);
207    }
208
209    #[test]
210    fn estimate_capacity_png_lsb() {
211        let cover = make_cover(CoverMediaKind::PngImage, 8192);
212        let cap = estimate_capacity(&cover, StegoTechnique::LsbImage);
213        assert!(cap > 0);
214        // (8192 - 54) / 8 = 1017
215        assert_eq!(cap, 1017);
216    }
217
218    #[test]
219    fn estimate_capacity_wav_lsb() {
220        let cover = make_cover(CoverMediaKind::WavAudio, 44100);
221        let cap = estimate_capacity(&cover, StegoTechnique::LsbAudio);
222        assert!(cap > 0);
223    }
224
225    #[test]
226    fn estimate_capacity_wrong_kind_returns_zero() {
227        let cover = make_cover(CoverMediaKind::WavAudio, 1000);
228        assert_eq!(estimate_capacity(&cover, StegoTechnique::LsbImage), 0);
229    }
230
231    #[test]
232    fn chi_square_uniform_data_low_score() {
233        // Uniform distribution: all byte values equally represented
234        let data: Vec<u8> = (0..=255).cycle().take(256 * 100).collect();
235        let score = chi_square_score(&data);
236        assert!(
237            score < HIGH_THRESHOLD_DB,
238            "uniform data should score low: {score}"
239        );
240    }
241
242    #[test]
243    fn chi_square_biased_data_high_score() {
244        // Heavily biased: all zeros
245        let data = vec![0u8; 10000];
246        let score = chi_square_score(&data);
247        assert!(
248            score > HIGH_THRESHOLD_DB,
249            "biased data should score high: {score}"
250        );
251    }
252
253    #[test]
254    fn chi_square_empty_returns_zero() {
255        assert!((chi_square_score(&[]) - 0.0).abs() < f64::EPSILON);
256    }
257
258    #[test]
259    fn corpus_selection_uses_image_capacity() {
260        let cover = make_cover(CoverMediaKind::PngImage, 4096);
261        let lsb_cap = estimate_capacity(&cover, StegoTechnique::LsbImage);
262        let corpus_cap = estimate_capacity(&cover, StegoTechnique::CorpusSelection);
263        assert_eq!(lsb_cap, corpus_cap);
264    }
265
266    #[test]
267    fn pdf_content_stream_has_capacity() {
268        let cover = make_cover(CoverMediaKind::PdfDocument, 100_000);
269        let cap = estimate_capacity(&cover, StegoTechnique::PdfContentStream);
270        assert!(cap > 0);
271    }
272
273    // ─── Additional capacity estimator coverage ───────────────────────────
274
275    #[test]
276    fn jpeg_dct_capacity_for_jpeg() {
277        let cover = make_cover(CoverMediaKind::JpegImage, 16_000);
278        let cap = estimate_capacity(&cover, StegoTechnique::DctJpeg);
279        assert_eq!(cap, 1000); // 16000 / 16
280    }
281
282    #[test]
283    fn jpeg_dct_capacity_wrong_kind_returns_zero() {
284        let cover = make_cover(CoverMediaKind::PngImage, 16_000);
285        assert_eq!(estimate_capacity(&cover, StegoTechnique::DctJpeg), 0);
286    }
287
288    #[test]
289    fn palette_capacity_for_gif() {
290        let cover = make_cover(CoverMediaKind::GifImage, 4096);
291        let cap = estimate_capacity(&cover, StegoTechnique::Palette);
292        assert!(cap > 0);
293        // (4096 - 128) / 32 = 124
294        assert_eq!(cap, 124);
295    }
296
297    #[test]
298    fn palette_capacity_wrong_kind_returns_zero() {
299        let cover = make_cover(CoverMediaKind::WavAudio, 4096);
300        assert_eq!(estimate_capacity(&cover, StegoTechnique::Palette), 0);
301    }
302
303    #[test]
304    fn text_capacity_for_plain_text() {
305        // "hello world" has 11 grapheme clusters -> 11 / 4 = 2
306        let cover = CoverMedia {
307            kind: CoverMediaKind::PlainText,
308            data: Bytes::from(
309                "hello world, this is a test of capacity estimation for zero-width text",
310            ),
311            metadata: HashMap::new(),
312        };
313        let cap = estimate_capacity(&cover, StegoTechnique::ZeroWidthText);
314        assert!(cap > 0);
315    }
316
317    #[test]
318    fn text_capacity_wrong_kind_returns_zero() {
319        let cover = make_cover(CoverMediaKind::PngImage, 1000);
320        assert_eq!(estimate_capacity(&cover, StegoTechnique::ZeroWidthText), 0);
321    }
322
323    #[test]
324    fn pdf_content_capacity_wrong_kind_returns_zero() {
325        let cover = make_cover(CoverMediaKind::PngImage, 100_000);
326        assert_eq!(
327            estimate_capacity(&cover, StegoTechnique::PdfContentStream),
328            0
329        );
330    }
331
332    #[test]
333    fn pdf_metadata_capacity_always_256() {
334        let cover = make_cover(CoverMediaKind::PdfDocument, 1000);
335        assert_eq!(estimate_capacity(&cover, StegoTechnique::PdfMetadata), 256);
336        // Even for non-PDF types, metadata capacity is fixed
337        let cover2 = make_cover(CoverMediaKind::PngImage, 1000);
338        assert_eq!(estimate_capacity(&cover2, StegoTechnique::PdfMetadata), 256);
339    }
340
341    #[test]
342    fn audio_lsb_wrong_kind_returns_zero() {
343        let cover = make_cover(CoverMediaKind::PngImage, 44100);
344        assert_eq!(estimate_capacity(&cover, StegoTechnique::LsbAudio), 0);
345    }
346
347    #[test]
348    fn phase_encoding_is_audio_lsb_div_8() {
349        let cover = make_cover(CoverMediaKind::WavAudio, 44100);
350        let audio_cap = estimate_capacity(&cover, StegoTechnique::LsbAudio);
351        let phase_cap = estimate_capacity(&cover, StegoTechnique::PhaseEncoding);
352        assert_eq!(phase_cap, audio_cap / 8);
353    }
354
355    #[test]
356    fn echo_hiding_same_as_phase_encoding() {
357        let cover = make_cover(CoverMediaKind::WavAudio, 44100);
358        let phase_cap = estimate_capacity(&cover, StegoTechnique::PhaseEncoding);
359        let echo_cap = estimate_capacity(&cover, StegoTechnique::EchoHiding);
360        assert_eq!(phase_cap, echo_cap);
361    }
362
363    #[test]
364    fn dual_payload_is_half_image_lsb() {
365        let cover = make_cover(CoverMediaKind::PngImage, 8192);
366        let lsb_cap = estimate_capacity(&cover, StegoTechnique::LsbImage);
367        let dual_cap = estimate_capacity(&cover, StegoTechnique::DualPayload);
368        assert_eq!(dual_cap, lsb_cap / 2);
369    }
370
371    #[test]
372    fn gif_lsb_image_capacity() {
373        let cover = make_cover(CoverMediaKind::GifImage, 4096);
374        let cap = estimate_capacity(&cover, StegoTechnique::LsbImage);
375        // (4096 - 128) / 16 = 248
376        assert_eq!(cap, 248);
377    }
378
379    #[test]
380    fn bmp_lsb_same_as_png() {
381        let cover_png = make_cover(CoverMediaKind::PngImage, 8192);
382        let cover_bmp = make_cover(CoverMediaKind::BmpImage, 8192);
383        assert_eq!(
384            estimate_capacity(&cover_png, StegoTechnique::LsbImage),
385            estimate_capacity(&cover_bmp, StegoTechnique::LsbImage)
386        );
387    }
388
389    #[test]
390    fn palette_capacity_for_png() {
391        let cover = make_cover(CoverMediaKind::PngImage, 4096);
392        let cap = estimate_capacity(&cover, StegoTechnique::Palette);
393        assert_eq!(cap, 124); // Same formula as GIF
394    }
395}