Skip to main content

shadowforge_lib/domain/corpus/
mod.rs

1//! Corpus steganography: zero-modification cover selection via ANN search.
2//!
3//! Given a payload to embed and a corpus of images, this module finds
4//! the image whose natural LSB bit pattern most closely matches the
5//! payload — minimising or eliminating modifications needed.
6
7use bytes::Bytes;
8
9/// Compute the Hamming distance between two byte slices of equal length.
10///
11/// Returns `None` if the slices differ in length.
12#[must_use]
13pub fn hamming_distance(a: &[u8], b: &[u8]) -> Option<u64> {
14    if a.len() != b.len() {
15        return None;
16    }
17    let mut dist: u64 = 0;
18    for (x, y) in a.iter().zip(b.iter()) {
19        dist = dist.strict_add(u64::from((x ^ y).count_ones()));
20    }
21    Some(dist)
22}
23
24/// Extract the LSB bit pattern from raw pixel bytes.
25///
26/// For each byte of pixel data, extracts the least significant bit and packs
27/// 8 bits into one output byte. The result length is `ceil(pixel_bytes.len() / 8)`.
28#[must_use]
29pub fn extract_lsb_pattern(pixel_bytes: &[u8]) -> Bytes {
30    let out_len = pixel_bytes.len().div_ceil(8);
31    let mut pattern = vec![0u8; out_len];
32
33    for (i, &byte) in pixel_bytes.iter().enumerate() {
34        let out_byte_idx = i / 8;
35        let bit_idx = 7 - (i % 8);
36        if byte & 1 == 1 {
37            // out_byte_idx = i/8 <= (pixel_bytes.len()-1)/8 < out_len
38            #[expect(
39                clippy::indexing_slicing,
40                reason = "out_byte_idx = i/8 < ceil(len/8) = out_len"
41            )]
42            {
43                pattern[out_byte_idx] |= 1 << bit_idx;
44            }
45        }
46    }
47
48    Bytes::from(pattern)
49}
50
51/// Expand a payload into a bit pattern of the same format as
52/// [`extract_lsb_pattern`] output (one bit per sample, packed into bytes).
53///
54/// This effectively returns the raw bytes padded to a target bit-count
55/// boundary. If `target_bits` is `None`, returns the payload bytes as-is.
56#[must_use]
57pub fn payload_to_bit_pattern(payload: &[u8], target_bits: Option<usize>) -> Bytes {
58    target_bits.map_or_else(
59        || Bytes::copy_from_slice(payload),
60        |target| {
61            let needed_bytes = target.div_ceil(8);
62            let mut result = Vec::with_capacity(needed_bytes);
63            result.extend_from_slice(payload);
64            result.resize(needed_bytes, 0);
65            Bytes::from(result)
66        },
67    )
68}
69
70/// Score a corpus entry's precomputed bit pattern against a payload pattern.
71///
72/// Returns the Hamming distance — lower is better. Returns `u64::MAX` if the
73/// patterns are incompatible in length.
74#[must_use]
75pub fn score_match(corpus_pattern: &[u8], payload_pattern: &[u8]) -> u64 {
76    // Compare only the overlapping prefix
77    let compare_len = corpus_pattern.len().min(payload_pattern.len());
78    if compare_len == 0 {
79        return u64::MAX;
80    }
81    match (
82        corpus_pattern.get(..compare_len),
83        payload_pattern.get(..compare_len),
84    ) {
85        (Some(a), Some(b)) => hamming_distance(a, b).unwrap_or(u64::MAX),
86        _ => u64::MAX,
87    }
88}
89
90/// Determine if a Hamming distance counts as a "close enough" match.
91///
92/// Threshold: fewer than 5% of total bits differ.
93#[must_use]
94pub const fn is_close_match(distance: u64, total_bits: u64) -> bool {
95    if total_bits == 0 {
96        return false;
97    }
98    // ≤ 5% of bits differ
99    distance.strict_mul(20) <= total_bits
100}
101
102#[cfg(test)]
103mod tests {
104    use super::*;
105
106    #[test]
107    fn hamming_distance_identical() {
108        let a = [0xAA, 0x55, 0xFF];
109        let b = [0xAA, 0x55, 0xFF];
110        assert_eq!(hamming_distance(&a, &b), Some(0));
111    }
112
113    #[test]
114    fn hamming_distance_one_bit() {
115        let a = [0b0000_0000];
116        let b = [0b0000_0001];
117        assert_eq!(hamming_distance(&a, &b), Some(1));
118    }
119
120    #[test]
121    fn hamming_distance_all_bits() {
122        let a = [0x00];
123        let b = [0xFF];
124        assert_eq!(hamming_distance(&a, &b), Some(8));
125    }
126
127    #[test]
128    fn hamming_distance_unequal_lengths() {
129        let a = [0x00, 0x00];
130        let b = [0xFF];
131        assert_eq!(hamming_distance(&a, &b), None);
132    }
133
134    #[test]
135    fn extract_lsb_pattern_basic() {
136        // 8 bytes: LSBs = 1,0,1,0,1,0,1,0 = 0xAA
137        let pixels = [0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00];
138        let pattern = extract_lsb_pattern(&pixels);
139        assert_eq!(pattern.as_ref(), &[0xAA]);
140    }
141
142    #[test]
143    fn extract_lsb_pattern_partial_byte() {
144        // 3 bytes: LSBs = 1,1,0 → packed into 0b1100_0000 = 0xC0
145        let pixels = [0x01, 0x03, 0x00];
146        let pattern = extract_lsb_pattern(&pixels);
147        assert_eq!(pattern.len(), 1);
148        assert_eq!(pattern.as_ref(), &[0b1100_0000]);
149    }
150
151    #[test]
152    fn payload_to_bit_pattern_no_target() {
153        let payload = b"hello";
154        let result = payload_to_bit_pattern(payload, None);
155        assert_eq!(result.as_ref(), b"hello");
156    }
157
158    #[test]
159    fn payload_to_bit_pattern_with_padding() {
160        let payload = b"\xFF";
161        let result = payload_to_bit_pattern(payload, Some(16)); // 16 bits = 2 bytes
162        assert_eq!(result.len(), 2);
163        assert_eq!(result.as_ref(), &[0xFF, 0x00]);
164    }
165
166    #[test]
167    fn score_match_identical() {
168        let a = [0xAA, 0x55];
169        let b = [0xAA, 0x55];
170        assert_eq!(score_match(&a, &b), 0);
171    }
172
173    #[test]
174    fn score_match_empty() {
175        let a: [u8; 0] = [];
176        let b: [u8; 0] = [];
177        assert_eq!(score_match(&a, &b), u64::MAX);
178    }
179
180    #[test]
181    fn is_close_match_zero_distance() {
182        assert!(is_close_match(0, 100));
183    }
184
185    #[test]
186    fn is_close_match_exactly_five_percent() {
187        // 5 differing out of 100 total = exactly 5%
188        assert!(is_close_match(5, 100));
189    }
190
191    #[test]
192    fn is_close_match_above_threshold() {
193        // 6 differing out of 100 total = 6% > 5%
194        assert!(!is_close_match(6, 100));
195    }
196
197    #[test]
198    fn is_close_match_zero_total() {
199        assert!(!is_close_match(0, 0));
200    }
201}