Skip to main content

shadowforge_lib/adapters/
corpus.rs

1//! Adapter implementing the [`CorpusIndex`] port for zero-modification
2//! steganographic cover selection.
3
4use std::cell::RefCell;
5use std::collections::HashMap;
6use std::path::Path;
7
8use sha2::{Digest, Sha256};
9
10use crate::domain::corpus;
11use crate::domain::errors::CorpusError;
12use crate::domain::ports::CorpusIndex;
13use crate::domain::types::{CorpusEntry, CoverMediaKind, Payload, StegoTechnique};
14
15/// In-memory corpus index backed by a `HashMap<file_hash, CorpusEntry>`.
16///
17/// Search uses a linear scan with Hamming distance — sufficient for corpora
18/// up to ~100 K images. Interior mutability via [`RefCell`] keeps the port
19/// trait's `&self` receiver while allowing mutation during `add_to_index`
20/// and `build_index`.
21pub struct CorpusIndexImpl {
22    entries: RefCell<HashMap<[u8; 32], CorpusEntry>>,
23}
24
25impl CorpusIndexImpl {
26    /// Create an empty corpus index.
27    #[must_use]
28    pub fn new() -> Self {
29        Self {
30            entries: RefCell::new(HashMap::new()),
31        }
32    }
33
34    /// Return the number of entries currently in the index.
35    #[must_use]
36    pub fn len(&self) -> usize {
37        self.entries.borrow().len()
38    }
39
40    /// Return `true` if the index contains no entries.
41    #[must_use]
42    pub fn is_empty(&self) -> bool {
43        self.entries.borrow().is_empty()
44    }
45}
46
47impl Default for CorpusIndexImpl {
48    fn default() -> Self {
49        Self::new()
50    }
51}
52
53/// Detect the cover media kind from a file extension.
54fn kind_from_extension(path: &Path) -> Option<CoverMediaKind> {
55    let ext = path.extension()?.to_str()?.to_lowercase();
56    match ext.as_str() {
57        "png" => Some(CoverMediaKind::PngImage),
58        "bmp" => Some(CoverMediaKind::BmpImage),
59        "jpg" | "jpeg" => Some(CoverMediaKind::JpegImage),
60        "gif" => Some(CoverMediaKind::GifImage),
61        "wav" => Some(CoverMediaKind::WavAudio),
62        _ => None,
63    }
64}
65
66impl CorpusIndex for CorpusIndexImpl {
67    fn search(
68        &self,
69        payload: &Payload,
70        _technique: StegoTechnique,
71        max_results: usize,
72    ) -> Result<Vec<CorpusEntry>, CorpusError> {
73        let entries = self.entries.borrow();
74        if entries.is_empty() {
75            return Err(CorpusError::NoSuitableCover {
76                payload_bytes: payload.len() as u64,
77            });
78        }
79
80        let payload_pattern = corpus::payload_to_bit_pattern(payload.as_bytes(), None);
81
82        // Score each entry by Hamming distance and collect results
83        let mut scored: Vec<(u64, CorpusEntry)> = entries
84            .values()
85            .map(|entry| {
86                let dist = corpus::score_match(&entry.precomputed_bit_pattern, &payload_pattern);
87                (dist, entry.clone())
88            })
89            .collect();
90
91        scored.sort_by_key(|(dist, _)| *dist);
92        scored.truncate(max_results);
93
94        if scored.is_empty() {
95            return Err(CorpusError::NoSuitableCover {
96                payload_bytes: payload.len() as u64,
97            });
98        }
99
100        Ok(scored.into_iter().map(|(_, entry)| entry).collect())
101    }
102
103    fn add_to_index(&self, path: &Path) -> Result<CorpusEntry, CorpusError> {
104        let cover_kind = kind_from_extension(path).ok_or_else(|| CorpusError::AddFailed {
105            path: path.display().to_string(),
106            reason: "unsupported file extension".into(),
107        })?;
108
109        let data = std::fs::read(path).map_err(|e| CorpusError::AddFailed {
110            path: path.display().to_string(),
111            reason: e.to_string(),
112        })?;
113
114        let file_hash: [u8; 32] = Sha256::digest(&data).into();
115        let bit_pattern = corpus::extract_lsb_pattern(&data);
116
117        let entry = CorpusEntry {
118            file_hash,
119            path: path.display().to_string(),
120            cover_kind,
121            precomputed_bit_pattern: bit_pattern,
122        };
123
124        self.entries.borrow_mut().insert(file_hash, entry.clone());
125        Ok(entry)
126    }
127
128    fn build_index(&self, corpus_dir: &Path) -> Result<usize, CorpusError> {
129        if !corpus_dir.is_dir() {
130            return Err(CorpusError::IndexError {
131                reason: format!("{} is not a directory", corpus_dir.display()),
132            });
133        }
134
135        let mut count = 0usize;
136        let entries = std::fs::read_dir(corpus_dir).map_err(|e| CorpusError::IndexError {
137            reason: e.to_string(),
138        })?;
139
140        for entry in entries {
141            let entry = entry.map_err(|e| CorpusError::IndexError {
142                reason: e.to_string(),
143            })?;
144            let path = entry.path();
145            if path.is_file()
146                && kind_from_extension(&path).is_some()
147                && self.add_to_index(&path).is_ok()
148            {
149                count = count.strict_add(1);
150            }
151        }
152
153        Ok(count)
154    }
155}
156
157#[cfg(test)]
158mod tests {
159    use std::io::Write;
160
161    type TestResult = Result<(), Box<dyn std::error::Error>>;
162
163    use super::*;
164
165    /// Create a minimal 1×1 BMP file with known pixel data.
166    fn make_test_bmp(pixel_rgb: [u8; 3]) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
167        // Minimal 1×1 24-bit BMP
168        let mut bmp = Vec::new();
169        // BMP header (14 bytes)
170        bmp.write_all(b"BM")?;
171        let file_size: u32 = 14 + 40 + 4; // header + DIB + 1 pixel (padded to 4 bytes)
172        bmp.write_all(&file_size.to_le_bytes())?;
173        bmp.write_all(&0u32.to_le_bytes())?; // reserved
174        bmp.write_all(&54u32.to_le_bytes())?; // pixel data offset
175
176        // DIB header (40 bytes - BITMAPINFOHEADER)
177        bmp.write_all(&40u32.to_le_bytes())?; // header size
178        bmp.write_all(&1i32.to_le_bytes())?; // width
179        bmp.write_all(&1i32.to_le_bytes())?; // height
180        bmp.write_all(&1u16.to_le_bytes())?; // color planes
181        bmp.write_all(&24u16.to_le_bytes())?; // bits per pixel
182        bmp.write_all(&0u32.to_le_bytes())?; // compression
183        bmp.write_all(&4u32.to_le_bytes())?; // image size (padded row)
184        bmp.write_all(&2835i32.to_le_bytes())?; // h resolution
185        bmp.write_all(&2835i32.to_le_bytes())?; // v resolution
186        bmp.write_all(&0u32.to_le_bytes())?; // colors in palette
187        bmp.write_all(&0u32.to_le_bytes())?; // important colors
188
189        // Pixel data (BGR, padded to 4 bytes)
190        bmp.push(pixel_rgb[2]); // B
191        bmp.push(pixel_rgb[1]); // G
192        bmp.push(pixel_rgb[0]); // R
193        bmp.push(0); // padding
194
195        Ok(bmp)
196    }
197
198    #[test]
199    fn build_index_counts_files() -> TestResult {
200        let dir = tempfile::tempdir()?;
201        for i in 0..5 {
202            let path = dir.path().join(format!("img_{i}.bmp"));
203            std::fs::write(&path, make_test_bmp([i * 50, 0, 0])?)?;
204        }
205
206        let index = CorpusIndexImpl::new();
207        let count = index.build_index(dir.path())?;
208        assert_eq!(count, 5);
209        assert_eq!(index.len(), 5);
210        Ok(())
211    }
212
213    #[test]
214    fn build_index_skips_non_image_files() -> TestResult {
215        let dir = tempfile::tempdir()?;
216        std::fs::write(dir.path().join("readme.txt"), b"hello")?;
217        std::fs::write(dir.path().join("img.bmp"), make_test_bmp([0, 0, 0])?)?;
218
219        let index = CorpusIndexImpl::new();
220        let count = index.build_index(dir.path())?;
221        assert_eq!(count, 1);
222        Ok(())
223    }
224
225    #[test]
226    fn search_returns_exact_match_first() -> TestResult {
227        let dir = tempfile::tempdir()?;
228        let target_data = make_test_bmp([0xFF, 0xFF, 0xFF])?;
229        let target_path = dir.path().join("target.bmp");
230        std::fs::write(&target_path, &target_data)?;
231
232        // Add a different image too
233        std::fs::write(dir.path().join("other.bmp"), make_test_bmp([0, 0, 0])?)?;
234
235        let index = CorpusIndexImpl::new();
236        index.build_index(dir.path())?;
237
238        // Search with a payload that matches the target's bit pattern
239        let target_hash: [u8; 32] = Sha256::digest(&target_data).into();
240        let target_entry = index.entries.borrow();
241        let expected_pattern = &target_entry
242            .get(&target_hash)
243            .ok_or("target hash not found in index")?
244            .precomputed_bit_pattern;
245        let payload = Payload::from_bytes(expected_pattern.to_vec());
246        drop(target_entry);
247
248        let results = index.search(&payload, StegoTechnique::LsbImage, 5)?;
249        assert!(!results.is_empty());
250        // First result should be the exact match
251        assert_eq!(
252            results.first().ok_or("no search results")?.file_hash,
253            target_hash
254        );
255        Ok(())
256    }
257
258    #[test]
259    fn search_empty_index_returns_error() {
260        let index = CorpusIndexImpl::new();
261        let payload = Payload::from_bytes(vec![0x42]);
262        let result = index.search(&payload, StegoTechnique::LsbImage, 5);
263        assert!(result.is_err());
264    }
265
266    #[test]
267    fn add_to_index_rejects_unsupported_extension() -> TestResult {
268        let dir = tempfile::tempdir()?;
269        let path = dir.path().join("readme.txt");
270        std::fs::write(&path, b"not an image")?;
271
272        let index = CorpusIndexImpl::new();
273        assert!(index.add_to_index(&path).is_err());
274        Ok(())
275    }
276
277    #[test]
278    fn build_index_rejects_non_directory() -> TestResult {
279        let file = tempfile::NamedTempFile::new()?;
280        let index = CorpusIndexImpl::new();
281        let result = index.build_index(file.path());
282        assert!(result.is_err());
283        Ok(())
284    }
285
286    #[test]
287    fn default_impl() {
288        let index = CorpusIndexImpl::default();
289        assert!(index.is_empty());
290    }
291}