shadowforge_lib/adapters/
corpus.rs1use std::cell::RefCell;
5use std::collections::HashMap;
6use std::path::Path;
7
8use sha2::{Digest, Sha256};
9
10use crate::domain::corpus;
11use crate::domain::errors::CorpusError;
12use crate::domain::ports::CorpusIndex;
13use crate::domain::types::{CorpusEntry, CoverMediaKind, Payload, SpectralKey, StegoTechnique};
14
15pub struct CorpusIndexImpl {
22 entries: RefCell<HashMap<[u8; 32], CorpusEntry>>,
23 spectral_index: RefCell<HashMap<SpectralKey, Vec<[u8; 32]>>>,
25}
26
27impl CorpusIndexImpl {
28 #[must_use]
30 pub fn new() -> Self {
31 Self {
32 entries: RefCell::new(HashMap::new()),
33 spectral_index: RefCell::new(HashMap::new()),
34 }
35 }
36
37 #[must_use]
39 pub fn len(&self) -> usize {
40 self.entries.borrow().len()
41 }
42
43 #[must_use]
45 pub fn is_empty(&self) -> bool {
46 self.entries.borrow().is_empty()
47 }
48}
49
50impl Default for CorpusIndexImpl {
51 fn default() -> Self {
52 Self::new()
53 }
54}
55
56fn kind_from_extension(path: &Path) -> Option<CoverMediaKind> {
58 let ext = path.extension()?.to_str()?.to_lowercase();
59 match ext.as_str() {
60 "png" => Some(CoverMediaKind::PngImage),
61 "bmp" => Some(CoverMediaKind::BmpImage),
62 "jpg" | "jpeg" => Some(CoverMediaKind::JpegImage),
63 "gif" => Some(CoverMediaKind::GifImage),
64 "wav" => Some(CoverMediaKind::WavAudio),
65 _ => None,
66 }
67}
68
69impl CorpusIndex for CorpusIndexImpl {
70 fn search(
71 &self,
72 payload: &Payload,
73 _technique: StegoTechnique,
74 max_results: usize,
75 ) -> Result<Vec<CorpusEntry>, CorpusError> {
76 let entries = self.entries.borrow();
77 if entries.is_empty() {
78 return Err(CorpusError::NoSuitableCover {
79 payload_bytes: payload.len() as u64,
80 });
81 }
82
83 let payload_pattern = corpus::payload_to_bit_pattern(payload.as_bytes(), None);
84
85 let mut scored: Vec<(u64, CorpusEntry)> = entries
87 .values()
88 .map(|entry| {
89 let dist = corpus::score_match(&entry.precomputed_bit_pattern, &payload_pattern);
90 (dist, entry.clone())
91 })
92 .collect();
93
94 scored.sort_by_key(|(dist, _)| *dist);
95 scored.truncate(max_results);
96
97 if scored.is_empty() {
98 return Err(CorpusError::NoSuitableCover {
99 payload_bytes: payload.len() as u64,
100 });
101 }
102
103 Ok(scored.into_iter().map(|(_, entry)| entry).collect())
104 }
105
106 fn add_to_index(&self, path: &Path) -> Result<CorpusEntry, CorpusError> {
107 let cover_kind = kind_from_extension(path).ok_or_else(|| CorpusError::AddFailed {
108 path: path.display().to_string(),
109 reason: "unsupported file extension".into(),
110 })?;
111
112 let data = std::fs::read(path).map_err(|e| CorpusError::AddFailed {
113 path: path.display().to_string(),
114 reason: e.to_string(),
115 })?;
116
117 let file_hash: [u8; 32] = Sha256::digest(&data).into();
118 let bit_pattern = corpus::extract_lsb_pattern(&data);
119
120 let entry = CorpusEntry {
121 file_hash,
122 path: path.display().to_string(),
123 cover_kind,
124 precomputed_bit_pattern: bit_pattern,
125 spectral_key: None,
126 };
127
128 self.entries.borrow_mut().insert(file_hash, entry.clone());
129 Ok(entry)
132 }
133
134 fn build_index(&self, corpus_dir: &Path) -> Result<usize, CorpusError> {
135 if !corpus_dir.is_dir() {
136 return Err(CorpusError::IndexError {
137 reason: format!("{} is not a directory", corpus_dir.display()),
138 });
139 }
140
141 let mut count = 0usize;
142 let entries = std::fs::read_dir(corpus_dir).map_err(|e| CorpusError::IndexError {
143 reason: e.to_string(),
144 })?;
145
146 for entry in entries {
147 let entry = entry.map_err(|e| CorpusError::IndexError {
148 reason: e.to_string(),
149 })?;
150 let path = entry.path();
151 if path.is_file()
152 && kind_from_extension(&path).is_some()
153 && self.add_to_index(&path).is_ok()
154 {
155 count = count.strict_add(1);
156 }
157 }
158
159 Ok(count)
160 }
161
162 fn search_for_model(
163 &self,
164 payload: &Payload,
165 model_id: &str,
166 resolution: (u32, u32),
167 max_results: usize,
168 ) -> Result<Vec<CorpusEntry>, CorpusError> {
169 let key = SpectralKey {
170 model_id: model_id.to_string(),
171 resolution,
172 };
173 let spectral_index = self.spectral_index.borrow();
174 let hashes = spectral_index.get(&key).map_or(&[][..], Vec::as_slice);
175 if hashes.is_empty() {
176 return Err(CorpusError::NoSuitableCover {
177 payload_bytes: payload.len() as u64,
178 });
179 }
180
181 let entries = self.entries.borrow();
182 let payload_pattern = corpus::payload_to_bit_pattern(payload.as_bytes(), None);
183 let mut scored: Vec<(u64, CorpusEntry)> = hashes
184 .iter()
185 .filter_map(|h| entries.get(h))
186 .map(|entry| {
187 let dist = corpus::score_match(&entry.precomputed_bit_pattern, &payload_pattern);
188 (dist, entry.clone())
189 })
190 .collect();
191
192 scored.sort_by_key(|(dist, _)| *dist);
193 scored.truncate(max_results);
194
195 if scored.is_empty() {
196 return Err(CorpusError::NoSuitableCover {
197 payload_bytes: payload.len() as u64,
198 });
199 }
200
201 Ok(scored.into_iter().map(|(_, e)| e).collect())
202 }
203
204 fn model_stats(&self) -> Vec<(SpectralKey, usize)> {
205 let spectral_index = self.spectral_index.borrow();
206 let mut stats: Vec<(SpectralKey, usize)> = spectral_index
207 .iter()
208 .map(|(k, v)| (k.clone(), v.len()))
209 .collect();
210 stats.sort_by(|a, b| a.0.model_id.cmp(&b.0.model_id));
211 stats
212 }
213}
214
215impl CorpusIndexImpl {
216 pub fn add_entry_with_key(&self, entry: CorpusEntry) {
220 if let Some(ref key) = entry.spectral_key {
221 self.spectral_index
222 .borrow_mut()
223 .entry(key.clone())
224 .or_default()
225 .push(entry.file_hash);
226 }
227 self.entries.borrow_mut().insert(entry.file_hash, entry);
228 }
229}
230
231#[cfg(test)]
232mod tests {
233 use std::io::Write;
234
235 type TestResult = Result<(), Box<dyn std::error::Error>>;
236
237 use super::*;
238
239 fn make_test_bmp(pixel_rgb: [u8; 3]) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
241 let mut bmp = Vec::new();
243 bmp.write_all(b"BM")?;
245 let file_size: u32 = 14 + 40 + 4; bmp.write_all(&file_size.to_le_bytes())?;
247 bmp.write_all(&0u32.to_le_bytes())?; bmp.write_all(&54u32.to_le_bytes())?; bmp.write_all(&40u32.to_le_bytes())?; bmp.write_all(&1i32.to_le_bytes())?; bmp.write_all(&1i32.to_le_bytes())?; bmp.write_all(&1u16.to_le_bytes())?; bmp.write_all(&24u16.to_le_bytes())?; bmp.write_all(&0u32.to_le_bytes())?; bmp.write_all(&4u32.to_le_bytes())?; bmp.write_all(&2835i32.to_le_bytes())?; bmp.write_all(&2835i32.to_le_bytes())?; bmp.write_all(&0u32.to_le_bytes())?; bmp.write_all(&0u32.to_le_bytes())?; bmp.push(pixel_rgb[2]); bmp.push(pixel_rgb[1]); bmp.push(pixel_rgb[0]); bmp.push(0); Ok(bmp)
270 }
271
272 #[test]
273 fn build_index_counts_files() -> TestResult {
274 let dir = tempfile::tempdir()?;
275 for i in 0..5 {
276 let path = dir.path().join(format!("img_{i}.bmp"));
277 std::fs::write(&path, make_test_bmp([i * 50, 0, 0])?)?;
278 }
279
280 let index = CorpusIndexImpl::new();
281 let count = index.build_index(dir.path())?;
282 assert_eq!(count, 5);
283 assert_eq!(index.len(), 5);
284 Ok(())
285 }
286
287 #[test]
288 fn build_index_skips_non_image_files() -> TestResult {
289 let dir = tempfile::tempdir()?;
290 std::fs::write(dir.path().join("readme.txt"), b"hello")?;
291 std::fs::write(dir.path().join("img.bmp"), make_test_bmp([0, 0, 0])?)?;
292
293 let index = CorpusIndexImpl::new();
294 let count = index.build_index(dir.path())?;
295 assert_eq!(count, 1);
296 Ok(())
297 }
298
299 #[test]
300 fn search_returns_exact_match_first() -> TestResult {
301 let dir = tempfile::tempdir()?;
302 let target_data = make_test_bmp([0xFF, 0xFF, 0xFF])?;
303 let target_path = dir.path().join("target.bmp");
304 std::fs::write(&target_path, &target_data)?;
305
306 std::fs::write(dir.path().join("other.bmp"), make_test_bmp([0, 0, 0])?)?;
308
309 let index = CorpusIndexImpl::new();
310 index.build_index(dir.path())?;
311
312 let target_hash: [u8; 32] = Sha256::digest(&target_data).into();
314 let target_entry = index.entries.borrow();
315 let expected_pattern = &target_entry
316 .get(&target_hash)
317 .ok_or("target hash not found in index")?
318 .precomputed_bit_pattern;
319 let payload = Payload::from_bytes(expected_pattern.to_vec());
320 drop(target_entry);
321
322 let results = index.search(&payload, StegoTechnique::LsbImage, 5)?;
323 assert!(!results.is_empty());
324 assert_eq!(
326 results.first().ok_or("no search results")?.file_hash,
327 target_hash
328 );
329 Ok(())
330 }
331
332 #[test]
333 fn search_empty_index_returns_error() {
334 let index = CorpusIndexImpl::new();
335 let payload = Payload::from_bytes(vec![0x42]);
336 let result = index.search(&payload, StegoTechnique::LsbImage, 5);
337 assert!(result.is_err());
338 }
339
340 #[test]
341 fn add_to_index_rejects_unsupported_extension() -> TestResult {
342 let dir = tempfile::tempdir()?;
343 let path = dir.path().join("readme.txt");
344 std::fs::write(&path, b"not an image")?;
345
346 let index = CorpusIndexImpl::new();
347 assert!(index.add_to_index(&path).is_err());
348 Ok(())
349 }
350
351 #[test]
352 fn build_index_rejects_non_directory() -> TestResult {
353 let file = tempfile::NamedTempFile::new()?;
354 let index = CorpusIndexImpl::new();
355 let result = index.build_index(file.path());
356 assert!(result.is_err());
357 Ok(())
358 }
359
360 #[test]
361 fn default_impl() {
362 let index = CorpusIndexImpl::default();
363 assert!(index.is_empty());
364 }
365}