shadowforge_lib/adapters/
corpus.rs1use std::cell::RefCell;
5use std::collections::HashMap;
6use std::path::Path;
7
8use sha2::{Digest, Sha256};
9
10use crate::domain::corpus;
11use crate::domain::errors::CorpusError;
12use crate::domain::ports::CorpusIndex;
13use crate::domain::types::{CorpusEntry, CoverMediaKind, Payload, StegoTechnique};
14
15pub struct CorpusIndexImpl {
22 entries: RefCell<HashMap<[u8; 32], CorpusEntry>>,
23}
24
25impl CorpusIndexImpl {
26 #[must_use]
28 pub fn new() -> Self {
29 Self {
30 entries: RefCell::new(HashMap::new()),
31 }
32 }
33
34 #[must_use]
36 pub fn len(&self) -> usize {
37 self.entries.borrow().len()
38 }
39
40 #[must_use]
42 pub fn is_empty(&self) -> bool {
43 self.entries.borrow().is_empty()
44 }
45}
46
47impl Default for CorpusIndexImpl {
48 fn default() -> Self {
49 Self::new()
50 }
51}
52
53fn kind_from_extension(path: &Path) -> Option<CoverMediaKind> {
55 let ext = path.extension()?.to_str()?.to_lowercase();
56 match ext.as_str() {
57 "png" => Some(CoverMediaKind::PngImage),
58 "bmp" => Some(CoverMediaKind::BmpImage),
59 "jpg" | "jpeg" => Some(CoverMediaKind::JpegImage),
60 "gif" => Some(CoverMediaKind::GifImage),
61 "wav" => Some(CoverMediaKind::WavAudio),
62 _ => None,
63 }
64}
65
66impl CorpusIndex for CorpusIndexImpl {
67 fn search(
68 &self,
69 payload: &Payload,
70 _technique: StegoTechnique,
71 max_results: usize,
72 ) -> Result<Vec<CorpusEntry>, CorpusError> {
73 let entries = self.entries.borrow();
74 if entries.is_empty() {
75 return Err(CorpusError::NoSuitableCover {
76 payload_bytes: payload.len() as u64,
77 });
78 }
79
80 let payload_pattern = corpus::payload_to_bit_pattern(payload.as_bytes(), None);
81
82 let mut scored: Vec<(u64, CorpusEntry)> = entries
84 .values()
85 .map(|entry| {
86 let dist = corpus::score_match(&entry.precomputed_bit_pattern, &payload_pattern);
87 (dist, entry.clone())
88 })
89 .collect();
90
91 scored.sort_by_key(|(dist, _)| *dist);
92 scored.truncate(max_results);
93
94 if scored.is_empty() {
95 return Err(CorpusError::NoSuitableCover {
96 payload_bytes: payload.len() as u64,
97 });
98 }
99
100 Ok(scored.into_iter().map(|(_, entry)| entry).collect())
101 }
102
103 fn add_to_index(&self, path: &Path) -> Result<CorpusEntry, CorpusError> {
104 let cover_kind = kind_from_extension(path).ok_or_else(|| CorpusError::AddFailed {
105 path: path.display().to_string(),
106 reason: "unsupported file extension".into(),
107 })?;
108
109 let data = std::fs::read(path).map_err(|e| CorpusError::AddFailed {
110 path: path.display().to_string(),
111 reason: e.to_string(),
112 })?;
113
114 let file_hash: [u8; 32] = Sha256::digest(&data).into();
115 let bit_pattern = corpus::extract_lsb_pattern(&data);
116
117 let entry = CorpusEntry {
118 file_hash,
119 path: path.display().to_string(),
120 cover_kind,
121 precomputed_bit_pattern: bit_pattern,
122 };
123
124 self.entries.borrow_mut().insert(file_hash, entry.clone());
125 Ok(entry)
126 }
127
128 fn build_index(&self, corpus_dir: &Path) -> Result<usize, CorpusError> {
129 if !corpus_dir.is_dir() {
130 return Err(CorpusError::IndexError {
131 reason: format!("{} is not a directory", corpus_dir.display()),
132 });
133 }
134
135 let mut count = 0usize;
136 let entries = std::fs::read_dir(corpus_dir).map_err(|e| CorpusError::IndexError {
137 reason: e.to_string(),
138 })?;
139
140 for entry in entries {
141 let entry = entry.map_err(|e| CorpusError::IndexError {
142 reason: e.to_string(),
143 })?;
144 let path = entry.path();
145 if path.is_file()
146 && kind_from_extension(&path).is_some()
147 && self.add_to_index(&path).is_ok()
148 {
149 count = count.strict_add(1);
150 }
151 }
152
153 Ok(count)
154 }
155}
156
157#[cfg(test)]
158mod tests {
159 use std::io::Write;
160
161 type TestResult = Result<(), Box<dyn std::error::Error>>;
162
163 use super::*;
164
165 fn make_test_bmp(pixel_rgb: [u8; 3]) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
167 let mut bmp = Vec::new();
169 bmp.write_all(b"BM")?;
171 let file_size: u32 = 14 + 40 + 4; bmp.write_all(&file_size.to_le_bytes())?;
173 bmp.write_all(&0u32.to_le_bytes())?; bmp.write_all(&54u32.to_le_bytes())?; bmp.write_all(&40u32.to_le_bytes())?; bmp.write_all(&1i32.to_le_bytes())?; bmp.write_all(&1i32.to_le_bytes())?; bmp.write_all(&1u16.to_le_bytes())?; bmp.write_all(&24u16.to_le_bytes())?; bmp.write_all(&0u32.to_le_bytes())?; bmp.write_all(&4u32.to_le_bytes())?; bmp.write_all(&2835i32.to_le_bytes())?; bmp.write_all(&2835i32.to_le_bytes())?; bmp.write_all(&0u32.to_le_bytes())?; bmp.write_all(&0u32.to_le_bytes())?; bmp.push(pixel_rgb[2]); bmp.push(pixel_rgb[1]); bmp.push(pixel_rgb[0]); bmp.push(0); Ok(bmp)
196 }
197
198 #[test]
199 fn build_index_counts_files() -> TestResult {
200 let dir = tempfile::tempdir()?;
201 for i in 0..5 {
202 let path = dir.path().join(format!("img_{i}.bmp"));
203 std::fs::write(&path, make_test_bmp([i * 50, 0, 0])?)?;
204 }
205
206 let index = CorpusIndexImpl::new();
207 let count = index.build_index(dir.path())?;
208 assert_eq!(count, 5);
209 assert_eq!(index.len(), 5);
210 Ok(())
211 }
212
213 #[test]
214 fn build_index_skips_non_image_files() -> TestResult {
215 let dir = tempfile::tempdir()?;
216 std::fs::write(dir.path().join("readme.txt"), b"hello")?;
217 std::fs::write(dir.path().join("img.bmp"), make_test_bmp([0, 0, 0])?)?;
218
219 let index = CorpusIndexImpl::new();
220 let count = index.build_index(dir.path())?;
221 assert_eq!(count, 1);
222 Ok(())
223 }
224
225 #[test]
226 fn search_returns_exact_match_first() -> TestResult {
227 let dir = tempfile::tempdir()?;
228 let target_data = make_test_bmp([0xFF, 0xFF, 0xFF])?;
229 let target_path = dir.path().join("target.bmp");
230 std::fs::write(&target_path, &target_data)?;
231
232 std::fs::write(dir.path().join("other.bmp"), make_test_bmp([0, 0, 0])?)?;
234
235 let index = CorpusIndexImpl::new();
236 index.build_index(dir.path())?;
237
238 let target_hash: [u8; 32] = Sha256::digest(&target_data).into();
240 let target_entry = index.entries.borrow();
241 let expected_pattern = &target_entry
242 .get(&target_hash)
243 .ok_or("target hash not found in index")?
244 .precomputed_bit_pattern;
245 let payload = Payload::from_bytes(expected_pattern.to_vec());
246 drop(target_entry);
247
248 let results = index.search(&payload, StegoTechnique::LsbImage, 5)?;
249 assert!(!results.is_empty());
250 assert_eq!(
252 results.first().ok_or("no search results")?.file_hash,
253 target_hash
254 );
255 Ok(())
256 }
257
258 #[test]
259 fn search_empty_index_returns_error() {
260 let index = CorpusIndexImpl::new();
261 let payload = Payload::from_bytes(vec![0x42]);
262 let result = index.search(&payload, StegoTechnique::LsbImage, 5);
263 assert!(result.is_err());
264 }
265
266 #[test]
267 fn add_to_index_rejects_unsupported_extension() -> TestResult {
268 let dir = tempfile::tempdir()?;
269 let path = dir.path().join("readme.txt");
270 std::fs::write(&path, b"not an image")?;
271
272 let index = CorpusIndexImpl::new();
273 assert!(index.add_to_index(&path).is_err());
274 Ok(())
275 }
276
277 #[test]
278 fn build_index_rejects_non_directory() -> TestResult {
279 let file = tempfile::NamedTempFile::new()?;
280 let index = CorpusIndexImpl::new();
281 let result = index.build_index(file.path());
282 assert!(result.is_err());
283 Ok(())
284 }
285
286 #[test]
287 fn default_impl() {
288 let index = CorpusIndexImpl::default();
289 assert!(index.is_empty());
290 }
291}