1use crate::{MetadataEntry, ScrubError, ScrubResult, Scrubber};
2
3#[derive(Debug, Clone)]
5pub struct JpegScrubber {
6 file_bytes: Vec<u8>,
7}
8
9impl JpegScrubber {
11 fn find_exif_segment(&self) -> Option<(usize, usize)> {
14 let mut offset = 2; while offset + 4 <= self.file_bytes.len() {
16 if self.file_bytes[offset] != 0xFF {
17 eprintln!(
18 "DBG: Invalid marker start at offset {}: byte is {}",
19 offset, self.file_bytes[offset]
20 );
21 return None;
22 }
23
24 let marker = self.file_bytes[offset + 1];
25
26 if (0xD0..=0xD7).contains(&marker) || marker == 0x01 {
27 offset += 2;
28 continue;
29 }
30
31 if marker == 0xD9 || marker == 0xDA {
32 break;
33 }
34
35 if offset + 4 > self.file_bytes.len() {
36 eprintln!("DBG: Not enough bytes to read length at offset {}", offset);
37 return None;
38 }
39
40 let length_bytes = [self.file_bytes[offset + 2], self.file_bytes[offset + 3]];
41 let length = u16::from_be_bytes(length_bytes) as usize;
42
43 if length < 2 || offset + 2 + length > self.file_bytes.len() {
44 eprintln!("DBG: Corrupt length field at offset {}: {}", offset, length);
45 return None;
46 }
47
48 if marker == 0xE1 && length >= 6 {
49 let exif_sig_start = offset + 4; let exif_sig_end = exif_sig_start + 6; if exif_sig_end <= self.file_bytes.len()
52 && self.file_bytes[exif_sig_start..exif_sig_end] == *b"Exif\0\0"
53 {
54 return Some((offset, length)); }
61 }
62
63 offset += 2 + length;
64 }
65 eprintln!("DBG: EXIF APP1 segment not found");
66 None
67 }
68}
69
70impl Scrubber for JpegScrubber {
71 fn new(file_bytes: Vec<u8>) -> Result<Self, ScrubError> {
72 if file_bytes.len() < 2 || file_bytes[0..2] != [0xFF, 0xD8] {
74 return Err(ScrubError::ParsingError("Not a valid JPEG file".into()));
75 }
76 eprintln!(
77 "DBG (JpegScrubber::new): Received file_bytes with length {}",
78 file_bytes.len()
79 ); Ok(Self { file_bytes })
81 }
82
83 fn view_metadata(&self) -> Result<Vec<MetadataEntry>, ScrubError> {
84 use nom_exif::{ExifIter, MediaParser, MediaSource};
85 use std::io::Cursor; let media_source = MediaSource::seekable(Cursor::new(&self.file_bytes)).map_err(|e| {
88 ScrubError::ParsingError(format!("Failed to create MediaSource: {:?}", e))
89 })?;
90
91 let mut parser = MediaParser::new();
92
93 let exif_iter_result = parser.parse(media_source);
94
95 let exif_iter: ExifIter = match exif_iter_result {
96 Ok(iter) => iter,
97 Err(_parse_error) => {
98 return Ok(Vec::new());
99 }
100 };
101
102 let mut metadata_entries = Vec::new();
103
104 for entry in exif_iter {
106 let tag_name = "<Tag Name Unavailable>".to_string();
111
112 let ifd_num_result = entry.ifd_index();
118 let ifd_num: usize = ifd_num_result; let category = match ifd_num {
121 0 => "IFD0".to_string(),
122 1 => "IFD1".to_string(),
123 2 => "EXIF".to_string(),
124 3 => "GPS".to_string(),
125 4 => "Interop".to_string(),
126 _ => format!("IFD_{}", ifd_num),
127 };
128
129 let opt_value_ref_result = entry.get_value();
133 let opt_value_ref: Option<_> = opt_value_ref_result; let value_string = match opt_value_ref {
141 Some(value_ref) => {
142 format!("{:?}", value_ref)
147 }
148 None => "<No Value>".to_string(),
149 };
150
151 metadata_entries.push(MetadataEntry {
152 key: tag_name,
153 value: value_string,
154 category,
155 });
156 }
157 Ok(metadata_entries)
158 }
159
160 fn scrub(&self) -> Result<ScrubResult, ScrubError> {
161 let metadata_removed = self.view_metadata()?; if let Some((start_offset, segment_length)) = self.find_exif_segment() {
164 eprintln!(
165 "DBG (scrub): Preparing to remove segment. Start: {}, Length: {}",
166 start_offset, segment_length
167 );
168
169 let original_len = self.file_bytes.len();
171 let part1_len = start_offset;
172 let part2_start = start_offset + segment_length;
173 let part2_len = original_len - part2_start;
174 let calculated_cleaned_len = part1_len + part2_len;
175
176 eprintln!(
177 "DBG (scrub): Original len: {}, Part1 len: {}, Part2 start: {}, Part2 len: {}, Calculated cleaned len: {}",
178 original_len, part1_len, part2_start, part2_len, calculated_cleaned_len
179 );
180
181 if part2_start > original_len {
182 eprintln!(
183 "DBG (scrub): ERROR - part2_start ({}) is beyond file length ({})",
184 part2_start, original_len
185 );
186 }
188
189 let mut cleaned_bytes = Vec::with_capacity(calculated_cleaned_len); eprintln!("DBG (scrub): Copying Part 1: indices [0..{})", start_offset);
191 cleaned_bytes.extend_from_slice(&self.file_bytes[..start_offset]);
192
193 eprintln!(
194 "DBG (scrub): Copying Part 2: indices [{}..{})",
195 part2_start, original_len
196 );
197 cleaned_bytes.extend_from_slice(&self.file_bytes[part2_start..]);
198
199 eprintln!(
200 "DBG (scrub): Final cleaned_bytes length: {}",
201 cleaned_bytes.len()
202 );
203
204 if !cleaned_bytes.is_empty() {
206 let first_len = std::cmp::min(10, cleaned_bytes.len());
207 let last_start = std::cmp::max(cleaned_bytes.len(), 10) - 10;
208 eprintln!(
209 "DBG (scrub): First {} bytes: {:?}",
210 first_len,
211 &cleaned_bytes[0..first_len]
212 );
213 eprintln!(
214 "DBG (scrub): Last 10 bytes: {:?}",
215 &cleaned_bytes[last_start..]
216 );
217 }
218
219 Ok(ScrubResult {
220 cleaned_file_bytes: cleaned_bytes,
221 metadata_removed,
222 })
223 } else {
224 eprintln!("DBG (scrub): No EXIF segment found");
225 Ok(ScrubResult {
226 cleaned_file_bytes: self.file_bytes.clone(),
227 metadata_removed: vec![],
228 })
229 }
230 }
231}
232
233#[cfg(test)]
237mod tests {
238 use super::*;
239
240 const TEST_JPEG_WITH_EXIF: &[u8] = &[
246 0xFF, 0xD8, 0xFF, 0xE1, 0x00, 0x4A, 0x45, 0x78, 0x69, 0x66, 0x00, 0x00, 0x4D, 0x4D, 0x00,
247 0x2A, 0x00, 0x00, 0x00, 0x08, 0x00, 0x02, 0x01, 0x0F, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0D,
248 0x00, 0x00, 0x00, 0x1A, 0x01, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00,
249 0x28, 0x00, 0x00, 0x00, 0x00, 0x54, 0x65, 0x73, 0x74, 0x20, 0x43, 0x61, 0x6D, 0x65, 0x72,
250 0x61, 0x00, 0x54, 0x65, 0x73, 0x74, 0x20, 0x4D, 0x6F, 0x64, 0x65, 0x6C, 0x00, 0xFF, 0xDB,
251 0x00, 0x43, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
252 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
253 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
254 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
255 0x01, 0x01, 0x01, 0x01, 0xFF, 0xC0, 0x00, 0x11, 0x08, 0x00, 0x01, 0x00, 0x01, 0x03, 0x01,
256 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xFF, 0xC4, 0x00, 0x1F, 0x00, 0x00, 0x01,
257 0x05, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
258 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xFF, 0xDA, 0x00, 0x0C,
259 0x03, 0x01, 0x00, 0x02, 0x11, 0x03, 0x11, 0x00, 0x3F, 0x00, 0xF7, 0xC8, 0xFF, 0xD9,
260 ];
261
262 const TEST_JPEG_WITHOUT_EXIF: &[u8] = &[
269 0xFF, 0xD8, 0x43, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
270 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
271 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
272 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
273 0x01, 0x01, 0x01, 0x01, 0x01, 0xFF, 0xC0, 0x00, 0x11, 0x08, 0x00, 0x01, 0x00, 0x01, 0x03,
274 0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xFF, 0xC4, 0x00, 0x1F, 0x00, 0x00,
275 0x01, 0x05, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
276 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xFF, 0xDA, 0x00,
277 0x0C, 0x03, 0x01, 0x00, 0x02, 0x11, 0x03, 0x11, 0x00, 0x3F, 0x00, 0xF7, 0xC8, 0xFF, 0xD9,
278 ];
279
280 #[test]
281 fn new_jpeg_scrubber_works() {
282 assert!(JpegScrubber::new(TEST_JPEG_WITH_EXIF.to_vec()).is_ok());
283 let invalid_bytes = vec![0x01, 0x02, 0x03];
284 assert!(JpegScrubber::new(invalid_bytes).is_err());
285 }
286
287 #[test]
288 fn view_metadata_finds_exif_data() {
289 let scrubber = JpegScrubber::new(TEST_JPEG_WITH_EXIF.to_vec()).unwrap();
290 let metadata = scrubber.view_metadata().unwrap();
291 println!("Found meta {:?}", metadata); assert!(!metadata.is_empty(), "No metadata was found");
293
294 let model_entry_found = metadata.iter().any(|m| m.value.contains("st Camera"));
295 assert!(
296 model_entry_found,
297 "Camera model metadata entry (containing 'st Camera') not found. Metadata list: {:?}",
298 metadata
299 );
300 }
301
302 #[test]
303 fn scrub_removes_exif_segment_and_reports_it() {
304 eprintln!(
306 "DBG (Test): TEST_JPEG_WITH_EXIF length: {}",
307 TEST_JPEG_WITH_EXIF.len()
308 );
309
310 assert_eq!(
313 TEST_JPEG_WITH_EXIF.len(),
314 209,
315 "Test constant length has changed!"
316 );
317
318 let scrubber = JpegScrubber::new(TEST_JPEG_WITH_EXIF.to_vec()).unwrap();
320 let expected_metadata_removed = scrubber.view_metadata().unwrap();
321
322 assert!(
324 !expected_metadata_removed.is_empty(),
325 "Expected metadata to be present before scrubbing"
326 );
327
328 let result = scrubber.scrub().unwrap();
330
331 assert!(
335 result.cleaned_file_bytes.len() < TEST_JPEG_WITH_EXIF.len(),
336 "Scrubbed file size should be smaller than original. Original: {}, Scrubbed: {}",
337 TEST_JPEG_WITH_EXIF.len(),
338 result.cleaned_file_bytes.len()
339 );
340
341 assert!(
343 !result.metadata_removed.is_empty(),
344 "Metadata removed should not be empty"
345 );
346 let new_scrubber = JpegScrubber::new(result.cleaned_file_bytes.clone()).unwrap();
351 assert!(
352 new_scrubber.find_exif_segment().is_none(),
353 "EXIF segment should be removed from the scrubbed file"
354 );
355
356 assert_eq!(
358 result.cleaned_file_bytes, TEST_JPEG_WITHOUT_EXIF,
359 "Scrubbed bytes do not match expected clean JPEG"
360 );
361 }
362
363 #[test]
364 fn view_metadata_on_jpeg_without_exif_returns_empty() {
365 let scrubber = JpegScrubber::new(TEST_JPEG_WITHOUT_EXIF.to_vec()).unwrap();
366 let metadata = scrubber.view_metadata().unwrap();
367 println!("Metadata for clean JPEG: {:?}", metadata); assert!(
369 metadata.is_empty(),
370 "Metadata should be empty for a clean JPEG. Found: {:?}",
371 metadata
372 );
373 }
374
375 #[test]
376 fn scrub_on_jpeg_without_exif_does_nothing() {
377 let original_bytes = TEST_JPEG_WITHOUT_EXIF.to_vec();
378 let scrubber = JpegScrubber::new(original_bytes.clone()).unwrap();
379 let result = scrubber.scrub().unwrap();
380
381 assert_eq!(
382 result.cleaned_file_bytes, original_bytes,
383 "File bytes should not change when no EXIF data is present"
384 );
385 assert!(
386 result.metadata_removed.is_empty(),
387 "No metadata should be reported as removed. Found: {:?}",
388 result.metadata_removed
389 );
390 }
391
392 #[test]
393 fn _calculate_correct_without_exif_for_209_byte_input() {
394 println!(
397 "DBG: Using TEST_JPEG_WITH_EXIF with length {}",
398 TEST_JPEG_WITH_EXIF.len()
399 );
400
401 let start_remove_index = 2;
412 let segment_length = 74; let end_remove_index = start_remove_index + segment_length - 1; let start_keep_after_index = end_remove_index + 1; println!(
417 "DBG: Calculating removal from index {} for {} bytes (indices {} to {})",
418 start_remove_index, segment_length, start_remove_index, end_remove_index
419 );
420
421 assert!(
423 start_remove_index + segment_length <= TEST_JPEG_WITH_EXIF.len(),
424 "Segment exceeds file bounds"
425 );
426 assert!(
427 start_keep_after_index <= TEST_JPEG_WITH_EXIF.len(),
428 "Data after segment exceeds file bounds"
429 );
430
431 let part1_bytes = &TEST_JPEG_WITH_EXIF[..start_remove_index]; let part2_bytes = &TEST_JPEG_WITH_EXIF[start_keep_after_index..]; println!(
435 "DBG: Part 1 length: {}, Part 2 length: {}",
436 part1_bytes.len(),
437 part2_bytes.len()
438 );
439
440 let mut correct_without_exif_bytes: Vec<u8> =
441 Vec::with_capacity(part1_bytes.len() + part2_bytes.len());
442 correct_without_exif_bytes.extend_from_slice(part1_bytes);
443 correct_without_exif_bytes.extend_from_slice(part2_bytes);
444
445 println!(
447 "\n--- CORRECT TEST_JPEG_WITHOUT_EXIF ({} bytes) ---",
448 correct_without_exif_bytes.len()
449 );
450 println!("Replace the current TEST_JPEG_WITHOUT_EXIF constant with this array:");
451 print!("const TEST_JPEG_WITHOUT_EXIF: &[u8] = &[");
452 for (i, &byte) in correct_without_exif_bytes.iter().enumerate() {
453 if i % 16 == 0 {
454 print!("\n ");
455 }
456 print!(" 0x{:02X},", byte);
457 }
458 println!("\n];");
459 println!("--- END OF CORRECT ARRAY ---");
460
461 assert_eq!(
462 correct_without_exif_bytes.len(),
463 135,
464 "Expected 135 bytes for the scrubbed file"
465 );
466 println!(
467 "\nSUCCESS: Calculation completed. Copy the array above to update TEST_JPEG_WITHOUT_EXIF."
468 );
469
470 }
474
475 #[test]
476 fn _debug_test_jpeg_length() {
477 println!(
480 "--- DEBUG: TEST_JPEG_WITH_EXIF length is {} ---",
481 TEST_JPEG_WITH_EXIF.len()
482 );
483
484 let print_len = std::cmp::min(10, TEST_JPEG_WITH_EXIF.len());
486 println!(
487 "--- DEBUG: First {} bytes: {:?}",
488 print_len,
489 &TEST_JPEG_WITH_EXIF[..print_len]
490 );
491
492 }
495}