use crate::{MetadataEntry, ScrubError, ScrubResult, Scrubber};
#[derive(Debug, Clone)]
pub struct JpegScrubber {
file_bytes: Vec<u8>,
}
impl JpegScrubber {
fn find_exif_segment(&self) -> Option<(usize, usize)> {
let mut offset = 2; while offset + 4 <= self.file_bytes.len() {
if self.file_bytes[offset] != 0xFF {
eprintln!(
"DBG: Invalid marker start at offset {}: byte is {}",
offset, self.file_bytes[offset]
);
return None;
}
let marker = self.file_bytes[offset + 1];
if (0xD0..=0xD7).contains(&marker) || marker == 0x01 {
offset += 2;
continue;
}
if marker == 0xD9 || marker == 0xDA {
break;
}
if offset + 4 > self.file_bytes.len() {
eprintln!("DBG: Not enough bytes to read length at offset {}", offset);
return None;
}
let length_bytes = [self.file_bytes[offset + 2], self.file_bytes[offset + 3]];
let length = u16::from_be_bytes(length_bytes) as usize;
if length < 2 || offset + 2 + length > self.file_bytes.len() {
eprintln!("DBG: Corrupt length field at offset {}: {}", offset, length);
return None;
}
if marker == 0xE1 && length >= 6 {
let exif_sig_start = offset + 4; let exif_sig_end = exif_sig_start + 6; if exif_sig_end <= self.file_bytes.len()
&& self.file_bytes[exif_sig_start..exif_sig_end] == *b"Exif\0\0"
{
return Some((offset, length)); }
}
offset += 2 + length;
}
eprintln!("DBG: EXIF APP1 segment not found");
None
}
}
impl Scrubber for JpegScrubber {
fn new(file_bytes: Vec<u8>) -> Result<Self, ScrubError> {
if file_bytes.len() < 2 || file_bytes[0..2] != [0xFF, 0xD8] {
return Err(ScrubError::ParsingError("Not a valid JPEG file".into()));
}
eprintln!(
"DBG (JpegScrubber::new): Received file_bytes with length {}",
file_bytes.len()
); Ok(Self { file_bytes })
}
fn view_metadata(&self) -> Result<Vec<MetadataEntry>, ScrubError> {
use nom_exif::{ExifIter, MediaParser, MediaSource};
use std::io::Cursor;
let media_source = MediaSource::seekable(Cursor::new(&self.file_bytes)).map_err(|e| {
ScrubError::ParsingError(format!("Failed to create MediaSource: {:?}", e))
})?;
let mut parser = MediaParser::new();
let exif_iter_result = parser.parse(media_source);
let exif_iter: ExifIter = match exif_iter_result {
Ok(iter) => iter,
Err(_parse_error) => {
return Ok(Vec::new());
}
};
let mut metadata_entries = Vec::new();
for entry in exif_iter {
let tag_name = "<Tag Name Unavailable>".to_string();
let ifd_num_result = entry.ifd_index();
let ifd_num: usize = ifd_num_result;
let category = match ifd_num {
0 => "IFD0".to_string(),
1 => "IFD1".to_string(),
2 => "EXIF".to_string(),
3 => "GPS".to_string(),
4 => "Interop".to_string(),
_ => format!("IFD_{}", ifd_num),
};
let opt_value_ref_result = entry.get_value();
let opt_value_ref: Option<_> = opt_value_ref_result;
let value_string = match opt_value_ref {
Some(value_ref) => {
format!("{:?}", value_ref)
}
None => "<No Value>".to_string(),
};
metadata_entries.push(MetadataEntry {
key: tag_name,
value: value_string,
category,
});
}
Ok(metadata_entries)
}
fn scrub(&self) -> Result<ScrubResult, ScrubError> {
let metadata_removed = self.view_metadata()?;
if let Some((start_offset, segment_length)) = self.find_exif_segment() {
eprintln!(
"DBG (scrub): Preparing to remove segment. Start: {}, Length: {}",
start_offset, segment_length
);
let original_len = self.file_bytes.len();
let part1_len = start_offset;
let part2_start = start_offset + segment_length;
let part2_len = original_len - part2_start;
let calculated_cleaned_len = part1_len + part2_len;
eprintln!(
"DBG (scrub): Original len: {}, Part1 len: {}, Part2 start: {}, Part2 len: {}, Calculated cleaned len: {}",
original_len, part1_len, part2_start, part2_len, calculated_cleaned_len
);
if part2_start > original_len {
eprintln!(
"DBG (scrub): ERROR - part2_start ({}) is beyond file length ({})",
part2_start, original_len
);
}
let mut cleaned_bytes = Vec::with_capacity(calculated_cleaned_len); eprintln!("DBG (scrub): Copying Part 1: indices [0..{})", start_offset);
cleaned_bytes.extend_from_slice(&self.file_bytes[..start_offset]);
eprintln!(
"DBG (scrub): Copying Part 2: indices [{}..{})",
part2_start, original_len
);
cleaned_bytes.extend_from_slice(&self.file_bytes[part2_start..]);
eprintln!(
"DBG (scrub): Final cleaned_bytes length: {}",
cleaned_bytes.len()
);
if !cleaned_bytes.is_empty() {
let first_len = std::cmp::min(10, cleaned_bytes.len());
let last_start = std::cmp::max(cleaned_bytes.len(), 10) - 10;
eprintln!(
"DBG (scrub): First {} bytes: {:?}",
first_len,
&cleaned_bytes[0..first_len]
);
eprintln!(
"DBG (scrub): Last 10 bytes: {:?}",
&cleaned_bytes[last_start..]
);
}
Ok(ScrubResult {
cleaned_file_bytes: cleaned_bytes,
metadata_removed,
})
} else {
eprintln!("DBG (scrub): No EXIF segment found");
Ok(ScrubResult {
cleaned_file_bytes: self.file_bytes.clone(),
metadata_removed: vec![],
})
}
}
}
#[cfg(test)]
mod tests {
use super::*;
const TEST_JPEG_WITH_EXIF: &[u8] = &[
0xFF, 0xD8, 0xFF, 0xE1, 0x00, 0x4A, 0x45, 0x78, 0x69, 0x66, 0x00, 0x00, 0x4D, 0x4D, 0x00,
0x2A, 0x00, 0x00, 0x00, 0x08, 0x00, 0x02, 0x01, 0x0F, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0D,
0x00, 0x00, 0x00, 0x1A, 0x01, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x00, 0x54, 0x65, 0x73, 0x74, 0x20, 0x43, 0x61, 0x6D, 0x65, 0x72,
0x61, 0x00, 0x54, 0x65, 0x73, 0x74, 0x20, 0x4D, 0x6F, 0x64, 0x65, 0x6C, 0x00, 0xFF, 0xDB,
0x00, 0x43, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0xFF, 0xC0, 0x00, 0x11, 0x08, 0x00, 0x01, 0x00, 0x01, 0x03, 0x01,
0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xFF, 0xC4, 0x00, 0x1F, 0x00, 0x00, 0x01,
0x05, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xFF, 0xDA, 0x00, 0x0C,
0x03, 0x01, 0x00, 0x02, 0x11, 0x03, 0x11, 0x00, 0x3F, 0x00, 0xF7, 0xC8, 0xFF, 0xD9,
];
const TEST_JPEG_WITHOUT_EXIF: &[u8] = &[
0xFF, 0xD8, 0x43, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0xFF, 0xC0, 0x00, 0x11, 0x08, 0x00, 0x01, 0x00, 0x01, 0x03,
0x01, 0x22, 0x00, 0x02, 0x11, 0x01, 0x03, 0x11, 0x01, 0xFF, 0xC4, 0x00, 0x1F, 0x00, 0x00,
0x01, 0x05, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xFF, 0xDA, 0x00,
0x0C, 0x03, 0x01, 0x00, 0x02, 0x11, 0x03, 0x11, 0x00, 0x3F, 0x00, 0xF7, 0xC8, 0xFF, 0xD9,
];
#[test]
fn new_jpeg_scrubber_works() {
assert!(JpegScrubber::new(TEST_JPEG_WITH_EXIF.to_vec()).is_ok());
let invalid_bytes = vec![0x01, 0x02, 0x03];
assert!(JpegScrubber::new(invalid_bytes).is_err());
}
#[test]
fn view_metadata_finds_exif_data() {
let scrubber = JpegScrubber::new(TEST_JPEG_WITH_EXIF.to_vec()).unwrap();
let metadata = scrubber.view_metadata().unwrap();
println!("Found meta {:?}", metadata); assert!(!metadata.is_empty(), "No metadata was found");
let model_entry_found = metadata.iter().any(|m| m.value.contains("st Camera"));
assert!(
model_entry_found,
"Camera model metadata entry (containing 'st Camera') not found. Metadata list: {:?}",
metadata
);
}
#[test]
fn scrub_removes_exif_segment_and_reports_it() {
eprintln!(
"DBG (Test): TEST_JPEG_WITH_EXIF length: {}",
TEST_JPEG_WITH_EXIF.len()
);
assert_eq!(
TEST_JPEG_WITH_EXIF.len(),
209,
"Test constant length has changed!"
);
let scrubber = JpegScrubber::new(TEST_JPEG_WITH_EXIF.to_vec()).unwrap();
let expected_metadata_removed = scrubber.view_metadata().unwrap();
assert!(
!expected_metadata_removed.is_empty(),
"Expected metadata to be present before scrubbing"
);
let result = scrubber.scrub().unwrap();
assert!(
result.cleaned_file_bytes.len() < TEST_JPEG_WITH_EXIF.len(),
"Scrubbed file size should be smaller than original. Original: {}, Scrubbed: {}",
TEST_JPEG_WITH_EXIF.len(),
result.cleaned_file_bytes.len()
);
assert!(
!result.metadata_removed.is_empty(),
"Metadata removed should not be empty"
);
let new_scrubber = JpegScrubber::new(result.cleaned_file_bytes.clone()).unwrap();
assert!(
new_scrubber.find_exif_segment().is_none(),
"EXIF segment should be removed from the scrubbed file"
);
assert_eq!(
result.cleaned_file_bytes, TEST_JPEG_WITHOUT_EXIF,
"Scrubbed bytes do not match expected clean JPEG"
);
}
#[test]
fn view_metadata_on_jpeg_without_exif_returns_empty() {
let scrubber = JpegScrubber::new(TEST_JPEG_WITHOUT_EXIF.to_vec()).unwrap();
let metadata = scrubber.view_metadata().unwrap();
println!("Metadata for clean JPEG: {:?}", metadata); assert!(
metadata.is_empty(),
"Metadata should be empty for a clean JPEG. Found: {:?}",
metadata
);
}
#[test]
fn scrub_on_jpeg_without_exif_does_nothing() {
let original_bytes = TEST_JPEG_WITHOUT_EXIF.to_vec();
let scrubber = JpegScrubber::new(original_bytes.clone()).unwrap();
let result = scrubber.scrub().unwrap();
assert_eq!(
result.cleaned_file_bytes, original_bytes,
"File bytes should not change when no EXIF data is present"
);
assert!(
result.metadata_removed.is_empty(),
"No metadata should be reported as removed. Found: {:?}",
result.metadata_removed
);
}
#[test]
fn _calculate_correct_without_exif_for_209_byte_input() {
println!(
"DBG: Using TEST_JPEG_WITH_EXIF with length {}",
TEST_JPEG_WITH_EXIF.len()
);
let start_remove_index = 2;
let segment_length = 74; let end_remove_index = start_remove_index + segment_length - 1; let start_keep_after_index = end_remove_index + 1;
println!(
"DBG: Calculating removal from index {} for {} bytes (indices {} to {})",
start_remove_index, segment_length, start_remove_index, end_remove_index
);
assert!(
start_remove_index + segment_length <= TEST_JPEG_WITH_EXIF.len(),
"Segment exceeds file bounds"
);
assert!(
start_keep_after_index <= TEST_JPEG_WITH_EXIF.len(),
"Data after segment exceeds file bounds"
);
let part1_bytes = &TEST_JPEG_WITH_EXIF[..start_remove_index]; let part2_bytes = &TEST_JPEG_WITH_EXIF[start_keep_after_index..];
println!(
"DBG: Part 1 length: {}, Part 2 length: {}",
part1_bytes.len(),
part2_bytes.len()
);
let mut correct_without_exif_bytes: Vec<u8> =
Vec::with_capacity(part1_bytes.len() + part2_bytes.len());
correct_without_exif_bytes.extend_from_slice(part1_bytes);
correct_without_exif_bytes.extend_from_slice(part2_bytes);
println!(
"\n--- CORRECT TEST_JPEG_WITHOUT_EXIF ({} bytes) ---",
correct_without_exif_bytes.len()
);
println!("Replace the current TEST_JPEG_WITHOUT_EXIF constant with this array:");
print!("const TEST_JPEG_WITHOUT_EXIF: &[u8] = &[");
for (i, &byte) in correct_without_exif_bytes.iter().enumerate() {
if i % 16 == 0 {
print!("\n ");
}
print!(" 0x{:02X},", byte);
}
println!("\n];");
println!("--- END OF CORRECT ARRAY ---");
assert_eq!(
correct_without_exif_bytes.len(),
135,
"Expected 135 bytes for the scrubbed file"
);
println!(
"\nSUCCESS: Calculation completed. Copy the array above to update TEST_JPEG_WITHOUT_EXIF."
);
}
#[test]
fn _debug_test_jpeg_length() {
println!(
"--- DEBUG: TEST_JPEG_WITH_EXIF length is {} ---",
TEST_JPEG_WITH_EXIF.len()
);
let print_len = std::cmp::min(10, TEST_JPEG_WITH_EXIF.len());
println!(
"--- DEBUG: First {} bytes: {:?}",
print_len,
&TEST_JPEG_WITH_EXIF[..print_len]
);
}
}