use anyhow::{Context, Result, bail};
use std::fs::File;
use std::io::Write;
use std::path::Path;
use crate::models::{FileKind, SanitizationPlan};
use crate::sanitizers::Sanitizer;
pub struct JpegSanitizer;
impl Sanitizer for JpegSanitizer {
fn kind(&self) -> FileKind {
FileKind::Jpeg
}
fn plan(&self, input: &Path) -> Result<SanitizationPlan> {
let bytes = std::fs::read(input)
.with_context(|| format!("failed to read JPEG {}", input.display()))?;
let (_, removed_items) = strip_jpeg_metadata(&bytes)?;
Ok(SanitizationPlan { removed_items })
}
fn sanitize(&self, input: &Path, output: &mut File) -> Result<SanitizationPlan> {
let bytes = std::fs::read(input)
.with_context(|| format!("failed to read JPEG {}", input.display()))?;
let (sanitized_bytes, removed_items) = strip_jpeg_metadata(&bytes)?;
output.write_all(&sanitized_bytes)?;
output.flush()?;
Ok(SanitizationPlan { removed_items })
}
}
fn strip_jpeg_metadata(bytes: &[u8]) -> Result<(Vec<u8>, Vec<String>)> {
if bytes.len() < 4 || bytes[0] != 0xFF || bytes[1] != 0xD8 {
bail!("input is not a JPEG file");
}
let mut output = Vec::with_capacity(bytes.len());
let mut removed_items = Vec::new();
output.extend_from_slice(&bytes[..2]);
let mut cursor = 2_usize;
let mut saw_end_marker = false;
while cursor < bytes.len() {
let marker_start = cursor;
while cursor < bytes.len() && bytes[cursor] == 0xFF {
cursor += 1;
}
if cursor >= bytes.len() {
break;
}
if marker_start == cursor {
bail!("unexpected JPEG data outside of a marker segment");
}
let marker = bytes[cursor];
cursor += 1;
match marker {
0xD9 => {
output.extend_from_slice(&bytes[marker_start..cursor]);
output.extend_from_slice(&bytes[cursor..]);
saw_end_marker = true;
break;
}
0xDA => {
let end = consume_segment_end(bytes, &mut cursor)?;
output.extend_from_slice(&bytes[marker_start..end]);
output.extend_from_slice(&bytes[end..]);
saw_end_marker = true;
break;
}
0x01 | 0xD0..=0xD7 => {
output.extend_from_slice(&bytes[marker_start..cursor]);
}
_ => {
let end = consume_segment_end(bytes, &mut cursor)?;
if let Some(label) = metadata_label(marker, &bytes[marker_start..end]) {
removed_items.push(label);
} else {
output.extend_from_slice(&bytes[marker_start..end]);
}
}
}
}
if !saw_end_marker {
bail!("JPEG file ended before an end-of-image marker was found");
}
if !output.windows(2).any(|window| window == [0xFF, 0xD9]) {
bail!("sanitized JPEG is missing an end-of-image marker");
}
Ok((output, removed_items))
}
fn consume_segment_end(bytes: &[u8], cursor: &mut usize) -> Result<usize> {
if *cursor + 2 > bytes.len() {
bail!("unexpected end of JPEG segment");
}
let length = u16::from_be_bytes([bytes[*cursor], bytes[*cursor + 1]]) as usize;
if length < 2 {
bail!("JPEG segment length is invalid");
}
let segment_end = *cursor + length;
if segment_end > bytes.len() {
bail!("JPEG segment extends past the end of the file");
}
*cursor = segment_end;
Ok(segment_end)
}
fn metadata_label(marker: u8, segment: &[u8]) -> Option<String> {
let payload = if segment.len() > 4 {
&segment[4..]
} else {
&[]
};
match marker {
0xE1 => {
if payload.starts_with(b"Exif\0\0") {
Some("APP1/EXIF".to_string())
} else if payload.starts_with(b"http://ns.adobe.com/xap/1.0/\0") {
Some("APP1/XMP".to_string())
} else {
Some("APP1 metadata".to_string())
}
}
0xED => Some("APP13/IPTC-Photoshop".to_string()),
0xFE => Some("JPEG comment".to_string()),
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::strip_jpeg_metadata;
fn segment(marker: u8, payload: &[u8]) -> Vec<u8> {
let length = (payload.len() + 2) as u16;
let mut bytes = vec![0xFF, marker, (length >> 8) as u8, (length & 0xFF) as u8];
bytes.extend_from_slice(payload);
bytes
}
#[test]
fn strips_metadata_segments_but_keeps_jpeg_structure() {
let mut input = vec![0xFF, 0xD8];
input.extend(segment(0xE0, b"JFIF\0\x01\x02"));
input.extend(segment(0xE1, b"Exif\0\0metadata"));
input.extend(segment(0xFE, b"comment"));
input.extend_from_slice(&[0xFF, 0xD9]);
let (output, removed) = strip_jpeg_metadata(&input).expect("jpeg should sanitize");
assert!(output.starts_with(&[0xFF, 0xD8]));
assert!(output.windows(2).any(|window| window == [0xFF, 0xD9]));
assert!(
output
.windows(4)
.any(|window| window == [0xFF, 0xE0, 0x00, 0x09])
);
assert!(!output.windows(2).any(|window| window == [0xFF, 0xE1]));
assert!(!output.windows(2).any(|window| window == [0xFF, 0xFE]));
assert!(removed.iter().any(|item| item == "APP1/EXIF"));
assert!(removed.iter().any(|item| item == "JPEG comment"));
}
#[test]
fn rejects_non_jpeg_input() {
let error = strip_jpeg_metadata(b"not a jpeg").expect_err("should reject invalid input");
assert!(error.to_string().contains("input is not a JPEG file"));
}
}