use anyhow::{Context, Result, bail};
use std::fs::File;
use std::io::Write;
use std::path::Path;
use crate::models::{FileKind, SanitizationPlan};
use crate::sanitizers::Sanitizer;
const PNG_SIGNATURE: &[u8; 8] = b"\x89PNG\r\n\x1a\n";
pub struct PngSanitizer;
impl Sanitizer for PngSanitizer {
fn kind(&self) -> FileKind {
FileKind::Png
}
fn plan(&self, input: &Path) -> Result<SanitizationPlan> {
let bytes = std::fs::read(input)
.with_context(|| format!("failed to read PNG {}", input.display()))?;
let (_, removed_items) = strip_png_metadata(&bytes)?;
Ok(SanitizationPlan { removed_items })
}
fn sanitize(&self, input: &Path, output: &mut File) -> Result<SanitizationPlan> {
let bytes = std::fs::read(input)
.with_context(|| format!("failed to read PNG {}", input.display()))?;
let (sanitized_bytes, removed_items) = strip_png_metadata(&bytes)?;
output.write_all(&sanitized_bytes)?;
output.flush()?;
Ok(SanitizationPlan { removed_items })
}
}
fn strip_png_metadata(bytes: &[u8]) -> Result<(Vec<u8>, Vec<String>)> {
if bytes.len() < PNG_SIGNATURE.len() || &bytes[..8] != PNG_SIGNATURE {
bail!("input is not a PNG file");
}
let mut cursor = PNG_SIGNATURE.len();
let mut output = Vec::with_capacity(bytes.len());
let mut removed_items = Vec::new();
output.extend_from_slice(PNG_SIGNATURE);
let mut saw_iend = false;
while cursor + 12 <= bytes.len() {
let length = u32::from_be_bytes([
bytes[cursor],
bytes[cursor + 1],
bytes[cursor + 2],
bytes[cursor + 3],
]) as usize;
let chunk_type = &bytes[cursor + 4..cursor + 8];
let chunk_end = cursor + 12 + length;
if chunk_end > bytes.len() {
bail!("PNG chunk extends past end of file");
}
if is_metadata_chunk(chunk_type) {
removed_items.push(String::from_utf8_lossy(chunk_type).to_string());
} else {
output.extend_from_slice(&bytes[cursor..chunk_end]);
}
if chunk_type == b"IEND" {
saw_iend = true;
output.extend_from_slice(&bytes[chunk_end..]);
break;
}
cursor = chunk_end;
}
if !saw_iend {
bail!("PNG file ended before IEND");
}
Ok((output, removed_items))
}
fn is_metadata_chunk(chunk_type: &[u8]) -> bool {
matches!(chunk_type, b"tEXt" | b"zTXt" | b"iTXt" | b"eXIf" | b"tIME")
}
#[cfg(test)]
mod tests {
use super::strip_png_metadata;
fn chunk(chunk_type: &[u8; 4], payload: &[u8]) -> Vec<u8> {
let length = (payload.len() as u32).to_be_bytes();
let mut bytes = Vec::new();
bytes.extend_from_slice(&length);
bytes.extend_from_slice(chunk_type);
bytes.extend_from_slice(payload);
bytes.extend_from_slice(&[0, 0, 0, 0]);
bytes
}
#[test]
fn strips_png_text_chunks() {
let mut input = Vec::new();
input.extend_from_slice(b"\x89PNG\r\n\x1a\n");
input.extend(chunk(b"IHDR", &[0; 13]));
input.extend(chunk(b"tEXt", b"Comment\0secret"));
input.extend(chunk(b"IDAT", b"compressed"));
input.extend(chunk(b"IEND", &[]));
let (output, removed) = strip_png_metadata(&input).expect("png should sanitize");
assert!(output.windows(4).any(|window| window == b"IHDR"));
assert!(output.windows(4).any(|window| window == b"IDAT"));
assert!(output.windows(4).any(|window| window == b"IEND"));
assert!(!output.windows(4).any(|window| window == b"tEXt"));
assert!(removed.iter().any(|chunk| chunk == "tEXt"));
}
}