use std::ops::Range;
use crate::error::{ParsingError, ParsingErrorState};
#[derive(Debug)]
pub(crate) struct PngParseOut {
pub exif: Option<PngExifSource>,
pub text_chunks: Vec<(String, String)>,
}
#[derive(Debug)]
pub(crate) enum PngExifSource {
EXif(Range<usize>),
Legacy(Vec<u8>),
}
const PNG_SIGNATURE: &[u8; 8] = b"\x89PNG\r\n\x1a\n";
const MAX_TEXT_CHUNK_SIZE: u32 = 1024 * 1024;
const MAX_TEXT_CHUNKS_TOTAL: usize = 16 * 1024 * 1024;
fn decode_latin1(bytes: &[u8]) -> String {
bytes.iter().map(|&b| b as char).collect()
}
fn decode_raw_profile_value(s: &str) -> Result<Vec<u8>, ()> {
let mut lines = s.lines();
lines.next().ok_or(())?;
lines.next().ok_or(())?;
lines.next().ok_or(())?;
let body: String = lines.collect();
hex_decode(&body)
}
fn hex_decode(s: &str) -> Result<Vec<u8>, ()> {
let mut out = Vec::with_capacity(s.len() / 2);
let mut high: Option<u8> = None;
for c in s.bytes() {
let nibble = match c {
b'0'..=b'9' => c - b'0',
b'a'..=b'f' => c - b'a' + 10,
b'A'..=b'F' => c - b'A' + 10,
b' ' | b'\n' | b'\r' | b'\t' => continue,
_ => return Err(()),
};
match high.take() {
None => high = Some(nibble),
Some(h) => out.push((h << 4) | nibble),
}
}
if high.is_some() {
return Err(());
}
Ok(out)
}
#[tracing::instrument(skip(buf))]
pub(crate) fn extract_chunks(buf: &[u8]) -> Result<PngParseOut, ParsingErrorState> {
if buf.len() < PNG_SIGNATURE.len() {
return Err(ParsingErrorState::new(
ParsingError::Need(PNG_SIGNATURE.len() - buf.len()),
None,
));
}
if &buf[..PNG_SIGNATURE.len()] != PNG_SIGNATURE {
return Err(ParsingErrorState::new(
ParsingError::Failed("PNG: bad signature".into()),
None,
));
}
let mut out = PngParseOut {
exif: None,
text_chunks: Vec::new(),
};
let mut text_total: usize = 0;
let mut exif_priority: u8 = 0;
let mut cursor = PNG_SIGNATURE.len();
loop {
if buf.len() - cursor < 8 {
return Err(ParsingErrorState::new(
ParsingError::Need(8 - (buf.len() - cursor)),
None,
));
}
let length = u32::from_be_bytes([
buf[cursor],
buf[cursor + 1],
buf[cursor + 2],
buf[cursor + 3],
]);
let ctype = &buf[cursor + 4..cursor + 8];
let total = match (length as usize).checked_add(12) {
Some(t) => t,
None => {
return Err(ParsingErrorState::new(
ParsingError::Failed("PNG: chunk length overflows addressable size".into()),
None,
));
}
};
match ctype {
b"IEND" => break,
b"eXIf" => {
let remaining = buf.len() - cursor;
if total > remaining {
return Err(ParsingErrorState::new(
ParsingError::Need(total - remaining),
None,
));
}
let data_start = cursor + 8;
let data_end = data_start + length as usize;
out.exif = Some(PngExifSource::EXif(data_start..data_end));
exif_priority = 3;
cursor += total;
}
b"tEXt" => {
if length > MAX_TEXT_CHUNK_SIZE {
let remaining = buf.len() - cursor;
if total > remaining {
return Err(ParsingErrorState::new(
ParsingError::ClearAndSkip(total - remaining),
None,
));
}
cursor += total;
continue;
}
let remaining = buf.len() - cursor;
if total > remaining {
return Err(ParsingErrorState::new(
ParsingError::Need(total - remaining),
None,
));
}
let data = &buf[cursor + 8..cursor + 8 + length as usize];
if let Some(nul_pos) = data.iter().position(|&b| b == 0) {
let key = decode_latin1(&data[..nul_pos]);
let value = decode_latin1(&data[nul_pos + 1..]);
let candidate_priority: u8 = match key.as_str() {
"Raw profile type APP1" => 2,
"Raw profile type exif" => 1,
_ => 0,
};
if candidate_priority > 0 && candidate_priority > exif_priority {
if let Ok(mut bytes) = decode_raw_profile_value(&value) {
if key.ends_with("APP1") && bytes.starts_with(b"Exif\0\0") {
bytes.drain(0..6);
}
if bytes.len() >= 8 && crate::exif::TiffHeader::parse(&bytes).is_ok() {
out.exif = Some(PngExifSource::Legacy(bytes));
exif_priority = candidate_priority;
}
}
}
let entry_size = key.len() + value.len();
if text_total + entry_size <= MAX_TEXT_CHUNKS_TOTAL {
text_total += entry_size;
out.text_chunks.push((key, value));
}
}
cursor += total;
}
_ => {
let remaining = buf.len() - cursor;
if total > remaining {
return Err(ParsingErrorState::new(
ParsingError::ClearAndSkip(total - remaining),
None,
));
}
cursor += total;
}
}
}
Ok(out)
}
#[cfg(test)]
mod tests {
use super::*;
fn build_minimal_png() -> Vec<u8> {
let mut out = Vec::new();
out.extend_from_slice(PNG_SIGNATURE);
out.extend_from_slice(&13u32.to_be_bytes());
out.extend_from_slice(b"IHDR");
out.extend_from_slice(&[0, 0, 0, 1, 0, 0, 0, 1, 8, 0, 0, 0, 0]);
out.extend_from_slice(&[0, 0, 0, 0]); out.extend_from_slice(&0u32.to_be_bytes());
out.extend_from_slice(b"IEND");
out.extend_from_slice(&[0, 0, 0, 0]); out
}
#[test]
fn extract_chunks_minimal_png() {
let buf = build_minimal_png();
let result = extract_chunks(&buf).unwrap();
assert!(result.exif.is_none());
assert!(result.text_chunks.is_empty());
}
#[test]
fn extract_chunks_bad_signature() {
let buf = b"\x00\x00\x00\x00\x00\x00\x00\x00not_png".to_vec();
let err = extract_chunks(&buf).unwrap_err();
assert!(matches!(err.err, ParsingError::Failed(_)));
}
#[test]
fn extract_chunks_truncated_signature() {
let buf = b"\x89PNG".to_vec();
let err = extract_chunks(&buf).unwrap_err();
assert!(matches!(err.err, ParsingError::Need(_)));
}
fn build_chunk(ctype: &[u8; 4], data: &[u8]) -> Vec<u8> {
let mut out = Vec::new();
out.extend_from_slice(&(data.len() as u32).to_be_bytes());
out.extend_from_slice(ctype);
out.extend_from_slice(data);
out.extend_from_slice(&[0, 0, 0, 0]); out
}
fn build_png_with_chunks(chunks: &[Vec<u8>]) -> Vec<u8> {
let mut out = Vec::new();
out.extend_from_slice(PNG_SIGNATURE);
out.extend_from_slice(&build_chunk(
b"IHDR",
&[0, 0, 0, 1, 0, 0, 0, 1, 8, 0, 0, 0, 0],
));
for c in chunks {
out.extend_from_slice(c);
}
out.extend_from_slice(&build_chunk(b"IEND", &[]));
out
}
#[test]
fn extract_chunks_with_exif() {
let exif_payload = b"II*\x00\x08\x00\x00\x00MM\x00\x2a";
let exif_chunk = build_chunk(b"eXIf", exif_payload);
let buf = build_png_with_chunks(&[exif_chunk]);
let result = extract_chunks(&buf).unwrap();
let exif_range = match result.exif {
Some(PngExifSource::EXif(r)) => r,
_ => panic!("expected EXif source"),
};
assert_eq!(&buf[exif_range], exif_payload);
assert!(result.text_chunks.is_empty());
}
#[test]
fn extract_chunks_with_text() {
let mut text_data = Vec::new();
text_data.extend_from_slice(b"Title");
text_data.push(0);
text_data.extend_from_slice(b"Hello world");
let chunks = vec![build_chunk(b"tEXt", &text_data)];
let buf = build_png_with_chunks(&chunks);
let result = extract_chunks(&buf).unwrap();
assert!(result.exif.is_none());
assert_eq!(result.text_chunks.len(), 1);
assert_eq!(result.text_chunks[0].0, "Title");
assert_eq!(result.text_chunks[0].1, "Hello world");
}
#[test]
fn extract_chunks_text_duplicate_keys() {
let mut t1 = Vec::new();
t1.extend_from_slice(b"Comment");
t1.push(0);
t1.extend_from_slice(b"first");
let mut t2 = Vec::new();
t2.extend_from_slice(b"Comment");
t2.push(0);
t2.extend_from_slice(b"second");
let chunks = vec![build_chunk(b"tEXt", &t1), build_chunk(b"tEXt", &t2)];
let buf = build_png_with_chunks(&chunks);
let result = extract_chunks(&buf).unwrap();
assert_eq!(result.text_chunks.len(), 2);
assert_eq!(result.text_chunks[0], ("Comment".into(), "first".into()));
assert_eq!(result.text_chunks[1], ("Comment".into(), "second".into()));
}
#[test]
fn extract_chunks_text_no_nul_separator() {
let chunks = vec![build_chunk(b"tEXt", b"NoNulSeparator")];
let buf = build_png_with_chunks(&chunks);
let result = extract_chunks(&buf).unwrap();
assert!(result.text_chunks.is_empty());
}
#[test]
fn extract_chunks_text_latin1_decode() {
let mut data = Vec::new();
data.extend_from_slice(b"Caption");
data.push(0);
data.extend_from_slice(b"caf\xE9");
let chunks = vec![build_chunk(b"tEXt", &data)];
let buf = build_png_with_chunks(&chunks);
let result = extract_chunks(&buf).unwrap();
assert_eq!(result.text_chunks[0].1, "café");
}
#[test]
fn extract_chunks_truncated_inside_exif() {
let mut buf = Vec::new();
buf.extend_from_slice(PNG_SIGNATURE);
buf.extend_from_slice(&build_chunk(b"IHDR", &[0; 13]));
buf.extend_from_slice(&100u32.to_be_bytes());
buf.extend_from_slice(b"eXIf");
let err = extract_chunks(&buf).unwrap_err();
match err.err {
ParsingError::Need(n) => assert!(n >= 100),
other => panic!("expected Need(>=100), got {other:?}"),
}
}
#[test]
fn extract_chunks_skips_large_idat() {
let mut buf = Vec::new();
buf.extend_from_slice(PNG_SIGNATURE);
buf.extend_from_slice(&build_chunk(b"IHDR", &[0; 13]));
buf.extend_from_slice(&50_000u32.to_be_bytes());
buf.extend_from_slice(b"IDAT");
let err = extract_chunks(&buf).unwrap_err();
match err.err {
ParsingError::ClearAndSkip(n) => assert!(n >= 50_000),
other => panic!("expected ClearAndSkip(>=50_000), got {other:?}"),
}
}
#[test]
fn extract_chunks_text_too_large_skipped() {
let mut buf = Vec::new();
buf.extend_from_slice(PNG_SIGNATURE);
buf.extend_from_slice(&build_chunk(b"IHDR", &[0; 13]));
let bogus_length = MAX_TEXT_CHUNK_SIZE + 1;
buf.extend_from_slice(&bogus_length.to_be_bytes());
buf.extend_from_slice(b"tEXt");
let err = extract_chunks(&buf).unwrap_err();
assert!(matches!(err.err, ParsingError::ClearAndSkip(_)));
}
#[test]
fn hex_decode_basic() {
assert_eq!(hex_decode("4849").unwrap(), b"HI");
assert_eq!(hex_decode("48 49").unwrap(), b"HI");
assert_eq!(hex_decode("48\n49").unwrap(), b"HI");
assert_eq!(hex_decode("aBcD").unwrap(), vec![0xab, 0xcd]);
}
#[test]
fn hex_decode_rejects_invalid() {
assert!(hex_decode("XX").is_err());
assert!(hex_decode("48a").is_err()); }
#[test]
fn decode_raw_profile_imagemagick_format() {
let v = "\nexif\n 4\n4849 5050\n";
let bytes = decode_raw_profile_value(v).unwrap();
assert_eq!(bytes, b"HIPP");
}
#[test]
fn extract_chunks_malicious_text_length_max_u32_does_not_panic() {
let mut buf = Vec::new();
buf.extend_from_slice(PNG_SIGNATURE);
buf.extend_from_slice(&build_chunk(b"IHDR", &[0; 13]));
buf.extend_from_slice(&u32::MAX.to_be_bytes());
buf.extend_from_slice(b"tEXt");
let _err = extract_chunks(&buf).unwrap_err();
}
#[test]
fn extract_chunks_chunk_length_overflow_is_rejected() {
let mut buf = Vec::new();
buf.extend_from_slice(PNG_SIGNATURE);
buf.extend_from_slice(&build_chunk(b"IHDR", &[0; 13]));
buf.extend_from_slice(&u32::MAX.to_be_bytes());
buf.extend_from_slice(b"XXXX");
let _err = extract_chunks(&buf).unwrap_err();
}
fn minimal_tiff_le() -> Vec<u8> {
let mut t = Vec::new();
t.extend_from_slice(b"II"); t.extend_from_slice(&[0x2a, 0x00]); t.extend_from_slice(&[0x08, 0, 0, 0]); t.extend_from_slice(&[0, 0]); t.extend_from_slice(&[0, 0, 0, 0]); t
}
fn raw_profile_value(profile_type: &str, tiff: &[u8]) -> String {
let hex: String = tiff.iter().map(|b| format!("{b:02x}")).collect();
let mut wrapped = String::new();
for chunk in hex.as_bytes().chunks(72) {
wrapped.push_str(std::str::from_utf8(chunk).unwrap());
wrapped.push('\n');
}
format!("\n{}\n {}\n{}", profile_type, tiff.len(), wrapped)
}
#[test]
fn extract_chunks_legacy_exif() {
let tiff = minimal_tiff_le();
let value = raw_profile_value("exif", &tiff);
let mut data = Vec::new();
data.extend_from_slice(b"Raw profile type exif");
data.push(0);
data.extend_from_slice(value.as_bytes());
let chunks = vec![build_chunk(b"tEXt", &data)];
let buf = build_png_with_chunks(&chunks);
let result = extract_chunks(&buf).unwrap();
match result.exif {
Some(PngExifSource::Legacy(bytes)) => assert_eq!(bytes, tiff),
other => panic!("expected Legacy, got {:?}", other),
}
assert_eq!(result.text_chunks.len(), 1);
assert_eq!(result.text_chunks[0].0, "Raw profile type exif");
}
#[test]
fn extract_chunks_legacy_app1() {
let tiff = minimal_tiff_le();
let mut app1 = Vec::new();
app1.extend_from_slice(b"Exif\0\0");
app1.extend_from_slice(&tiff);
let value = raw_profile_value("app1", &app1);
let mut data = Vec::new();
data.extend_from_slice(b"Raw profile type APP1");
data.push(0);
data.extend_from_slice(value.as_bytes());
let chunks = vec![build_chunk(b"tEXt", &data)];
let buf = build_png_with_chunks(&chunks);
let result = extract_chunks(&buf).unwrap();
match result.exif {
Some(PngExifSource::Legacy(bytes)) => assert_eq!(bytes, tiff),
other => panic!("expected Legacy, got {:?}", other),
}
}
#[test]
fn extract_chunks_exif_overrides_legacy() {
let tiff_legacy = minimal_tiff_le();
let tiff_exif = {
let mut t = minimal_tiff_le();
t.extend_from_slice(&[0xFF; 4]);
t
};
let legacy_value = raw_profile_value("exif", &tiff_legacy);
let mut legacy_data = Vec::new();
legacy_data.extend_from_slice(b"Raw profile type exif");
legacy_data.push(0);
legacy_data.extend_from_slice(legacy_value.as_bytes());
let chunks = vec![
build_chunk(b"tEXt", &legacy_data),
build_chunk(b"eXIf", &tiff_exif),
];
let buf = build_png_with_chunks(&chunks);
let result = extract_chunks(&buf).unwrap();
match result.exif {
Some(PngExifSource::EXif(range)) => {
assert_eq!(&buf[range], tiff_exif);
}
other => panic!("expected EXif (eXIf wins), got {:?}", other),
}
}
#[test]
fn extract_chunks_invalid_legacy_silently_dropped() {
let mut data = Vec::new();
data.extend_from_slice(b"Raw profile type exif");
data.push(0);
data.extend_from_slice(b"not hex at all\nzzz");
let chunks = vec![build_chunk(b"tEXt", &data)];
let buf = build_png_with_chunks(&chunks);
let result = extract_chunks(&buf).unwrap();
assert!(result.exif.is_none(), "malformed legacy must be dropped");
assert_eq!(result.text_chunks.len(), 1);
}
}