use crate::core::{Error, ExtractionResult, Metadata, MetadataValue, Result};
use crate::parsers::Parser;
use crate::parsers::image::maybe_ocr_content;
use flate2::read::ZlibDecoder;
use image::io::Reader as ImageReader;
use std::io::{Cursor, Read};
pub struct PngParser;
impl Parser for PngParser {
fn supported_types(&self) -> &[&str] {
&["image/png"]
}
fn parse(&self, data: &[u8], mime_type: &str) -> Result<ExtractionResult> {
let img = ImageReader::new(Cursor::new(data))
.with_guessed_format()
.map_err(|e| Error::ParseError(format!("Failed to read PNG: {}", e)))?
.decode()
.map_err(|e| Error::ParseError(format!("Failed to decode PNG: {}", e)))?;
let mut metadata = Metadata::new();
let width = img.width();
let height = img.height();
metadata.insert("width".to_string(), MetadataValue::Number(width as i64));
metadata.insert("height".to_string(), MetadataValue::Number(height as i64));
let color_type = img.color();
metadata.insert("color_type".to_string(), MetadataValue::Text(format!("{:?}", color_type)));
if let Ok(chunks) = Self::extract_png_chunks(data) {
for (key, value) in chunks {
metadata.insert(key, value);
}
}
let content = maybe_ocr_content(data, &mut metadata);
Ok(ExtractionResult {
mime_type: mime_type.to_string(),
content,
metadata,
detection_confidence: 0.0,
})
}
fn name(&self) -> &str {
"PngParser"
}
}
impl PngParser {
fn extract_png_chunks(data: &[u8]) -> Result<Vec<(String, MetadataValue)>> {
let mut chunks = Vec::new();
if data.len() < 8 || &data[0..8] != b"\x89PNG\r\n\x1a\n" {
return Err(Error::ParseError("Invalid PNG signature".to_string()));
}
let mut pos = 8;
while pos + 12 <= data.len() {
let length = u32::from_be_bytes([
data[pos],
data[pos + 1],
data[pos + 2],
data[pos + 3],
]) as usize;
let chunk_type = &data[pos + 4..pos + 8];
if pos + 12 + length > data.len() {
break;
}
match chunk_type {
b"tEXt" => {
if let Some((key, value)) = Self::parse_text_chunk(&data[pos + 8..pos + 8 + length]) {
chunks.push((format!("text_{}", key), MetadataValue::Text(value)));
}
}
b"iTXt" => {
if let Some((key, value)) = Self::parse_itext_chunk(&data[pos + 8..pos + 8 + length]) {
chunks.push((format!("itext_{}", key), MetadataValue::Text(value)));
}
}
b"zTXt" => {
if let Some((key, value)) = Self::parse_ztext_chunk(&data[pos + 8..pos + 8 + length]) {
chunks.push((format!("ztext_{}", key), MetadataValue::Text(value)));
}
}
_ => {}
}
pos += 12 + length;
}
Ok(chunks)
}
fn parse_text_chunk(data: &[u8]) -> Option<(String, String)> {
let null_pos = data.iter().position(|&b| b == 0)?;
let keyword = String::from_utf8_lossy(&data[..null_pos]).to_string();
let text = String::from_utf8_lossy(&data[null_pos + 1..]).to_string();
Some((keyword, text))
}
fn parse_itext_chunk(data: &[u8]) -> Option<(String, String)> {
let null_pos = data.iter().position(|&b| b == 0)?;
let keyword = String::from_utf8_lossy(&data[..null_pos]).to_string();
if null_pos + 2 >= data.len() {
return None;
}
let compression_flag = data[null_pos + 1];
let lang_start = null_pos + 3;
if lang_start >= data.len() {
return None;
}
let lang_end = data[lang_start..].iter().position(|&b| b == 0)? + lang_start;
let trans_start = lang_end + 1;
if trans_start >= data.len() {
return None;
}
let trans_end = data[trans_start..].iter().position(|&b| b == 0).map(|p| p + trans_start)?;
let text_bytes = &data[trans_end + 1..];
let text = if compression_flag == 1 {
inflate_zlib(text_bytes)?
} else {
String::from_utf8_lossy(text_bytes).to_string()
};
Some((keyword, text))
}
fn parse_ztext_chunk(data: &[u8]) -> Option<(String, String)> {
let null_pos = data.iter().position(|&b| b == 0)?;
let keyword = String::from_utf8_lossy(&data[..null_pos]).to_string();
if null_pos + 1 >= data.len() {
return None;
}
let compressed = &data[null_pos + 2..];
let text = inflate_zlib(compressed)?;
Some((keyword, text))
}
}
fn inflate_zlib(bytes: &[u8]) -> Option<String> {
let mut decoder = ZlibDecoder::new(bytes);
let mut out = Vec::with_capacity(bytes.len() * 2);
decoder.read_to_end(&mut out).ok()?;
Some(String::from_utf8_lossy(&out).to_string())
}