use async_trait::async_trait;
use std::path::Path;
use crate::domain::error::DomainError;
use crate::domain::ir::{DocumentBuilder, ParsedBlock, ParsedSource};
use crate::domain::parser::FileParserBackend;
pub struct ImageParser;
const SUPPORTED_EXTENSIONS: &[&str] = &["png", "jpg", "jpeg", "webp", "gif"];
const MAX_IMAGE_SIZE_MB: u64 = 50;
const MAX_IMAGE_SIZE_BYTES: u64 = MAX_IMAGE_SIZE_MB * 1024 * 1024;
impl ImageParser {
#[must_use]
pub fn new() -> Self {
Self
}
fn mime_type_from_extension(extension: &str) -> Option<&'static str> {
if extension.eq_ignore_ascii_case("png") {
return Some("image/png");
}
if extension.eq_ignore_ascii_case("jpg") || extension.eq_ignore_ascii_case("jpeg") {
return Some("image/jpeg");
}
if extension.eq_ignore_ascii_case("webp") {
return Some("image/webp");
}
if extension.eq_ignore_ascii_case("gif") {
return Some("image/gif");
}
None
}
fn determine_mime_type(
filename_hint: Option<&str>,
content_type: Option<&str>,
) -> Result<String, DomainError> {
if let Some(ct) = content_type
&& ct.starts_with("image/")
{
return Ok(ct.to_owned());
}
if let Some(filename) = filename_hint
&& let Some(ext) = Path::new(filename).extension().and_then(|s| s.to_str())
&& let Some(mime) = Self::mime_type_from_extension(ext)
{
return Ok(mime.to_owned());
}
Err(DomainError::unsupported_file_type(
"Unable to determine image MIME type",
))
}
fn build_data_uri(mime_type: &str, bytes: &[u8]) -> String {
use base64::Engine;
let encoded = base64::engine::general_purpose::STANDARD.encode(bytes);
format!("data:{mime_type};base64,{encoded}")
}
}
impl Default for ImageParser {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl FileParserBackend for ImageParser {
fn id(&self) -> &'static str {
"image"
}
fn supported_extensions(&self) -> &'static [&'static str] {
SUPPORTED_EXTENSIONS
}
async fn parse_local_path(
&self,
path: &Path,
) -> Result<crate::domain::ir::ParsedDocument, DomainError> {
let metadata = tokio::fs::metadata(path)
.await
.map_err(|e| DomainError::io_error(format!("Failed to read file metadata: {e}")))?;
if metadata.len() > MAX_IMAGE_SIZE_BYTES {
return Err(DomainError::invalid_request(format!(
"Image file too large: {} bytes (max {} MB)",
metadata.len(),
MAX_IMAGE_SIZE_MB
)));
}
let bytes = tokio::fs::read(path)
.await
.map_err(|e| DomainError::io_error(format!("Failed to read image file: {e}")))?;
let extension = path
.extension()
.and_then(|s| s.to_str())
.ok_or_else(|| DomainError::unsupported_file_type("no extension"))?;
let mime_type = Self::mime_type_from_extension(extension)
.ok_or_else(|| DomainError::unsupported_file_type(extension))?;
let data_uri = Self::build_data_uri(mime_type, &bytes);
let filename = path.file_name().and_then(|s| s.to_str()).map(str::to_owned);
let document = DocumentBuilder::new(ParsedSource::LocalPath(path.display().to_string()))
.content_type(mime_type)
.original_filename(filename.unwrap_or_else(|| "unknown".to_owned()))
.blocks(vec![ParsedBlock::Image {
alt: None,
title: None,
src: Some(data_uri),
}])
.build();
Ok(document)
}
async fn parse_bytes(
&self,
filename_hint: Option<&str>,
content_type: Option<&str>,
bytes: bytes::Bytes,
) -> Result<crate::domain::ir::ParsedDocument, DomainError> {
#[allow(clippy::cast_possible_truncation)]
if bytes.len() > MAX_IMAGE_SIZE_BYTES as usize {
return Err(DomainError::invalid_request(format!(
"Image file too large: {} bytes (max {} MB)",
bytes.len(),
MAX_IMAGE_SIZE_MB
)));
}
let mime_type = Self::determine_mime_type(filename_hint, content_type)?;
let data_uri = Self::build_data_uri(&mime_type, &bytes);
let source = if let Some(name) = filename_hint {
ParsedSource::Uploaded {
original_name: name.to_owned(),
}
} else {
ParsedSource::Uploaded {
original_name: "unknown".to_owned(),
}
};
let document = DocumentBuilder::new(source)
.content_type(mime_type)
.original_filename(filename_hint.unwrap_or("unknown"))
.blocks(vec![ParsedBlock::Image {
alt: None,
title: None,
src: Some(data_uri),
}])
.build();
Ok(document)
}
}