#![allow(
dead_code,
clippy::uninlined_format_args,
clippy::needless_borrows_for_generic_args
)]
use std::path::Path;
use async_trait::async_trait;
use tokio::fs;
use super::traits::{ConverterMetadata, DocumentConverter};
use crate::Result;
use crate::types::{
ConversionOptions, ConversionOutput, ConversionResult, FileFormat, OutputFormat, OutputMetadata,
};
#[derive(Debug)]
pub struct ImageConverter {
#[cfg(feature = "tesseract")]
ocr_engine: Option<String>,
}
impl ImageConverter {
pub fn new() -> Self {
Self {
#[cfg(feature = "tesseract")]
ocr_engine: Some("tesseract".to_string()),
}
}
#[cfg(feature = "tesseract")]
async fn ocr_image(&self, image_path: &Path, language: &str) -> Result<String> {
use leptess::LepTess;
let mut tesseract = LepTess::new(None, language).map_err(|e| {
crate::TransmutationError::conversion_failed(&format!(
"Failed to initialize Tesseract: {}",
e
))
})?;
tesseract.set_image(image_path).map_err(|e| {
crate::TransmutationError::conversion_failed(&format!("Failed to set image: {}", e))
})?;
let text = tesseract.get_utf8_text().map_err(|e| {
crate::TransmutationError::conversion_failed(&format!("OCR failed: {}", e))
})?;
Ok(text)
}
async fn image_to_markdown(&self, image_path: &Path, language: &str) -> Result<String> {
#[cfg(feature = "tesseract")]
{
let text = self.ocr_image(image_path, language).await?;
let mut markdown = String::new();
markdown.push_str("# OCR Result\n\n");
for para in text.split("\n\n") {
let trimmed = para.trim();
if !trimmed.is_empty() {
markdown.push_str(&format!("{}\n\n", trimmed));
}
}
Ok(markdown)
}
#[cfg(not(feature = "tesseract"))]
{
Err(crate::TransmutationError::conversion_failed(
"OCR feature not enabled. Compile with --features tesseract",
))
}
}
}
impl Default for ImageConverter {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl DocumentConverter for ImageConverter {
fn supported_formats(&self) -> Vec<FileFormat> {
vec![
FileFormat::Jpeg,
FileFormat::Png,
FileFormat::Tiff,
FileFormat::Bmp,
FileFormat::Gif,
FileFormat::Webp,
]
}
fn output_formats(&self) -> Vec<OutputFormat> {
vec![
OutputFormat::Markdown {
split_pages: false,
optimize_for_llm: true,
},
OutputFormat::Json {
structured: true,
include_metadata: true,
},
]
}
async fn convert(
&self,
input: &Path,
output_format: OutputFormat,
_options: ConversionOptions,
) -> Result<ConversionResult> {
eprintln!("🔄 Image OCR (Tesseract)");
eprintln!(" Image → OCR → {:?}", output_format);
eprintln!();
let language = "eng";
#[cfg(feature = "tesseract")]
{
eprintln!("📸 Running OCR (language: {})...", language);
let markdown = self.image_to_markdown(input, language).await?;
let output_data = match output_format {
OutputFormat::Markdown { .. } => {
eprintln!("📝 Markdown generated!");
markdown.into_bytes()
}
OutputFormat::Json { .. } => {
eprintln!("📝 Converting to JSON...");
let json = serde_json::json!({
"ocr": {
"text": markdown,
"language": language,
}
});
serde_json::to_string_pretty(&json)?.into_bytes()
}
_ => {
return Err(crate::TransmutationError::UnsupportedFormat(format!(
"Output format {:?} not supported for images",
output_format
)));
}
};
let output_size = output_data.len() as u64;
let input_size = fs::metadata(input).await?.len();
eprintln!("✅ OCR complete!");
Ok(ConversionResult {
input_path: input.to_path_buf(),
input_format: crate::utils::file_detect::detect_format(input).await?,
output_format,
content: vec![ConversionOutput {
page_number: 1,
data: output_data,
metadata: OutputMetadata {
size_bytes: output_size,
chunk_count: 1,
token_count: None,
},
}],
metadata: crate::types::DocumentMetadata {
title: None,
author: None,
created: None,
modified: None,
page_count: 1,
language: Some(language.to_string()),
custom: std::collections::HashMap::new(),
},
statistics: crate::types::ConversionStatistics {
input_size_bytes: input_size,
output_size_bytes: output_size,
duration: std::time::Duration::from_secs(0),
pages_processed: 1,
tables_extracted: 0,
images_extracted: 0,
cache_hit: false,
},
})
}
#[cfg(not(feature = "tesseract"))]
{
Err(crate::TransmutationError::conversion_failed(
"OCR feature not enabled. Compile with --features image-ocr",
))
}
}
fn metadata(&self) -> ConverterMetadata {
ConverterMetadata {
name: "Image OCR Converter".to_string(),
version: env!("CARGO_PKG_VERSION").to_string(),
description: "Image to text converter using Tesseract OCR".to_string(),
external_deps: vec!["tesseract".to_string()],
}
}
}