use crate::{
DriverCallback, DriverCategory, DriverContext, file_exists,
types::{Driver, DriverParameter},
};
use anyhow::Result;
use serde_json::{Value, json};
use std::collections::HashMap;
#[derive(Debug)]
pub struct OcrDriver;
#[async_trait::async_trait]
impl Driver for OcrDriver {
fn name(&self) -> &str {
"ocr"
}
fn description(&self) -> &str {
"Extract text from images using OCR (requires Tesseract)"
}
fn usage_hint(&self) -> &str {
"Use this skill to extract text from images, scanned documents, or PDFs. \
Requires Tesseract OCR to be installed on the system."
}
fn parameters(&self) -> Vec<DriverParameter> {
vec![
DriverParameter {
name: "path".to_string(),
param_type: "string".to_string(),
description: "Path to the image or PDF file".to_string(),
required: true,
default: None,
example: Some(Value::String("/path/to/document.png".to_string())),
enum_values: None,
},
DriverParameter {
name: "language".to_string(),
param_type: "string".to_string(),
description: "Language code (eng, chi_sim, etc.)".to_string(),
required: false,
default: Some(Value::String("eng".to_string())),
example: Some(Value::String("chi_sim".to_string())),
enum_values: None,
},
]
}
fn example_call(&self) -> Value {
json!({
"action": "ocr",
"parameters": {
"path": "/documents/scan.jpg",
"language": "eng"
}
})
}
fn example_output(&self) -> String {
"Extracted text: Hello World! This is OCR text.".to_string()
}
fn category(&self) -> DriverCategory {
DriverCategory::Media
}
async fn execute(
&self,
parameters: &HashMap<String, Value>,
callback: Option<&dyn DriverCallback>,
context: Option<&DriverContext>,
) -> Result<String> {
let task_id = context.as_ref().and_then(|c| c.task_id()).map(String::from);
let driver_index = context.as_ref().and_then(|c| c.driver_index());
let step_name = context
.as_ref()
.and_then(|c| c.driver_name())
.map(String::from);
let cb = callback;
if let Some(cb) = cb {
cb.on_start(task_id.clone(), driver_index, step_name);
cb.on_log(
task_id.clone(),
driver_index,
Some("Starting OCR text extraction".to_string()),
);
cb.on_progress(task_id.clone(), driver_index, Some(10), None);
}
let path = parameters
.get("path")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing 'path' parameter"))?;
let language = parameters
.get("language")
.and_then(|v| v.as_str())
.unwrap_or("eng");
if let Some(cb) = cb {
cb.on_log(
task_id.clone(),
driver_index,
Some(format!("Image: {}, language: {}", path, language)),
);
cb.on_progress(task_id.clone(), driver_index, Some(20), None);
}
if !file_exists(path) {
anyhow::bail!("File not found: {}", path);
}
if let Some(cb) = cb {
cb.on_log(
task_id.clone(),
driver_index,
Some(format!("File verified: {}", path)),
);
cb.on_progress(task_id.clone(), driver_index, Some(30), None);
}
if let Some(cb) = cb {
cb.on_log(
task_id.clone(),
driver_index,
Some("Running Tesseract OCR...".to_string()),
);
cb.on_progress(task_id.clone(), driver_index, Some(50), None);
}
#[cfg(not(target_os = "windows"))]
{
let output = std::process::Command::new("tesseract")
.args([path, "stdout", "-l", language])
.output()
.map_err(|e| {
anyhow::anyhow!("Tesseract not found: {}. Please install Tesseract OCR.", e)
})?;
if !output.status.success() {
anyhow::bail!("OCR failed: {}", String::from_utf8_lossy(&output.stderr));
}
let text = String::from_utf8(output.stdout)
.map_err(|e| anyhow::anyhow!("Invalid UTF-8 output: {}", e))?;
if text.trim().is_empty() {
anyhow::bail!("No text found in image");
}
let result_msg = format!("Extracted text: {}", text.trim());
if let Some(cb) = cb {
cb.on_log(
task_id.clone(),
driver_index,
Some(format!("Result: {}", result_msg)),
);
cb.on_progress(task_id.clone(), driver_index, Some(100), None);
cb.on_complete(
task_id.clone(),
driver_index,
Some("ocr".to_string()),
Some(result_msg.clone()),
);
}
Ok(result_msg)
}
#[cfg(target_os = "windows")]
{
let output = std::process::Command::new("tesseract")
.args([path, "stdout", "-l", language])
.output()
.map_err(|e| anyhow::anyhow!("Tesseract not found: {}", e))?;
if !output.status.success() {
anyhow::bail!("OCR failed: {}", String::from_utf8_lossy(&output.stderr));
}
let text = String::from_utf8_lossy(&output.stdout);
if text.trim().is_empty() {
anyhow::bail!("No text found in image");
}
let result_msg = format!("Extracted text: {}", text.trim());
if let Some(cb) = cb {
cb.on_log(
task_id.clone(),
driver_index,
Some(format!("Result: {}", result_msg)),
);
cb.on_progress(task_id.clone(), driver_index, Some(100), None);
cb.on_complete(
task_id.clone(),
driver_index,
Some("ocr".to_string()),
Some(result_msg.clone()),
);
}
Ok(result_msg)
}
}
}