use anyhow::{Result, anyhow};
use serde_json::{Value, json};
pub use rsclaw_kb::OcrClient;
impl super::runtime::AgentRuntime {
pub(crate) async fn tool_ocr(&self, args: Value) -> Result<Value> {
let image = args["image"].as_str().unwrap_or("").trim();
if image.is_empty() {
return Ok(json!({
"error": "ocr: `image` is required",
"hint": "Pass a file path, an http(s):// URL, or a data:image/...;base64,... URI."
}));
}
let Some(client) = OcrClient::from_config() else {
return Ok(json!({
"error": "ocr: no OCR endpoint configured",
"hint": "Set `kb.ocr` in rsclaw.json5 (model rsclaw-ocr-v1 needs no baseUrl), then retry."
}));
};
let payload = if image.starts_with("http://")
|| image.starts_with("https://")
|| image.starts_with("data:")
{
image.to_owned()
} else {
let workspace = self.default_workspace();
let path = super::runtime::canonicalize_external_path(image, &workspace);
let bytes = match tokio::fs::read(&path).await {
Ok(b) => b,
Err(e) => {
return Ok(json!({
"error": format!("ocr: cannot read image file `{image}`: {e}"),
"hint": "Use a path relative to the workspace, or pass an http(s) URL / data URI."
}));
}
};
let mime = rsclaw_kb::canonicalize::detect_mime(&bytes, Some(image));
use base64::Engine as _;
let b64 = base64::engine::general_purpose::STANDARD.encode(&bytes);
format!("data:{mime};base64,{b64}")
};
let lang = args["lang"].as_str().map(str::to_owned);
let text = tokio::task::spawn_blocking(move || {
let _ = ⟨
client.ocr(&payload)
})
.await
.map_err(|e| anyhow!("ocr: task join failed: {e}"))?;
match text {
Ok(t) if !t.trim().is_empty() => Ok(json!({ "text": t.trim() })),
Ok(_) => Ok(json!({ "text": "", "note": "OCR returned no text (blank or non-text image)." })),
Err(e) => Ok(json!({
"error": format!("ocr request failed: {e:#}"),
"hint": "Endpoint may be down or the image unreadable. Retry once; if it persists tell the user."
})),
}
}
}