use anyhow::{Context, Result};
const OCR_TIMEOUT_SECS: u64 = 120;
pub struct OcrClient {
client: rsclaw_embed::FleetHttp,
url: String,
model: Option<String>,
api_key: Option<String>,
lang: Option<String>,
}
impl OcrClient {
pub fn from_config() -> Option<std::sync::Arc<Self>> {
let cfg = rsclaw_config::load().ok()?;
let oc = cfg.raw.kb.as_ref()?.ocr.clone()?;
if !oc.enabled.unwrap_or(true) {
return None;
}
let model_is_rsclaw = oc
.model
.as_deref()
.map(rsclaw_embed::is_rsclaw_model)
.unwrap_or(false);
let base_raw = oc.base_url.trim();
let base = if base_raw.is_empty() {
if model_is_rsclaw {
rsclaw_embed::RSCLAW_API_BASE_URL.to_owned()
} else {
return None;
}
} else {
base_raw.trim_end_matches('/').to_owned()
};
let api_key = oc
.api_key
.as_ref()
.and_then(|s| s.resolve_early())
.or_else(|| rsclaw_provider_key(&cfg));
Some(std::sync::Arc::new(Self {
client: rsclaw_embed::FleetHttp::new(None),
url: format!("{base}/agent/ocr"),
model: oc.model,
api_key,
lang: oc.lang,
}))
}
pub fn is_configured() -> bool {
rsclaw_config::load()
.ok()
.and_then(|c| c.raw.kb.as_ref().and_then(|k| k.ocr.clone()))
.map(|o| o.enabled.unwrap_or(true))
.unwrap_or(false)
}
pub fn ocr(&self, image: &str) -> Result<String> {
let mut body = serde_json::json!({
"image": image,
"stream": false,
});
if let Some(m) = &self.model {
body["model"] = serde_json::json!(m);
}
if let Some(l) = &self.lang {
body["lang"] = serde_json::json!(l);
}
let send = || async {
let resp = self
.client
.post_following_redirects(
self.url.as_str(),
&body,
self.api_key.as_deref(),
false,
None,
Some(std::time::Duration::from_secs(OCR_TIMEOUT_SECS)),
)
.await?
.error_for_status()?;
anyhow::Ok(resp.json::<serde_json::Value>().await?)
};
let resp: serde_json::Value = match tokio::runtime::Handle::try_current() {
Ok(handle) => {
tokio::task::block_in_place(|| handle.block_on(send())).context("ocr request failed")?
}
Err(_) => tokio::runtime::Runtime::new()
.context("failed to create temp runtime for ocr")?
.block_on(send())
.context("ocr request failed")?,
};
let content = resp
.get("content")
.and_then(|v| v.as_str())
.or_else(|| resp.get("text").and_then(|v| v.as_str()))
.or_else(|| {
resp.pointer("/choices/0/message/content")
.and_then(|v| v.as_str())
})
.context("ocr response missing `content`")?;
Ok(content.to_owned())
}
}
pub(crate) fn rsclaw_provider_key(
cfg: &rsclaw_config::runtime::RuntimeConfig,
) -> Option<String> {
cfg.raw
.models
.as_ref()
.and_then(|m| m.providers.get("rsclaw"))
.and_then(|p| p.api_key.as_ref())
.and_then(|s| s.resolve_early())
.filter(|s| !s.is_empty())
.or_else(|| std::env::var("RSCLAW_API_KEY").ok().filter(|s| !s.is_empty()))
.or_else(|| std::env::var("RSCLAW_KEY").ok().filter(|s| !s.is_empty()))
}
pub(crate) fn rsclaw_provider_base_url(
cfg: &rsclaw_config::runtime::RuntimeConfig,
) -> Option<String> {
cfg.raw
.models
.as_ref()
.and_then(|m| m.providers.get("rsclaw"))
.and_then(|p| p.base_url.as_ref())
.map(|s| {
let t = s.trim().trim_end_matches('/');
t.strip_suffix("/agent").unwrap_or(t).trim_end_matches('/').to_owned()
})
.filter(|s| !s.is_empty())
}
impl std::fmt::Debug for OcrClient {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("OcrClient")
.field("url", &self.url)
.field("model", &self.model)
.finish()
}
}