1use crate::agent::inference::{ChatMessage, InferenceEngine};
2use base64::prelude::*;
3use serde_json::Value;
4use std::path::Path;
5
6pub fn encode_image_as_data_url(path: &Path) -> Result<String, String> {
7 if !path.exists() {
8 return Err(format!("File not found: {}", path.display()));
9 }
10
11 let data = std::fs::read(path).map_err(|e| format!("Failed to read image: {}", e))?;
12 let b64 = BASE64_STANDARD.encode(data);
13
14 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("png");
15 let mime = match ext.to_lowercase().as_str() {
16 "jpg" | "jpeg" => "image/jpeg",
17 "gif" => "image/gif",
18 "webp" => "image/webp",
19 _ => "image/png",
20 };
21
22 Ok(format!("data:{};base64,{}", mime, b64))
23}
24
25pub async fn vision_analyze(engine: &InferenceEngine, args: &Value) -> Result<String, String> {
26 let path_str = args
27 .get("path")
28 .and_then(|v| v.as_str())
29 .ok_or("Missing parameter: path")?;
30 let prompt = args
31 .get("prompt")
32 .and_then(|v| v.as_str())
33 .ok_or("Missing parameter: prompt")?;
34
35 let path = Path::new(path_str);
36 let url = encode_image_as_data_url(path).map_err(|e| {
37 if e.starts_with("File not found: ") {
38 format!("File not found: {}", path_str)
39 } else {
40 e
41 }
42 })?;
43
44 let messages = vec![
45 ChatMessage::system("You are a vision-capable technical assistant. Analyze the provided image (likely a screenshot, diagram, or UI mockup) and provide a concise technical summary or answer the specific query."),
46 ChatMessage::user_with_image(prompt, &url),
47 ];
48
49 let (text, _, _, _) = engine.call_with_tools(&messages, &[], None).await?;
51
52 Ok(text.unwrap_or_else(|| "The vision model returned an empty response.".to_string()))
53}