use std::path::PathBuf;
use serde_json::{json, Value};
use super::{claudette_home, ensure_dir, parse_json_input};
use crate::image_attach::{encode_base64_standard, image_mime_from_path};
use crate::test_runner::run_command_with_timeout;
const SHOT_TIMEOUT_SECS: u64 = 15;
const DESCRIBE_TIMEOUT_SECS: u64 = 120;
const DESCRIBE_MAX_IMAGE_BYTES: usize = 8 * 1024 * 1024;
pub(super) fn schemas() -> Vec<Value> {
vec![
json!({
"type": "function",
"function": {
"name": "screenshot_capture",
"description": "Capture the primary display to a PNG under ~/.claudette/files/. Returns {path}. Uses PowerShell on Windows, screencapture on macOS, gnome-screenshot/import on Linux.",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
}),
json!({
"type": "function",
"function": {
"name": "image_describe",
"description": "Ask a vision-language model loaded in LM Studio to describe an image (PNG/JPG/GIF/WebP/BMP). Returns {description}. Requires a VLM (e.g. Qwen2.5-VL) loaded at LMS_API_URL/v1.",
"parameters": {
"type": "object",
"properties": {
"path": { "type": "string", "description": "Image path (absolute or ~/)." },
"question": { "type": "string", "description": "Optional question; default 'Describe this image.'" }
},
"required": ["path"]
}
}
}),
]
}
pub(super) fn dispatch(name: &str, input: &str) -> Option<Result<String, String>> {
let result = match name {
"screenshot_capture" => run_screenshot_capture(input),
"image_describe" => run_image_describe(input),
_ => return None,
};
Some(result)
}
fn screenshots_dir() -> PathBuf {
claudette_home().join("files")
}
fn run_screenshot_capture(_input: &str) -> Result<String, String> {
ensure_dir(&screenshots_dir())?;
let ts = chrono::Local::now().format("%Y%m%dT%H%M%S").to_string();
let path = screenshots_dir().join(format!("screenshot-{ts}.png"));
let path_str = path.display().to_string();
let (program, args, args_owned) = build_screenshot_command(&path_str);
let _ = args_owned; let result = run_command_with_timeout(program, &args, SHOT_TIMEOUT_SECS, None);
if !result.success {
return Err(format!(
"screenshot_capture: command failed (exit {:?}): {} {}",
result.exit_code,
result
.stderr
.lines()
.take(3)
.collect::<Vec<_>>()
.join(" | "),
result
.stdout
.lines()
.take(3)
.collect::<Vec<_>>()
.join(" | "),
));
}
if !path.exists() {
return Err(format!(
"screenshot_capture: command reported success but {} doesn't exist",
path.display()
));
}
Ok(json!({
"ok": true,
"path": path_str,
})
.to_string())
}
#[cfg(target_os = "windows")]
fn build_screenshot_command(out_path: &str) -> (&'static str, Vec<&str>, String) {
let escaped = out_path.replace('\'', "''");
let script = format!(
"Add-Type -AssemblyName System.Windows.Forms,System.Drawing; \
$b = [System.Windows.Forms.Screen]::PrimaryScreen.Bounds; \
$bmp = New-Object System.Drawing.Bitmap $b.Width, $b.Height; \
$g = [System.Drawing.Graphics]::FromImage($bmp); \
$g.CopyFromScreen($b.Location, [System.Drawing.Point]::Empty, $b.Size); \
$bmp.Save('{escaped}', [System.Drawing.Imaging.ImageFormat]::Png); \
$g.Dispose(); $bmp.Dispose()"
);
let leaked: &'static str = Box::leak(script.into_boxed_str());
(
"powershell",
vec!["-NoProfile", "-NonInteractive", "-Command", leaked],
String::new(),
)
}
#[cfg(target_os = "macos")]
fn build_screenshot_command(out_path: &str) -> (&'static str, Vec<&str>, String) {
let leaked: &'static str = Box::leak(out_path.to_string().into_boxed_str());
("screencapture", vec!["-x", leaked], String::new())
}
#[cfg(all(not(target_os = "windows"), not(target_os = "macos")))]
fn build_screenshot_command(out_path: &str) -> (&'static str, Vec<&str>, String) {
let escaped = out_path.replace('\'', "'\\''");
let script = format!(
"if command -v gnome-screenshot >/dev/null 2>&1; then \
gnome-screenshot -f '{escaped}'; \
elif command -v import >/dev/null 2>&1; then \
import -window root '{escaped}'; \
elif command -v scrot >/dev/null 2>&1; then \
scrot '{escaped}'; \
else \
echo 'no screenshot tool on PATH (install gnome-screenshot, imagemagick, or scrot)'; \
exit 1; \
fi"
);
let leaked: &'static str = Box::leak(script.into_boxed_str());
("sh", vec!["-c", leaked], String::new())
}
fn run_image_describe(input: &str) -> Result<String, String> {
let v = parse_json_input(input, "image_describe")?;
let path_str = v
.get("path")
.and_then(Value::as_str)
.ok_or("image_describe: missing 'path'")?;
let question = v
.get("question")
.and_then(Value::as_str)
.filter(|s| !s.is_empty())
.unwrap_or("Describe this image.");
let path = super::validate_read_path(path_str)?;
let mime = image_mime_from_path(&path).ok_or_else(|| {
format!(
"image_describe: '{}' is not a supported image type (PNG/JPG/GIF/WebP/BMP)",
path.display()
)
})?;
let bytes = std::fs::read(&path)
.map_err(|e| format!("image_describe: read {} failed: {e}", path.display()))?;
if bytes.len() > DESCRIBE_MAX_IMAGE_BYTES {
return Err(format!(
"image_describe: {} is {} bytes, exceeds {}",
path.display(),
bytes.len(),
DESCRIBE_MAX_IMAGE_BYTES
));
}
let data_url = format!("data:{mime};base64,{}", encode_base64_standard(&bytes));
let base = std::env::var("LMS_API_URL")
.or_else(|_| std::env::var("OLLAMA_HOST"))
.unwrap_or_else(|_| "http://localhost:1234".to_string());
let model = std::env::var("CLAUDETTE_VISION_MODEL").unwrap_or_else(|_| "vision".to_string());
let url = format!("{base}/v1/chat/completions");
let payload = json!({
"model": model,
"max_tokens": 512,
"messages": [{
"role": "user",
"content": [
{ "type": "text", "text": question },
{ "type": "image_url", "image_url": { "url": data_url } }
]
}]
});
let client = reqwest::blocking::Client::builder()
.timeout(std::time::Duration::from_secs(DESCRIBE_TIMEOUT_SECS))
.build()
.map_err(|e| format!("image_describe: build client failed: {e}"))?;
let resp = client.post(&url).json(&payload).send().map_err(|e| {
format!(
"image_describe: cannot reach {url} ({e}). Is LM Studio running with a vision \
model loaded? See docs/vision.md."
)
})?;
let status = resp.status();
if !status.is_success() {
let text = resp.text().unwrap_or_default();
return Err(format!(
"image_describe: HTTP {status}: {} \
— confirm a vision-language model (Qwen2.5-VL, InternVL, etc.) is loaded.",
text.chars().take(400).collect::<String>()
));
}
let data: Value = resp
.json()
.map_err(|e| format!("image_describe: parse failed: {e}"))?;
let description = data
.pointer("/choices/0/message/content")
.and_then(Value::as_str)
.unwrap_or("")
.to_string();
Ok(json!({
"ok": true,
"model": model,
"description": description,
})
.to_string())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn schemas_lists_two_tools() {
let s = schemas();
assert_eq!(s.len(), 2);
let names: Vec<&str> = s
.iter()
.filter_map(|v| v.pointer("/function/name").and_then(Value::as_str))
.collect();
assert_eq!(names, ["screenshot_capture", "image_describe"]);
}
#[test]
fn image_describe_rejects_missing_path() {
let err = run_image_describe("{}").unwrap_err();
assert!(err.contains("missing 'path'"), "got: {err}");
}
#[test]
fn image_describe_rejects_non_image_extension() {
crate::with_temp_home(|home| {
let path = home.join("claudette-not-an-image-xyz.txt");
let _ = std::fs::write(&path, "hi");
let err = run_image_describe(&json!({ "path": path.to_string_lossy() }).to_string())
.unwrap_err();
assert!(
err.contains("not a supported image type") || err.contains("missing"),
"got: {err}"
);
});
}
#[test]
fn build_screenshot_command_returns_non_empty_program() {
let (program, args, _kept) = build_screenshot_command("/tmp/test.png");
assert!(!program.is_empty());
assert!(!args.is_empty());
}
}