Skip to main content

agentic_vision/
capture.rs

1//! Image capture and thumbnail generation.
2
3use std::io::Cursor;
4use std::path::Path;
5use std::process::Command;
6
7use image::codecs::jpeg::JpegEncoder;
8use image::{DynamicImage, GenericImageView, ImageFormat};
9
10use crate::types::{CaptureSource, Rect, VisionError, VisionResult};
11
12/// Maximum thumbnail dimension (width or height).
13const MAX_THUMBNAIL_SIZE: u32 = 512;
14
15/// JPEG quality for thumbnails.
16const THUMBNAIL_QUALITY: u8 = 85;
17
18/// Load an image from a file path.
19pub fn capture_from_file(path: &str) -> VisionResult<(DynamicImage, CaptureSource)> {
20    let img = image::open(path)?;
21    let source = CaptureSource::File {
22        path: path.to_string(),
23    };
24    Ok((img, source))
25}
26
27/// Load an image from base64-encoded data.
28pub fn capture_from_base64(data: &str, mime: &str) -> VisionResult<(DynamicImage, CaptureSource)> {
29    use base64::Engine;
30    let bytes = base64::engine::general_purpose::STANDARD
31        .decode(data)
32        .map_err(|e| crate::types::VisionError::InvalidInput(format!("Invalid base64: {e}")))?;
33
34    let format = match mime {
35        "image/png" => Some(ImageFormat::Png),
36        "image/jpeg" | "image/jpg" => Some(ImageFormat::Jpeg),
37        "image/webp" => Some(ImageFormat::WebP),
38        "image/gif" => Some(ImageFormat::Gif),
39        _ => None,
40    };
41
42    let img = if let Some(fmt) = format {
43        image::load_from_memory_with_format(&bytes, fmt)?
44    } else {
45        image::load_from_memory(&bytes)?
46    };
47
48    let source = CaptureSource::Base64 {
49        mime: mime.to_string(),
50    };
51    Ok((img, source))
52}
53
54/// Generate a JPEG thumbnail, preserving aspect ratio, max 512x512.
55pub fn generate_thumbnail(img: &DynamicImage) -> Vec<u8> {
56    let (w, h) = img.dimensions();
57
58    let thumb = if w > MAX_THUMBNAIL_SIZE || h > MAX_THUMBNAIL_SIZE {
59        img.resize(
60            MAX_THUMBNAIL_SIZE,
61            MAX_THUMBNAIL_SIZE,
62            image::imageops::FilterType::Lanczos3,
63        )
64    } else {
65        img.clone()
66    };
67
68    let rgb = thumb.to_rgb8();
69    let mut buf = Vec::new();
70    let mut cursor = Cursor::new(&mut buf);
71    let encoder = JpegEncoder::new_with_quality(&mut cursor, THUMBNAIL_QUALITY);
72    rgb.write_with_encoder(encoder).unwrap_or_else(|e| {
73        tracing::warn!("Failed to encode thumbnail as JPEG: {e}");
74    });
75    buf
76}
77
78// ---------------------------------------------------------------------------
79// Screenshot & clipboard capture
80// ---------------------------------------------------------------------------
81
82/// RAII guard that removes a temporary file when dropped.
83struct TempFileGuard {
84    path: std::path::PathBuf,
85}
86
87impl Drop for TempFileGuard {
88    fn drop(&mut self) {
89        let _ = std::fs::remove_file(&self.path);
90    }
91}
92
93/// Platform-specific: capture screenshot bytes to a temp file and return the path.
94#[cfg(target_os = "macos")]
95fn platform_screenshot(temp_path: &Path, region: Option<Rect>) -> VisionResult<()> {
96    let mut cmd = Command::new("screencapture");
97    cmd.arg("-x"); // silent, no sound
98
99    if let Some(r) = region {
100        cmd.arg("-R")
101            .arg(format!("{},{},{},{}", r.x, r.y, r.w, r.h));
102    }
103
104    cmd.arg(temp_path.to_string_lossy().as_ref());
105
106    let output = cmd
107        .output()
108        .map_err(|e| VisionError::Capture(format!("Failed to run screencapture: {e}")))?;
109
110    if !output.status.success() {
111        let stderr = String::from_utf8_lossy(&output.stderr);
112        return Err(VisionError::Capture(format!(
113            "screencapture failed (check Screen Recording permission): {stderr}"
114        )));
115    }
116    Ok(())
117}
118
119#[cfg(target_os = "linux")]
120fn platform_screenshot(temp_path: &Path, region: Option<Rect>) -> VisionResult<()> {
121    let temp_str = temp_path.to_string_lossy();
122
123    let success = if let Some(r) = region {
124        // Region capture: try maim, then import (ImageMagick)
125        let geometry = format!("{}x{}+{}+{}", r.w, r.h, r.x, r.y);
126        let maim = Command::new("maim")
127            .arg("-g")
128            .arg(&geometry)
129            .arg(temp_str.as_ref())
130            .output();
131        match maim {
132            Ok(o) if o.status.success() => true,
133            _ => {
134                let import = Command::new("import")
135                    .arg("-window")
136                    .arg("root")
137                    .arg("-crop")
138                    .arg(&geometry)
139                    .arg(temp_str.as_ref())
140                    .output();
141                matches!(import, Ok(o) if o.status.success())
142            }
143        }
144    } else {
145        // Full-screen: try gnome-screenshot → scrot → maim
146        let gnome = Command::new("gnome-screenshot")
147            .arg("-f")
148            .arg(temp_str.as_ref())
149            .output();
150        match gnome {
151            Ok(o) if o.status.success() => true,
152            _ => {
153                let scrot = Command::new("scrot").arg(temp_str.as_ref()).output();
154                match scrot {
155                    Ok(o) if o.status.success() => true,
156                    _ => {
157                        let maim = Command::new("maim").arg(temp_str.as_ref()).output();
158                        matches!(maim, Ok(o) if o.status.success())
159                    }
160                }
161            }
162        }
163    };
164
165    if !success {
166        return Err(VisionError::Capture(
167            "No screenshot tool found. Install one of: gnome-screenshot, scrot, maim, or import (ImageMagick).".to_string(),
168        ));
169    }
170    Ok(())
171}
172
173#[cfg(not(any(target_os = "macos", target_os = "linux")))]
174fn platform_screenshot(_temp_path: &Path, _region: Option<Rect>) -> VisionResult<()> {
175    Err(VisionError::Capture(
176        "Screenshot capture is not supported on this platform.".to_string(),
177    ))
178}
179
180/// Platform-specific: read image bytes from the system clipboard.
181///
182/// macOS clipboard images may be stored as PNG (`PNGf`) or TIFF (`TIFF`).
183/// `screencapture -c` writes TIFF, while copy-image-from-browser typically
184/// writes PNG. We try PNG first, then fall back to TIFF + `sips` conversion.
185#[cfg(target_os = "macos")]
186fn platform_clipboard_bytes() -> VisionResult<Vec<u8>> {
187    let pid = std::process::id();
188    let png_path = std::env::temp_dir().join(format!("avis_clipboard_{pid}.png"));
189    let _png_guard = TempFileGuard {
190        path: png_path.clone(),
191    };
192
193    // --- Attempt 1: read clipboard as PNG directly ---
194    let png_script = format!(
195        r#"try
196    set imgData to the clipboard as «class PNGf»
197    set fp to open for access POSIX file "{}" with write permission
198    write imgData to fp
199    close access fp
200on error
201    error "no png"
202end try"#,
203        png_path.to_string_lossy()
204    );
205
206    let png_result = Command::new("osascript")
207        .arg("-e")
208        .arg(&png_script)
209        .output();
210
211    if let Ok(ref o) = png_result {
212        if o.status.success() {
213            if let Ok(bytes) = std::fs::read(&png_path) {
214                if !bytes.is_empty() {
215                    return Ok(bytes);
216                }
217            }
218        }
219    }
220
221    // --- Attempt 2: read clipboard as TIFF, convert via sips ---
222    let tiff_path = std::env::temp_dir().join(format!("avis_clipboard_{pid}.tiff"));
223    let _tiff_guard = TempFileGuard {
224        path: tiff_path.clone(),
225    };
226    let converted_path = std::env::temp_dir().join(format!("avis_clipboard_{pid}_conv.png"));
227    let _conv_guard = TempFileGuard {
228        path: converted_path.clone(),
229    };
230
231    let tiff_script = format!(
232        r#"try
233    set imgData to the clipboard as «class TIFF»
234    set fp to open for access POSIX file "{}" with write permission
235    write imgData to fp
236    close access fp
237on error
238    error "no tiff"
239end try"#,
240        tiff_path.to_string_lossy()
241    );
242
243    let tiff_result = Command::new("osascript")
244        .arg("-e")
245        .arg(&tiff_script)
246        .output()
247        .map_err(|e| VisionError::Capture(format!("Failed to run osascript: {e}")))?;
248
249    if !tiff_result.status.success() {
250        let stderr = String::from_utf8_lossy(&tiff_result.stderr);
251        return Err(VisionError::Capture(format!(
252            "No image found in clipboard (tried PNG and TIFF): {stderr}"
253        )));
254    }
255
256    // Convert TIFF → PNG using sips (ships with macOS)
257    let sips = Command::new("sips")
258        .args([
259            "-s",
260            "format",
261            "png",
262            &tiff_path.to_string_lossy(),
263            "--out",
264            &converted_path.to_string_lossy(),
265        ])
266        .output()
267        .map_err(|e| VisionError::Capture(format!("Failed to run sips: {e}")))?;
268
269    if !sips.status.success() {
270        let stderr = String::from_utf8_lossy(&sips.stderr);
271        return Err(VisionError::Capture(format!(
272            "Failed to convert TIFF clipboard image to PNG: {stderr}"
273        )));
274    }
275
276    std::fs::read(&converted_path)
277        .map_err(|e| VisionError::Capture(format!("Failed to read converted clipboard image: {e}")))
278}
279
280#[cfg(target_os = "linux")]
281fn platform_clipboard_bytes() -> VisionResult<Vec<u8>> {
282    // Try xclip first, then wl-paste (Wayland)
283    let xclip = Command::new("xclip")
284        .args(["-selection", "clipboard", "-t", "image/png", "-o"])
285        .output();
286
287    if let Ok(o) = xclip {
288        if o.status.success() && !o.stdout.is_empty() {
289            return Ok(o.stdout);
290        }
291    }
292
293    let wl = Command::new("wl-paste")
294        .args(["--type", "image/png"])
295        .output();
296
297    if let Ok(o) = wl {
298        if o.status.success() && !o.stdout.is_empty() {
299            return Ok(o.stdout);
300        }
301    }
302
303    Err(VisionError::Capture(
304        "No image in clipboard. Requires xclip or wl-paste.".to_string(),
305    ))
306}
307
308#[cfg(not(any(target_os = "macos", target_os = "linux")))]
309fn platform_clipboard_bytes() -> VisionResult<Vec<u8>> {
310    Err(VisionError::Capture(
311        "Clipboard capture is not supported on this platform.".to_string(),
312    ))
313}
314
315/// Capture a screenshot, optionally of a specific screen region.
316///
317/// On macOS, uses `screencapture -x`. On Linux, tries `gnome-screenshot`,
318/// then falls back to `scrot` or `maim`. Windows is not currently supported.
319pub fn capture_screenshot(region: Option<Rect>) -> VisionResult<(DynamicImage, CaptureSource)> {
320    let temp_path =
321        std::env::temp_dir().join(format!("avis_screenshot_{}.png", std::process::id()));
322    let _guard = TempFileGuard {
323        path: temp_path.clone(),
324    };
325
326    platform_screenshot(&temp_path, region)?;
327
328    let img = image::open(&temp_path)
329        .map_err(|e| VisionError::Capture(format!("Failed to read screenshot file: {e}")))?;
330
331    Ok((img, CaptureSource::Screenshot { region }))
332}
333
334/// Capture an image from the system clipboard.
335///
336/// On macOS, uses `osascript` to extract PNG data. On Linux, uses `xclip`
337/// or `wl-paste`. Windows is not currently supported.
338pub fn capture_clipboard() -> VisionResult<(DynamicImage, CaptureSource)> {
339    let image_bytes = platform_clipboard_bytes()?;
340
341    if image_bytes.is_empty() {
342        return Err(VisionError::Capture(
343            "No image data found in clipboard.".to_string(),
344        ));
345    }
346
347    let img = image::load_from_memory(&image_bytes)
348        .map_err(|e| VisionError::Capture(format!("Failed to decode clipboard image: {e}")))?;
349
350    Ok((img, CaptureSource::Clipboard))
351}
352
353/// Check if a file path points to a supported image format.
354pub fn is_supported_format(path: &str) -> bool {
355    let ext = Path::new(path)
356        .extension()
357        .and_then(|e| e.to_str())
358        .unwrap_or("")
359        .to_lowercase();
360
361    matches!(
362        ext.as_str(),
363        "png" | "jpg" | "jpeg" | "webp" | "gif" | "bmp" | "tiff" | "tif" | "ico"
364    )
365}
366
367#[cfg(test)]
368mod tests {
369    use super::*;
370
371    #[test]
372    fn test_thumbnail_small_image() {
373        let img = DynamicImage::new_rgb8(100, 100);
374        let thumb = generate_thumbnail(&img);
375        assert!(!thumb.is_empty());
376    }
377
378    #[test]
379    fn test_thumbnail_large_image() {
380        let img = DynamicImage::new_rgb8(2000, 1000);
381        let thumb = generate_thumbnail(&img);
382        assert!(!thumb.is_empty());
383
384        // Verify the thumbnail can be loaded back
385        let loaded = image::load_from_memory(&thumb).unwrap();
386        let (w, h) = loaded.dimensions();
387        assert!(w <= MAX_THUMBNAIL_SIZE);
388        assert!(h <= MAX_THUMBNAIL_SIZE);
389    }
390
391    #[test]
392    fn test_supported_formats() {
393        assert!(is_supported_format("test.png"));
394        assert!(is_supported_format("test.JPG"));
395        assert!(is_supported_format("test.webp"));
396        assert!(!is_supported_format("test.txt"));
397        assert!(!is_supported_format("test.pdf"));
398    }
399
400    #[test]
401    fn test_capture_screenshot_returns_sensible_result() {
402        // On CI or headless environments, this will fail with a Capture error.
403        // On a developer machine with display access, it may succeed.
404        // We just verify it doesn't panic and returns the right error variant.
405        let result = capture_screenshot(None);
406        match result {
407            Ok((img, CaptureSource::Screenshot { region: None })) => {
408                let (w, h) = img.dimensions();
409                assert!(w > 0 && h > 0);
410            }
411            Err(VisionError::Capture(_)) => {} // Expected on CI
412            other => panic!("Unexpected result: {other:?}"),
413        }
414    }
415
416    #[test]
417    fn test_capture_clipboard_returns_sensible_result() {
418        // On CI, clipboard is typically empty or inaccessible.
419        let result = capture_clipboard();
420        match result {
421            Ok((img, CaptureSource::Clipboard)) => {
422                let (w, h) = img.dimensions();
423                assert!(w > 0 && h > 0);
424            }
425            Err(VisionError::Capture(_)) => {} // Expected on CI
426            other => panic!("Unexpected result: {other:?}"),
427        }
428    }
429
430    #[test]
431    fn test_capture_screenshot_with_zero_region() {
432        // Zero-size region — should not panic regardless of platform
433        let region = Some(Rect {
434            x: 0,
435            y: 0,
436            w: 0,
437            h: 0,
438        });
439        let result = capture_screenshot(region);
440        match result {
441            Ok(_) | Err(VisionError::Capture(_)) => {}
442            other => panic!("Unexpected result: {other:?}"),
443        }
444    }
445}