dravr_browser/vision.rs
1// ABOUTME: Vision-LLM seam — the trait consumers implement to supply screenshot analysis
2// ABOUTME: Keeps dravr-browser free of any concrete LLM crate, preventing a dependency cycle
3//
4// SPDX-License-Identifier: MIT OR Apache-2.0
5// Copyright (c) 2026 dravr.ai
6
7use std::error::Error;
8
9use async_trait::async_trait;
10
11/// Error returned by a [`VisionAnalyzer`] implementation.
12///
13/// Boxed so this crate stays agnostic to the consumer's error type (an embacle
14/// provider error, an HTTP error, etc.).
15pub type VisionError = Box<dyn Error + Send + Sync>;
16
17/// A vision-capable LLM reduced to the single operation page automation needs:
18/// analyze a screenshot against a prompt and return the model's text reply.
19///
20/// `dravr-browser` defines this trait so it does **not** depend on any concrete
21/// LLM crate. The consumer implements it — typically by wrapping its own LLM
22/// provider — and hands it to whatever flow needs a vision fallback.
23#[async_trait]
24pub trait VisionAnalyzer: Send + Sync {
25 /// Analyze a base64-encoded PNG screenshot against `prompt`; return the
26 /// model's text response.
27 ///
28 /// # Errors
29 ///
30 /// Returns an error if the underlying model call fails.
31 async fn analyze_screenshot(
32 &self,
33 prompt: &str,
34 screenshot_png_b64: &str,
35 ) -> Result<String, VisionError>;
36}