Skip to main content

mofa_foundation/llm/
vision.rs

1//! Vision utilities for multi-modal message construction
2//!
3//! This module provides:
4//! - MIME type detection
5//! - Base64 encoding
6//! - Structured content array generation
7//! - Multi-modal message builders
8
9use crate::llm::types::{ChatMessage, ContentPart, ImageDetail, ImageUrl, MessageContent, Role};
10use anyhow::Result;
11use std::path::Path;
12
13/// Encode an image file as a data URL
14///
15/// # Arguments
16/// * `path` - Path to the image file
17///
18/// # Returns
19/// A data URL string in the format `data:<mime-type>;base64,<data>`
20///
21/// # Example
22/// ```ignore
23/// let url = encode_image_data_url(Path::new("/path/to/image.png"))?;
24/// assert!(url.starts_with("data:image/png;base64,"));
25/// ```
26pub fn encode_image_data_url(path: &Path) -> Result<String> {
27    use base64::Engine;
28    use base64::engine::general_purpose::STANDARD_NO_PAD;
29    use std::fs;
30
31    let bytes = fs::read(path)?;
32
33    let mime_type = infer::get_from_path(path)?
34        .ok_or_else(|| anyhow::anyhow!("Unknown MIME type for: {:?}", path))?
35        .mime_type()
36        .to_string();
37
38    let base64 = STANDARD_NO_PAD.encode(&bytes);
39    Ok(format!("data:{};base64,{}", mime_type, base64))
40}
41
42/// Encode an image file as an ImageUrl struct
43///
44/// # Arguments
45/// * `path` - Path to the image file
46///
47/// # Returns
48/// An ImageUrl struct suitable for use in ContentPart
49pub fn encode_image_url(path: &Path) -> Result<ImageUrl> {
50    let url = encode_image_data_url(path)?;
51    Ok(ImageUrl { url, detail: None })
52}
53
54/// Build multi-modal message content with images
55///
56/// # Arguments
57/// * `text` - The text content
58/// * `image_paths` - Paths to image files
59///
60/// # Returns
61/// MessageContent with both text and images
62///
63/// # Example
64/// ```ignore
65/// let content = build_vision_message(
66///     "What's in this image?",
67///     &["/path/to/image.png".to_string()]
68/// )?;
69/// ```
70pub fn build_vision_message(text: &str, image_paths: &[String]) -> Result<MessageContent> {
71    let mut parts = vec![ContentPart::Text {
72        text: text.to_string(),
73    }];
74
75    for path_str in image_paths {
76        let path = Path::new(path_str);
77        let image_url = encode_image_url(path)?;
78        parts.push(ContentPart::Image { image_url });
79    }
80
81    Ok(MessageContent::Parts(parts))
82}
83
84/// Build a ChatMessage with vision content
85///
86/// # Arguments
87/// * `text` - The text content
88/// * `image_paths` - Paths to image files
89///
90/// # Returns
91/// A ChatMessage ready to send to an LLM
92///
93/// # Example
94/// ```ignore
95/// let msg = build_vision_chat_message(
96///     "Describe this image",
97///     &["/path/to/image.jpg".to_string()]
98/// )?;
99/// ```
100pub fn build_vision_chat_message(text: &str, image_paths: &[String]) -> Result<ChatMessage> {
101    let content = build_vision_message(text, image_paths)?;
102
103    Ok(ChatMessage {
104        role: Role::User,
105        content: Some(content),
106        name: None,
107        tool_calls: None,
108        tool_call_id: None,
109    })
110}
111
112/// Build a ChatMessage with a single image
113///
114/// # Arguments
115/// * `text` - The text content
116/// * `image_path` - Path to a single image file
117///
118/// # Returns
119/// A ChatMessage with text and one image
120pub fn build_vision_chat_message_single(text: &str, image_path: &str) -> Result<ChatMessage> {
121    build_vision_chat_message(text, &[image_path.to_string()])
122}
123
124/// Create an ImageUrl from a URL string
125///
126/// # Arguments
127/// * `url` - URL string (can be a web URL or data URL)
128///
129/// # Returns
130/// An ImageUrl struct
131pub fn image_url_from_string(url: impl Into<String>) -> ImageUrl {
132    ImageUrl {
133        url: url.into(),
134        detail: None,
135    }
136}
137
138/// Create an ImageUrl with detail level
139///
140/// # Arguments
141/// * `url` - URL string
142/// * `detail` - Detail level (low, high, auto)
143///
144/// # Returns
145/// An ImageUrl struct with specified detail level
146pub fn image_url_with_detail(url: impl Into<String>, detail: ImageDetail) -> ImageUrl {
147    ImageUrl {
148        url: url.into(),
149        detail: Some(detail),
150    }
151}
152
153/// Extension trait for ImageDetail with helper methods
154pub trait ImageDetailExt {
155    /// Convert to string for API
156    fn as_str(&self) -> &str;
157}
158
159impl ImageDetailExt for ImageDetail {
160    fn as_str(&self) -> &str {
161        match self {
162            ImageDetail::Low => "low",
163            ImageDetail::High => "high",
164            ImageDetail::Auto => "auto",
165        }
166    }
167}
168
169/// Check if a file is an image based on its extension
170///
171/// # Arguments
172/// * `path` - Path to check
173///
174/// # Returns
175/// true if the file appears to be an image
176pub fn is_image_file(path: &Path) -> bool {
177    match path.extension().and_then(|e| e.to_str()) {
178        Some(ext) => matches!(
179            ext.to_lowercase().as_str(),
180            "png" | "jpg" | "jpeg" | "gif" | "webp" | "bmp"
181        ),
182        None => false,
183    }
184}
185
186/// Get MIME type for a file path
187///
188/// # Arguments
189/// * `path` - Path to the file
190///
191/// # Returns
192/// The MIME type string or an error
193pub fn get_mime_type(path: &Path) -> Result<String> {
194    infer::get_from_path(path)?
195        .ok_or_else(|| anyhow::anyhow!("Unknown MIME type for: {:?}", path))
196        .map(|info| info.mime_type().to_string())
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202
203    #[test]
204    fn test_is_image_file() {
205        assert!(is_image_file(Path::new("test.png")));
206        assert!(is_image_file(Path::new("test.JPG")));
207        assert!(is_image_file(Path::new("test.jpeg")));
208        assert!(!is_image_file(Path::new("test.txt")));
209        assert!(!is_image_file(Path::new("test.pdf")));
210    }
211
212    #[test]
213    fn test_image_detail_as_str() {
214        assert_eq!(ImageDetail::Low.as_str(), "low");
215        assert_eq!(ImageDetail::High.as_str(), "high");
216        assert_eq!(ImageDetail::Auto.as_str(), "auto");
217    }
218
219    #[test]
220    fn test_image_url_from_string() {
221        let url = image_url_from_string("https://example.com/image.png");
222        assert_eq!(url.url, "https://example.com/image.png");
223        assert!(url.detail.is_none());
224    }
225
226    #[test]
227    fn test_image_url_with_detail() {
228        let url = image_url_with_detail("https://example.com/image.png", ImageDetail::High);
229        assert_eq!(url.url, "https://example.com/image.png");
230        assert!(url.detail.is_some());
231    }
232}