vision_chat/
vision_chat.rs

1#![allow(clippy::uninlined_format_args)]
2//! Vision chat example demonstrating image understanding capabilities.
3//!
4//! This example demonstrates:
5//! - Basic image understanding with URLs
6//! - Base64 image encoding and analysis
7//! - Multiple image analysis in a single message
8//! - Different detail levels (low, high, auto)
9//! - Conversation context with images
10//! - Comprehensive error handling
11//!
12//! Run with: `cargo run --example vision_chat`
13
14use openai_ergonomic::{
15    image_base64_part_with_detail, image_url_part_with_detail, text_part, Client, Detail, Error,
16    Response,
17};
18use std::io::{self, Write};
19
20/// A sample image for demonstration (small test image in base64)
21const SAMPLE_BASE64_IMAGE: &str = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==";
22
23/// Sample image URLs for demonstration
24const SAMPLE_IMAGE_URLS: &[&str] = &[
25    "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
26    "https://upload.wikimedia.org/wikipedia/commons/thumb/5/50/Vd-Orig.png/256px-Vd-Orig.png",
27];
28
29#[tokio::main]
30async fn main() -> Result<(), Box<dyn std::error::Error>> {
31    println!("OpenAI Ergonomic - Vision Chat Example");
32    println!("======================================");
33    println!();
34
35    // Create client from environment variables
36    let client = match Client::from_env() {
37        Ok(client_builder) => {
38            println!("āœ“ Client initialized successfully");
39            client_builder.build()
40        }
41        Err(e) => {
42            eprintln!("āœ— Failed to initialize client: {e}");
43            eprintln!("Make sure OPENAI_API_KEY environment variable is set");
44            return Err(e.into());
45        }
46    };
47
48    println!("āœ“ Using vision-capable model for image understanding");
49    println!();
50
51    // Demonstrate various vision capabilities
52    demonstrate_basic_image_analysis(&client).await?;
53    demonstrate_multiple_images(&client).await?;
54    demonstrate_detail_levels(&client).await?;
55    demonstrate_base64_image(&client).await?;
56    demonstrate_conversation_with_images(&client).await?;
57    demonstrate_error_handling(&client).await?;
58
59    println!("šŸŽ‰ Vision chat example completed successfully!");
60    println!("This example demonstrated:");
61    println!("  • Basic image understanding with URLs");
62    println!("  • Multiple image analysis in single messages");
63    println!("  • Different detail levels (low, high, auto)");
64    println!("  • Base64 image encoding and analysis");
65    println!("  • Conversation context with images");
66    println!("  • Comprehensive error handling");
67
68    Ok(())
69}
70
71/// Demonstrate basic image analysis with a URL.
72async fn demonstrate_basic_image_analysis(
73    client: &Client,
74) -> Result<(), Box<dyn std::error::Error>> {
75    println!("šŸ–¼ļø  Example 1: Basic Image Analysis");
76    println!("----------------------------------");
77
78    let image_url = SAMPLE_IMAGE_URLS[0];
79    let question = "What do you see in this image? Please describe it in detail.";
80
81    println!("Image URL: {image_url}");
82    println!("Question: {question}");
83    print!("Assistant: ");
84    io::stdout().flush()?;
85
86    // Use the convenient user_with_image_url method
87    let chat_builder = client
88        .chat()
89        .system("You are a helpful AI assistant that can analyze images. Provide detailed, accurate descriptions of what you see.")
90        .user_with_image_url(question, image_url)
91        .temperature(0.3);
92
93    let response = client.send_chat(chat_builder).await?;
94
95    if let Some(content) = response.content() {
96        println!("{content}");
97
98        // Show usage information
99        if let Some(usage) = response.usage() {
100            println!("\nšŸ“Š Token usage:");
101            println!("  Prompt tokens: {}", usage.prompt_tokens);
102            println!("  Completion tokens: {}", usage.completion_tokens);
103            println!("  Total tokens: {}", usage.total_tokens);
104        }
105    } else {
106        println!("No response content received");
107    }
108
109    println!();
110    Ok(())
111}
112
113/// Demonstrate analysis of multiple images in a single message.
114async fn demonstrate_multiple_images(client: &Client) -> Result<(), Box<dyn std::error::Error>> {
115    println!("šŸ–¼ļøšŸ–¼ļø Example 2: Multiple Image Analysis");
116    println!("---------------------------------------");
117
118    let question = "Compare these two images. What are the differences and similarities?";
119
120    println!("Question: {question}");
121    println!("Image 1: {}", SAMPLE_IMAGE_URLS[0]);
122    println!("Image 2: {}", SAMPLE_IMAGE_URLS[1]);
123    print!("Assistant: ");
124    io::stdout().flush()?;
125
126    // Create message parts manually for multiple images
127    let parts = vec![
128        text_part(question),
129        image_url_part_with_detail(SAMPLE_IMAGE_URLS[0], Detail::Auto),
130        image_url_part_with_detail(SAMPLE_IMAGE_URLS[1], Detail::Auto),
131    ];
132
133    let chat_builder = client
134        .chat()
135        .system("You are an expert at comparing and analyzing images. Provide thoughtful comparisons focusing on visual elements, composition, and content.")
136        .user_with_parts(parts)
137        .temperature(0.4);
138
139    let response = client.send_chat(chat_builder).await?;
140
141    if let Some(content) = response.content() {
142        println!("{content}");
143    } else {
144        println!("No response content received");
145    }
146
147    println!();
148    Ok(())
149}
150
151/// Demonstrate different detail levels for image analysis.
152async fn demonstrate_detail_levels(client: &Client) -> Result<(), Box<dyn std::error::Error>> {
153    println!("šŸ” Example 3: Different Detail Levels");
154    println!("------------------------------------");
155
156    let image_url = SAMPLE_IMAGE_URLS[0];
157    let question = "Analyze this image";
158
159    // Test different detail levels
160    let detail_levels = vec![
161        (Detail::Low, "Low detail (faster, less detailed)"),
162        (Detail::High, "High detail (slower, more detailed)"),
163        (Detail::Auto, "Auto detail (balanced)"),
164    ];
165
166    for (detail, description) in detail_levels {
167        println!("\n{description}:");
168        print!("Assistant: ");
169        io::stdout().flush()?;
170
171        let chat_builder = client
172            .chat()
173            .system("Analyze the image and describe what you see. Adjust your response detail based on the image quality provided.")
174            .user_with_image_url_and_detail(question, image_url, detail)
175            .temperature(0.2)
176            .max_completion_tokens(100); // Limit response length for comparison
177
178        let response = client.send_chat(chat_builder).await?;
179
180        if let Some(content) = response.content() {
181            println!("{content}");
182        }
183    }
184
185    println!();
186    Ok(())
187}
188
189/// Demonstrate base64 image encoding and analysis.
190async fn demonstrate_base64_image(client: &Client) -> Result<(), Box<dyn std::error::Error>> {
191    println!("šŸ”¢ Example 4: Base64 Image Analysis");
192    println!("-----------------------------------");
193
194    let question = "What is this image? It's very small, what can you tell about it?";
195
196    println!("Question: {question}");
197    println!("Image: Small test image encoded as base64");
198    print!("Assistant: ");
199    io::stdout().flush()?;
200
201    // Create message parts with base64 image
202    let parts = vec![
203        text_part(question),
204        image_base64_part_with_detail(SAMPLE_BASE64_IMAGE, "image/png", Detail::High),
205    ];
206
207    let chat_builder = client
208        .chat()
209        .system("You are analyzing images provided in base64 format. Even if an image is very small or simple, try to provide what information you can.")
210        .user_with_parts(parts)
211        .temperature(0.3);
212
213    let response = client.send_chat(chat_builder).await?;
214
215    if let Some(content) = response.content() {
216        println!("{content}");
217    } else {
218        println!("No response content received");
219    }
220
221    println!();
222    Ok(())
223}
224
225/// Demonstrate conversation context with images.
226async fn demonstrate_conversation_with_images(
227    client: &Client,
228) -> Result<(), Box<dyn std::error::Error>> {
229    println!("šŸ’¬ Example 5: Conversation Context with Images");
230    println!("----------------------------------------------");
231
232    let image_url = SAMPLE_IMAGE_URLS[0];
233
234    // First message: Analyze the image
235    println!("Step 1: Initial image analysis");
236    print!("Assistant: ");
237    io::stdout().flush()?;
238
239    let mut chat_builder = client
240        .chat()
241        .system("You are having a conversation about images. Remember details from previous messages to maintain context.")
242        .user_with_image_url("What's the main subject of this image?", image_url)
243        .temperature(0.3);
244
245    let response1 = client.send_chat(chat_builder).await?;
246    let first_response = response1.content().unwrap_or("No response").to_string();
247    println!("{first_response}");
248
249    // Second message: Follow-up question (without re-uploading the image)
250    println!("\nStep 2: Follow-up question");
251    print!("Assistant: ");
252    io::stdout().flush()?;
253
254    chat_builder = client
255        .chat()
256        .system("You are having a conversation about images. Remember details from previous messages to maintain context.")
257        .user_with_image_url("What's the main subject of this image?", image_url)
258        .assistant(&first_response)
259        .user("What colors are most prominent in the image we just discussed?")
260        .temperature(0.3);
261
262    let response2 = client.send_chat(chat_builder).await?;
263
264    if let Some(content) = response2.content() {
265        println!("{content}");
266    }
267
268    // Third message: Ask for creative interpretation
269    println!("\nStep 3: Creative interpretation");
270    print!("Assistant: ");
271    io::stdout().flush()?;
272
273    let second_response = response2.content().unwrap_or("No response").to_string();
274
275    chat_builder = client
276        .chat()
277        .system("You are having a conversation about images. Remember details from previous messages to maintain context.")
278        .user_with_image_url("What's the main subject of this image?", image_url)
279        .assistant(&first_response)
280        .user("What colors are most prominent in the image we just discussed?")
281        .assistant(second_response)
282        .user("Based on our discussion, write a short poem inspired by this image.")
283        .temperature(0.7);
284
285    let response3 = client.send_chat(chat_builder).await?;
286
287    if let Some(content) = response3.content() {
288        println!("{content}");
289    }
290
291    println!();
292    Ok(())
293}
294
295/// Demonstrate error handling patterns for vision requests.
296async fn demonstrate_error_handling(client: &Client) -> Result<(), Box<dyn std::error::Error>> {
297    println!("āš ļø  Example 6: Error Handling Patterns");
298    println!("------------------------------------");
299
300    println!("Testing various error scenarios...\n");
301
302    // Test 1: Invalid image URL
303    println!("Test 1: Invalid image URL");
304    let invalid_url = "https://this-domain-does-not-exist-12345.com/image.jpg";
305
306    let invalid_builder = client
307        .chat()
308        .user_with_image_url("What do you see?", invalid_url)
309        .temperature(0.3);
310
311    match client.send_chat(invalid_builder).await {
312        Ok(_) => println!("āœ— Invalid URL request unexpectedly succeeded"),
313        Err(e) => match &e {
314            Error::Api {
315                status, message, ..
316            } => {
317                println!("āœ“ API properly rejected invalid URL ({status}): {message}");
318            }
319            Error::Http(reqwest_err) => {
320                println!("āœ“ HTTP error caught: {reqwest_err}");
321            }
322            Error::InvalidRequest(msg) => {
323                println!("āœ“ Validation caught invalid URL: {msg}");
324            }
325            _ => {
326                println!("ā„¹ļø  Other error type: {e}");
327            }
328        },
329    }
330
331    // Test 2: Empty message with image
332    println!("\nTest 2: Empty text with image");
333    let empty_text_builder = client
334        .chat()
335        .user_with_image_url("", SAMPLE_IMAGE_URLS[0])
336        .temperature(0.3);
337
338    match client.send_chat(empty_text_builder).await {
339        Ok(response) => {
340            if let Some(content) = response.content() {
341                println!(
342                    "āœ“ API handled empty text gracefully: {}",
343                    content.chars().take(50).collect::<String>()
344                );
345            }
346        }
347        Err(e) => {
348            println!("ā„¹ļø  Empty text error: {e}");
349        }
350    }
351
352    // Test 3: Malformed base64 data
353    println!("\nTest 3: Malformed base64 image data");
354    let malformed_base64 = "this-is-not-valid-base64!@#$%";
355    let malformed_parts = vec![
356        text_part("What is this?"),
357        image_base64_part_with_detail(malformed_base64, "image/png", Detail::Auto),
358    ];
359
360    let malformed_builder = client.chat().user_with_parts(malformed_parts);
361
362    match client.send_chat(malformed_builder).await {
363        Ok(_) => println!("āœ— Malformed base64 unexpectedly succeeded"),
364        Err(e) => match &e {
365            Error::Api {
366                status, message, ..
367            } => {
368                println!("āœ“ API properly rejected malformed base64 ({status}): {message}");
369            }
370            _ => {
371                println!("ā„¹ļø  Other error for malformed base64: {e}");
372            }
373        },
374    }
375
376    println!("\nšŸ›”ļø  Error handling patterns demonstrated:");
377    println!("  • Invalid image URL handling");
378    println!("  • Empty text with image handling");
379    println!("  • Malformed base64 data validation");
380    println!("  • API error classification");
381    println!("  • Network error handling");
382
383    println!();
384    Ok(())
385}