use paladin::application::services::paladin::paladin_builder::PaladinBuilder;
use paladin::application::services::paladin::paladin_execution_service::PaladinExecutionService;
use paladin::core::platform::container::vision::{ImageDetail, VisionContent, VisionError};
use paladin::infrastructure::resilience::circuit_breaker::CircuitBreaker;
use paladin::{OpenAIAdapter, OpenAIConfig};
use paladin_ports::output::llm_port::LlmPort;
use std::env;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
println!("🎯 Sentinel Vision System - Image Analysis Example\n");
println!("📋 Step 1: Configuring OpenAI with GPT-4o (vision-capable)...");
let api_key = env::var("OPENAI_API_KEY").expect("OPENAI_API_KEY environment variable not set");
let config = OpenAIConfig {
api_key,
base_url: "https://api.openai.com/v1".to_string(),
organization: None,
timeout_seconds: 300,
max_retries: 3,
};
let llm_adapter: Arc<dyn LlmPort> =
Arc::new(OpenAIAdapter::new(config).expect("Failed to create OpenAI adapter"));
let capabilities = llm_adapter.get_capabilities();
if !capabilities.supports_vision {
eprintln!("❌ Error: Selected model does not support vision!");
eprintln!("Please use a vision-capable model like gpt-4o or gpt-4-vision-preview");
std::process::exit(1);
}
println!("✅ Vision support confirmed\n");
println!("📋 Step 2: Setting up circuit breaker...");
let circuit_breaker = Arc::new(CircuitBreaker::new(
5, 3, Duration::from_secs(60), ));
println!("✅ Circuit breaker configured\n");
println!("📋 Step 3: Building vision-enabled Paladin...");
let paladin = PaladinBuilder::new(llm_adapter.clone())
.name("ImageAnalyzer")
.system_prompt(
"You are an expert image analyst. \
Provide detailed, accurate descriptions of images. \
Focus on key objects, scenes, actions, colors, and composition.",
)
.enable_vision(true) .model("gpt-4o")
.temperature(0.7)
.max_loops(1)
.timeout_seconds(120)
.build()
.await
.expect("Failed to build Paladin");
println!(
"✅ Paladin '{}' created with vision enabled\n",
paladin.node.name
);
println!("📋 Step 4: Creating execution service...");
let execution_service = PaladinExecutionService::new(
llm_adapter,
circuit_breaker,
None, None, );
println!("✅ Execution service ready\n");
println!("📋 Step 5: Preparing image for analysis...");
let image_path = PathBuf::from("./examples/assets/sample_image.jpg");
if !image_path.exists() {
println!("⚠️ Warning: Sample image not found at {:?}", image_path);
println!(" Creating example with image URL instead...\n");
let vision_content = vec![VisionContent::ImageUrl {
url: "https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/280px-PNG_transparency_demonstration_1.png".to_string(),
detail: ImageDetail::Auto,
}];
analyze_image(
&execution_service,
&paladin,
vision_content,
"What do you see in this image? Describe it in detail.",
)
.await?;
} else {
let vision_content = vec![VisionContent::ImageFile {
path: image_path.clone(),
detail: ImageDetail::Auto, }];
println!("📸 Image: {:?}", image_path);
println!("🔍 Detail Level: Auto (balanced speed/quality)\n");
analyze_image(
&execution_service,
&paladin,
vision_content,
"What do you see in this image? Describe it in detail.",
)
.await?;
}
println!();
println!("{}", "=".repeat(80));
println!("📊 Demonstrating Different Detail Levels");
println!("{}", "=".repeat(80));
println!();
let low_detail_image = vec![VisionContent::ImageUrl {
url: "https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/280px-PNG_transparency_demonstration_1.png".to_string(),
detail: ImageDetail::Low, }];
println!("🔽 LOW DETAIL (Fast & Cheap - ~85 tokens)");
analyze_image(
&execution_service,
&paladin,
low_detail_image,
"Quick summary of this image.",
)
.await?;
let high_detail_image = vec![VisionContent::ImageUrl {
url: "https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/280px-PNG_transparency_demonstration_1.png".to_string(),
detail: ImageDetail::High, }];
println!("\n🔼 HIGH DETAIL (Accurate & Detailed - ~170+ tokens)");
analyze_image(
&execution_service,
&paladin,
high_detail_image,
"Provide an extremely detailed analysis of this image.",
)
.await?;
println!();
println!("{}", "=".repeat(80));
println!("📦 Demonstrating Base64-Encoded Image Processing");
println!("{}", "=".repeat(80));
println!();
demonstrate_base64_image(&execution_service, &paladin).await?;
println!();
println!("{}", "=".repeat(80));
println!("🖼️ Demonstrating Multiple Images in One Request");
println!("{}", "=".repeat(80));
println!();
demonstrate_multiple_images(&execution_service, &paladin).await?;
println!();
println!("{}", "=".repeat(80));
println!("⚠️ Demonstrating Error Handling Patterns");
println!("{}", "=".repeat(80));
println!();
demonstrate_error_handling(&execution_service, &paladin).await;
println!();
println!("{}", "=".repeat(80));
println!("✅ Vision analysis example completed successfully!");
println!("{}", "=".repeat(80));
Ok(())
}
async fn analyze_image(
service: &PaladinExecutionService,
paladin: &paladin::core::platform::container::paladin::Paladin,
images: Vec<VisionContent>,
task: &str,
) -> Result<(), Box<dyn std::error::Error>> {
println!("💭 Task: {}", task);
println!("⏳ Analyzing...\n");
let start = std::time::Instant::now();
let result = service.execute_with_vision(paladin, task, images).await?;
let duration = start.elapsed();
println!("{}", "─".repeat(80));
println!("📊 ANALYSIS RESULTS");
println!("{}", "─".repeat(80));
println!("🤖 Paladin: {}", paladin.node.name);
println!("⏱️ Execution Time: {:.2}s", duration.as_secs_f64());
println!("🔄 Loops: {}", result.loop_count);
println!("🎫 Tokens: {}", result.token_count);
println!("🛑 Stop Reason: {:?}", result.stop_reason);
println!("{}", "─".repeat(80));
println!("📝 OUTPUT:\n");
println!("{}", result.output);
println!("{}", "─".repeat(80));
Ok(())
}
async fn demonstrate_base64_image(
service: &PaladinExecutionService,
paladin: &paladin::core::platform::container::paladin::Paladin,
) -> Result<(), Box<dyn std::error::Error>> {
println!("Base64 encoding is ideal for dynamically generated or in-memory images.");
println!("This example creates a small image and encodes it as base64.\n");
let base64_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==".to_string();
let vision_content = vec![VisionContent::ImageBase64 {
data: base64_data,
media_type: "image/png".to_string(),
detail: ImageDetail::Low, }];
println!("📦 Image Format: Base64-encoded PNG");
println!("🔍 Detail Level: Low");
println!("💭 Task: Analyze this base64-encoded image\n");
analyze_image(
service,
paladin,
vision_content,
"What color is this image?",
)
.await?;
println!("\n💡 Tip: Base64 encoding is handled automatically by the vision adapter.");
println!(" Just provide the data and media_type, and the adapter handles the rest.\n");
Ok(())
}
async fn demonstrate_multiple_images(
service: &PaladinExecutionService,
paladin: &paladin::core::platform::container::paladin::Paladin,
) -> Result<(), Box<dyn std::error::Error>> {
println!("You can analyze multiple images in a single request for comparison");
println!("or sequential analysis.\n");
let images = vec![
VisionContent::ImageUrl {
url: "https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/Cat03.jpg/400px-Cat03.jpg".to_string(),
detail: ImageDetail::Low,
},
VisionContent::ImageUrl {
url: "https://upload.wikimedia.org/wikipedia/commons/thumb/4/4d/Cat_November_2010-1a.jpg/400px-Cat_November_2010-1a.jpg".to_string(),
detail: ImageDetail::Low,
},
];
println!("📸 Number of Images: {}", images.len());
println!("💭 Task: Compare and describe the differences\n");
analyze_image(
service,
paladin,
images,
"Compare these two images. What are the key differences between them?",
)
.await?;
println!("\n💡 Tip: Multiple image support varies by provider.");
println!(" OpenAI GPT-4o and Anthropic Claude 3 both support multiple images per request.\n");
Ok(())
}
async fn demonstrate_error_handling(
service: &PaladinExecutionService,
paladin: &paladin::core::platform::container::paladin::Paladin,
) {
println!("Vision operations can fail for various reasons. Here's how to handle them:\n");
println!("1️⃣ Handling Invalid Image URL");
let invalid_url = vec![VisionContent::ImageUrl {
url: "https://example.com/nonexistent-image-12345.jpg".to_string(),
detail: ImageDetail::Auto,
}];
match service
.execute_with_vision(paladin, "Describe this image", invalid_url)
.await
{
Ok(_) => println!(" ✅ Unexpectedly succeeded"),
Err(e) => {
println!(" ❌ Error handled gracefully: {}", e);
println!(" 💡 Tip: Always validate URLs before processing");
}
}
println!();
println!("2️⃣ Format Validation (Unsupported Format)");
let unsupported = VisionContent::ImageBase64 {
data: "invalid_base64".to_string(),
media_type: "image/bmp".to_string(), detail: ImageDetail::Auto,
};
match unsupported.validate_format() {
Ok(_) => println!(" ✅ Format is valid"),
Err(VisionError::UnsupportedFormat(msg)) => {
println!(" ❌ Format validation failed: {}", msg);
println!(" 💡 Tip: Supported formats are PNG, JPEG, GIF, and WebP");
}
Err(e) => println!(" ❌ Validation error: {:?}", e),
}
println!();
println!("3️⃣ Recommended Error Handling Pattern");
println!(" ```rust");
println!(" match service.execute_with_vision(paladin, task, images).await {{");
println!(" Ok(result) => {{");
println!(" // Process successful result");
println!(r#" println!("Analysis: {{}}", result.output);"#);
println!(" }}");
println!(" Err(e) => {{");
println!(" // Log and handle error gracefully");
println!(r#" eprintln!("Vision error: {{}}", e);"#);
println!(" // Optionally retry or use fallback strategy");
println!(" }}");
println!(" }}");
println!(" ```\n");
println!("💡 Best Practices:");
println!(" • Validate image formats before sending to API");
println!(" • Use circuit breaker for fault tolerance");
println!(" • Implement retry logic with exponential backoff");
println!(" • Set appropriate timeouts for large images");
println!(" • Monitor token usage to stay within limits");
println!(" • Handle rate limits gracefully\n");
}