use qai_sdk::types::ImageSource;
use qai_sdk::*;
use qai_sdk::LanguageModel;
#[tokio::main]
async fn main() -> Result<()> {
dotenvy::dotenv().ok();
println!("=== Image URL Input (OpenAI) ===");
let api_key = std::env::var("OPENAI_API_KEY").unwrap_or_default();
let model = OpenAIModel::new(api_key);
let prompt = Prompt {
messages: vec![Message {
role: Role::User,
content: vec![
Content::Text {
text: "What do you see in this image? Be brief.".to_string(),
},
Content::Image {
source: ImageSource::Url {
url: "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-702-702-702-702-702-702-702-702.jpg/1280px-Gfp-wisconsin-madison-the-nature-702.jpg".to_string(),
},
},
],
}],
};
let options = GenerateOptions {
model_id: "gpt-4o-mini".to_string(),
max_tokens: Some(200),
temperature: None,
top_p: None,
stop_sequences: None,
..Default::default()
};
let result = model.generate(prompt, options).await?;
println!("Vision response: {}\n", result.text);
println!("=== Base64 Image Input (Anthropic) ===");
let api_key = std::env::var("ANTHROPIC_API_KEY").unwrap_or_default();
let model = AnthropicModel::new(api_key);
let tiny_png_base64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==";
let prompt = Prompt {
messages: vec![Message {
role: Role::User,
content: vec![
Content::Text {
text: "Describe this image.".to_string(),
},
Content::Image {
source: ImageSource::Base64 {
media_type: "image/png".to_string(),
data: tiny_png_base64.to_string(),
},
},
],
}],
};
let options = GenerateOptions {
model_id: "claude-3-haiku-20240307".to_string(),
max_tokens: Some(100),
temperature: None,
top_p: None,
stop_sequences: None,
..Default::default()
};
let result = model.generate(prompt, options).await?;
println!("Vision response: {}\n", result.text);
println!("=== Multi-turn with Image (Google) ===");
let api_key = std::env::var("GOOGLE_GENERATIVE_AI_API_KEY").unwrap_or_default();
let model = GoogleModel::new(api_key);
let prompt = Prompt {
messages: vec![
Message {
role: Role::User,
content: vec![
Content::Text {
text: "Remember this image.".to_string(),
},
Content::Image {
source: ImageSource::Url {
url: "https://upload.wikimedia.org/wikipedia/commons/thumb/1/15/Cat_August_2010-4.jpg/1200px-Cat_August_2010-4.jpg".to_string(),
},
},
],
},
Message {
role: Role::Assistant,
content: vec![Content::Text {
text: "I see a cat in the image.".to_string(),
}],
},
Message {
role: Role::User,
content: vec![Content::Text {
text: "What breed might it be?".to_string(),
}],
},
],
};
let options = GenerateOptions {
model_id: "gemini-1.5-flash".to_string(),
max_tokens: Some(200),
temperature: None,
top_p: None,
stop_sequences: None,
..Default::default()
};
let result = model.generate(prompt, options).await?;
println!("Multi-turn vision response: {}", result.text);
Ok(())
}