grok_api 0.1.71

Rust client library for the Grok AI API (xAI)
Documentation
//! Image understanding example — multimodal input with Grok 4.20.
//!
//! Grok 4.20 models accept images directly in the message content.
//! Supported formats: JPEG, PNG. Maximum size: 20 MiB per image.
//!
//! Run with:
//!   cargo run --example video_chat

use grok_api::{ChatMessage, ContentPart, GrokClient, ImageUrl, Result};

/// Grok 4.20 non-reasoning is the recommended model for vision tasks —
/// fast, multimodal, and has a 2 M token context window.
const MODEL: &str = "grok-4.20-0309-non-reasoning";

#[tokio::main]
async fn main() -> Result<()> {
    tracing_subscriber::fmt::init();

    let api_key = std::env::var("GROK_API_KEY").expect("GROK_API_KEY environment variable not set");

    let client = GrokClient::new(&api_key)?;

    println!("🖼️  Grok API — Image Understanding\n");
    println!("Model: {MODEL}\n");

    // ── Example 1: public image URL ───────────────────────────────────────────
    // Using a public domain image from Wikimedia Commons.
    let image_url = "https://picsum.photos/seed/grok/400/300.jpg";

    println!("Image: {image_url}\n");

    let parts = vec![
        ContentPart::ImageUrl {
            image_url: ImageUrl {
                url: image_url.to_string(),
                detail: Some("high".to_string()),
            },
        },
        ContentPart::Text {
            text: "What does this image show? Describe it briefly.".to_string(),
        },
    ];

    let messages = vec![ChatMessage::user_parts(parts)];

    println!("Sending image to {MODEL}");
    let response = client
        .chat_with_history(&messages)
        .model(MODEL)
        .max_tokens(300)
        .send()
        .await?;

    println!(
        "\n✅ Response:\n{}\n",
        response.content().unwrap_or("(no content)")
    );

    // ── Example 2: base64 image (template — fill in your own data) ────────────
    println!("── Base64 image (template) ──");
    println!("To send a local image, encode it to base64 and use:");
    println!(r#"  url: "data:image/jpeg;base64,<YOUR_BASE64_STRING>""#);
    println!();

    // ── Token usage ───────────────────────────────────────────────────────────
    let u = &response.usage;
    print!(
        "📊 Tokens — prompt: {}, completion: {}, total: {}",
        u.prompt_tokens, u.completion_tokens, u.total_tokens
    );
    if let Some(c) = u.cached_prompt_tokens {
        print!(", cached: {c} 💰");
    }
    println!();

    println!("\n✨ Done!");
    Ok(())
}