Skip to main content

llama_vision/
main.rs

1use anyhow::Result;
2use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder};
3
4// const MODEL_ID: &str = "meta-llama/Llama-3.2-11B-Vision-Instruct";
5const MODEL_ID: &str = "lamm-mit/Cephalo-Llama-3.2-11B-Vision-Instruct-128k";
6
7#[tokio::main]
8async fn main() -> Result<()> {
9    let model = VisionModelBuilder::new(MODEL_ID)
10        .with_isq(IsqType::Q4K)
11        .with_logging()
12        .build()
13        .await?;
14
15    let bytes = match reqwest::blocking::get(
16        "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg",
17    ) {
18        Ok(http_resp) => http_resp.bytes()?.to_vec(),
19        Err(e) => anyhow::bail!(e),
20    };
21    let image = image::load_from_memory(&bytes)?;
22
23    let messages = VisionMessages::new().add_image_message(
24        TextMessageRole::User,
25        "What is depicted here? Please describe the scene in detail.",
26        vec![image],
27        &model,
28    )?;
29
30    let response = model.send_chat_request(messages).await?;
31
32    println!("{}", response.choices[0].message.content.as_ref().unwrap());
33    dbg!(
34        response.usage.avg_prompt_tok_per_sec,
35        response.usage.avg_compl_tok_per_sec
36    );
37
38    Ok(())
39}