Skip to main content

vision_auto_device_map/
main.rs

1use anyhow::Result;
2use mistralrs::{
3    AutoDeviceMapParams, DeviceMapSetting, IsqType, TextMessageRole, VisionMessages,
4    VisionModelBuilder,
5};
6
7// const MODEL_ID: &str = "meta-llama/Llama-3.2-11B-Vision-Instruct";
8const MODEL_ID: &str = "lamm-mit/Cephalo-Llama-3.2-11B-Vision-Instruct-128k";
9
10#[tokio::main]
11async fn main() -> Result<()> {
12    let auto_map_params = AutoDeviceMapParams::Text {
13        max_seq_len: 4096,
14        max_batch_size: 2,
15    };
16    let model = VisionModelBuilder::new(MODEL_ID)
17        .with_isq(IsqType::Q4K)
18        .with_logging()
19        .with_device_mapping(DeviceMapSetting::Auto(auto_map_params))
20        .build()
21        .await?;
22
23    let bytes = match reqwest::blocking::get(
24        "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg",
25    ) {
26        Ok(http_resp) => http_resp.bytes()?.to_vec(),
27        Err(e) => anyhow::bail!(e),
28    };
29    let image = image::load_from_memory(&bytes)?;
30
31    let messages = VisionMessages::new().add_image_message(
32        TextMessageRole::User,
33        "What is depicted here? Please describe the scene in detail.",
34        vec![image],
35        &model,
36    )?;
37
38    let response = model.send_chat_request(messages).await?;
39
40    println!("{}", response.choices[0].message.content.as_ref().unwrap());
41    dbg!(
42        response.usage.avg_prompt_tok_per_sec,
43        response.usage.avg_compl_tok_per_sec
44    );
45
46    Ok(())
47}