Skip to main content

qwen2_5vl/
main.rs

1use anyhow::Result;
2use clap::Parser;
3use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder};
4use mistralrs_core::initialize_logging;
5use tokio::task;
6
7#[derive(Parser)]
8#[command(version, about, long_about = None)]
9struct Args {
10    #[arg(short, long, default_value = "Qwen/Qwen2.5-VL-3B-Instruct")]
11    model_id: String,
12}
13
14#[tokio::main]
15async fn main() -> Result<()> {
16    initialize_logging();
17    let args = Args::parse();
18
19    let bytes = task::spawn_blocking(|| {
20        match reqwest::blocking::get(
21            "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg",
22        ) {
23            Ok(http_resp) => Ok(http_resp.bytes()?.to_vec()),
24            Err(e) => anyhow::bail!(e),
25        }
26    }).await??;
27
28    let image = image::load_from_memory(&bytes)?;
29
30    let model = VisionModelBuilder::new(args.model_id)
31        .with_isq(IsqType::Q8_0)
32        .with_logging()
33        .build()
34        .await?;
35
36    let messages = VisionMessages::new().add_image_message(
37        TextMessageRole::User,
38        "What is depicted here?",
39        vec![image],
40        &model,
41    )?;
42
43    let response = model.send_chat_request(messages).await?;
44
45    println!("{}", response.choices[0].message.content.as_ref().unwrap());
46    dbg!(
47        response.usage.avg_prompt_tok_per_sec,
48        response.usage.avg_compl_tok_per_sec
49    );
50
51    Ok(())
52}