1use anyhow::Result;
2use clap::Parser;
3use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder};
4use mistralrs_core::initialize_logging;
5use tokio::task;
6
7#[derive(Parser)]
8#[command(version, about, long_about = None)]
9struct Args {
10 #[arg(short, long, default_value = "Qwen/Qwen2.5-VL-3B-Instruct")]
11 model_id: String,
12}
13
14#[tokio::main]
15async fn main() -> Result<()> {
16 initialize_logging();
17 let args = Args::parse();
18
19 let bytes = task::spawn_blocking(|| {
20 match reqwest::blocking::get(
21 "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg",
22 ) {
23 Ok(http_resp) => Ok(http_resp.bytes()?.to_vec()),
24 Err(e) => anyhow::bail!(e),
25 }
26 }).await??;
27
28 let image = image::load_from_memory(&bytes)?;
29
30 let model = VisionModelBuilder::new(args.model_id)
31 .with_isq(IsqType::Q8_0)
32 .with_logging()
33 .build()
34 .await?;
35
36 let messages = VisionMessages::new().add_image_message(
37 TextMessageRole::User,
38 "What is depicted here?",
39 vec![image],
40 &model,
41 )?;
42
43 let response = model.send_chat_request(messages).await?;
44
45 println!("{}", response.choices[0].message.content.as_ref().unwrap());
46 dbg!(
47 response.usage.avg_prompt_tok_per_sec,
48 response.usage.avg_compl_tok_per_sec
49 );
50
51 Ok(())
52}