1use anyhow::Result;
2use mistralrs::{
3 IsqType, LayerTopology, PagedAttentionMetaBuilder, TextMessageRole, TextMessages,
4 TextModelBuilder, Topology,
5};
6
7#[tokio::main]
8async fn main() -> Result<()> {
9 let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct")
10 .with_isq(IsqType::Q8_0)
11 .with_topology(
12 Topology::empty()
13 .with_range(
14 0..8,
15 LayerTopology {
16 isq: Some(IsqType::Q3K),
17 device: None,
18 },
19 )
20 .with_range(
21 8..16,
22 LayerTopology {
23 isq: Some(IsqType::Q4K),
24 device: None,
25 },
26 )
27 .with_range(
28 16..24,
29 LayerTopology {
30 isq: Some(IsqType::Q6K),
31 device: None,
32 },
33 )
34 .with_range(
35 24..32,
36 LayerTopology {
37 isq: Some(IsqType::Q8_0),
38 device: None,
39 },
40 ),
41 )
42 .with_logging()
43 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
44 .build()
45 .await?;
46
47 let messages = TextMessages::new()
48 .add_message(
49 TextMessageRole::System,
50 "You are an AI agent with a specialty in programming.",
51 )
52 .add_message(
53 TextMessageRole::User,
54 "Hello! How are you? Please write generic binary search function in Rust.",
55 );
56
57 let response = model.send_chat_request(messages).await?;
58
59 println!("{}", response.choices[0].message.content.as_ref().unwrap());
60 dbg!(
61 response.usage.avg_prompt_tok_per_sec,
62 response.usage.avg_compl_tok_per_sec
63 );
64
65 Ok(())
66}