Skip to main content

topology/
main.rs

1use anyhow::Result;
2use mistralrs::{
3    IsqType, LayerTopology, PagedAttentionMetaBuilder, TextMessageRole, TextMessages,
4    TextModelBuilder, Topology,
5};
6
7#[tokio::main]
8async fn main() -> Result<()> {
9    let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct")
10        .with_isq(IsqType::Q8_0)
11        .with_topology(
12            Topology::empty()
13                .with_range(
14                    0..8,
15                    LayerTopology {
16                        isq: Some(IsqType::Q3K),
17                        device: None,
18                    },
19                )
20                .with_range(
21                    8..16,
22                    LayerTopology {
23                        isq: Some(IsqType::Q4K),
24                        device: None,
25                    },
26                )
27                .with_range(
28                    16..24,
29                    LayerTopology {
30                        isq: Some(IsqType::Q6K),
31                        device: None,
32                    },
33                )
34                .with_range(
35                    24..32,
36                    LayerTopology {
37                        isq: Some(IsqType::Q8_0),
38                        device: None,
39                    },
40                ),
41        )
42        .with_logging()
43        .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
44        .build()
45        .await?;
46
47    let messages = TextMessages::new()
48        .add_message(
49            TextMessageRole::System,
50            "You are an AI agent with a specialty in programming.",
51        )
52        .add_message(
53            TextMessageRole::User,
54            "Hello! How are you? Please write generic binary search function in Rust.",
55        );
56
57    let response = model.send_chat_request(messages).await?;
58
59    println!("{}", response.choices[0].message.content.as_ref().unwrap());
60    dbg!(
61        response.usage.avg_prompt_tok_per_sec,
62        response.usage.avg_compl_tok_per_sec
63    );
64
65    Ok(())
66}