Skip to main content

paged_attn/
main.rs

1use anyhow::Result;
2use mistralrs::{
3    IsqType, MemoryGpuConfig, PagedAttentionMetaBuilder, TextMessageRole, TextMessages,
4    TextModelBuilder,
5};
6
7#[tokio::main]
8async fn main() -> Result<()> {
9    let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct")
10        .with_isq(IsqType::Q8_0)
11        .with_logging()
12        .with_paged_attn(|| {
13            PagedAttentionMetaBuilder::default()
14                .with_block_size(32)
15                .with_gpu_memory(MemoryGpuConfig::ContextSize(1024))
16                .build()
17        })?
18        .build()
19        .await?;
20
21    let messages = TextMessages::new()
22        .add_message(
23            TextMessageRole::System,
24            "You are an AI agent with a specialty in programming.",
25        )
26        .add_message(
27            TextMessageRole::User,
28            "Hello! How are you? Please write generic binary search function in Rust.",
29        );
30
31    let response = model.send_chat_request(messages).await?;
32
33    println!("{}", response.choices[0].message.content.as_ref().unwrap());
34    dbg!(
35        response.usage.avg_prompt_tok_per_sec,
36        response.usage.avg_compl_tok_per_sec
37    );
38
39    Ok(())
40}