1use anyhow::Result;
2use mistralrs::{
3 IsqType, MemoryGpuConfig, PagedAttentionMetaBuilder, TextMessageRole, TextMessages,
4 TextModelBuilder,
5};
6
7#[tokio::main]
8async fn main() -> Result<()> {
9 let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct")
10 .with_isq(IsqType::Q8_0)
11 .with_logging()
12 .with_paged_attn(|| {
13 PagedAttentionMetaBuilder::default()
14 .with_block_size(32)
15 .with_gpu_memory(MemoryGpuConfig::ContextSize(1024))
16 .build()
17 })?
18 .build()
19 .await?;
20
21 let messages = TextMessages::new()
22 .add_message(
23 TextMessageRole::System,
24 "You are an AI agent with a specialty in programming.",
25 )
26 .add_message(
27 TextMessageRole::User,
28 "Hello! How are you? Please write generic binary search function in Rust.",
29 );
30
31 let response = model.send_chat_request(messages).await?;
32
33 println!("{}", response.choices[0].message.content.as_ref().unwrap());
34 dbg!(
35 response.usage.avg_prompt_tok_per_sec,
36 response.usage.avg_compl_tok_per_sec
37 );
38
39 Ok(())
40}