1use anyhow::Result;
2use mistralrs::{
3 ChatCompletionChunkResponse, ChunkChoice, Delta, IsqType, PagedAttentionMetaBuilder,
4 RequestBuilder, Response, TextMessageRole, TextMessages, TextModelBuilder,
5};
6use std::io::Write;
7
8#[tokio::main]
9async fn main() -> Result<()> {
10 let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct")
11 .with_isq(IsqType::Q8_0)
12 .with_logging()
13 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
14 .build()
15 .await?;
16
17 let messages = TextMessages::new()
18 .add_message(
19 TextMessageRole::System,
20 "You are an AI agent with a specialty in programming.",
21 )
22 .add_message(
23 TextMessageRole::User,
24 "Hello! How are you? Please write generic binary search function in Rust.",
25 );
26
27 let response = model.send_chat_request(messages).await?;
28
29 println!("{}", response.choices[0].message.content.as_ref().unwrap());
30 dbg!(
31 response.usage.avg_prompt_tok_per_sec,
32 response.usage.avg_compl_tok_per_sec
33 );
34
35 let request = RequestBuilder::new().return_logprobs(true).add_message(
37 TextMessageRole::User,
38 "Please write a mathematical equation where a few numbers are added.",
39 );
40
41 let mut stream = model.stream_chat_request(request).await?;
42
43 let stdout = std::io::stdout();
44 let lock = stdout.lock();
45 let mut buf = std::io::BufWriter::new(lock);
46 while let Some(chunk) = stream.next().await {
47 if let Response::Chunk(ChatCompletionChunkResponse { choices, .. }) = chunk {
48 if let Some(ChunkChoice {
49 delta:
50 Delta {
51 content: Some(content),
52 ..
53 },
54 ..
55 }) = choices.first()
56 {
57 buf.write_all(content.as_bytes())?;
58 };
59 } else {
60 }
62 }
63
64 Ok(())
65}