Skip to main content

simple_stream/
main.rs

1use anyhow::Result;
2use mistralrs::{
3    ChatCompletionChunkResponse, ChunkChoice, Delta, IsqType, PagedAttentionMetaBuilder,
4    RequestBuilder, Response, TextMessageRole, TextMessages, TextModelBuilder,
5};
6use std::io::Write;
7
8#[tokio::main]
9async fn main() -> Result<()> {
10    let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct")
11        .with_isq(IsqType::Q8_0)
12        .with_logging()
13        .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
14        .build()
15        .await?;
16
17    let messages = TextMessages::new()
18        .add_message(
19            TextMessageRole::System,
20            "You are an AI agent with a specialty in programming.",
21        )
22        .add_message(
23            TextMessageRole::User,
24            "Hello! How are you? Please write generic binary search function in Rust.",
25        );
26
27    let response = model.send_chat_request(messages).await?;
28
29    println!("{}", response.choices[0].message.content.as_ref().unwrap());
30    dbg!(
31        response.usage.avg_prompt_tok_per_sec,
32        response.usage.avg_compl_tok_per_sec
33    );
34
35    // Next example: Return some logprobs with the `RequestBuilder`, which enables higher configurability.
36    let request = RequestBuilder::new().return_logprobs(true).add_message(
37        TextMessageRole::User,
38        "Please write a mathematical equation where a few numbers are added.",
39    );
40
41    let mut stream = model.stream_chat_request(request).await?;
42
43    let stdout = std::io::stdout();
44    let lock = stdout.lock();
45    let mut buf = std::io::BufWriter::new(lock);
46    while let Some(chunk) = stream.next().await {
47        if let Response::Chunk(ChatCompletionChunkResponse { choices, .. }) = chunk {
48            if let Some(ChunkChoice {
49                delta:
50                    Delta {
51                        content: Some(content),
52                        ..
53                    },
54                ..
55            }) = choices.first()
56            {
57                buf.write_all(content.as_bytes())?;
58            };
59        } else {
60            // Handle errors
61        }
62    }
63
64    Ok(())
65}