Skip to main content

gguf_locally/
main.rs

1use anyhow::Result;
2use mistralrs::{
3    GgufModelBuilder, PagedAttentionMetaBuilder, RequestBuilder, TextMessageRole, TextMessages,
4};
5
6#[tokio::main]
7async fn main() -> Result<()> {
8    // We do not use any files from remote servers here, and instead load the
9    // chat template from the specified file, and the tokenizer and model from a
10    // local GGUF file at the path specified.
11    let model = GgufModelBuilder::new(
12        "gguf_models/mistral_v0.1/",
13        vec!["mistral-7b-instruct-v0.1.Q4_K_M.gguf"],
14    )
15    .with_chat_template("chat_templates/mistral.json")
16    .with_logging()
17    .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
18    .build()
19    .await?;
20
21    let messages = TextMessages::new().add_message(
22        TextMessageRole::User,
23        "Hello! How are you? Please write generic binary search function in Rust.",
24    );
25
26    let response = model.send_chat_request(messages).await?;
27
28    println!("{}", response.choices[0].message.content.as_ref().unwrap());
29    dbg!(
30        response.usage.avg_prompt_tok_per_sec,
31        response.usage.avg_compl_tok_per_sec
32    );
33
34    // Next example: Return some logprobs with the `RequestBuilder`, which enables higher configurability.
35    let request = RequestBuilder::new().return_logprobs(true).add_message(
36        TextMessageRole::User,
37        "Please write a mathematical equation where a few numbers are added.",
38    );
39
40    let response = model.send_chat_request(request).await?;
41
42    println!(
43        "Logprobs: {:?}",
44        &response.choices[0]
45            .logprobs
46            .as_ref()
47            .unwrap()
48            .content
49            .as_ref()
50            .unwrap()[0..3]
51    );
52
53    Ok(())
54}