Skip to main content

speculative/
main.rs

1use anyhow::Result;
2use mistralrs::{
3    IsqType, RequestBuilder, SpeculativeConfig, TextMessageRole, TextMessages, TextModelBuilder,
4    TextSpeculativeBuilder,
5};
6
7#[tokio::main]
8async fn main() -> Result<()> {
9    let target = TextModelBuilder::new("meta-llama/Llama-3.1-8B-Instruct").with_logging();
10    let draft = TextModelBuilder::new("meta-llama/Llama-3.2-1B-Instruct")
11        .with_logging()
12        .with_isq(IsqType::Q8_0);
13    let spec_cfg = SpeculativeConfig { gamma: 16 };
14    let model = TextSpeculativeBuilder::new(target, draft, spec_cfg)?
15        .build()
16        .await?;
17
18    let messages = TextMessages::new()
19        .add_message(
20            TextMessageRole::System,
21            "You are an AI agent with a specialty in programming.",
22        )
23        .add_message(
24            TextMessageRole::User,
25            "Hello! How are you? Please write generic binary search function in Rust.",
26        );
27
28    let response = model.send_chat_request(messages).await?;
29
30    println!("{}", response.choices[0].message.content.as_ref().unwrap());
31    dbg!(
32        response.usage.avg_prompt_tok_per_sec,
33        response.usage.avg_compl_tok_per_sec
34    );
35
36    // Next example: Return some logprobs with the `RequestBuilder`, which enables higher configurability.
37    let request = RequestBuilder::new().return_logprobs(true).add_message(
38        TextMessageRole::User,
39        "Please write a mathematical equation where a few numbers are added.",
40    );
41
42    let response = model.send_chat_request(request).await?;
43
44    println!(
45        "Logprobs: {:?}",
46        &response.choices[0]
47            .logprobs
48            .as_ref()
49            .unwrap()
50            .content
51            .as_ref()
52            .unwrap()[0..3]
53    );
54
55    Ok(())
56}