1use anyhow::Result;
2use mistralrs::{
3 IsqType, RequestBuilder, SpeculativeConfig, TextMessageRole, TextMessages, TextModelBuilder,
4 TextSpeculativeBuilder,
5};
6
7#[tokio::main]
8async fn main() -> Result<()> {
9 let target = TextModelBuilder::new("meta-llama/Llama-3.1-8B-Instruct").with_logging();
10 let draft = TextModelBuilder::new("meta-llama/Llama-3.2-1B-Instruct")
11 .with_logging()
12 .with_isq(IsqType::Q8_0);
13 let spec_cfg = SpeculativeConfig { gamma: 16 };
14 let model = TextSpeculativeBuilder::new(target, draft, spec_cfg)?
15 .build()
16 .await?;
17
18 let messages = TextMessages::new()
19 .add_message(
20 TextMessageRole::System,
21 "You are an AI agent with a specialty in programming.",
22 )
23 .add_message(
24 TextMessageRole::User,
25 "Hello! How are you? Please write generic binary search function in Rust.",
26 );
27
28 let response = model.send_chat_request(messages).await?;
29
30 println!("{}", response.choices[0].message.content.as_ref().unwrap());
31 dbg!(
32 response.usage.avg_prompt_tok_per_sec,
33 response.usage.avg_compl_tok_per_sec
34 );
35
36 let request = RequestBuilder::new().return_logprobs(true).add_message(
38 TextMessageRole::User,
39 "Please write a mathematical equation where a few numbers are added.",
40 );
41
42 let response = model.send_chat_request(request).await?;
43
44 println!(
45 "Logprobs: {:?}",
46 &response.choices[0]
47 .logprobs
48 .as_ref()
49 .unwrap()
50 .content
51 .as_ref()
52 .unwrap()[0..3]
53 );
54
55 Ok(())
56}