text_auto_device_map/
main.rs1use anyhow::Result;
2use mistralrs::{
3 AutoDeviceMapParams, DeviceMapSetting, IsqType, PagedAttentionMetaBuilder, RequestBuilder,
4 TextMessageRole, TextMessages, TextModelBuilder,
5};
6
7#[tokio::main]
8async fn main() -> Result<()> {
9 let auto_map_params = AutoDeviceMapParams::Text {
10 max_seq_len: 4096,
11 max_batch_size: 2,
12 };
13 let model = TextModelBuilder::new("meta-llama/Llama-3.3-70B-Instruct")
14 .with_isq(IsqType::Q8_0)
15 .with_logging()
16 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
17 .with_device_mapping(DeviceMapSetting::Auto(auto_map_params))
18 .build()
19 .await?;
20
21 let messages = TextMessages::new()
22 .add_message(
23 TextMessageRole::System,
24 "You are an AI agent with a specialty in programming.",
25 )
26 .add_message(
27 TextMessageRole::User,
28 "Hello! How are you? Please write generic binary search function in Rust.",
29 );
30
31 let response = model.send_chat_request(messages).await?;
32
33 println!("{}", response.choices[0].message.content.as_ref().unwrap());
34 dbg!(
35 response.usage.avg_prompt_tok_per_sec,
36 response.usage.avg_compl_tok_per_sec
37 );
38
39 let request = RequestBuilder::new().return_logprobs(true).add_message(
41 TextMessageRole::User,
42 "Please write a mathematical equation where a few numbers are added.",
43 );
44
45 let response = model.send_chat_request(request).await?;
46
47 println!(
48 "Logprobs: {:?}",
49 &response.choices[0]
50 .logprobs
51 .as_ref()
52 .unwrap()
53 .content
54 .as_ref()
55 .unwrap()[0..3]
56 );
57
58 Ok(())
59}