Skip to main content

text_auto_device_map/
main.rs

1use anyhow::Result;
2use mistralrs::{
3    AutoDeviceMapParams, DeviceMapSetting, IsqType, PagedAttentionMetaBuilder, RequestBuilder,
4    TextMessageRole, TextMessages, TextModelBuilder,
5};
6
7#[tokio::main]
8async fn main() -> Result<()> {
9    let auto_map_params = AutoDeviceMapParams::Text {
10        max_seq_len: 4096,
11        max_batch_size: 2,
12    };
13    let model = TextModelBuilder::new("meta-llama/Llama-3.3-70B-Instruct")
14        .with_isq(IsqType::Q8_0)
15        .with_logging()
16        .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
17        .with_device_mapping(DeviceMapSetting::Auto(auto_map_params))
18        .build()
19        .await?;
20
21    let messages = TextMessages::new()
22        .add_message(
23            TextMessageRole::System,
24            "You are an AI agent with a specialty in programming.",
25        )
26        .add_message(
27            TextMessageRole::User,
28            "Hello! How are you? Please write generic binary search function in Rust.",
29        );
30
31    let response = model.send_chat_request(messages).await?;
32
33    println!("{}", response.choices[0].message.content.as_ref().unwrap());
34    dbg!(
35        response.usage.avg_prompt_tok_per_sec,
36        response.usage.avg_compl_tok_per_sec
37    );
38
39    // Next example: Return some logprobs with the `RequestBuilder`, which enables higher configurability.
40    let request = RequestBuilder::new().return_logprobs(true).add_message(
41        TextMessageRole::User,
42        "Please write a mathematical equation where a few numbers are added.",
43    );
44
45    let response = model.send_chat_request(request).await?;
46
47    println!(
48        "Logprobs: {:?}",
49        &response.choices[0]
50            .logprobs
51            .as_ref()
52            .unwrap()
53            .content
54            .as_ref()
55            .unwrap()[0..3]
56    );
57
58    Ok(())
59}