pub struct TextModelBuilder { /* private fields */ }Expand description
Configure a text model with the various parameters for loading, running, and other inference behaviors.
Implementations§
Source§impl TextModelBuilder
impl TextModelBuilder
Sourcepub fn new(model_id: impl ToString) -> Self
pub fn new(model_id: impl ToString) -> Self
A few defaults are applied here:
- MoQE ISQ organization
- Token source is from the cache (.cache/huggingface/token)
- Maximum number of sequences running is 32
- Number of sequences to hold in prefix cache is 16.
- Automatic device mapping with model defaults according to
AutoDeviceMapParams - By default, web searching compatible with the OpenAI
web_search_optionssetting is disabled.
Examples found in repository?
7async fn main() -> Result<()> {
8 let model = TextModelBuilder::new("google/gemma-2-9b-it")
9 .with_isq(IsqType::Q4K)
10 .with_logging()
11 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
12 .build()
13 .await?;
14
15 let request = RequestBuilder::new().add_message(
16 TextMessageRole::User,
17 "Please write a mathematical equation where a few numbers are added.",
18 );
19
20 let response = model.send_chat_request(request).await?;
21
22 println!("{}", response.choices[0].message.content.as_ref().unwrap());
23
24 Ok(())
25}More examples
7async fn main() -> Result<()> {
8 let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct")
9 .with_isq(IsqType::Q4K)
10 .with_logging()
11 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
12 .build()
13 .await?;
14
15 // Bullet list regex
16 let request = RequestBuilder::new()
17 .set_constraint(mistralrs::Constraint::Regex(
18 "(- [^\n]*\n)+(- [^\n]*)(\n\n)?".to_string(),
19 ))
20 .add_message(TextMessageRole::User, "Please write a few jokes.");
21
22 let response = model.send_chat_request(request).await?;
23
24 println!("{}", response.choices[0].message.content.as_ref().unwrap());
25
26 Ok(())
27}5async fn main() -> Result<()> {
6 let model = LoraModelBuilder::from_text_model_builder(
7 TextModelBuilder::new("meta-llama/Llama-3.2-1B-Instruct").with_logging(),
8 vec!["danielhanchen/llama-3.2-lora".to_string()],
9 )
10 .build()
11 .await?;
12
13 let messages = TextMessages::new().add_message(
14 TextMessageRole::User,
15 "Hello! How are you? Please write generic binary search function in Rust.",
16 );
17
18 let response = model.send_chat_request(messages).await?;
19
20 println!("{}", response.choices[0].message.content.as_ref().unwrap());
21 dbg!(
22 response.usage.avg_prompt_tok_per_sec,
23 response.usage.avg_compl_tok_per_sec
24 );
25
26 Ok(())
27}5async fn main() -> Result<()> {
6 let model = TextModelBuilder::new("openai/gpt-oss-20b")
7 .with_logging()
8 .build()
9 .await?;
10
11 let messages = TextMessages::new()
12 .add_message(
13 TextMessageRole::System,
14 "You are an AI agent with a specialty in programming.",
15 )
16 .add_message(
17 TextMessageRole::User,
18 "Hello! How are you? Please write generic binary search function in Rust.",
19 );
20
21 let response = model.send_chat_request(messages).await?;
22
23 println!("{}", response.choices[0].message.content.as_ref().unwrap());
24 dbg!(
25 response.usage.avg_prompt_tok_per_sec,
26 response.usage.avg_compl_tok_per_sec
27 );
28
29 Ok(())
30}5async fn main() -> Result<()> {
6 let model = TextModelBuilder::new("zai-org/GLM-4.7-Flash")
7 .with_isq(IsqType::Q4K)
8 .with_logging()
9 .build()
10 .await?;
11
12 let messages = TextMessages::new()
13 .add_message(
14 TextMessageRole::System,
15 "You are an AI agent with a specialty in programming.",
16 )
17 .add_message(
18 TextMessageRole::User,
19 "Hello! How are you? Please write generic binary search function in Rust.",
20 );
21
22 let response = model.send_chat_request(messages).await?;
23
24 println!("{}", response.choices[0].message.content.as_ref().unwrap());
25 dbg!(
26 response.usage.avg_prompt_tok_per_sec,
27 response.usage.avg_compl_tok_per_sec
28 );
29
30 Ok(())
31}7async fn main() -> Result<()> {
8 let model =
9 XLoraModelBuilder::from_text_model_builder(
10 TextModelBuilder::new("HuggingFaceH4/zephyr-7b-beta").with_logging(),
11 "lamm-mit/x-lora",
12 serde_json::from_reader(File::open("my-ordering-file.json").unwrap_or_else(|_| {
13 panic!("Could not load ordering file at my-ordering-file.json")
14 }))?,
15 )
16 .build()
17 .await?;
18
19 let messages =
20 TextMessages::new().add_message(TextMessageRole::User, "Hello! What is graphene.");
21
22 let response = model.send_chat_request(messages).await?;
23
24 println!("{}", response.choices[0].message.content.as_ref().unwrap());
25 dbg!(
26 response.usage.avg_prompt_tok_per_sec,
27 response.usage.avg_compl_tok_per_sec
28 );
29
30 Ok(())
31}- examples/web_search/main.rs
- examples/deepseekr1/main.rs
- examples/deepseekv2/main.rs
- examples/phi3_5_moe/main.rs
- examples/granite/main.rs
- examples/smollm3/main.rs
- examples/mixture_of_quant_experts/main.rs
- examples/imatrix/main.rs
- examples/custom_logits_processor/main.rs
- examples/paged_attn/main.rs
- examples/json_schema/main.rs
- examples/llguidance/main.rs
- examples/isq/main.rs
- examples/async/main.rs
- examples/simple/main.rs
- examples/anymoe_lora/main.rs
- examples/custom_search/main.rs
- examples/custom_tool_call/main.rs
- examples/speculative/main.rs
- examples/batching/main.rs
- examples/text_auto_device_map/main.rs
- examples/anymoe/main.rs
- examples/topology/main.rs
- examples/simple_stream/main.rs
- examples/qwen3/main.rs
- examples/tools/main.rs
- examples/perplexity/main.rs
- examples/agent/main.rs
- examples/multi_model/main.rs
- examples/agent_streaming/main.rs
- examples/mcp_client/main.rs
Sourcepub fn with_search(self, search_embedding_model: SearchEmbeddingModel) -> Self
pub fn with_search(self, search_embedding_model: SearchEmbeddingModel) -> Self
Enable searching compatible with the OpenAI web_search_options setting. This loads the selected search embedding reranker (EmbeddingGemma by default).
Examples found in repository?
8async fn main() -> Result<()> {
9 let model = TextModelBuilder::new("NousResearch/Hermes-3-Llama-3.1-8B")
10 .with_isq(IsqType::Q4K)
11 .with_logging()
12 .with_search(SearchEmbeddingModel::default())
13 .build()
14 .await?;
15
16 let messages = TextMessages::new().add_message(
17 TextMessageRole::User,
18 "What is the weather forecast for Boston?",
19 );
20 let messages =
21 RequestBuilder::from(messages).with_web_search_options(WebSearchOptions::default());
22
23 let response = model.send_chat_request(messages).await?;
24
25 println!("What is the weather forecast for Boston?\n\n");
26 println!("{}", response.choices[0].message.content.as_ref().unwrap());
27 dbg!(
28 response.usage.avg_prompt_tok_per_sec,
29 response.usage.avg_compl_tok_per_sec
30 );
31
32 Ok(())
33}Sourcepub fn with_search_callback(self, callback: Arc<SearchCallback>) -> Self
pub fn with_search_callback(self, callback: Arc<SearchCallback>) -> Self
Override the search function used when web_search_options is enabled.
Examples found in repository?
42async fn main() -> Result<()> {
43 // Build the model enabling web-search support. We supply a custom search callback that is
44 // used **instead** of the default remote web search when we set `web_search_options` later
45 // in the request.
46
47 // The EmbeddingGemma reranker is **not** required even when using a custom callback – it is
48 // used inside the retrieval pipeline to cluster / rank the results that our callback returns.
49 let model = TextModelBuilder::new("NousResearch/Hermes-3-Llama-3.1-8B")
50 .with_isq(IsqType::Q4K)
51 .with_logging()
52 .with_search_callback(Arc::new(|params: &mistralrs::SearchFunctionParameters| {
53 // In a real application there could be network or database calls here – but for the
54 // sake of demonstration we simply perform a local filesystem search.
55 local_search(¶ms.query)
56 }))
57 .build()
58 .await?;
59
60 let messages =
61 TextMessages::new().add_message(TextMessageRole::User, "Where is Cargo.toml in this repo?");
62
63 // Enable searching for this request. Because we provided a custom search callback above, the
64 // model will call **our** function instead of performing an online web search.
65 let messages =
66 RequestBuilder::from(messages).with_web_search_options(WebSearchOptions::default());
67
68 let response = model.send_chat_request(messages).await?;
69
70 println!("{}", response.choices[0].message.content.as_ref().unwrap());
71
72 Ok(())
73}Sourcepub fn with_tool_callback(
self,
name: impl Into<String>,
callback: Arc<ToolCallback>,
) -> Self
pub fn with_tool_callback( self, name: impl Into<String>, callback: Arc<ToolCallback>, ) -> Self
Register a callback for a specific tool name.
Examples found in repository?
34async fn main() -> Result<()> {
35 // Build the model and register the *tool callback*.
36 let model = TextModelBuilder::new("NousResearch/Hermes-3-Llama-3.1-8B")
37 .with_isq(IsqType::Q4K)
38 .with_logging()
39 .with_tool_callback(
40 "local_search",
41 Arc::new(|f: &CalledFunction| {
42 let args: serde_json::Value = serde_json::from_str(&f.arguments)?;
43 let query = args["query"].as_str().unwrap_or("");
44 Ok(serde_json::to_string(&local_search(query)?)?)
45 }),
46 )
47 .build()
48 .await?;
49
50 // Define the JSON schema for the tool the model can call.
51 let parameters = std::collections::HashMap::from([(
52 "query".to_string(),
53 serde_json::json!({"type": "string", "description": "Query"}),
54 )]);
55 let tool = Tool {
56 tp: ToolType::Function,
57 function: mistralrs::Function {
58 description: Some("Local filesystem search".to_string()),
59 name: "local_search".to_string(),
60 parameters: Some(parameters),
61 },
62 };
63
64 // Ask the user question and allow the model to call the tool automatically.
65 let messages =
66 TextMessages::new().add_message(TextMessageRole::User, "Where is Cargo.toml in this repo?");
67 let messages = RequestBuilder::from(messages)
68 .set_tools(vec![tool])
69 .set_tool_choice(ToolChoice::Auto);
70
71 let response = model.send_chat_request(messages).await?;
72 println!("{}", response.choices[0].message.content.as_ref().unwrap());
73 Ok(())
74}Sourcepub fn with_tool_callback_and_tool(
self,
name: impl Into<String>,
callback: Arc<ToolCallback>,
tool: Tool,
) -> Self
pub fn with_tool_callback_and_tool( self, name: impl Into<String>, callback: Arc<ToolCallback>, tool: Tool, ) -> Self
Register a callback with an associated Tool definition that will be automatically added to requests when tool callbacks are active.
Sourcepub fn with_mcp_client(self, config: McpClientConfig) -> Self
pub fn with_mcp_client(self, config: McpClientConfig) -> Self
Configure MCP client to connect to external MCP servers and automatically register their tools for use in automatic tool calling.
Examples found in repository?
20async fn main() -> Result<()> {
21 // Simple MCP client configuration using defaults
22 // Most fields use sensible defaults (enabled=true, UUID for id/prefix, no timeouts)
23 let mcp_config_simple = McpClientConfig {
24 servers: vec![McpServerConfig {
25 name: "Filesystem Tools".to_string(),
26 source: McpServerSource::Process {
27 command: "npx".to_string(),
28 args: vec![
29 "@modelcontextprotocol/server-filesystem".to_string(),
30 ".".to_string(),
31 ],
32 work_dir: None,
33 env: None,
34 },
35 ..Default::default()
36 }],
37 ..Default::default()
38 };
39
40 // Alternative: Full configuration with multiple transport types
41 let _mcp_config_full = McpClientConfig {
42 servers: vec![
43 // Example: Process-based MCP server (enabled by default)
44 McpServerConfig {
45 name: "Filesystem Tools".to_string(),
46 source: McpServerSource::Process {
47 command: "npx".to_string(),
48 args: vec![
49 "@modelcontextprotocol/server-filesystem".to_string(),
50 ".".to_string(),
51 ],
52 work_dir: None,
53 env: None,
54 },
55 tool_prefix: Some("fs".to_string()),
56 ..Default::default()
57 },
58 // Example: HTTP-based MCP server with Bearer token authentication (disabled by default)
59 McpServerConfig {
60 id: "hf_server".to_string(),
61 name: "Hugging Face MCP".to_string(),
62 source: McpServerSource::Http {
63 url: "https://hf.co/mcp".to_string(),
64 timeout_secs: Some(30),
65 headers: None, // Additional headers can be specified here if needed
66 },
67 enabled: false, // Disabled by default
68 tool_prefix: Some("hf".to_string()), // Prefixes tool names to avoid conflicts
69 resources: None,
70 bearer_token: Some("hf_xxx".to_string()), // Replace with your actual Hugging Face token
71 },
72 //
73 // // Example with both Bearer token and additional headers (uncomment HashMap import above)
74 // McpServerConfig {
75 // id: "authenticated_server".to_string(),
76 // name: "Authenticated MCP Server".to_string(),
77 // source: McpServerSource::Http {
78 // url: "https://api.example.com/mcp".to_string(),
79 // timeout_secs: Some(60),
80 // headers: Some({
81 // let mut headers = HashMap::new();
82 // headers.insert("X-API-Version".to_string(), "v1".to_string());
83 // headers.insert("X-Client-ID".to_string(), "mistral-rs".to_string());
84 // headers
85 // }),
86 // },
87 // enabled: false,
88 // tool_prefix: Some("auth".to_string()),
89 // resources: None,
90 // bearer_token: Some("your-bearer-token".to_string()), // Will be added as Authorization: Bearer <token>
91 // },
92 // Example WebSocket-based MCP server (disabled by default)
93 McpServerConfig {
94 id: "websocket_server".to_string(),
95 name: "WebSocket Example".to_string(),
96 source: McpServerSource::WebSocket {
97 url: "wss://api.example.com/mcp".to_string(),
98 timeout_secs: Some(30),
99 headers: None,
100 },
101 enabled: false, // Disabled by default
102 tool_prefix: Some("ws".to_string()),
103 resources: None,
104 bearer_token: Some("your-websocket-token".to_string()), // WebSocket Bearer token support
105 },
106 ],
107 // Automatically discover and register tools from connected MCP servers
108 auto_register_tools: true,
109 // Timeout for individual tool calls (30 seconds)
110 tool_timeout_secs: Some(30),
111 // Maximum concurrent tool calls across all servers
112 max_concurrent_calls: Some(5),
113 };
114
115 // Use the simple configuration for this example
116 let mcp_config = mcp_config_simple;
117
118 println!("Building model with MCP client support...");
119
120 // Build the model with MCP client configuration
121 // The MCP client will automatically connect to configured servers and discover available tools
122 let model = TextModelBuilder::new("Qwen/Qwen3-4B".to_string())
123 .with_isq(IsqType::Q8_0) // Use 8-bit quantization for efficiency
124 .with_logging()
125 .with_paged_attn(|| {
126 PagedAttentionMetaBuilder::default()
127 .with_gpu_memory(MemoryGpuConfig::ContextSize(8192))
128 .build()
129 })?
130 .with_mcp_client(mcp_config) // This automatically connects to MCP servers and registers tools
131 .build()
132 .await?;
133
134 println!("Model built successfully! MCP servers connected and tools registered.");
135 println!("MCP tools are now available for automatic tool calling during conversations.");
136 println!(
137 "Note: Install filesystem server with: npx @modelcontextprotocol/server-filesystem . -y"
138 );
139
140 // Create a conversation that demonstrates MCP tool usage
141 // The system message informs the model about available external tools
142 let messages = TextMessages::new()
143 .add_message(
144 TextMessageRole::System,
145 "You are an AI assistant with access to external tools via MCP servers. \
146 You can access filesystem operations and other external services \
147 provided by connected MCP servers. Use these tools when appropriate to \
148 help answer user questions. Tools are automatically available and you \
149 can call them as needed.",
150 )
151 .add_message(
152 TextMessageRole::User,
153 "Hello! Can you list the files in the current directory and create a test.txt file?",
154 );
155
156 println!("\nSending chat request...");
157 println!("The model will automatically use MCP tools if needed to answer the question.");
158 let response = model.send_chat_request(messages).await?;
159
160 println!("\nResponse:");
161 println!("{}", response.choices[0].message.content.as_ref().unwrap());
162
163 // Display performance metrics
164 println!("\nPerformance metrics:");
165 println!(
166 "Prompt tokens/sec: {:.2}",
167 response.usage.avg_prompt_tok_per_sec
168 );
169 println!(
170 "Completion tokens/sec: {:.2}",
171 response.usage.avg_compl_tok_per_sec
172 );
173
174 // Display any MCP tool calls that were made during the conversation
175 if let Some(tool_calls) = &response.choices[0].message.tool_calls {
176 println!("\nMCP tool calls made:");
177 for tool_call in tool_calls {
178 println!(
179 "- Tool: {} | Arguments: {}",
180 tool_call.function.name, tool_call.function.arguments
181 );
182 }
183 } else {
184 println!("\nNo tool calls were made for this request.");
185 }
186
187 Ok(())
188}Sourcepub fn with_throughput_logging(self) -> Self
pub fn with_throughput_logging(self) -> Self
Enable runner throughput logging.
Sourcepub fn with_jinja_explicit(self, jinja_explicit: String) -> Self
pub fn with_jinja_explicit(self, jinja_explicit: String) -> Self
Explicit JINJA chat template file (.jinja) to be used. If specified, this overrides all other chat templates.
Sourcepub fn with_topology(self, topology: Topology) -> Self
pub fn with_topology(self, topology: Topology) -> Self
Set the model topology for use during loading. If there is an overlap, the topology type is used over the ISQ type.
Examples found in repository?
8async fn main() -> Result<()> {
9 let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct")
10 .with_isq(IsqType::Q8_0)
11 .with_topology(
12 Topology::empty()
13 .with_range(
14 0..8,
15 LayerTopology {
16 isq: Some(IsqType::Q3K),
17 device: None,
18 },
19 )
20 .with_range(
21 8..16,
22 LayerTopology {
23 isq: Some(IsqType::Q4K),
24 device: None,
25 },
26 )
27 .with_range(
28 16..24,
29 LayerTopology {
30 isq: Some(IsqType::Q6K),
31 device: None,
32 },
33 )
34 .with_range(
35 24..32,
36 LayerTopology {
37 isq: Some(IsqType::Q8_0),
38 device: None,
39 },
40 ),
41 )
42 .with_logging()
43 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
44 .build()
45 .await?;
46
47 let messages = TextMessages::new()
48 .add_message(
49 TextMessageRole::System,
50 "You are an AI agent with a specialty in programming.",
51 )
52 .add_message(
53 TextMessageRole::User,
54 "Hello! How are you? Please write generic binary search function in Rust.",
55 );
56
57 let response = model.send_chat_request(messages).await?;
58
59 println!("{}", response.choices[0].message.content.as_ref().unwrap());
60 dbg!(
61 response.usage.avg_prompt_tok_per_sec,
62 response.usage.avg_compl_tok_per_sec
63 );
64
65 Ok(())
66}Sourcepub fn with_topology_from_path<P: AsRef<Path>>(self, path: P) -> Result<Self>
pub fn with_topology_from_path<P: AsRef<Path>>(self, path: P) -> Result<Self>
Set the model topology from a path. This preserves the path for unload/reload support. If there is an overlap, the topology type is used over the ISQ type.
Sourcepub fn with_mixture_qexperts_isq(self) -> Self
pub fn with_mixture_qexperts_isq(self) -> Self
Organize ISQ to enable MoQE (Mixture of Quantized Experts, https://arxiv.org/abs/2310.02410)
Examples found in repository?
7async fn main() -> Result<()> {
8 let model = TextModelBuilder::new("microsoft/Phi-3.5-MoE-instruct")
9 .with_isq(IsqType::Q4K)
10 .with_mixture_qexperts_isq()
11 .with_logging()
12 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
13 .build()
14 .await?;
15
16 let messages = TextMessages::new()
17 .add_message(
18 TextMessageRole::System,
19 "You are an AI agent with a specialty in programming.",
20 )
21 .add_message(
22 TextMessageRole::User,
23 "Hello! How are you? Please write generic binary search function in Rust.",
24 );
25
26 let response = model.send_chat_request(messages).await?;
27
28 println!("{}", response.choices[0].message.content.as_ref().unwrap());
29 dbg!(
30 response.usage.avg_prompt_tok_per_sec,
31 response.usage.avg_compl_tok_per_sec
32 );
33
34 Ok(())
35}Sourcepub fn with_chat_template(self, chat_template: impl ToString) -> Self
pub fn with_chat_template(self, chat_template: impl ToString) -> Self
Literal Jinja chat template OR Path (ending in .json) to one.
Sourcepub fn with_tokenizer_json(self, tokenizer_json: impl ToString) -> Self
pub fn with_tokenizer_json(self, tokenizer_json: impl ToString) -> Self
Path to a discrete tokenizer.json file.
Sourcepub fn with_loader_type(self, loader_type: NormalLoaderType) -> Self
pub fn with_loader_type(self, loader_type: NormalLoaderType) -> Self
Manually set the model loader type. Otherwise, it will attempt to automatically determine the loader type.
Sourcepub fn with_dtype(self, dtype: ModelDType) -> Self
pub fn with_dtype(self, dtype: ModelDType) -> Self
Load the model in a certain dtype.
Sourcepub fn with_force_cpu(self) -> Self
pub fn with_force_cpu(self) -> Self
Force usage of the CPU device. Do not use PagedAttention with this.
Sourcepub fn with_token_source(self, token_source: TokenSource) -> Self
pub fn with_token_source(self, token_source: TokenSource) -> Self
Source of the Hugging Face token.
Sourcepub fn with_hf_revision(self, revision: impl ToString) -> Self
pub fn with_hf_revision(self, revision: impl ToString) -> Self
Set the revision to use for a Hugging Face remote model.
Sourcepub fn with_isq(self, isq: IsqType) -> Self
pub fn with_isq(self, isq: IsqType) -> Self
Use ISQ of a certain type. If there is an overlap, the topology type is used over the ISQ type.
Examples found in repository?
7async fn main() -> Result<()> {
8 let model = TextModelBuilder::new("google/gemma-2-9b-it")
9 .with_isq(IsqType::Q4K)
10 .with_logging()
11 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
12 .build()
13 .await?;
14
15 let request = RequestBuilder::new().add_message(
16 TextMessageRole::User,
17 "Please write a mathematical equation where a few numbers are added.",
18 );
19
20 let response = model.send_chat_request(request).await?;
21
22 println!("{}", response.choices[0].message.content.as_ref().unwrap());
23
24 Ok(())
25}More examples
7async fn main() -> Result<()> {
8 let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct")
9 .with_isq(IsqType::Q4K)
10 .with_logging()
11 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
12 .build()
13 .await?;
14
15 // Bullet list regex
16 let request = RequestBuilder::new()
17 .set_constraint(mistralrs::Constraint::Regex(
18 "(- [^\n]*\n)+(- [^\n]*)(\n\n)?".to_string(),
19 ))
20 .add_message(TextMessageRole::User, "Please write a few jokes.");
21
22 let response = model.send_chat_request(request).await?;
23
24 println!("{}", response.choices[0].message.content.as_ref().unwrap());
25
26 Ok(())
27}5async fn main() -> Result<()> {
6 let model = TextModelBuilder::new("zai-org/GLM-4.7-Flash")
7 .with_isq(IsqType::Q4K)
8 .with_logging()
9 .build()
10 .await?;
11
12 let messages = TextMessages::new()
13 .add_message(
14 TextMessageRole::System,
15 "You are an AI agent with a specialty in programming.",
16 )
17 .add_message(
18 TextMessageRole::User,
19 "Hello! How are you? Please write generic binary search function in Rust.",
20 );
21
22 let response = model.send_chat_request(messages).await?;
23
24 println!("{}", response.choices[0].message.content.as_ref().unwrap());
25 dbg!(
26 response.usage.avg_prompt_tok_per_sec,
27 response.usage.avg_compl_tok_per_sec
28 );
29
30 Ok(())
31}8async fn main() -> Result<()> {
9 let model = TextModelBuilder::new("NousResearch/Hermes-3-Llama-3.1-8B")
10 .with_isq(IsqType::Q4K)
11 .with_logging()
12 .with_search(SearchEmbeddingModel::default())
13 .build()
14 .await?;
15
16 let messages = TextMessages::new().add_message(
17 TextMessageRole::User,
18 "What is the weather forecast for Boston?",
19 );
20 let messages =
21 RequestBuilder::from(messages).with_web_search_options(WebSearchOptions::default());
22
23 let response = model.send_chat_request(messages).await?;
24
25 println!("What is the weather forecast for Boston?\n\n");
26 println!("{}", response.choices[0].message.content.as_ref().unwrap());
27 dbg!(
28 response.usage.avg_prompt_tok_per_sec,
29 response.usage.avg_compl_tok_per_sec
30 );
31
32 Ok(())
33}7async fn main() -> Result<()> {
8 let model = TextModelBuilder::new("deepseek-ai/DeepSeek-R1")
9 .with_isq(IsqType::Q4K)
10 .with_logging()
11 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
12 .build()
13 .await?;
14
15 let messages = TextMessages::new()
16 .add_message(
17 TextMessageRole::System,
18 "You are an AI agent with a specialty in programming.",
19 )
20 .add_message(
21 TextMessageRole::User,
22 "Hello! How are you? Please write generic binary search function in Rust.",
23 );
24
25 let response = model.send_chat_request(messages).await?;
26
27 println!("{}", response.choices[0].message.content.as_ref().unwrap());
28 dbg!(
29 response.usage.avg_prompt_tok_per_sec,
30 response.usage.avg_compl_tok_per_sec
31 );
32
33 Ok(())
34}7async fn main() -> Result<()> {
8 let model = TextModelBuilder::new("deepseek-ai/DeepSeek-V2-Lite")
9 .with_isq(IsqType::Q4K)
10 .with_logging()
11 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
12 .build()
13 .await?;
14
15 let messages = TextMessages::new()
16 .add_message(
17 TextMessageRole::System,
18 "You are an AI agent with a specialty in programming.",
19 )
20 .add_message(
21 TextMessageRole::User,
22 "Hello! How are you? Please write generic binary search function in Rust.",
23 );
24
25 let response = model.send_chat_request(messages).await?;
26
27 println!("{}", response.choices[0].message.content.as_ref().unwrap());
28 dbg!(
29 response.usage.avg_prompt_tok_per_sec,
30 response.usage.avg_compl_tok_per_sec
31 );
32
33 Ok(())
34}- examples/phi3_5_moe/main.rs
- examples/granite/main.rs
- examples/smollm3/main.rs
- examples/mixture_of_quant_experts/main.rs
- examples/imatrix/main.rs
- examples/custom_logits_processor/main.rs
- examples/paged_attn/main.rs
- examples/json_schema/main.rs
- examples/llguidance/main.rs
- examples/isq/main.rs
- examples/async/main.rs
- examples/simple/main.rs
- examples/anymoe_lora/main.rs
- examples/custom_search/main.rs
- examples/custom_tool_call/main.rs
- examples/speculative/main.rs
- examples/batching/main.rs
- examples/text_auto_device_map/main.rs
- examples/anymoe/main.rs
- examples/topology/main.rs
- examples/simple_stream/main.rs
- examples/qwen3/main.rs
- examples/tools/main.rs
- examples/perplexity/main.rs
- examples/agent/main.rs
- examples/multi_model/main.rs
- examples/agent_streaming/main.rs
- examples/mcp_client/main.rs
Sourcepub fn with_imatrix(self, path: PathBuf) -> Self
pub fn with_imatrix(self, path: PathBuf) -> Self
Utilise this imatrix file during ISQ. Incompatible with specifying a calibration file.
Sourcepub fn with_calibration_file(self, path: PathBuf) -> Self
pub fn with_calibration_file(self, path: PathBuf) -> Self
Utilise this calibration file to collcet an imatrix. Incompatible with specifying a calibration file.
Examples found in repository?
7async fn main() -> Result<()> {
8 let model = TextModelBuilder::new("meta-llama/Llama-3.2-3B-Instruct")
9 .with_isq(IsqType::Q4K)
10 .with_calibration_file("calibration_data/calibration_datav3_small.txt".into())
11 .with_logging()
12 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
13 .build()
14 .await?;
15
16 let messages = TextMessages::new()
17 .add_message(
18 TextMessageRole::System,
19 "You are an AI agent with a specialty in programming.",
20 )
21 .add_message(
22 TextMessageRole::User,
23 "Hello! How are you? Please write generic binary search function in Rust.",
24 );
25
26 let response = model.send_chat_request(messages).await?;
27
28 println!("{}", response.choices[0].message.content.as_ref().unwrap());
29 dbg!(
30 response.usage.avg_prompt_tok_per_sec,
31 response.usage.avg_compl_tok_per_sec
32 );
33
34 Ok(())
35}More examples
75async fn main() -> Result<()> {
76 let args = Args::parse();
77
78 let quant = if let Some(isq) = &args.isq {
79 Some(parse_isq_value(isq, None).map_err(anyhow::Error::msg)?)
80 } else {
81 None
82 };
83
84 let prompt_chunksize = 1024;
85 let mut model_builder = TextModelBuilder::new(&args.model_id).with_logging();
86 if let Some(quant) = quant {
87 model_builder = model_builder.with_isq(quant);
88 }
89 if let Some(calibration_file) = &args.calibration_file {
90 model_builder = model_builder.with_calibration_file(calibration_file.clone());
91 }
92
93 let model = model_builder.build().await?;
94
95 let text = read_to_string(&args.file)?;
96 let tokens = model
97 .tokenize(Either::Right(text), None, false, false, None)
98 .await?;
99 let bos_token = model
100 .tokenize(Either::Right(" ".to_string()), None, true, false, None)
101 .await?[0];
102 let inner = model.inner();
103
104 println!("Using bos token id `{bos_token}`.");
105
106 let n_chunks = tokens.len().div_ceil(prompt_chunksize);
107 let mut ppl_measurements = Vec::new();
108 for (i, chunk) in tokens.chunks(prompt_chunksize).enumerate() {
109 let start = Instant::now();
110 let (logits, tokens) = {
111 let chunk = [vec![bos_token], chunk.to_vec()].concat();
112 process_chunk(inner, chunk).await?
113 };
114
115 // Upcast to float if we need to compute the loss to avoid potential precision issues
116 let logits = logits.to_device(&Device::Cpu)?.to_dtype(DType::F32)?;
117 // Shift so that tokens < n predict n
118 let shift_logits = logits.narrow(0, 0, logits.dim(0)? - 1)?.contiguous()?;
119 let shift_labels = Tensor::from_slice(&tokens[1..], (tokens.len() - 1,), &Device::Cpu)?;
120
121 let loss_fct = cross_entropy_loss(&shift_logits, &shift_labels)?;
122 let perplexity = loss_fct.exp()?.to_scalar::<f32>()?;
123 let end = Instant::now();
124
125 ppl_measurements.push(perplexity);
126 println!(
127 "Chunk {i}/{n_chunks} ({} tokens): Perplexity for `{}`, ISQ `{:?}`, {}s: {perplexity}",
128 tokens.len(),
129 args.file,
130 quant,
131 end.duration_since(start).as_secs_f32(),
132 );
133 }
134
135 let mean = ppl_measurements.iter().sum::<f32>() / ppl_measurements.len() as f32;
136 let variance = ppl_measurements
137 .iter()
138 .map(|e| (mean - e).powf(2.))
139 .sum::<f32>()
140 / ppl_measurements.len() as f32;
141 let std_dev = variance.sqrt();
142 println!();
143 println!(
144 "Final perplexity for `{}`, ISQ `{:?}`: {}±{} ppl",
145 args.file, quant, mean, std_dev
146 );
147
148 Ok(())
149}Sourcepub fn with_paged_attn(
self,
paged_attn_cfg: impl FnOnce() -> Result<PagedAttentionConfig>,
) -> Result<Self>
pub fn with_paged_attn( self, paged_attn_cfg: impl FnOnce() -> Result<PagedAttentionConfig>, ) -> Result<Self>
Enable PagedAttention. Configure PagedAttention with a PagedAttentionConfig object, which
can be created with sensible values with a PagedAttentionMetaBuilder.
If PagedAttention is not supported (query with paged_attn_supported), this will do nothing.
Examples found in repository?
7async fn main() -> Result<()> {
8 let model = TextModelBuilder::new("google/gemma-2-9b-it")
9 .with_isq(IsqType::Q4K)
10 .with_logging()
11 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
12 .build()
13 .await?;
14
15 let request = RequestBuilder::new().add_message(
16 TextMessageRole::User,
17 "Please write a mathematical equation where a few numbers are added.",
18 );
19
20 let response = model.send_chat_request(request).await?;
21
22 println!("{}", response.choices[0].message.content.as_ref().unwrap());
23
24 Ok(())
25}More examples
7async fn main() -> Result<()> {
8 let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct")
9 .with_isq(IsqType::Q4K)
10 .with_logging()
11 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
12 .build()
13 .await?;
14
15 // Bullet list regex
16 let request = RequestBuilder::new()
17 .set_constraint(mistralrs::Constraint::Regex(
18 "(- [^\n]*\n)+(- [^\n]*)(\n\n)?".to_string(),
19 ))
20 .add_message(TextMessageRole::User, "Please write a few jokes.");
21
22 let response = model.send_chat_request(request).await?;
23
24 println!("{}", response.choices[0].message.content.as_ref().unwrap());
25
26 Ok(())
27}7async fn main() -> Result<()> {
8 let model = TextModelBuilder::new("deepseek-ai/DeepSeek-R1")
9 .with_isq(IsqType::Q4K)
10 .with_logging()
11 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
12 .build()
13 .await?;
14
15 let messages = TextMessages::new()
16 .add_message(
17 TextMessageRole::System,
18 "You are an AI agent with a specialty in programming.",
19 )
20 .add_message(
21 TextMessageRole::User,
22 "Hello! How are you? Please write generic binary search function in Rust.",
23 );
24
25 let response = model.send_chat_request(messages).await?;
26
27 println!("{}", response.choices[0].message.content.as_ref().unwrap());
28 dbg!(
29 response.usage.avg_prompt_tok_per_sec,
30 response.usage.avg_compl_tok_per_sec
31 );
32
33 Ok(())
34}7async fn main() -> Result<()> {
8 let model = TextModelBuilder::new("deepseek-ai/DeepSeek-V2-Lite")
9 .with_isq(IsqType::Q4K)
10 .with_logging()
11 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
12 .build()
13 .await?;
14
15 let messages = TextMessages::new()
16 .add_message(
17 TextMessageRole::System,
18 "You are an AI agent with a specialty in programming.",
19 )
20 .add_message(
21 TextMessageRole::User,
22 "Hello! How are you? Please write generic binary search function in Rust.",
23 );
24
25 let response = model.send_chat_request(messages).await?;
26
27 println!("{}", response.choices[0].message.content.as_ref().unwrap());
28 dbg!(
29 response.usage.avg_prompt_tok_per_sec,
30 response.usage.avg_compl_tok_per_sec
31 );
32
33 Ok(())
34}7async fn main() -> Result<()> {
8 let model = TextModelBuilder::new("microsoft/Phi-3.5-MoE-instruct")
9 .with_isq(IsqType::Q4K)
10 .with_logging()
11 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
12 .build()
13 .await?;
14
15 let messages = TextMessages::new()
16 .add_message(
17 TextMessageRole::System,
18 "You are an AI agent with a specialty in programming.",
19 )
20 .add_message(
21 TextMessageRole::User,
22 "Hello! How are you? Please write generic binary search function in Rust.",
23 );
24
25 let response = model.send_chat_request(messages).await?;
26
27 println!("{}", response.choices[0].message.content.as_ref().unwrap());
28 dbg!(
29 response.usage.avg_prompt_tok_per_sec,
30 response.usage.avg_compl_tok_per_sec
31 );
32
33 Ok(())
34}7async fn main() -> Result<()> {
8 let model = TextModelBuilder::new("ibm-granite/granite-4.0-tiny-preview")
9 .with_isq(IsqType::Q8_0)
10 .with_logging()
11 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
12 .build()
13 .await?;
14
15 let messages = TextMessages::new()
16 .add_message(
17 TextMessageRole::System,
18 "You are an AI agent with a specialty in programming.",
19 )
20 .add_message(
21 TextMessageRole::User,
22 "Hello! How are you? Please write generic binary search function in Rust.",
23 );
24
25 let response = model.send_chat_request(messages).await?;
26
27 println!("{}", response.choices[0].message.content.as_ref().unwrap());
28 dbg!(
29 response.usage.avg_prompt_tok_per_sec,
30 response.usage.avg_compl_tok_per_sec
31 );
32
33 Ok(())
34}- examples/smollm3/main.rs
- examples/mixture_of_quant_experts/main.rs
- examples/imatrix/main.rs
- examples/custom_logits_processor/main.rs
- examples/paged_attn/main.rs
- examples/json_schema/main.rs
- examples/llguidance/main.rs
- examples/isq/main.rs
- examples/async/main.rs
- examples/simple/main.rs
- examples/uqff/main.rs
- examples/anymoe_lora/main.rs
- examples/batching/main.rs
- examples/text_auto_device_map/main.rs
- examples/anymoe/main.rs
- examples/topology/main.rs
- examples/simple_stream/main.rs
- examples/agent/main.rs
- examples/agent_streaming/main.rs
- examples/mcp_client/main.rs
Sourcepub fn with_max_num_seqs(self, max_num_seqs: usize) -> Self
pub fn with_max_num_seqs(self, max_num_seqs: usize) -> Self
Set the maximum number of sequences which can be run at once.
Sourcepub fn with_no_kv_cache(self) -> Self
pub fn with_no_kv_cache(self) -> Self
Disable KV cache. Trade performance for memory usage.
Sourcepub fn with_prefix_cache_n(self, n_seqs: Option<usize>) -> Self
pub fn with_prefix_cache_n(self, n_seqs: Option<usize>) -> Self
Set the number of sequences to hold in the prefix cache. Set to None to disable the prefix cacher.
Sourcepub fn with_logging(self) -> Self
pub fn with_logging(self) -> Self
Enable logging.
Examples found in repository?
7async fn main() -> Result<()> {
8 let model = TextModelBuilder::new("google/gemma-2-9b-it")
9 .with_isq(IsqType::Q4K)
10 .with_logging()
11 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
12 .build()
13 .await?;
14
15 let request = RequestBuilder::new().add_message(
16 TextMessageRole::User,
17 "Please write a mathematical equation where a few numbers are added.",
18 );
19
20 let response = model.send_chat_request(request).await?;
21
22 println!("{}", response.choices[0].message.content.as_ref().unwrap());
23
24 Ok(())
25}More examples
7async fn main() -> Result<()> {
8 let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct")
9 .with_isq(IsqType::Q4K)
10 .with_logging()
11 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
12 .build()
13 .await?;
14
15 // Bullet list regex
16 let request = RequestBuilder::new()
17 .set_constraint(mistralrs::Constraint::Regex(
18 "(- [^\n]*\n)+(- [^\n]*)(\n\n)?".to_string(),
19 ))
20 .add_message(TextMessageRole::User, "Please write a few jokes.");
21
22 let response = model.send_chat_request(request).await?;
23
24 println!("{}", response.choices[0].message.content.as_ref().unwrap());
25
26 Ok(())
27}5async fn main() -> Result<()> {
6 let model = LoraModelBuilder::from_text_model_builder(
7 TextModelBuilder::new("meta-llama/Llama-3.2-1B-Instruct").with_logging(),
8 vec!["danielhanchen/llama-3.2-lora".to_string()],
9 )
10 .build()
11 .await?;
12
13 let messages = TextMessages::new().add_message(
14 TextMessageRole::User,
15 "Hello! How are you? Please write generic binary search function in Rust.",
16 );
17
18 let response = model.send_chat_request(messages).await?;
19
20 println!("{}", response.choices[0].message.content.as_ref().unwrap());
21 dbg!(
22 response.usage.avg_prompt_tok_per_sec,
23 response.usage.avg_compl_tok_per_sec
24 );
25
26 Ok(())
27}5async fn main() -> Result<()> {
6 let model = TextModelBuilder::new("openai/gpt-oss-20b")
7 .with_logging()
8 .build()
9 .await?;
10
11 let messages = TextMessages::new()
12 .add_message(
13 TextMessageRole::System,
14 "You are an AI agent with a specialty in programming.",
15 )
16 .add_message(
17 TextMessageRole::User,
18 "Hello! How are you? Please write generic binary search function in Rust.",
19 );
20
21 let response = model.send_chat_request(messages).await?;
22
23 println!("{}", response.choices[0].message.content.as_ref().unwrap());
24 dbg!(
25 response.usage.avg_prompt_tok_per_sec,
26 response.usage.avg_compl_tok_per_sec
27 );
28
29 Ok(())
30}5async fn main() -> Result<()> {
6 let model = TextModelBuilder::new("zai-org/GLM-4.7-Flash")
7 .with_isq(IsqType::Q4K)
8 .with_logging()
9 .build()
10 .await?;
11
12 let messages = TextMessages::new()
13 .add_message(
14 TextMessageRole::System,
15 "You are an AI agent with a specialty in programming.",
16 )
17 .add_message(
18 TextMessageRole::User,
19 "Hello! How are you? Please write generic binary search function in Rust.",
20 );
21
22 let response = model.send_chat_request(messages).await?;
23
24 println!("{}", response.choices[0].message.content.as_ref().unwrap());
25 dbg!(
26 response.usage.avg_prompt_tok_per_sec,
27 response.usage.avg_compl_tok_per_sec
28 );
29
30 Ok(())
31}7async fn main() -> Result<()> {
8 let model =
9 XLoraModelBuilder::from_text_model_builder(
10 TextModelBuilder::new("HuggingFaceH4/zephyr-7b-beta").with_logging(),
11 "lamm-mit/x-lora",
12 serde_json::from_reader(File::open("my-ordering-file.json").unwrap_or_else(|_| {
13 panic!("Could not load ordering file at my-ordering-file.json")
14 }))?,
15 )
16 .build()
17 .await?;
18
19 let messages =
20 TextMessages::new().add_message(TextMessageRole::User, "Hello! What is graphene.");
21
22 let response = model.send_chat_request(messages).await?;
23
24 println!("{}", response.choices[0].message.content.as_ref().unwrap());
25 dbg!(
26 response.usage.avg_prompt_tok_per_sec,
27 response.usage.avg_compl_tok_per_sec
28 );
29
30 Ok(())
31}- examples/web_search/main.rs
- examples/deepseekr1/main.rs
- examples/deepseekv2/main.rs
- examples/phi3_5_moe/main.rs
- examples/granite/main.rs
- examples/smollm3/main.rs
- examples/mixture_of_quant_experts/main.rs
- examples/imatrix/main.rs
- examples/custom_logits_processor/main.rs
- examples/paged_attn/main.rs
- examples/json_schema/main.rs
- examples/llguidance/main.rs
- examples/isq/main.rs
- examples/async/main.rs
- examples/simple/main.rs
- examples/uqff/main.rs
- examples/anymoe_lora/main.rs
- examples/custom_search/main.rs
- examples/custom_tool_call/main.rs
- examples/speculative/main.rs
- examples/batching/main.rs
- examples/text_auto_device_map/main.rs
- examples/anymoe/main.rs
- examples/topology/main.rs
- examples/simple_stream/main.rs
- examples/qwen3/main.rs
- examples/tools/main.rs
- examples/perplexity/main.rs
- examples/agent/main.rs
- examples/agent_streaming/main.rs
- examples/mcp_client/main.rs
Sourcepub fn with_device_mapping(self, device_mapping: DeviceMapSetting) -> Self
pub fn with_device_mapping(self, device_mapping: DeviceMapSetting) -> Self
Provide metadata to initialize the device mapper.
Examples found in repository?
8async fn main() -> Result<()> {
9 let auto_map_params = AutoDeviceMapParams::Text {
10 max_seq_len: 4096,
11 max_batch_size: 2,
12 };
13 let model = TextModelBuilder::new("meta-llama/Llama-3.3-70B-Instruct")
14 .with_isq(IsqType::Q8_0)
15 .with_logging()
16 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
17 .with_device_mapping(DeviceMapSetting::Auto(auto_map_params))
18 .build()
19 .await?;
20
21 let messages = TextMessages::new()
22 .add_message(
23 TextMessageRole::System,
24 "You are an AI agent with a specialty in programming.",
25 )
26 .add_message(
27 TextMessageRole::User,
28 "Hello! How are you? Please write generic binary search function in Rust.",
29 );
30
31 let response = model.send_chat_request(messages).await?;
32
33 println!("{}", response.choices[0].message.content.as_ref().unwrap());
34 dbg!(
35 response.usage.avg_prompt_tok_per_sec,
36 response.usage.avg_compl_tok_per_sec
37 );
38
39 // Next example: Return some logprobs with the `RequestBuilder`, which enables higher configurability.
40 let request = RequestBuilder::new().return_logprobs(true).add_message(
41 TextMessageRole::User,
42 "Please write a mathematical equation where a few numbers are added.",
43 );
44
45 let response = model.send_chat_request(request).await?;
46
47 println!(
48 "Logprobs: {:?}",
49 &response.choices[0]
50 .logprobs
51 .as_ref()
52 .unwrap()
53 .content
54 .as_ref()
55 .unwrap()[0..3]
56 );
57
58 Ok(())
59}Sourcepub fn from_uqff(self, path: Vec<PathBuf>) -> Self
👎Deprecated: Use UqffTextModelBuilder to load a UQFF model instead of the generic from_uqff
pub fn from_uqff(self, path: Vec<PathBuf>) -> Self
UqffTextModelBuilder to load a UQFF model instead of the generic from_uqffPath to read a .uqff file from. Other necessary configuration files must be present at this location.
For example, these include:
residual.safetensorstokenizer.jsonconfig.json- More depending on the model
Sourcepub fn write_uqff(self, path: PathBuf) -> Self
pub fn write_uqff(self, path: PathBuf) -> Self
Path to write a .uqff file to and serialize the other necessary files.
The parent (part of the path excluding the filename) will determine where any other files serialized are written to.
For example, these include:
residual.safetensorstokenizer.jsonconfig.json- More depending on the model
Sourcepub fn from_hf_cache_pathf(self, hf_cache_path: PathBuf) -> Self
pub fn from_hf_cache_pathf(self, hf_cache_path: PathBuf) -> Self
Cache path for Hugging Face models downloaded locally
Sourcepub fn with_device(self, device: Device) -> Self
pub fn with_device(self, device: Device) -> Self
Set the main device to load this model onto. Automatic device mapping will be performed starting with this device.
Sourcepub fn with_matformer_config_path(self, path: PathBuf) -> Self
pub fn with_matformer_config_path(self, path: PathBuf) -> Self
Path to a Matryoshka Transformer configuration CSV file.
Sourcepub fn with_matformer_slice_name(self, name: String) -> Self
pub fn with_matformer_slice_name(self, name: String) -> Self
Name of the slice to use from the Matryoshka Transformer configuration.
Sourcepub async fn build(self) -> Result<Model>
pub async fn build(self) -> Result<Model>
Examples found in repository?
7async fn main() -> Result<()> {
8 let model = TextModelBuilder::new("google/gemma-2-9b-it")
9 .with_isq(IsqType::Q4K)
10 .with_logging()
11 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
12 .build()
13 .await?;
14
15 let request = RequestBuilder::new().add_message(
16 TextMessageRole::User,
17 "Please write a mathematical equation where a few numbers are added.",
18 );
19
20 let response = model.send_chat_request(request).await?;
21
22 println!("{}", response.choices[0].message.content.as_ref().unwrap());
23
24 Ok(())
25}More examples
7async fn main() -> Result<()> {
8 let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct")
9 .with_isq(IsqType::Q4K)
10 .with_logging()
11 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
12 .build()
13 .await?;
14
15 // Bullet list regex
16 let request = RequestBuilder::new()
17 .set_constraint(mistralrs::Constraint::Regex(
18 "(- [^\n]*\n)+(- [^\n]*)(\n\n)?".to_string(),
19 ))
20 .add_message(TextMessageRole::User, "Please write a few jokes.");
21
22 let response = model.send_chat_request(request).await?;
23
24 println!("{}", response.choices[0].message.content.as_ref().unwrap());
25
26 Ok(())
27}5async fn main() -> Result<()> {
6 let model = TextModelBuilder::new("openai/gpt-oss-20b")
7 .with_logging()
8 .build()
9 .await?;
10
11 let messages = TextMessages::new()
12 .add_message(
13 TextMessageRole::System,
14 "You are an AI agent with a specialty in programming.",
15 )
16 .add_message(
17 TextMessageRole::User,
18 "Hello! How are you? Please write generic binary search function in Rust.",
19 );
20
21 let response = model.send_chat_request(messages).await?;
22
23 println!("{}", response.choices[0].message.content.as_ref().unwrap());
24 dbg!(
25 response.usage.avg_prompt_tok_per_sec,
26 response.usage.avg_compl_tok_per_sec
27 );
28
29 Ok(())
30}5async fn main() -> Result<()> {
6 let model = TextModelBuilder::new("zai-org/GLM-4.7-Flash")
7 .with_isq(IsqType::Q4K)
8 .with_logging()
9 .build()
10 .await?;
11
12 let messages = TextMessages::new()
13 .add_message(
14 TextMessageRole::System,
15 "You are an AI agent with a specialty in programming.",
16 )
17 .add_message(
18 TextMessageRole::User,
19 "Hello! How are you? Please write generic binary search function in Rust.",
20 );
21
22 let response = model.send_chat_request(messages).await?;
23
24 println!("{}", response.choices[0].message.content.as_ref().unwrap());
25 dbg!(
26 response.usage.avg_prompt_tok_per_sec,
27 response.usage.avg_compl_tok_per_sec
28 );
29
30 Ok(())
31}8async fn main() -> Result<()> {
9 let model = TextModelBuilder::new("NousResearch/Hermes-3-Llama-3.1-8B")
10 .with_isq(IsqType::Q4K)
11 .with_logging()
12 .with_search(SearchEmbeddingModel::default())
13 .build()
14 .await?;
15
16 let messages = TextMessages::new().add_message(
17 TextMessageRole::User,
18 "What is the weather forecast for Boston?",
19 );
20 let messages =
21 RequestBuilder::from(messages).with_web_search_options(WebSearchOptions::default());
22
23 let response = model.send_chat_request(messages).await?;
24
25 println!("What is the weather forecast for Boston?\n\n");
26 println!("{}", response.choices[0].message.content.as_ref().unwrap());
27 dbg!(
28 response.usage.avg_prompt_tok_per_sec,
29 response.usage.avg_compl_tok_per_sec
30 );
31
32 Ok(())
33}7async fn main() -> Result<()> {
8 let model = TextModelBuilder::new("deepseek-ai/DeepSeek-R1")
9 .with_isq(IsqType::Q4K)
10 .with_logging()
11 .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
12 .build()
13 .await?;
14
15 let messages = TextMessages::new()
16 .add_message(
17 TextMessageRole::System,
18 "You are an AI agent with a specialty in programming.",
19 )
20 .add_message(
21 TextMessageRole::User,
22 "Hello! How are you? Please write generic binary search function in Rust.",
23 );
24
25 let response = model.send_chat_request(messages).await?;
26
27 println!("{}", response.choices[0].message.content.as_ref().unwrap());
28 dbg!(
29 response.usage.avg_prompt_tok_per_sec,
30 response.usage.avg_compl_tok_per_sec
31 );
32
33 Ok(())
34}- examples/deepseekv2/main.rs
- examples/phi3_5_moe/main.rs
- examples/granite/main.rs
- examples/smollm3/main.rs
- examples/mixture_of_quant_experts/main.rs
- examples/imatrix/main.rs
- examples/custom_logits_processor/main.rs
- examples/paged_attn/main.rs
- examples/json_schema/main.rs
- examples/llguidance/main.rs
- examples/isq/main.rs
- examples/async/main.rs
- examples/simple/main.rs
- examples/uqff/main.rs
- examples/custom_search/main.rs
- examples/custom_tool_call/main.rs
- examples/batching/main.rs
- examples/text_auto_device_map/main.rs
- examples/topology/main.rs
- examples/simple_stream/main.rs
- examples/qwen3/main.rs
- examples/tools/main.rs
- examples/perplexity/main.rs
- examples/agent/main.rs
- examples/agent_streaming/main.rs
- examples/mcp_client/main.rs
Trait Implementations§
Source§impl Clone for TextModelBuilder
impl Clone for TextModelBuilder
Source§fn clone(&self) -> TextModelBuilder
fn clone(&self) -> TextModelBuilder
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl From<TextModelBuilder> for AnyModelBuilder
impl From<TextModelBuilder> for AnyModelBuilder
Source§fn from(b: TextModelBuilder) -> Self
fn from(b: TextModelBuilder) -> Self
Source§impl From<UqffTextModelBuilder> for TextModelBuilder
impl From<UqffTextModelBuilder> for TextModelBuilder
Source§fn from(value: UqffTextModelBuilder) -> Self
fn from(value: UqffTextModelBuilder) -> Self
Auto Trait Implementations§
impl Freeze for TextModelBuilder
impl !RefUnwindSafe for TextModelBuilder
impl Send for TextModelBuilder
impl Sync for TextModelBuilder
impl Unpin for TextModelBuilder
impl !UnwindSafe for TextModelBuilder
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Downcast for T
impl<T> Downcast for T
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<F, T> IntoSample<T> for Fwhere
T: FromSample<F>,
impl<F, T> IntoSample<T> for Fwhere
T: FromSample<F>,
fn into_sample(self) -> T
Source§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<T> PolicyExt for Twhere
T: ?Sized,
impl<T> PolicyExt for Twhere
T: ?Sized,
Source§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
Source§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
self from the equivalent element of its
superset. Read moreSource§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
self is actually part of its subset T (and can be converted to it).Source§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
self.to_subset but without any property checks. Always succeeds.Source§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
self to the equivalent element of its superset.