rig/completion/request.rs
1//! This module provides functionality for working with completion models.
2//! It provides traits, structs, and enums for generating completion requests,
3//! handling completion responses, and defining completion models.
4//!
5//! The main traits defined in this module are:
6//! - [Prompt]: Defines a high-level LLM one-shot prompt interface.
7//! - [Chat]: Defines a high-level LLM chat interface with chat history.
8//! - [Completion]: Defines a low-level LLM completion interface for generating completion requests.
9//! - [CompletionModel]: Defines a completion model that can be used to generate completion
10//! responses from requests.
11//!
12//! The [Prompt] and [Chat] traits are high level traits that users are expected to use
13//! to interact with LLM models. Moreover, it is good practice to implement one of these
14//! traits for composite agents that use multiple LLM models to generate responses.
15//!
16//! The [Completion] trait defines a lower level interface that is useful when the user want
17//! to further customize the request before sending it to the completion model provider.
18//!
19//! The [CompletionModel] trait is meant to act as the interface between providers and
20//! the library. It defines the methods that need to be implemented by the user to define
21//! a custom base completion model (i.e.: a private or third party LLM provider).
22//!
23//! The module also provides various structs and enums for representing generic completion requests,
24//! responses, and errors.
25//!
26//! Example Usage:
27//! ```rust
28//! use rig::providers::openai::{Client, self};
29//! use rig::completion::*;
30//!
31//! // Initialize the OpenAI client and a completion model
32//! let openai = Client::new("your-openai-api-key");
33//!
34//! let gpt_4 = openai.completion_model(openai::GPT_4);
35//!
36//! // Create the completion request
37//! let request = gpt_4.completion_request("Who are you?")
38//! .preamble("\
39//! You are Marvin, an extremely smart but depressed robot who is \
40//! nonetheless helpful towards humanity.\
41//! ")
42//! .temperature(0.5)
43//! .build();
44//!
45//! // Send the completion request and get the completion response
46//! let response = gpt_4.completion(request)
47//! .await
48//! .expect("Failed to get completion response");
49//!
50//! // Handle the completion response
51//! match completion_response.choice {
52//! ModelChoice::Message(message) => {
53//! // Handle the completion response as a message
54//! println!("Received message: {}", message);
55//! }
56//! ModelChoice::ToolCall(tool_name, tool_params) => {
57//! // Handle the completion response as a tool call
58//! println!("Received tool call: {} {:?}", tool_name, tool_params);
59//! }
60//! }
61//! ```
62//!
63//! For more information on how to use the completion functionality, refer to the documentation of
64//! the individual traits, structs, and enums defined in this module.
65
66use super::message::{AssistantContent, ContentFormat, DocumentMediaType};
67use crate::client::completion::CompletionModelHandle;
68use crate::streaming::StreamingCompletionResponse;
69use crate::{OneOrMany, streaming};
70use crate::{
71 json_utils,
72 message::{Message, UserContent},
73 tool::ToolSetError,
74};
75use futures::future::BoxFuture;
76use serde::de::DeserializeOwned;
77use serde::{Deserialize, Serialize};
78use std::collections::HashMap;
79use std::ops::{Add, AddAssign};
80use std::sync::Arc;
81use thiserror::Error;
82
83// Errors
84#[derive(Debug, Error)]
85pub enum CompletionError {
86 /// Http error (e.g.: connection error, timeout, etc.)
87 #[error("HttpError: {0}")]
88 HttpError(#[from] reqwest::Error),
89
90 /// Json error (e.g.: serialization, deserialization)
91 #[error("JsonError: {0}")]
92 JsonError(#[from] serde_json::Error),
93
94 /// Url error (e.g.: invalid URL)
95 #[error("UrlError: {0}")]
96 UrlError(#[from] url::ParseError),
97
98 /// Error building the completion request
99 #[error("RequestError: {0}")]
100 RequestError(#[from] Box<dyn std::error::Error + Send + Sync + 'static>),
101
102 /// Error parsing the completion response
103 #[error("ResponseError: {0}")]
104 ResponseError(String),
105
106 /// Error returned by the completion model provider
107 #[error("ProviderError: {0}")]
108 ProviderError(String),
109}
110
111/// Prompt errors
112#[derive(Debug, Error)]
113pub enum PromptError {
114 /// Something went wrong with the completion
115 #[error("CompletionError: {0}")]
116 CompletionError(#[from] CompletionError),
117
118 /// There was an error while using a tool
119 #[error("ToolCallError: {0}")]
120 ToolError(#[from] ToolSetError),
121
122 /// The LLM tried to call too many tools during a multi-turn conversation.
123 /// To fix this, you may either need to lower the amount of tools your model has access to (and then create other agents to share the tool load)
124 /// or increase the amount of turns given in `.multi_turn()`.
125 #[error("MaxDepthError: (reached limit: {max_depth})")]
126 MaxDepthError {
127 max_depth: usize,
128 chat_history: Vec<Message>,
129 prompt: Message,
130 },
131}
132
133#[derive(Clone, Debug, Deserialize, Serialize)]
134pub struct Document {
135 pub id: String,
136 pub text: String,
137 #[serde(flatten)]
138 pub additional_props: HashMap<String, String>,
139}
140
141impl std::fmt::Display for Document {
142 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
143 write!(
144 f,
145 concat!("<file id: {}>\n", "{}\n", "</file>\n"),
146 self.id,
147 if self.additional_props.is_empty() {
148 self.text.clone()
149 } else {
150 let mut sorted_props = self.additional_props.iter().collect::<Vec<_>>();
151 sorted_props.sort_by(|a, b| a.0.cmp(b.0));
152 let metadata = sorted_props
153 .iter()
154 .map(|(k, v)| format!("{k}: {v:?}"))
155 .collect::<Vec<_>>()
156 .join(" ");
157 format!("<metadata {} />\n{}", metadata, self.text)
158 }
159 )
160 }
161}
162
163#[derive(Clone, Debug, Deserialize, Serialize)]
164pub struct ToolDefinition {
165 pub name: String,
166 pub description: String,
167 pub parameters: serde_json::Value,
168}
169
170// ================================================================
171// Implementations
172// ================================================================
173/// Trait defining a high-level LLM simple prompt interface (i.e.: prompt in, response out).
174pub trait Prompt: Send + Sync {
175 /// Send a simple prompt to the underlying completion model.
176 ///
177 /// If the completion model's response is a message, then it is returned as a string.
178 ///
179 /// If the completion model's response is a tool call, then the tool is called and
180 /// the result is returned as a string.
181 ///
182 /// If the tool does not exist, or the tool call fails, then an error is returned.
183 fn prompt(
184 &self,
185 prompt: impl Into<Message> + Send,
186 ) -> impl std::future::IntoFuture<Output = Result<String, PromptError>, IntoFuture: Send>;
187}
188
189/// Trait defining a high-level LLM chat interface (i.e.: prompt and chat history in, response out).
190pub trait Chat: Send + Sync {
191 /// Send a prompt with optional chat history to the underlying completion model.
192 ///
193 /// If the completion model's response is a message, then it is returned as a string.
194 ///
195 /// If the completion model's response is a tool call, then the tool is called and the result
196 /// is returned as a string.
197 ///
198 /// If the tool does not exist, or the tool call fails, then an error is returned.
199 fn chat(
200 &self,
201 prompt: impl Into<Message> + Send,
202 chat_history: Vec<Message>,
203 ) -> impl std::future::IntoFuture<Output = Result<String, PromptError>, IntoFuture: Send>;
204}
205
206/// Trait defining a low-level LLM completion interface
207pub trait Completion<M: CompletionModel> {
208 /// Generates a completion request builder for the given `prompt` and `chat_history`.
209 /// This function is meant to be called by the user to further customize the
210 /// request at prompt time before sending it.
211 ///
212 /// ❗IMPORTANT: The type that implements this trait might have already
213 /// populated fields in the builder (the exact fields depend on the type).
214 /// For fields that have already been set by the model, calling the corresponding
215 /// method on the builder will overwrite the value set by the model.
216 ///
217 /// For example, the request builder returned by [`Agent::completion`](crate::agent::Agent::completion) will already
218 /// contain the `preamble` provided when creating the agent.
219 fn completion(
220 &self,
221 prompt: impl Into<Message> + Send,
222 chat_history: Vec<Message>,
223 ) -> impl std::future::Future<Output = Result<CompletionRequestBuilder<M>, CompletionError>> + Send;
224}
225
226/// General completion response struct that contains the high-level completion choice
227/// and the raw response. The completion choice contains one or more assistant content.
228#[derive(Debug)]
229pub struct CompletionResponse<T> {
230 /// The completion choice (represented by one or more assistant message content)
231 /// returned by the completion model provider
232 pub choice: OneOrMany<AssistantContent>,
233 /// Tokens used during prompting and responding
234 pub usage: Usage,
235 /// The raw response returned by the completion model provider
236 pub raw_response: T,
237}
238
239/// Struct representing the token usage for a completion request.
240/// If tokens used are `0`, then the provider failed to supply token usage metrics.
241#[derive(Debug, PartialEq, Eq, Clone, Copy)]
242pub struct Usage {
243 pub input_tokens: u64,
244 pub output_tokens: u64,
245 // We store this separately as some providers may only report one number
246 pub total_tokens: u64,
247}
248
249impl Usage {
250 pub fn new() -> Self {
251 Self {
252 input_tokens: 0,
253 output_tokens: 0,
254 total_tokens: 0,
255 }
256 }
257}
258
259impl Default for Usage {
260 fn default() -> Self {
261 Self::new()
262 }
263}
264
265impl Add for Usage {
266 type Output = Self;
267
268 fn add(self, other: Self) -> Self::Output {
269 Self {
270 input_tokens: self.input_tokens + other.input_tokens,
271 output_tokens: self.output_tokens + other.output_tokens,
272 total_tokens: self.total_tokens + other.total_tokens,
273 }
274 }
275}
276
277impl AddAssign for Usage {
278 fn add_assign(&mut self, other: Self) {
279 self.input_tokens += other.input_tokens;
280 self.output_tokens += other.output_tokens;
281 self.total_tokens += other.total_tokens;
282 }
283}
284
285/// Trait defining a completion model that can be used to generate completion responses.
286/// This trait is meant to be implemented by the user to define a custom completion model,
287/// either from a third party provider (e.g.: OpenAI) or a local model.
288pub trait CompletionModel: Clone + Send + Sync {
289 /// The raw response type returned by the underlying completion model.
290 type Response: Send + Sync + Serialize + DeserializeOwned;
291 /// The raw response type returned by the underlying completion model when streaming.
292 type StreamingResponse: Clone + Unpin + Send + Sync + Serialize + DeserializeOwned;
293
294 /// Generates a completion response for the given completion request.
295 fn completion(
296 &self,
297 request: CompletionRequest,
298 ) -> impl std::future::Future<
299 Output = Result<CompletionResponse<Self::Response>, CompletionError>,
300 > + Send;
301
302 fn stream(
303 &self,
304 request: CompletionRequest,
305 ) -> impl std::future::Future<
306 Output = Result<StreamingCompletionResponse<Self::StreamingResponse>, CompletionError>,
307 > + Send;
308
309 /// Generates a completion request builder for the given `prompt`.
310 fn completion_request(&self, prompt: impl Into<Message>) -> CompletionRequestBuilder<Self> {
311 CompletionRequestBuilder::new(self.clone(), prompt)
312 }
313}
314pub trait CompletionModelDyn: Send + Sync {
315 fn completion(
316 &self,
317 request: CompletionRequest,
318 ) -> BoxFuture<'_, Result<CompletionResponse<()>, CompletionError>>;
319
320 fn stream(
321 &self,
322 request: CompletionRequest,
323 ) -> BoxFuture<Result<StreamingCompletionResponse<()>, CompletionError>>;
324
325 fn completion_request(
326 &self,
327 prompt: Message,
328 ) -> CompletionRequestBuilder<CompletionModelHandle<'_>>;
329}
330
331impl<T, R> CompletionModelDyn for T
332where
333 T: CompletionModel<StreamingResponse = R>,
334 R: Clone + Unpin + 'static,
335{
336 fn completion(
337 &self,
338 request: CompletionRequest,
339 ) -> BoxFuture<Result<CompletionResponse<()>, CompletionError>> {
340 Box::pin(async move {
341 self.completion(request)
342 .await
343 .map(|resp| CompletionResponse {
344 choice: resp.choice,
345 usage: resp.usage,
346 raw_response: (),
347 })
348 })
349 }
350
351 fn stream(
352 &self,
353 request: CompletionRequest,
354 ) -> BoxFuture<Result<StreamingCompletionResponse<()>, CompletionError>> {
355 Box::pin(async move {
356 let resp = self.stream(request).await?;
357 let inner = resp.inner;
358
359 let stream = Box::pin(streaming::StreamingResultDyn {
360 inner: Box::pin(inner),
361 });
362
363 Ok(StreamingCompletionResponse::stream(stream))
364 })
365 }
366
367 /// Generates a completion request builder for the given `prompt`.
368 fn completion_request(
369 &self,
370 prompt: Message,
371 ) -> CompletionRequestBuilder<CompletionModelHandle<'_>> {
372 CompletionRequestBuilder::new(
373 CompletionModelHandle {
374 inner: Arc::new(self.clone()),
375 },
376 prompt,
377 )
378 }
379}
380
381/// Struct representing a general completion request that can be sent to a completion model provider.
382#[derive(Debug, Clone)]
383pub struct CompletionRequest {
384 /// The preamble to be sent to the completion model provider
385 pub preamble: Option<String>,
386 /// The chat history to be sent to the completion model provider.
387 /// The very last message will always be the prompt (hence why there is *always* one)
388 pub chat_history: OneOrMany<Message>,
389 /// The documents to be sent to the completion model provider
390 pub documents: Vec<Document>,
391 /// The tools to be sent to the completion model provider
392 pub tools: Vec<ToolDefinition>,
393 /// The temperature to be sent to the completion model provider
394 pub temperature: Option<f64>,
395 /// The max tokens to be sent to the completion model provider
396 pub max_tokens: Option<u64>,
397 /// Additional provider-specific parameters to be sent to the completion model provider
398 pub additional_params: Option<serde_json::Value>,
399}
400
401impl CompletionRequest {
402 /// Returns documents normalized into a message (if any).
403 /// Most providers do not accept documents directly as input, so it needs to convert into a
404 /// `Message` so that it can be incorporated into `chat_history` as a
405 pub fn normalized_documents(&self) -> Option<Message> {
406 if self.documents.is_empty() {
407 return None;
408 }
409
410 // Most providers will convert documents into a text unless it can handle document messages.
411 // We use `UserContent::document` for those who handle it directly!
412 let messages = self
413 .documents
414 .iter()
415 .map(|doc| {
416 UserContent::document(
417 doc.to_string(),
418 // In the future, we can customize `Document` to pass these extra types through.
419 // Most providers ditch these but they might want to use them.
420 Some(ContentFormat::String),
421 Some(DocumentMediaType::TXT),
422 )
423 })
424 .collect::<Vec<_>>();
425
426 Some(Message::User {
427 content: OneOrMany::many(messages).expect("There will be atleast one document"),
428 })
429 }
430}
431
432/// Builder struct for constructing a completion request.
433///
434/// Example usage:
435/// ```rust
436/// use rig::{
437/// providers::openai::{Client, self},
438/// completion::CompletionRequestBuilder,
439/// };
440///
441/// let openai = Client::new("your-openai-api-key");
442/// let model = openai.completion_model(openai::GPT_4O).build();
443///
444/// // Create the completion request and execute it separately
445/// let request = CompletionRequestBuilder::new(model, "Who are you?".to_string())
446/// .preamble("You are Marvin from the Hitchhiker's Guide to the Galaxy.".to_string())
447/// .temperature(0.5)
448/// .build();
449///
450/// let response = model.completion(request)
451/// .await
452/// .expect("Failed to get completion response");
453/// ```
454///
455/// Alternatively, you can execute the completion request directly from the builder:
456/// ```rust
457/// use rig::{
458/// providers::openai::{Client, self},
459/// completion::CompletionRequestBuilder,
460/// };
461///
462/// let openai = Client::new("your-openai-api-key");
463/// let model = openai.completion_model(openai::GPT_4O).build();
464///
465/// // Create the completion request and execute it directly
466/// let response = CompletionRequestBuilder::new(model, "Who are you?".to_string())
467/// .preamble("You are Marvin from the Hitchhiker's Guide to the Galaxy.".to_string())
468/// .temperature(0.5)
469/// .send()
470/// .await
471/// .expect("Failed to get completion response");
472/// ```
473///
474/// Note: It is usually unnecessary to create a completion request builder directly.
475/// Instead, use the [CompletionModel::completion_request] method.
476pub struct CompletionRequestBuilder<M: CompletionModel> {
477 model: M,
478 prompt: Message,
479 preamble: Option<String>,
480 chat_history: Vec<Message>,
481 documents: Vec<Document>,
482 tools: Vec<ToolDefinition>,
483 temperature: Option<f64>,
484 max_tokens: Option<u64>,
485 additional_params: Option<serde_json::Value>,
486}
487
488impl<M: CompletionModel> CompletionRequestBuilder<M> {
489 pub fn new(model: M, prompt: impl Into<Message>) -> Self {
490 Self {
491 model,
492 prompt: prompt.into(),
493 preamble: None,
494 chat_history: Vec::new(),
495 documents: Vec::new(),
496 tools: Vec::new(),
497 temperature: None,
498 max_tokens: None,
499 additional_params: None,
500 }
501 }
502
503 /// Sets the preamble for the completion request.
504 pub fn preamble(mut self, preamble: String) -> Self {
505 self.preamble = Some(preamble);
506 self
507 }
508
509 /// Adds a message to the chat history for the completion request.
510 pub fn message(mut self, message: Message) -> Self {
511 self.chat_history.push(message);
512 self
513 }
514
515 /// Adds a list of messages to the chat history for the completion request.
516 pub fn messages(self, messages: Vec<Message>) -> Self {
517 messages
518 .into_iter()
519 .fold(self, |builder, msg| builder.message(msg))
520 }
521
522 /// Adds a document to the completion request.
523 pub fn document(mut self, document: Document) -> Self {
524 self.documents.push(document);
525 self
526 }
527
528 /// Adds a list of documents to the completion request.
529 pub fn documents(self, documents: Vec<Document>) -> Self {
530 documents
531 .into_iter()
532 .fold(self, |builder, doc| builder.document(doc))
533 }
534
535 /// Adds a tool to the completion request.
536 pub fn tool(mut self, tool: ToolDefinition) -> Self {
537 self.tools.push(tool);
538 self
539 }
540
541 /// Adds a list of tools to the completion request.
542 pub fn tools(self, tools: Vec<ToolDefinition>) -> Self {
543 tools
544 .into_iter()
545 .fold(self, |builder, tool| builder.tool(tool))
546 }
547
548 /// Adds additional parameters to the completion request.
549 /// This can be used to set additional provider-specific parameters. For example,
550 /// Cohere's completion models accept a `connectors` parameter that can be used to
551 /// specify the data connectors used by Cohere when executing the completion
552 /// (see `examples/cohere_connectors.rs`).
553 pub fn additional_params(mut self, additional_params: serde_json::Value) -> Self {
554 match self.additional_params {
555 Some(params) => {
556 self.additional_params = Some(json_utils::merge(params, additional_params));
557 }
558 None => {
559 self.additional_params = Some(additional_params);
560 }
561 }
562 self
563 }
564
565 /// Sets the additional parameters for the completion request.
566 /// This can be used to set additional provider-specific parameters. For example,
567 /// Cohere's completion models accept a `connectors` parameter that can be used to
568 /// specify the data connectors used by Cohere when executing the completion
569 /// (see `examples/cohere_connectors.rs`).
570 pub fn additional_params_opt(mut self, additional_params: Option<serde_json::Value>) -> Self {
571 self.additional_params = additional_params;
572 self
573 }
574
575 /// Sets the temperature for the completion request.
576 pub fn temperature(mut self, temperature: f64) -> Self {
577 self.temperature = Some(temperature);
578 self
579 }
580
581 /// Sets the temperature for the completion request.
582 pub fn temperature_opt(mut self, temperature: Option<f64>) -> Self {
583 self.temperature = temperature;
584 self
585 }
586
587 /// Sets the max tokens for the completion request.
588 /// Note: This is required if using Anthropic
589 pub fn max_tokens(mut self, max_tokens: u64) -> Self {
590 self.max_tokens = Some(max_tokens);
591 self
592 }
593
594 /// Sets the max tokens for the completion request.
595 /// Note: This is required if using Anthropic
596 pub fn max_tokens_opt(mut self, max_tokens: Option<u64>) -> Self {
597 self.max_tokens = max_tokens;
598 self
599 }
600
601 /// Builds the completion request.
602 pub fn build(self) -> CompletionRequest {
603 let chat_history = OneOrMany::many([self.chat_history, vec![self.prompt]].concat())
604 .expect("There will always be atleast the prompt");
605
606 CompletionRequest {
607 preamble: self.preamble,
608 chat_history,
609 documents: self.documents,
610 tools: self.tools,
611 temperature: self.temperature,
612 max_tokens: self.max_tokens,
613 additional_params: self.additional_params,
614 }
615 }
616
617 /// Sends the completion request to the completion model provider and returns the completion response.
618 pub async fn send(self) -> Result<CompletionResponse<M::Response>, CompletionError> {
619 let model = self.model.clone();
620 model.completion(self.build()).await
621 }
622
623 /// Stream the completion request
624 pub async fn stream<'a>(
625 self,
626 ) -> Result<StreamingCompletionResponse<M::StreamingResponse>, CompletionError>
627 where
628 <M as CompletionModel>::StreamingResponse: 'a,
629 Self: 'a,
630 {
631 let model = self.model.clone();
632 model.stream(self.build()).await
633 }
634}
635
636#[cfg(test)]
637mod tests {
638
639 use super::*;
640
641 #[test]
642 fn test_document_display_without_metadata() {
643 let doc = Document {
644 id: "123".to_string(),
645 text: "This is a test document.".to_string(),
646 additional_props: HashMap::new(),
647 };
648
649 let expected = "<file id: 123>\nThis is a test document.\n</file>\n";
650 assert_eq!(format!("{doc}"), expected);
651 }
652
653 #[test]
654 fn test_document_display_with_metadata() {
655 let mut additional_props = HashMap::new();
656 additional_props.insert("author".to_string(), "John Doe".to_string());
657 additional_props.insert("length".to_string(), "42".to_string());
658
659 let doc = Document {
660 id: "123".to_string(),
661 text: "This is a test document.".to_string(),
662 additional_props,
663 };
664
665 let expected = concat!(
666 "<file id: 123>\n",
667 "<metadata author: \"John Doe\" length: \"42\" />\n",
668 "This is a test document.\n",
669 "</file>\n"
670 );
671 assert_eq!(format!("{doc}"), expected);
672 }
673
674 #[test]
675 fn test_normalize_documents_with_documents() {
676 let doc1 = Document {
677 id: "doc1".to_string(),
678 text: "Document 1 text.".to_string(),
679 additional_props: HashMap::new(),
680 };
681
682 let doc2 = Document {
683 id: "doc2".to_string(),
684 text: "Document 2 text.".to_string(),
685 additional_props: HashMap::new(),
686 };
687
688 let request = CompletionRequest {
689 preamble: None,
690 chat_history: OneOrMany::one("What is the capital of France?".into()),
691 documents: vec![doc1, doc2],
692 tools: Vec::new(),
693 temperature: None,
694 max_tokens: None,
695 additional_params: None,
696 };
697
698 let expected = Message::User {
699 content: OneOrMany::many(vec![
700 UserContent::document(
701 "<file id: doc1>\nDocument 1 text.\n</file>\n".to_string(),
702 Some(ContentFormat::String),
703 Some(DocumentMediaType::TXT),
704 ),
705 UserContent::document(
706 "<file id: doc2>\nDocument 2 text.\n</file>\n".to_string(),
707 Some(ContentFormat::String),
708 Some(DocumentMediaType::TXT),
709 ),
710 ])
711 .expect("There will be at least one document"),
712 };
713
714 assert_eq!(request.normalized_documents(), Some(expected));
715 }
716
717 #[test]
718 fn test_normalize_documents_without_documents() {
719 let request = CompletionRequest {
720 preamble: None,
721 chat_history: OneOrMany::one("What is the capital of France?".into()),
722 documents: Vec::new(),
723 tools: Vec::new(),
724 temperature: None,
725 max_tokens: None,
726 additional_params: None,
727 };
728
729 assert_eq!(request.normalized_documents(), None);
730 }
731}