openai_tools/common/message.rs
1//! Message module for OpenAI tools.
2//!
3//! This module provides data structures and functionality for handling OpenAI API messages,
4//! including text content, images, tool calls, and multi-modal interactions. It serves as
5//! the foundation for communication between users and OpenAI models.
6//!
7//! ## Core Components
8//!
9//! - [`Message`] - The main message structure containing role, content, and metadata
10//! - [`Content`] - Represents different types of content (text, images, etc.)
11//! - [`ToolCall`] - Represents function calls made by OpenAI models
12//!
13//! ## Supported Content Types
14//!
15//! The module supports various content types for rich interactions:
16//!
17//! - **Text content**: Plain text messages
18//! - **Image content**: Images from URLs or local files (PNG, JPEG, GIF)
19//! - **Multi-modal content**: Combining text and images in a single message
20//!
21//! ## Usage in the Library
22//!
23//! This module is used throughout the OpenAI tools library:
24//!
25//! - In [`crate::chat::request`] - For Chat Completion API message handling
26//! - In [`crate::responses::request`] - For Responses API message processing
27//! - In [`crate::chat::response`] - For parsing OpenAI API responses
28//!
29//! ## Examples
30//!
31//! ### Basic Text Message
32//!
33//! ```rust,no_run
34//! use openai_tools::common::message::Message;
35//! use openai_tools::common::role::Role;
36//!
37//! # fn main() {
38//! let message = Message::from_string(Role::User, "Hello, how are you?");
39//! # }
40//! ```
41//!
42//! ### Multi-modal Message with Text and Image
43//!
44//! ```rust,no_run
45//! use openai_tools::common::message::{Message, Content};
46//! use openai_tools::common::role::Role;
47//!
48//! # fn main() {
49//! let contents = vec![
50//! Content::from_text("What's in this image?"),
51//! Content::from_image_file("path/to/image.png"),
52//! ];
53//! let message = Message::from_message_array(Role::User, contents);
54//! # }
55//! ```
56//!
57
58use crate::common::{function::Function, role::Role};
59use base64::prelude::*;
60use serde::{ser::SerializeStruct, Deserialize, Serialize};
61
62/// Represents a tool call made by an OpenAI model.
63///
64/// Tool calls are generated when an OpenAI model decides to invoke a function
65/// or tool as part of its response. This structure contains the metadata
66/// necessary to identify and execute the requested function.
67///
68/// # Fields
69///
70/// * `id` - Unique identifier for this tool call
71/// * `type_name` - The type of tool call (typically "function")
72/// * `function` - The function details including name and arguments
73///
74/// # Examples
75///
76/// ```rust,no_run
77/// use openai_tools::common::message::ToolCall;
78/// use openai_tools::common::function::Function;
79///
80/// // Tool calls are typically received from OpenAI API responses
81/// // and contain function invocation details
82/// ```
83#[derive(Debug, Clone, Default, Deserialize, Serialize)]
84pub struct ToolCall {
85 /// Unique identifier for this tool call
86 pub id: String,
87 /// The type of tool call (e.g., "function")
88 #[serde(rename = "type")]
89 pub type_name: String,
90 /// The function to be called with its arguments
91 pub function: Function,
92}
93
94/// Represents different types of content that can be included in a message.
95///
96/// Content can be either text or images, supporting multi-modal interactions
97/// with OpenAI models. Images can be provided as URLs or loaded from local files
98/// and are automatically encoded as base64 data URLs.
99///
100/// # Supported Image Formats
101///
102/// * PNG
103/// * JPEG/JPG
104/// * GIF
105///
106/// # Fields
107///
108/// * `type_name` - The type of content ("input_text" or "input_image")
109/// * `text` - Optional text content
110/// * `image_url` - Optional image URL or base64 data URL
111///
112/// # Examples
113///
114/// ```rust,no_run
115/// use openai_tools::common::message::Content;
116///
117/// // Create text content
118/// let text_content = Content::from_text("Hello, world!");
119///
120/// // Create image content from URL
121/// let image_content = Content::from_image_url("https://example.com/image.png");
122///
123/// // Create image content from local file
124/// let file_content = Content::from_image_file("path/to/image.png");
125/// ```
126#[derive(Debug, Clone, Default, Deserialize, Serialize)]
127pub struct Content {
128 /// The type of content ("input_text" or "input_image")
129 #[serde(rename = "type")]
130 pub type_name: String,
131 /// Optional text content
132 #[serde(skip_serializing_if = "Option::is_none")]
133 pub text: Option<String>,
134 /// Optional image URL or base64 data URL
135 #[serde(skip_serializing_if = "Option::is_none")]
136 pub image_url: Option<String>,
137}
138
139impl Content {
140 /// Creates a new Content instance with text content.
141 ///
142 /// # Arguments
143 ///
144 /// * `text` - The text content to include
145 ///
146 /// # Returns
147 ///
148 /// A new Content instance with type "input_text"
149 ///
150 /// # Examples
151 ///
152 /// ```rust,no_run
153 /// use openai_tools::common::message::Content;
154 ///
155 /// let content = Content::from_text("Hello, world!");
156 /// assert_eq!(content.type_name, "input_text");
157 /// ```
158 pub fn from_text<T: AsRef<str>>(text: T) -> Self {
159 Self { type_name: "input_text".to_string(), text: Some(text.as_ref().to_string()), image_url: None }
160 }
161
162 /// Creates a new Content instance with an image URL.
163 ///
164 /// # Arguments
165 ///
166 /// * `image_url` - The URL of the image
167 ///
168 /// # Returns
169 ///
170 /// A new Content instance with type "input_image"
171 ///
172 /// # Examples
173 ///
174 /// ```rust
175 /// use openai_tools::common::message::Content;
176 ///
177 /// let content = Content::from_image_url("https://example.com/image.png");
178 /// assert_eq!(content.type_name, "input_image");
179 /// ```
180 pub fn from_image_url<T: AsRef<str>>(image_url: T) -> Self {
181 Self { type_name: "input_image".to_string(), text: None, image_url: Some(image_url.as_ref().to_string()) }
182 }
183
184 /// Creates a new Content instance from a local image file.
185 ///
186 /// This method reads an image file from the filesystem, encodes it as base64,
187 /// and creates a data URL suitable for use with OpenAI APIs.
188 ///
189 /// # Arguments
190 ///
191 /// * `file_path` - Path to the image file
192 ///
193 /// # Returns
194 ///
195 /// A new Content instance with type "input_image" and base64-encoded image data
196 ///
197 /// # Panics
198 ///
199 /// Panics if:
200 /// - The file cannot be opened
201 /// - The image cannot be decoded
202 /// - The image format is unsupported
203 /// - The image cannot be encoded to the buffer
204 ///
205 /// # Supported Formats
206 ///
207 /// * PNG
208 /// * JPEG/JPG
209 /// * GIF
210 ///
211 /// # Examples
212 ///
213 /// ```rust,no_run
214 /// use openai_tools::common::message::Content;
215 ///
216 /// let content = Content::from_image_file("path/to/image.png");
217 /// assert_eq!(content.type_name, "input_image");
218 /// ```
219 pub fn from_image_file<T: AsRef<str>>(file_path: T) -> Self {
220 let ext = file_path.as_ref();
221 let ext = std::path::Path::new(&ext).extension().and_then(|s| s.to_str()).unwrap();
222 let img = image::ImageReader::open(file_path.as_ref()).expect("Failed to open image file").decode().expect("Failed to decode image");
223 let img_fmt = match ext {
224 "png" => image::ImageFormat::Png,
225 "jpg" | "jpeg" => image::ImageFormat::Jpeg,
226 "gif" => image::ImageFormat::Gif,
227 _ => panic!("Unsupported image format"),
228 };
229 let mut buf = std::io::Cursor::new(Vec::new());
230 img.write_to(&mut buf, img_fmt).expect("Failed to write image to buffer");
231 let base64_string = BASE64_STANDARD.encode(buf.into_inner());
232 let image_url = format!("data:image/{ext};base64,{base64_string}");
233 Self { type_name: "input_image".to_string(), text: None, image_url: Some(image_url) }
234 }
235}
236
237/// Represents a message in an OpenAI conversation.
238///
239/// Messages are the core communication unit between users and OpenAI models.
240/// They can contain various types of content including text, images, tool calls,
241/// and metadata like refusals and annotations.
242///
243/// # Content Types
244///
245/// A message can contain either:
246/// - Single content (`content` field) - for simple text messages
247/// - Multiple content items (`content_list` field) - for multi-modal messages
248///
249/// # Fields
250///
251/// * `role` - The role of the message sender (User, Assistant, System, etc.)
252/// * `content` - Optional single content item
253/// * `content_list` - Optional list of content items for multi-modal messages
254/// * `tool_calls` - Optional list of tool calls made by the assistant
255/// * `tool_call_id` - Optional tool call ID for tracking specific tool calls
256/// * `refusal` - Optional refusal message if the model declined to respond
257/// * `annotations` - Optional list of annotations or metadata
258///
259/// # Examples
260///
261/// ```rust,no_run
262/// use openai_tools::common::message::{Message, Content};
263/// use openai_tools::common::role::Role;
264///
265/// // Simple text message
266/// let message = Message::from_string(Role::User, "Hello!");
267///
268/// // Multi-modal message with text and image
269/// let contents = vec![
270/// Content::from_text("What's in this image?"),
271/// Content::from_image_url("https://example.com/image.png"),
272/// ];
273/// let message = Message::from_message_array(Role::User, contents);
274/// ```
275#[derive(Debug, Clone)]
276pub struct Message {
277 /// The role of the message sender
278 pub role: Role,
279 /// Optional single content item
280 pub content: Option<Content>,
281 /// Optional list of content items for multi-modal messages
282 pub content_list: Option<Vec<Content>>,
283 /// Optional list of tool calls made by the assistant
284 pub tool_calls: Option<Vec<ToolCall>>,
285 /// Optional refusal message if the model declined to respond
286 pub refusal: Option<String>,
287 /// Optional tool call ID for tracking specific tool calls
288 pub tool_call_id: Option<String>,
289 /// Optional list of annotations or metadata
290 pub annotations: Option<Vec<String>>,
291}
292
293/// Custom serialization implementation for Message.
294///
295/// This implementation ensures that messages are serialized correctly for the OpenAI API,
296/// handling the mutually exclusive nature of `content` and `content_list` fields.
297/// Either `content` or `content_list` must be present, but not both.
298/// Additionally, it handles optional fields like `tool_call_id` for tool call responses.
299impl Serialize for Message {
300 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
301 where
302 S: serde::Serializer,
303 {
304 let mut state = serializer.serialize_struct("Message", 3)?;
305 state.serialize_field("role", &self.role)?;
306
307 // Ensure that either content or contents is present, but not both
308 if self.role != Role::Assistant {
309 if (self.content.is_none() && self.content_list.is_none()) || (self.content.is_some() && self.content_list.is_some()) {
310 return Err(serde::ser::Error::custom("Message must have either content or contents"));
311 }
312 }
313
314 // Serialize optional fields
315 if let Some(content) = &self.content {
316 state.serialize_field("content", &content.text)?;
317 }
318 if let Some(contents) = &self.content_list {
319 state.serialize_field("content", contents)?;
320 }
321 if let Some(tool_call_id) = &self.tool_call_id {
322 state.serialize_field("tool_call_id", tool_call_id)?;
323 }
324 if let Some(tool_calls) = &self.tool_calls {
325 state.serialize_field("tool_calls", tool_calls)?;
326 }
327
328 state.end()
329 }
330}
331
332/// Custom deserialization implementation for Message.
333///
334/// This implementation handles the deserialization of messages from OpenAI API responses,
335/// converting string content to Content objects and handling optional fields including
336/// `tool_call_id` for tool call tracking.
337impl<'de> Deserialize<'de> for Message {
338 fn deserialize<D>(deserializer: D) -> Result<Message, D::Error>
339 where
340 D: serde::Deserializer<'de>,
341 {
342 #[derive(Deserialize)]
343 struct MessageData {
344 role: Role,
345 content: Option<String>,
346 tool_calls: Option<Vec<ToolCall>>,
347 refusal: Option<String>,
348 annotations: Option<Vec<String>>,
349 }
350
351 let data = MessageData::deserialize(deserializer)?;
352 let content = if let Some(text) = data.content { Some(Content::from_text(text)) } else { None };
353
354 Ok(Message {
355 role: data.role,
356 content,
357 content_list: None,
358 tool_calls: data.tool_calls,
359 tool_call_id: None,
360 refusal: data.refusal,
361 annotations: data.annotations,
362 })
363 }
364}
365
366impl Message {
367 /// Creates a new Message with a single text content.
368 ///
369 /// This is a convenience method for creating simple text messages.
370 ///
371 /// # Arguments
372 ///
373 /// * `role` - The role of the message sender
374 /// * `message` - The text content of the message
375 ///
376 /// # Returns
377 ///
378 /// A new Message instance with the specified role and text content
379 ///
380 /// # Examples
381 ///
382 /// ```rust,no_run
383 /// use openai_tools::common::message::Message;
384 /// use openai_tools::common::role::Role;
385 ///
386 /// let message = Message::from_string(Role::User, "Hello, how are you?");
387 /// ```
388 pub fn from_string<T: AsRef<str>>(role: Role, message: T) -> Self {
389 Self {
390 role,
391 content: Some(Content::from_text(message.as_ref())),
392 content_list: None,
393 tool_calls: None,
394 tool_call_id: None,
395 refusal: None,
396 annotations: None,
397 }
398 }
399
400 /// Creates a new Message with multiple content items.
401 ///
402 /// This method is used for multi-modal messages that contain multiple
403 /// types of content such as text and images.
404 ///
405 /// # Arguments
406 ///
407 /// * `role` - The role of the message sender
408 /// * `contents` - Vector of content items to include in the message
409 ///
410 /// # Returns
411 ///
412 /// A new Message instance with the specified role and content list
413 ///
414 /// # Examples
415 ///
416 /// ```rust,no_run
417 /// use openai_tools::common::message::{Message, Content};
418 /// use openai_tools::common::role::Role;
419 ///
420 /// let contents = vec![
421 /// Content::from_text("What's in this image?"),
422 /// Content::from_image_url("https://example.com/image.png"),
423 /// ];
424 /// let message = Message::from_message_array(Role::User, contents);
425 /// ```
426 pub fn from_message_array(role: Role, contents: Vec<Content>) -> Self {
427 Self { role, content: None, content_list: Some(contents), tool_calls: None, tool_call_id: None, refusal: None, annotations: None }
428 }
429
430 /// Creates a new Message as a response to a specific tool call.
431 ///
432 /// This method is used to create messages that respond to tool calls made by
433 /// OpenAI models. The message will have the Assistant role and includes the
434 /// tool call ID for tracking purposes.
435 ///
436 /// # Arguments
437 ///
438 /// * `tool_call_response` - The response content for the tool call
439 /// * `tool_call_id` - The ID of the tool call this message is responding to
440 ///
441 /// # Returns
442 ///
443 /// A new Message instance with Assistant role, response content, and tool call ID
444 ///
445 /// # Examples
446 ///
447 /// ```rust,no_run
448 /// use openai_tools::common::message::Message;
449 ///
450 /// let response = Message::from_tool_call_response(
451 /// "The weather in Tokyo is 25°C and sunny",
452 /// "tool_call_123"
453 /// );
454 /// ```
455 pub fn from_tool_call_response<T: AsRef<str>>(tool_call_response: T, tool_call_id: T) -> Self {
456 Self {
457 role: Role::Tool,
458 content: Some(Content::from_text(tool_call_response.as_ref())),
459 content_list: None,
460 tool_calls: None,
461 tool_call_id: Some(tool_call_id.as_ref().to_string()),
462 refusal: None,
463 annotations: None,
464 }
465 }
466 /// Calculates the approximate token count for the message content.
467 ///
468 /// This method uses the tiktoken library to estimate the number of tokens
469 /// that would be consumed by this message when sent to OpenAI's API.
470 /// Only text content is counted; images are not included in the calculation.
471 ///
472 /// # Returns
473 ///
474 /// The estimated number of tokens for the text content in this message
475 ///
476 /// # Examples
477 ///
478 /// ```rust,no_run
479 /// use openai_tools::common::message::Message;
480 /// use openai_tools::common::role::Role;
481 ///
482 /// let message = Message::from_string(Role::User, "Hello, world!");
483 /// let token_count = message.get_input_token_count();
484 /// ```
485 pub fn get_input_token_count(&self) -> usize {
486 let bpe = tiktoken_rs::o200k_base().unwrap();
487 if let Some(content) = &self.content {
488 bpe.encode_with_special_tokens(&content.clone().text.unwrap()).len()
489 } else if let Some(contents) = &self.content_list {
490 let mut total_tokens = 0;
491 for content in contents {
492 if let Some(text) = &content.text {
493 total_tokens += bpe.encode_with_special_tokens(text).len();
494 }
495 }
496 total_tokens
497 } else {
498 0 // No content to count tokens for
499 }
500 }
501}