openai_tools/common/
message.rs

1//! Message module for OpenAI tools.
2//!
3//! This module provides data structures and functionality for handling OpenAI API messages,
4//! including text content, images, tool calls, and multi-modal interactions. It serves as
5//! the foundation for communication between users and OpenAI models.
6//!
7//! ## Core Components
8//!
9//! - [`Message`] - The main message structure containing role, content, and metadata
10//! - [`Content`] - Represents different types of content (text, images, etc.)
11//! - [`ToolCall`] - Represents function calls made by OpenAI models
12//!
13//! ## Supported Content Types
14//!
15//! The module supports various content types for rich interactions:
16//!
17//! - **Text content**: Plain text messages
18//! - **Image content**: Images from URLs or local files (PNG, JPEG, GIF)
19//! - **Multi-modal content**: Combining text and images in a single message
20//!
21//! ## Usage in the Library
22//!
23//! This module is used throughout the OpenAI tools library:
24//!
25//! - In [`crate::chat::request`] - For Chat Completion API message handling
26//! - In [`crate::responses::request`] - For Responses API message processing
27//! - In [`crate::chat::response`] - For parsing OpenAI API responses
28//!
29//! ## Examples
30//!
31//! ### Basic Text Message
32//!
33//! ```rust,no_run
34//! use openai_tools::common::message::Message;
35//! use openai_tools::common::role::Role;
36//!
37//! # fn main() {
38//! let message = Message::from_string(Role::User, "Hello, how are you?");
39//! # }
40//! ```
41//!
42//! ### Multi-modal Message with Text and Image
43//!
44//! ```rust,no_run
45//! use openai_tools::common::message::{Message, Content};
46//! use openai_tools::common::role::Role;
47//!
48//! # fn main() {
49//! let contents = vec![
50//!     Content::from_text("What's in this image?"),
51//!     Content::from_image_file("path/to/image.png"),
52//! ];
53//! let message = Message::from_message_array(Role::User, contents);
54//! # }
55//! ```
56//!
57
58use crate::common::{function::Function, role::Role};
59use base64::prelude::*;
60use serde::{ser::SerializeStruct, Deserialize, Serialize};
61
62/// Represents a tool call made by an OpenAI model.
63///
64/// Tool calls are generated when an OpenAI model decides to invoke a function
65/// or tool as part of its response. This structure contains the metadata
66/// necessary to identify and execute the requested function.
67///
68/// # Fields
69///
70/// * `id` - Unique identifier for this tool call
71/// * `type_name` - The type of tool call (typically "function")
72/// * `function` - The function details including name and arguments
73///
74/// # Examples
75///
76/// ```rust,no_run
77/// use openai_tools::common::message::ToolCall;
78/// use openai_tools::common::function::Function;
79///
80/// // Tool calls are typically received from OpenAI API responses
81/// // and contain function invocation details
82/// ```
83#[derive(Debug, Clone, Default, Deserialize, Serialize)]
84pub struct ToolCall {
85    /// Unique identifier for this tool call
86    pub id: String,
87    /// The type of tool call (e.g., "function")
88    #[serde(rename = "type")]
89    pub type_name: String,
90    /// The function to be called with its arguments
91    pub function: Function,
92}
93
94/// Represents different types of content that can be included in a message.
95///
96/// Content can be either text or images, supporting multi-modal interactions
97/// with OpenAI models. Images can be provided as URLs or loaded from local files
98/// and are automatically encoded as base64 data URLs.
99///
100/// # Supported Image Formats
101///
102/// * PNG
103/// * JPEG/JPG
104/// * GIF
105///
106/// # Fields
107///
108/// * `type_name` - The type of content ("input_text" or "input_image")
109/// * `text` - Optional text content
110/// * `image_url` - Optional image URL or base64 data URL
111///
112/// # Examples
113///
114/// ```rust,no_run
115/// use openai_tools::common::message::Content;
116///
117/// // Create text content
118/// let text_content = Content::from_text("Hello, world!");
119///
120/// // Create image content from URL
121/// let image_content = Content::from_image_url("https://example.com/image.png");
122///
123/// // Create image content from local file
124/// let file_content = Content::from_image_file("path/to/image.png");
125/// ```
126#[derive(Debug, Clone, Default, Deserialize, Serialize)]
127pub struct Content {
128    /// The type of content ("input_text" or "input_image")
129    #[serde(rename = "type")]
130    pub type_name: String,
131    /// Optional text content
132    #[serde(skip_serializing_if = "Option::is_none")]
133    pub text: Option<String>,
134    /// Optional image URL or base64 data URL
135    #[serde(skip_serializing_if = "Option::is_none")]
136    pub image_url: Option<String>,
137}
138
139impl Content {
140    /// Creates a new Content instance with text content.
141    ///
142    /// # Arguments
143    ///
144    /// * `text` - The text content to include
145    ///
146    /// # Returns
147    ///
148    /// A new Content instance with type "input_text"
149    ///
150    /// # Examples
151    ///
152    /// ```rust,no_run
153    /// use openai_tools::common::message::Content;
154    ///
155    /// let content = Content::from_text("Hello, world!");
156    /// assert_eq!(content.type_name, "input_text");
157    /// ```
158    pub fn from_text<T: AsRef<str>>(text: T) -> Self {
159        Self { type_name: "input_text".to_string(), text: Some(text.as_ref().to_string()), image_url: None }
160    }
161
162    /// Creates a new Content instance with an image URL.
163    ///
164    /// # Arguments
165    ///
166    /// * `image_url` - The URL of the image
167    ///
168    /// # Returns
169    ///
170    /// A new Content instance with type "input_image"
171    ///
172    /// # Examples
173    ///
174    /// ```rust
175    /// use openai_tools::common::message::Content;
176    ///
177    /// let content = Content::from_image_url("https://example.com/image.png");
178    /// assert_eq!(content.type_name, "input_image");
179    /// ```
180    pub fn from_image_url<T: AsRef<str>>(image_url: T) -> Self {
181        Self { type_name: "input_image".to_string(), text: None, image_url: Some(image_url.as_ref().to_string()) }
182    }
183
184    /// Creates a new Content instance from a local image file.
185    ///
186    /// This method reads an image file from the filesystem, encodes it as base64,
187    /// and creates a data URL suitable for use with OpenAI APIs.
188    ///
189    /// # Arguments
190    ///
191    /// * `file_path` - Path to the image file
192    ///
193    /// # Returns
194    ///
195    /// A new Content instance with type "input_image" and base64-encoded image data
196    ///
197    /// # Panics
198    ///
199    /// Panics if:
200    /// - The file cannot be opened
201    /// - The image cannot be decoded
202    /// - The image format is unsupported
203    /// - The image cannot be encoded to the buffer
204    ///
205    /// # Supported Formats
206    ///
207    /// * PNG
208    /// * JPEG/JPG
209    /// * GIF
210    ///
211    /// # Examples
212    ///
213    /// ```rust,no_run
214    /// use openai_tools::common::message::Content;
215    ///
216    /// let content = Content::from_image_file("path/to/image.png");
217    /// assert_eq!(content.type_name, "input_image");
218    /// ```
219    pub fn from_image_file<T: AsRef<str>>(file_path: T) -> Self {
220        let ext = file_path.as_ref();
221        let ext = std::path::Path::new(&ext).extension().and_then(|s| s.to_str()).unwrap();
222        let img = image::ImageReader::open(file_path.as_ref()).expect("Failed to open image file").decode().expect("Failed to decode image");
223        let img_fmt = match ext {
224            "png" => image::ImageFormat::Png,
225            "jpg" | "jpeg" => image::ImageFormat::Jpeg,
226            "gif" => image::ImageFormat::Gif,
227            _ => panic!("Unsupported image format"),
228        };
229        let mut buf = std::io::Cursor::new(Vec::new());
230        img.write_to(&mut buf, img_fmt).expect("Failed to write image to buffer");
231        let base64_string = BASE64_STANDARD.encode(buf.into_inner());
232        let image_url = format!("data:image/{ext};base64,{base64_string}");
233        Self { type_name: "input_image".to_string(), text: None, image_url: Some(image_url) }
234    }
235}
236
237/// Represents a message in an OpenAI conversation.
238///
239/// Messages are the core communication unit between users and OpenAI models.
240/// They can contain various types of content including text, images, tool calls,
241/// and metadata like refusals and annotations.
242///
243/// # Content Types
244///
245/// A message can contain either:
246/// - Single content (`content` field) - for simple text messages
247/// - Multiple content items (`content_list` field) - for multi-modal messages
248///
249/// # Fields
250///
251/// * `role` - The role of the message sender (User, Assistant, System, etc.)
252/// * `content` - Optional single content item
253/// * `content_list` - Optional list of content items for multi-modal messages
254/// * `tool_calls` - Optional list of tool calls made by the assistant
255/// * `tool_call_id` - Optional tool call ID for tracking specific tool calls
256/// * `refusal` - Optional refusal message if the model declined to respond
257/// * `annotations` - Optional list of annotations or metadata
258///
259/// # Examples
260///
261/// ```rust,no_run
262/// use openai_tools::common::message::{Message, Content};
263/// use openai_tools::common::role::Role;
264///
265/// // Simple text message
266/// let message = Message::from_string(Role::User, "Hello!");
267///
268/// // Multi-modal message with text and image
269/// let contents = vec![
270///     Content::from_text("What's in this image?"),
271///     Content::from_image_url("https://example.com/image.png"),
272/// ];
273/// let message = Message::from_message_array(Role::User, contents);
274/// ```
275#[derive(Debug, Clone)]
276pub struct Message {
277    /// The role of the message sender
278    pub role: Role,
279    /// Optional single content item
280    pub content: Option<Content>,
281    /// Optional list of content items for multi-modal messages
282    pub content_list: Option<Vec<Content>>,
283    /// Optional list of tool calls made by the assistant
284    pub tool_calls: Option<Vec<ToolCall>>,
285    /// Optional refusal message if the model declined to respond
286    pub refusal: Option<String>,
287    /// Optional tool call ID for tracking specific tool calls
288    pub tool_call_id: Option<String>,
289    /// Optional list of annotations or metadata
290    pub annotations: Option<Vec<String>>,
291}
292
293/// Custom serialization implementation for Message.
294///
295/// This implementation ensures that messages are serialized correctly for the OpenAI API,
296/// handling the mutually exclusive nature of `content` and `content_list` fields.
297/// Either `content` or `content_list` must be present, but not both.
298/// Additionally, it handles optional fields like `tool_call_id` for tool call responses.
299impl Serialize for Message {
300    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
301    where
302        S: serde::Serializer,
303    {
304        let mut state = serializer.serialize_struct("Message", 3)?;
305        state.serialize_field("role", &self.role)?;
306
307        // Ensure that either content or contents is present, but not both
308        if self.role != Role::Assistant {
309            if (self.content.is_none() && self.content_list.is_none()) || (self.content.is_some() && self.content_list.is_some()) {
310                return Err(serde::ser::Error::custom("Message must have either content or contents"));
311            }
312        }
313
314        // Serialize optional fields
315        if let Some(content) = &self.content {
316            state.serialize_field("content", &content.text)?;
317        }
318        if let Some(contents) = &self.content_list {
319            state.serialize_field("content", contents)?;
320        }
321        if let Some(tool_call_id) = &self.tool_call_id {
322            state.serialize_field("tool_call_id", tool_call_id)?;
323        }
324        if let Some(tool_calls) = &self.tool_calls {
325            state.serialize_field("tool_calls", tool_calls)?;
326        }
327
328        state.end()
329    }
330}
331
332/// Custom deserialization implementation for Message.
333///
334/// This implementation handles the deserialization of messages from OpenAI API responses,
335/// converting string content to Content objects and handling optional fields including
336/// `tool_call_id` for tool call tracking.
337impl<'de> Deserialize<'de> for Message {
338    fn deserialize<D>(deserializer: D) -> Result<Message, D::Error>
339    where
340        D: serde::Deserializer<'de>,
341    {
342        #[derive(Deserialize)]
343        struct MessageData {
344            role: Role,
345            content: Option<String>,
346            tool_calls: Option<Vec<ToolCall>>,
347            refusal: Option<String>,
348            annotations: Option<Vec<String>>,
349        }
350
351        let data = MessageData::deserialize(deserializer)?;
352        let content = if let Some(text) = data.content { Some(Content::from_text(text)) } else { None };
353
354        Ok(Message {
355            role: data.role,
356            content,
357            content_list: None,
358            tool_calls: data.tool_calls,
359            tool_call_id: None,
360            refusal: data.refusal,
361            annotations: data.annotations,
362        })
363    }
364}
365
366impl Message {
367    /// Creates a new Message with a single text content.
368    ///
369    /// This is a convenience method for creating simple text messages.
370    ///
371    /// # Arguments
372    ///
373    /// * `role` - The role of the message sender
374    /// * `message` - The text content of the message
375    ///
376    /// # Returns
377    ///
378    /// A new Message instance with the specified role and text content
379    ///
380    /// # Examples
381    ///
382    /// ```rust,no_run
383    /// use openai_tools::common::message::Message;
384    /// use openai_tools::common::role::Role;
385    ///
386    /// let message = Message::from_string(Role::User, "Hello, how are you?");
387    /// ```
388    pub fn from_string<T: AsRef<str>>(role: Role, message: T) -> Self {
389        Self {
390            role,
391            content: Some(Content::from_text(message.as_ref())),
392            content_list: None,
393            tool_calls: None,
394            tool_call_id: None,
395            refusal: None,
396            annotations: None,
397        }
398    }
399
400    /// Creates a new Message with multiple content items.
401    ///
402    /// This method is used for multi-modal messages that contain multiple
403    /// types of content such as text and images.
404    ///
405    /// # Arguments
406    ///
407    /// * `role` - The role of the message sender
408    /// * `contents` - Vector of content items to include in the message
409    ///
410    /// # Returns
411    ///
412    /// A new Message instance with the specified role and content list
413    ///
414    /// # Examples
415    ///
416    /// ```rust,no_run
417    /// use openai_tools::common::message::{Message, Content};
418    /// use openai_tools::common::role::Role;
419    ///
420    /// let contents = vec![
421    ///     Content::from_text("What's in this image?"),
422    ///     Content::from_image_url("https://example.com/image.png"),
423    /// ];
424    /// let message = Message::from_message_array(Role::User, contents);
425    /// ```
426    pub fn from_message_array(role: Role, contents: Vec<Content>) -> Self {
427        Self { role, content: None, content_list: Some(contents), tool_calls: None, tool_call_id: None, refusal: None, annotations: None }
428    }
429
430    /// Creates a new Message as a response to a specific tool call.
431    ///
432    /// This method is used to create messages that respond to tool calls made by
433    /// OpenAI models. The message will have the Assistant role and includes the
434    /// tool call ID for tracking purposes.
435    ///
436    /// # Arguments
437    ///
438    /// * `tool_call_response` - The response content for the tool call
439    /// * `tool_call_id` - The ID of the tool call this message is responding to
440    ///
441    /// # Returns
442    ///
443    /// A new Message instance with Assistant role, response content, and tool call ID
444    ///
445    /// # Examples
446    ///
447    /// ```rust,no_run
448    /// use openai_tools::common::message::Message;
449    ///
450    /// let response = Message::from_tool_call_response(
451    ///     "The weather in Tokyo is 25°C and sunny",
452    ///     "tool_call_123"
453    /// );
454    /// ```
455    pub fn from_tool_call_response<T: AsRef<str>>(tool_call_response: T, tool_call_id: T) -> Self {
456        Self {
457            role: Role::Tool,
458            content: Some(Content::from_text(tool_call_response.as_ref())),
459            content_list: None,
460            tool_calls: None,
461            tool_call_id: Some(tool_call_id.as_ref().to_string()),
462            refusal: None,
463            annotations: None,
464        }
465    }
466    /// Calculates the approximate token count for the message content.
467    ///
468    /// This method uses the tiktoken library to estimate the number of tokens
469    /// that would be consumed by this message when sent to OpenAI's API.
470    /// Only text content is counted; images are not included in the calculation.
471    ///
472    /// # Returns
473    ///
474    /// The estimated number of tokens for the text content in this message
475    ///
476    /// # Examples
477    ///
478    /// ```rust,no_run
479    /// use openai_tools::common::message::Message;
480    /// use openai_tools::common::role::Role;
481    ///
482    /// let message = Message::from_string(Role::User, "Hello, world!");
483    /// let token_count = message.get_input_token_count();
484    /// ```
485    pub fn get_input_token_count(&self) -> usize {
486        let bpe = tiktoken_rs::o200k_base().unwrap();
487        if let Some(content) = &self.content {
488            bpe.encode_with_special_tokens(&content.clone().text.unwrap()).len()
489        } else if let Some(contents) = &self.content_list {
490            let mut total_tokens = 0;
491            for content in contents {
492                if let Some(text) = &content.text {
493                    total_tokens += bpe.encode_with_special_tokens(text).len();
494                }
495            }
496            total_tokens
497        } else {
498            0 // No content to count tokens for
499        }
500    }
501}