openai_tools/common/
message.rs

1//! Message module for OpenAI tools.
2//!
3//! This module provides data structures and functionality for handling OpenAI API messages,
4//! including text content, images, tool calls, and multi-modal interactions. It serves as
5//! the foundation for communication between users and OpenAI models.
6//!
7//! ## Core Components
8//!
9//! - [`Message`] - The main message structure containing role, content, and metadata
10//! - [`Content`] - Represents different types of content (text, images, etc.)
11//! - [`ToolCall`] - Represents function calls made by OpenAI models
12//!
13//! ## Supported Content Types
14//!
15//! The module supports various content types for rich interactions:
16//!
17//! - **Text content**: Plain text messages
18//! - **Image content**: Images from URLs or local files (PNG, JPEG, GIF)
19//! - **Multi-modal content**: Combining text and images in a single message
20//!
21//! ## Usage in the Library
22//!
23//! This module is used throughout the OpenAI tools library:
24//!
25//! - In [`crate::chat::request`] - For Chat Completion API message handling
26//! - In [`crate::responses::request`] - For Responses API message processing
27//! - In [`crate::chat::response`] - For parsing OpenAI API responses
28//!
29//! ## Examples
30//!
31//! ### Basic Text Message
32//!
33//! ```rust,no_run
34//! use openai_tools::common::message::Message;
35//! use openai_tools::common::role::Role;
36//!
37//! # fn main() {
38//! let message = Message::from_string(Role::User, "Hello, how are you?");
39//! # }
40//! ```
41//!
42//! ### Multi-modal Message with Text and Image
43//!
44//! ```rust,no_run
45//! use openai_tools::common::message::{Message, Content};
46//! use openai_tools::common::role::Role;
47//!
48//! # fn main() {
49//! let contents = vec![
50//!     Content::from_text("What's in this image?"),
51//!     Content::from_image_file("path/to/image.png"),
52//! ];
53//! let message = Message::from_message_array(Role::User, contents);
54//! # }
55//! ```
56//!
57
58use crate::common::{function::Function, role::Role};
59use base64::prelude::*;
60use serde::{ser::SerializeStruct, Deserialize, Serialize};
61
62/// Represents a tool call made by an OpenAI model.
63///
64/// Tool calls are generated when an OpenAI model decides to invoke a function
65/// or tool as part of its response. This structure contains the metadata
66/// necessary to identify and execute the requested function.
67///
68/// # Fields
69///
70/// * `id` - Unique identifier for this tool call
71/// * `type_name` - The type of tool call (typically "function")
72/// * `function` - The function details including name and arguments
73///
74/// # Examples
75///
76/// ```rust,no_run
77/// use openai_tools::common::message::ToolCall;
78/// use openai_tools::common::function::Function;
79///
80/// // Tool calls are typically received from OpenAI API responses
81/// // and contain function invocation details
82/// ```
83#[derive(Debug, Clone, Default, Deserialize, Serialize)]
84pub struct ToolCall {
85    /// Unique identifier for this tool call
86    pub id: String,
87    /// The type of tool call (e.g., "function")
88    #[serde(rename = "type")]
89    pub type_name: String,
90    /// The function to be called with its arguments
91    pub function: Function,
92}
93
94/// Represents different types of content that can be included in a message.
95///
96/// Content can be either text or images, supporting multi-modal interactions
97/// with OpenAI models. Images can be provided as URLs or loaded from local files
98/// and are automatically encoded as base64 data URLs.
99///
100/// # Supported Image Formats
101///
102/// * PNG
103/// * JPEG/JPG
104/// * GIF
105///
106/// # Fields
107///
108/// * `type_name` - The type of content ("input_text" or "input_image")
109/// * `text` - Optional text content
110/// * `image_url` - Optional image URL or base64 data URL
111///
112/// # Examples
113///
114/// ```rust,no_run
115/// use openai_tools::common::message::Content;
116///
117/// // Create text content
118/// let text_content = Content::from_text("Hello, world!");
119///
120/// // Create image content from URL
121/// let image_content = Content::from_image_url("https://example.com/image.png");
122///
123/// // Create image content from local file
124/// let file_content = Content::from_image_file("path/to/image.png");
125/// ```
126#[derive(Debug, Clone, Default, Deserialize, Serialize)]
127pub struct Content {
128    /// The type of content ("input_text" or "input_image")
129    #[serde(rename = "type")]
130    pub type_name: String,
131    /// Optional text content
132    #[serde(skip_serializing_if = "Option::is_none")]
133    pub text: Option<String>,
134    /// Optional image URL or base64 data URL
135    #[serde(skip_serializing_if = "Option::is_none")]
136    pub image_url: Option<String>,
137}
138
139impl Content {
140    /// Creates a new Content instance with text content.
141    ///
142    /// # Arguments
143    ///
144    /// * `text` - The text content to include
145    ///
146    /// # Returns
147    ///
148    /// A new Content instance with type "input_text"
149    ///
150    /// # Examples
151    ///
152    /// ```rust,no_run
153    /// use openai_tools::common::message::Content;
154    ///
155    /// let content = Content::from_text("Hello, world!");
156    /// assert_eq!(content.type_name, "input_text");
157    /// ```
158    pub fn from_text<T: AsRef<str>>(text: T) -> Self {
159        Self { type_name: "input_text".to_string(), text: Some(text.as_ref().to_string()), image_url: None }
160    }
161
162    /// Creates a new Content instance with an image URL.
163    ///
164    /// # Arguments
165    ///
166    /// * `image_url` - The URL of the image
167    ///
168    /// # Returns
169    ///
170    /// A new Content instance with type "input_image"
171    ///
172    /// # Examples
173    ///
174    /// ```rust
175    /// use openai_tools::common::message::Content;
176    ///
177    /// let content = Content::from_image_url("https://example.com/image.png");
178    /// assert_eq!(content.type_name, "input_image");
179    /// ```
180    pub fn from_image_url<T: AsRef<str>>(image_url: T) -> Self {
181        Self { type_name: "input_image".to_string(), text: None, image_url: Some(image_url.as_ref().to_string()) }
182    }
183
184    /// Creates a new Content instance from a local image file.
185    ///
186    /// This method reads an image file from the filesystem, encodes it as base64,
187    /// and creates a data URL suitable for use with OpenAI APIs.
188    ///
189    /// # Arguments
190    ///
191    /// * `file_path` - Path to the image file
192    ///
193    /// # Returns
194    ///
195    /// A new Content instance with type "input_image" and base64-encoded image data
196    ///
197    /// # Panics
198    ///
199    /// Panics if:
200    /// - The file cannot be opened
201    /// - The image cannot be decoded
202    /// - The image format is unsupported
203    /// - The image cannot be encoded to the buffer
204    ///
205    /// # Supported Formats
206    ///
207    /// * PNG
208    /// * JPEG/JPG
209    /// * GIF
210    ///
211    /// # Examples
212    ///
213    /// ```rust,no_run
214    /// use openai_tools::common::message::Content;
215    ///
216    /// let content = Content::from_image_file("path/to/image.png");
217    /// assert_eq!(content.type_name, "input_image");
218    /// ```
219    pub fn from_image_file<T: AsRef<str>>(file_path: T) -> Self {
220        let ext = file_path.as_ref();
221        let ext = std::path::Path::new(&ext).extension().and_then(|s| s.to_str()).unwrap();
222        let img = image::ImageReader::open(file_path.as_ref()).expect("Failed to open image file").decode().expect("Failed to decode image");
223        let img_fmt = match ext {
224            "png" => image::ImageFormat::Png,
225            "jpg" | "jpeg" => image::ImageFormat::Jpeg,
226            "gif" => image::ImageFormat::Gif,
227            _ => panic!("Unsupported image format"),
228        };
229        let mut buf = std::io::Cursor::new(Vec::new());
230        img.write_to(&mut buf, img_fmt).expect("Failed to write image to buffer");
231        let base64_string = BASE64_STANDARD.encode(buf.into_inner());
232        let image_url = format!("data:image/{ext};base64,{base64_string}");
233        Self { type_name: "input_image".to_string(), text: None, image_url: Some(image_url) }
234    }
235}
236
237/// Represents a message in an OpenAI conversation.
238///
239/// Messages are the core communication unit between users and OpenAI models.
240/// They can contain various types of content including text, images, tool calls,
241/// and metadata like refusals and annotations.
242///
243/// # Content Types
244///
245/// A message can contain either:
246/// - Single content (`content` field) - for simple text messages
247/// - Multiple content items (`content_list` field) - for multi-modal messages
248///
249/// # Fields
250///
251/// * `role` - The role of the message sender (User, Assistant, System, etc.)
252/// * `content` - Optional single content item
253/// * `content_list` - Optional list of content items for multi-modal messages
254/// * `tool_calls` - Optional list of tool calls made by the assistant
255/// * `refusal` - Optional refusal message if the model declined to respond
256/// * `annotations` - Optional list of annotations or metadata
257///
258/// # Examples
259///
260/// ```rust,no_run
261/// use openai_tools::common::message::{Message, Content};
262/// use openai_tools::common::role::Role;
263///
264/// // Simple text message
265/// let message = Message::from_string(Role::User, "Hello!");
266///
267/// // Multi-modal message with text and image
268/// let contents = vec![
269///     Content::from_text("What's in this image?"),
270///     Content::from_image_url("https://example.com/image.png"),
271/// ];
272/// let message = Message::from_message_array(Role::User, contents);
273/// ```
274#[derive(Debug, Clone)]
275pub struct Message {
276    /// The role of the message sender
277    pub role: Role,
278    /// Optional single content item
279    pub content: Option<Content>,
280    /// Optional list of content items for multi-modal messages
281    pub content_list: Option<Vec<Content>>,
282    /// Optional list of tool calls made by the assistant
283    pub tool_calls: Option<Vec<ToolCall>>,
284    /// Optional refusal message if the model declined to respond
285    pub refusal: Option<String>,
286    /// Optional list of annotations or metadata
287    pub annotations: Option<Vec<String>>,
288}
289
290/// Custom serialization implementation for Message.
291///
292/// This implementation ensures that messages are serialized correctly for the OpenAI API,
293/// handling the mutually exclusive nature of `content` and `content_list` fields.
294/// Either `content` or `content_list` must be present, but not both.
295impl Serialize for Message {
296    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
297    where
298        S: serde::Serializer,
299    {
300        let mut state = serializer.serialize_struct("Message", 3)?;
301        state.serialize_field("role", &self.role)?;
302
303        // Ensure that either content or contents is present, but not both
304        if (self.content.is_none() && self.content_list.is_none()) || (self.content.is_some() && self.content_list.is_some()) {
305            return Err(serde::ser::Error::custom("Message must have either content or contents"));
306        }
307
308        // Serialize content or contents based on which one is present
309        if let Some(content) = &self.content {
310            state.serialize_field("content", &content.text)?;
311        }
312        if let Some(contents) = &self.content_list {
313            state.serialize_field("content", contents)?;
314        }
315        state.end()
316    }
317}
318
319/// Custom deserialization implementation for Message.
320///
321/// This implementation handles the deserialization of messages from OpenAI API responses,
322/// converting string content to Content objects and handling optional fields.
323impl<'de> Deserialize<'de> for Message {
324    fn deserialize<D>(deserializer: D) -> Result<Message, D::Error>
325    where
326        D: serde::Deserializer<'de>,
327    {
328        #[derive(Deserialize)]
329        struct MessageData {
330            role: Role,
331            content: Option<String>,
332            tool_calls: Option<Vec<ToolCall>>,
333            refusal: Option<String>,
334            annotations: Option<Vec<String>>,
335        }
336
337        let data = MessageData::deserialize(deserializer)?;
338        let content = if let Some(text) = data.content { Some(Content::from_text(text)) } else { None };
339
340        Ok(Message {
341            role: data.role,
342            content,
343            content_list: None,
344            tool_calls: data.tool_calls,
345            refusal: data.refusal,
346            annotations: data.annotations,
347        })
348    }
349}
350
351impl Message {
352    /// Creates a new Message with a single text content.
353    ///
354    /// This is a convenience method for creating simple text messages.
355    ///
356    /// # Arguments
357    ///
358    /// * `role` - The role of the message sender
359    /// * `message` - The text content of the message
360    ///
361    /// # Returns
362    ///
363    /// A new Message instance with the specified role and text content
364    ///
365    /// # Examples
366    ///
367    /// ```rust,no_run
368    /// use openai_tools::common::message::Message;
369    /// use openai_tools::common::role::Role;
370    ///
371    /// let message = Message::from_string(Role::User, "Hello, how are you?");
372    /// ```
373    pub fn from_string<T: AsRef<str>>(role: Role, message: T) -> Self {
374        Self { role, content: Some(Content::from_text(message.as_ref())), content_list: None, tool_calls: None, refusal: None, annotations: None }
375    }
376
377    /// Creates a new Message with multiple content items.
378    ///
379    /// This method is used for multi-modal messages that contain multiple
380    /// types of content such as text and images.
381    ///
382    /// # Arguments
383    ///
384    /// * `role` - The role of the message sender
385    /// * `contents` - Vector of content items to include in the message
386    ///
387    /// # Returns
388    ///
389    /// A new Message instance with the specified role and content list
390    ///
391    /// # Examples
392    ///
393    /// ```rust,no_run
394    /// use openai_tools::common::message::{Message, Content};
395    /// use openai_tools::common::role::Role;
396    ///
397    /// let contents = vec![
398    ///     Content::from_text("What's in this image?"),
399    ///     Content::from_image_url("https://example.com/image.png"),
400    /// ];
401    /// let message = Message::from_message_array(Role::User, contents);
402    /// ```
403    pub fn from_message_array(role: Role, contents: Vec<Content>) -> Self {
404        Self { role, content: None, content_list: Some(contents), tool_calls: None, refusal: None, annotations: None }
405    }
406
407    /// Calculates the approximate token count for the message content.
408    ///
409    /// This method uses the tiktoken library to estimate the number of tokens
410    /// that would be consumed by this message when sent to OpenAI's API.
411    /// Only text content is counted; images are not included in the calculation.
412    ///
413    /// # Returns
414    ///
415    /// The estimated number of tokens for the text content in this message
416    ///
417    /// # Examples
418    ///
419    /// ```rust,no_run
420    /// use openai_tools::common::message::Message;
421    /// use openai_tools::common::role::Role;
422    ///
423    /// let message = Message::from_string(Role::User, "Hello, world!");
424    /// let token_count = message.get_input_token_count();
425    /// ```
426    pub fn get_input_token_count(&self) -> usize {
427        let bpe = tiktoken_rs::o200k_base().unwrap();
428        if let Some(content) = &self.content {
429            bpe.encode_with_special_tokens(&content.clone().text.unwrap()).len()
430        } else if let Some(contents) = &self.content_list {
431            let mut total_tokens = 0;
432            for content in contents {
433                if let Some(text) = &content.text {
434                    total_tokens += bpe.encode_with_special_tokens(text).len();
435                }
436            }
437            total_tokens
438        } else {
439            0 // No content to count tokens for
440        }
441    }
442}