openai_tools/embedding/
mod.rs

1//! # Embedding Module
2//!
3//! This module provides functionality for interacting with the OpenAI Embeddings API.
4//! It allows you to convert text into numerical vector representations (embeddings)
5//! that capture semantic meaning, enabling various NLP tasks such as semantic search,
6//! clustering, and similarity comparison.
7//!
8//! ## Key Features
9//!
10//! - **Text Embedding Generation**: Convert single or multiple texts into vector embeddings
11//! - **Multiple Input Formats**: Support for single text strings or arrays of texts
12//! - **Flexible Encoding**: Support for both `float` and `base64` encoding formats
13//! - **Various Model Support**: Compatible with OpenAI's embedding models (e.g., `text-embedding-3-small`, `text-embedding-3-large`)
14//! - **Multi-dimensional Output**: Support for 1D, 2D, and 3D embedding vectors
15//!
16//! ## Quick Start
17//!
18//! ```rust,no_run
19//! use openai_tools::embedding::request::Embedding;
20//!
21//! #[tokio::main]
22//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
23//!     // Initialize the embedding client
24//!     let mut embedding = Embedding::new()?;
25//!     
26//!     // Configure the model and input text
27//!     embedding
28//!         .model("text-embedding-3-small")
29//!         .input_text("Hello, world!");
30//!     
31//!     // Generate embedding
32//!     let response = embedding.embed().await?;
33//!     
34//!     // Access the embedding vector
35//!     let vector = response.data[0].embedding.as_1d().unwrap();
36//!     println!("Embedding dimension: {}", vector.len());
37//!     Ok(())
38//! }
39//! ```
40//!
41//! ## Usage Examples
42//!
43//! ### Single Text Embedding
44//!
45//! ```rust,no_run
46//! use openai_tools::embedding::request::Embedding;
47//!
48//! #[tokio::main]
49//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
50//!     let mut embedding = Embedding::new()?;
51//!     
52//!     embedding
53//!         .model("text-embedding-3-small")
54//!         .input_text("The quick brown fox jumps over the lazy dog.");
55//!     
56//!     let response = embedding.embed().await?;
57//!     
58//!     // The response contains embedding data
59//!     assert_eq!(response.object, "list");
60//!     assert_eq!(response.data.len(), 1);
61//!     
62//!     let vector = response.data[0].embedding.as_1d().unwrap();
63//!     println!("Generated embedding with {} dimensions", vector.len());
64//!     Ok(())
65//! }
66//! ```
67//!
68//! ### Batch Text Embedding
69//!
70//! ```rust,no_run
71//! use openai_tools::embedding::request::Embedding;
72//!
73//! #[tokio::main]
74//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
75//!     let mut embedding = Embedding::new()?;
76//!     
77//!     // Embed multiple texts at once
78//!     let texts = vec![
79//!         "Hello, world!",
80//!         "こんにちは、世界!",
81//!         "Bonjour le monde!",
82//!     ];
83//!     
84//!     embedding
85//!         .model("text-embedding-3-small")
86//!         .input_text_array(texts);
87//!     
88//!     let response = embedding.embed().await?;
89//!     
90//!     // Each input text gets its own embedding
91//!     for (i, data) in response.data.iter().enumerate() {
92//!         let vector = data.embedding.as_1d().unwrap();
93//!         println!("Text {}: {} dimensions", i, vector.len());
94//!     }
95//!     Ok(())
96//! }
97//! ```
98//!
99//! ### Using Different Encoding Formats
100//!
101//! ```rust,no_run
102//! use openai_tools::embedding::request::Embedding;
103//!
104//! #[tokio::main]
105//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
106//!     let mut embedding = Embedding::new()?;
107//!     
108//!     embedding
109//!         .model("text-embedding-3-small")
110//!         .input_text("Sample text for embedding")
111//!         .encoding_format("float"); // or "base64"
112//!     
113//!     let response = embedding.embed().await?;
114//!     println!("Model used: {}", response.model);
115//!     println!("Token usage: {:?}", response.usage);
116//!     Ok(())
117//! }
118//! ```
119//!
120//! ## Supported Models
121//!
122//! | Model | Dimensions | Description |
123//! |-------|------------|-------------|
124//! | `text-embedding-3-small` | 1536 | Efficient model for most use cases |
125//! | `text-embedding-3-large` | 3072 | Higher quality embeddings for demanding tasks |
126//! | `text-embedding-ada-002` | 1536 | Legacy model (still supported) |
127//!
128//! ## Response Structure
129//!
130//! The embedding response contains:
131//! - `object`: Always "list" for embedding responses
132//! - `data`: Array of embedding objects, each containing:
133//!   - `object`: Type identifier ("embedding")
134//!   - `embedding`: The vector representation (1D, 2D, or 3D)
135//!   - `index`: Position in the input array
136//! - `model`: The model used for embedding
137//! - `usage`: Token usage information
138
139pub mod request;
140pub mod response;
141
142#[cfg(test)]
143mod tests {
144    use crate::common::errors::OpenAIToolError;
145    use crate::embedding::request::Embedding;
146    use std::sync::Once;
147    use tracing_subscriber::EnvFilter;
148
149    static TRACING_INIT: Once = Once::new();
150
151    fn init_tracing() {
152        TRACING_INIT.call_once(|| {
153            let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
154            let _ = tracing_subscriber::fmt().with_env_filter(filter).with_test_writer().try_init();
155        });
156    }
157
158    #[tokio::test]
159    #[test_log::test]
160    async fn test_embedding_with_text() {
161        init_tracing();
162
163        let mut embedding = Embedding::new().expect("Embedding initialization should succeed");
164        embedding.model("text-embedding-3-small").input_text("Hello, world!");
165
166        let mut counter = 3;
167        loop {
168            match embedding.embed().await {
169                Ok(response) => {
170                    tracing::info!("Embedding model: {}", &response.model);
171                    tracing::info!("Embedding data count: {}", response.data.len());
172                    tracing::info!("Embedding usage: {:?}", &response.usage);
173
174                    assert_eq!(response.object, "list");
175                    assert_eq!(response.data.len(), 1);
176                    assert!(response.data[0].embedding.is_1d());
177
178                    let embedding_vec = response.data[0].embedding.as_1d().expect("Embedding should be 1D");
179                    tracing::info!("Embedding dimension: {}", embedding_vec.len());
180                    assert_eq!(embedding_vec.len(), 1536); // text-embedding-3-small outputs 1536 dimensions
181
182                    break;
183                }
184                Err(e) => match e {
185                    OpenAIToolError::RequestError(e) => {
186                        tracing::warn!("Request error: {} (retrying... {})", e, counter);
187                        counter -= 1;
188                        if counter == 0 {
189                            panic!("Embedding request failed (retry limit reached)");
190                        }
191                        continue;
192                    }
193                    _ => {
194                        tracing::error!("Error: {}", e);
195                        panic!("Embedding request failed: {}", e);
196                    }
197                },
198            };
199        }
200    }
201
202    #[tokio::test]
203    #[test_log::test]
204    async fn test_embedding_with_text_array() {
205        init_tracing();
206
207        let mut embedding = Embedding::new().expect("Embedding initialization should succeed");
208        let texts = vec!["Hello, world!", "こんにちは、世界!", "Bonjour le monde!"];
209        embedding.model("text-embedding-3-small").input_text_array(texts.clone());
210
211        let mut counter = 3;
212        loop {
213            match embedding.embed().await {
214                Ok(response) => {
215                    tracing::info!("Embedding model: {}", &response.model);
216                    tracing::info!("Embedding data count: {}", response.data.len());
217                    tracing::info!("Embedding usage: {:?}", &response.usage);
218
219                    assert_eq!(response.object, "list");
220                    assert_eq!(response.data.len(), texts.len());
221
222                    for (i, data) in response.data.iter().enumerate() {
223                        assert!(data.embedding.is_1d());
224                        let embedding_vec = data.embedding.as_1d().expect("Embedding should be 1D");
225                        tracing::info!("Embedding[{}] dimension: {}", i, embedding_vec.len());
226                        assert_eq!(embedding_vec.len(), 1536); // text-embedding-3-small outputs 1536 dimensions
227                        assert_eq!(data.index, i);
228                    }
229
230                    break;
231                }
232                Err(e) => match e {
233                    OpenAIToolError::RequestError(e) => {
234                        tracing::warn!("Request error: {} (retrying... {})", e, counter);
235                        counter -= 1;
236                        if counter == 0 {
237                            panic!("Embedding request failed (retry limit reached)");
238                        }
239                        continue;
240                    }
241                    _ => {
242                        tracing::error!("Error: {}", e);
243                        panic!("Embedding request failed: {}", e);
244                    }
245                },
246            };
247        }
248    }
249}