openai_tools/embedding/mod.rs
1//! # Embedding Module
2//!
3//! This module provides functionality for interacting with the OpenAI Embeddings API.
4//! It allows you to convert text into numerical vector representations (embeddings)
5//! that capture semantic meaning, enabling various NLP tasks such as semantic search,
6//! clustering, and similarity comparison.
7//!
8//! ## Key Features
9//!
10//! - **Text Embedding Generation**: Convert single or multiple texts into vector embeddings
11//! - **Multiple Input Formats**: Support for single text strings or arrays of texts
12//! - **Flexible Encoding**: Support for both `float` and `base64` encoding formats
13//! - **Various Model Support**: Compatible with OpenAI's embedding models (e.g., `text-embedding-3-small`, `text-embedding-3-large`)
14//! - **Multi-dimensional Output**: Support for 1D, 2D, and 3D embedding vectors
15//!
16//! ## Quick Start
17//!
18//! ```rust,no_run
19//! use openai_tools::embedding::request::Embedding;
20//!
21//! #[tokio::main]
22//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
23//! // Initialize the embedding client
24//! let mut embedding = Embedding::new()?;
25//!
26//! // Configure the model and input text
27//! embedding
28//! .model("text-embedding-3-small")
29//! .input_text("Hello, world!");
30//!
31//! // Generate embedding
32//! let response = embedding.embed().await?;
33//!
34//! // Access the embedding vector
35//! let vector = response.data[0].embedding.as_1d().unwrap();
36//! println!("Embedding dimension: {}", vector.len());
37//! Ok(())
38//! }
39//! ```
40//!
41//! ## Usage Examples
42//!
43//! ### Single Text Embedding
44//!
45//! ```rust,no_run
46//! use openai_tools::embedding::request::Embedding;
47//!
48//! #[tokio::main]
49//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
50//! let mut embedding = Embedding::new()?;
51//!
52//! embedding
53//! .model("text-embedding-3-small")
54//! .input_text("The quick brown fox jumps over the lazy dog.");
55//!
56//! let response = embedding.embed().await?;
57//!
58//! // The response contains embedding data
59//! assert_eq!(response.object, "list");
60//! assert_eq!(response.data.len(), 1);
61//!
62//! let vector = response.data[0].embedding.as_1d().unwrap();
63//! println!("Generated embedding with {} dimensions", vector.len());
64//! Ok(())
65//! }
66//! ```
67//!
68//! ### Batch Text Embedding
69//!
70//! ```rust,no_run
71//! use openai_tools::embedding::request::Embedding;
72//!
73//! #[tokio::main]
74//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
75//! let mut embedding = Embedding::new()?;
76//!
77//! // Embed multiple texts at once
78//! let texts = vec![
79//! "Hello, world!",
80//! "こんにちは、世界!",
81//! "Bonjour le monde!",
82//! ];
83//!
84//! embedding
85//! .model("text-embedding-3-small")
86//! .input_text_array(texts);
87//!
88//! let response = embedding.embed().await?;
89//!
90//! // Each input text gets its own embedding
91//! for (i, data) in response.data.iter().enumerate() {
92//! let vector = data.embedding.as_1d().unwrap();
93//! println!("Text {}: {} dimensions", i, vector.len());
94//! }
95//! Ok(())
96//! }
97//! ```
98//!
99//! ### Using Different Encoding Formats
100//!
101//! ```rust,no_run
102//! use openai_tools::embedding::request::Embedding;
103//!
104//! #[tokio::main]
105//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
106//! let mut embedding = Embedding::new()?;
107//!
108//! embedding
109//! .model("text-embedding-3-small")
110//! .input_text("Sample text for embedding")
111//! .encoding_format("float"); // or "base64"
112//!
113//! let response = embedding.embed().await?;
114//! println!("Model used: {}", response.model);
115//! println!("Token usage: {:?}", response.usage);
116//! Ok(())
117//! }
118//! ```
119//!
120//! ## Supported Models
121//!
122//! | Model | Dimensions | Description |
123//! |-------|------------|-------------|
124//! | `text-embedding-3-small` | 1536 | Efficient model for most use cases |
125//! | `text-embedding-3-large` | 3072 | Higher quality embeddings for demanding tasks |
126//! | `text-embedding-ada-002` | 1536 | Legacy model (still supported) |
127//!
128//! ## Response Structure
129//!
130//! The embedding response contains:
131//! - `object`: Always "list" for embedding responses
132//! - `data`: Array of embedding objects, each containing:
133//! - `object`: Type identifier ("embedding")
134//! - `embedding`: The vector representation (1D, 2D, or 3D)
135//! - `index`: Position in the input array
136//! - `model`: The model used for embedding
137//! - `usage`: Token usage information
138
139pub mod request;
140pub mod response;
141
142#[cfg(test)]
143mod tests {
144 use crate::common::errors::OpenAIToolError;
145 use crate::embedding::request::Embedding;
146 use std::sync::Once;
147 use tracing_subscriber::EnvFilter;
148
149 static TRACING_INIT: Once = Once::new();
150
151 fn init_tracing() {
152 TRACING_INIT.call_once(|| {
153 let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
154 let _ = tracing_subscriber::fmt().with_env_filter(filter).with_test_writer().try_init();
155 });
156 }
157
158 #[tokio::test]
159 #[test_log::test]
160 async fn test_embedding_with_text() {
161 init_tracing();
162
163 let mut embedding = Embedding::new().expect("Embedding initialization should succeed");
164 embedding.model("text-embedding-3-small").input_text("Hello, world!");
165
166 let mut counter = 3;
167 loop {
168 match embedding.embed().await {
169 Ok(response) => {
170 tracing::info!("Embedding model: {}", &response.model);
171 tracing::info!("Embedding data count: {}", response.data.len());
172 tracing::info!("Embedding usage: {:?}", &response.usage);
173
174 assert_eq!(response.object, "list");
175 assert_eq!(response.data.len(), 1);
176 assert!(response.data[0].embedding.is_1d());
177
178 let embedding_vec = response.data[0].embedding.as_1d().expect("Embedding should be 1D");
179 tracing::info!("Embedding dimension: {}", embedding_vec.len());
180 assert_eq!(embedding_vec.len(), 1536); // text-embedding-3-small outputs 1536 dimensions
181
182 break;
183 }
184 Err(e) => match e {
185 OpenAIToolError::RequestError(e) => {
186 tracing::warn!("Request error: {} (retrying... {})", e, counter);
187 counter -= 1;
188 if counter == 0 {
189 panic!("Embedding request failed (retry limit reached)");
190 }
191 continue;
192 }
193 _ => {
194 tracing::error!("Error: {}", e);
195 panic!("Embedding request failed: {}", e);
196 }
197 },
198 };
199 }
200 }
201
202 #[tokio::test]
203 #[test_log::test]
204 async fn test_embedding_with_text_array() {
205 init_tracing();
206
207 let mut embedding = Embedding::new().expect("Embedding initialization should succeed");
208 let texts = vec!["Hello, world!", "こんにちは、世界!", "Bonjour le monde!"];
209 embedding.model("text-embedding-3-small").input_text_array(texts.clone());
210
211 let mut counter = 3;
212 loop {
213 match embedding.embed().await {
214 Ok(response) => {
215 tracing::info!("Embedding model: {}", &response.model);
216 tracing::info!("Embedding data count: {}", response.data.len());
217 tracing::info!("Embedding usage: {:?}", &response.usage);
218
219 assert_eq!(response.object, "list");
220 assert_eq!(response.data.len(), texts.len());
221
222 for (i, data) in response.data.iter().enumerate() {
223 assert!(data.embedding.is_1d());
224 let embedding_vec = data.embedding.as_1d().expect("Embedding should be 1D");
225 tracing::info!("Embedding[{}] dimension: {}", i, embedding_vec.len());
226 assert_eq!(embedding_vec.len(), 1536); // text-embedding-3-small outputs 1536 dimensions
227 assert_eq!(data.index, i);
228 }
229
230 break;
231 }
232 Err(e) => match e {
233 OpenAIToolError::RequestError(e) => {
234 tracing::warn!("Request error: {} (retrying... {})", e, counter);
235 counter -= 1;
236 if counter == 0 {
237 panic!("Embedding request failed (retry limit reached)");
238 }
239 continue;
240 }
241 _ => {
242 tracing::error!("Error: {}", e);
243 panic!("Embedding request failed: {}", e);
244 }
245 },
246 };
247 }
248 }
249}