ai_lib/api/
chat.rs

1use crate::types::{AiLibError, ChatCompletionRequest, ChatCompletionResponse};
2use async_trait::async_trait;
3use futures::stream::Stream;
4
5/// Chat API module
6///
7/// Generic chat API interface
8///
9/// This trait defines the core capabilities that all AI services should have,
10/// without depending on any specific model implementation details
11#[async_trait]
12pub trait ChatApi: Send + Sync {
13    /// Send chat completion request
14    ///
15    /// # Arguments
16    /// * `request` - Generic chat completion request
17    ///
18    /// # Returns
19    /// * `Result<ChatCompletionResponse, AiLibError>` - Returns response on success, error on failure
20    async fn chat_completion(
21        &self,
22        request: ChatCompletionRequest,
23    ) -> Result<ChatCompletionResponse, AiLibError>;
24
25    /// Streaming chat completion request
26    ///
27    /// # Arguments
28    /// * `request` - Generic chat completion request
29    ///
30    /// # Returns
31    /// * `Result<impl Stream<Item = Result<ChatCompletionChunk, AiLibError>>, AiLibError>` - Returns streaming response on success
32    async fn chat_completion_stream(
33        &self,
34        request: ChatCompletionRequest,
35    ) -> Result<
36        Box<dyn Stream<Item = Result<ChatCompletionChunk, AiLibError>> + Send + Unpin>,
37        AiLibError,
38    >;
39
40    /// Get list of supported models
41    ///
42    /// # Returns
43    /// * `Result<Vec<String>, AiLibError>` - Returns model list on success, error on failure
44    async fn list_models(&self) -> Result<Vec<String>, AiLibError>;
45
46    /// Get model information
47    ///
48    /// # Arguments
49    /// * `model_id` - Model ID
50    ///
51    /// # Returns
52    /// * `Result<ModelInfo, AiLibError>` - Returns model information on success, error on failure
53    async fn get_model_info(&self, model_id: &str) -> Result<ModelInfo, AiLibError>;
54
55    /// Batch chat completion requests
56    ///
57    /// # Arguments
58    /// * `requests` - Vector of chat completion requests
59    /// * `concurrency_limit` - Optional concurrency limit for concurrent processing
60    ///
61    /// # Returns
62    /// * `Result<Vec<Result<ChatCompletionResponse, AiLibError>>, AiLibError>` - Returns vector of results
63    async fn chat_completion_batch(
64        &self,
65        requests: Vec<ChatCompletionRequest>,
66        concurrency_limit: Option<usize>,
67    ) -> Result<Vec<Result<ChatCompletionResponse, AiLibError>>, AiLibError> {
68        batch_utils::process_batch_concurrent(self, requests, concurrency_limit).await
69    }
70}
71
72/// Streaming response data chunk
73#[derive(Debug, Clone)]
74pub struct ChatCompletionChunk {
75    pub id: String,
76    pub object: String,
77    pub created: u64,
78    pub model: String,
79    pub choices: Vec<ChoiceDelta>,
80}
81
82/// Streaming response choice delta
83#[derive(Debug, Clone)]
84pub struct ChoiceDelta {
85    pub index: u32,
86    pub delta: MessageDelta,
87    pub finish_reason: Option<String>,
88}
89
90/// Message delta
91#[derive(Debug, Clone)]
92pub struct MessageDelta {
93    pub role: Option<Role>,
94    pub content: Option<String>,
95}
96
97/// Model information
98///
99/// Model information
100#[derive(Debug, Clone)]
101pub struct ModelInfo {
102    pub id: String,
103    pub object: String,
104    pub created: u64,
105    pub owned_by: String,
106    pub permission: Vec<ModelPermission>,
107}
108
109/// Model permission
110#[derive(Debug, Clone)]
111pub struct ModelPermission {
112    pub id: String,
113    pub object: String,
114    pub created: u64,
115    pub allow_create_engine: bool,
116    pub allow_sampling: bool,
117    pub allow_logprobs: bool,
118    pub allow_search_indices: bool,
119    pub allow_view: bool,
120    pub allow_fine_tuning: bool,
121    pub organization: String,
122    pub group: Option<String>,
123    pub is_blocking: bool,
124}
125
126// Re-export Role type as it's also needed in streaming responses
127use crate::types::Role;
128
129/// Batch processing result containing successful and failed responses
130#[derive(Debug)]
131pub struct BatchResult {
132    pub successful: Vec<ChatCompletionResponse>,
133    pub failed: Vec<(usize, AiLibError)>,
134    pub total_requests: usize,
135    pub total_successful: usize,
136    pub total_failed: usize,
137}
138
139impl BatchResult {
140    /// Create a new batch result
141    pub fn new(total_requests: usize) -> Self {
142        Self {
143            successful: Vec::new(),
144            failed: Vec::new(),
145            total_requests,
146            total_successful: 0,
147            total_failed: 0,
148        }
149    }
150
151    /// Add a successful response
152    pub fn add_success(&mut self, response: ChatCompletionResponse) {
153        self.successful.push(response);
154        self.total_successful += 1;
155    }
156
157    /// Add a failed response with index
158    pub fn add_failure(&mut self, index: usize, error: AiLibError) {
159        self.failed.push((index, error));
160        self.total_failed += 1;
161    }
162
163    /// Check if all requests were successful
164    pub fn all_successful(&self) -> bool {
165        self.total_failed == 0
166    }
167
168    /// Get success rate as a percentage
169    pub fn success_rate(&self) -> f64 {
170        if self.total_requests == 0 {
171            0.0
172        } else {
173            (self.total_successful as f64 / self.total_requests as f64) * 100.0
174        }
175    }
176}
177
178/// Batch processing utility functions
179pub mod batch_utils {
180    use super::*;
181    use futures::stream::{self, StreamExt};
182    use std::sync::Arc;
183    use tokio::sync::Semaphore;
184
185    /// Default implementation for concurrent batch processing
186    pub async fn process_batch_concurrent<T: ChatApi + ?Sized>(
187        api: &T,
188        requests: Vec<ChatCompletionRequest>,
189        concurrency_limit: Option<usize>,
190    ) -> Result<Vec<Result<ChatCompletionResponse, AiLibError>>, AiLibError> {
191        if requests.is_empty() {
192            return Ok(Vec::new());
193        }
194
195        let semaphore = concurrency_limit.map(|limit| Arc::new(Semaphore::new(limit)));
196
197        let futures = requests.into_iter().enumerate().map(|(index, request)| {
198            let api_ref = api;
199            let semaphore_ref = semaphore.clone();
200
201            async move {
202                // Acquire permit if concurrency limit is set
203                let _permit = if let Some(sem) = &semaphore_ref {
204                    match sem.acquire().await {
205                        Ok(permit) => Some(permit),
206                        Err(_) => {
207                            return (
208                                index,
209                                Err(AiLibError::ProviderError(
210                                    "Failed to acquire semaphore permit".to_string(),
211                                )),
212                            )
213                        }
214                    }
215                } else {
216                    None
217                };
218
219                // Process the request
220                let result = api_ref.chat_completion(request).await;
221
222                // Return result with index for ordering
223                (index, result)
224            }
225        });
226
227        // Execute all futures concurrently
228        let results: Vec<_> = stream::iter(futures)
229            .buffer_unordered(concurrency_limit.unwrap_or(usize::MAX))
230            .collect()
231            .await;
232
233        // Sort results by original index to maintain order
234        let mut sorted_results = Vec::with_capacity(results.len());
235        sorted_results.resize_with(results.len(), || {
236            Err(AiLibError::ProviderError("Placeholder".to_string()))
237        });
238        for (index, result) in results {
239            sorted_results[index] = result;
240        }
241
242        Ok(sorted_results)
243    }
244
245    /// Sequential batch processing implementation
246    pub async fn process_batch_sequential<T: ChatApi + ?Sized>(
247        api: &T,
248        requests: Vec<ChatCompletionRequest>,
249    ) -> Result<Vec<Result<ChatCompletionResponse, AiLibError>>, AiLibError> {
250        let mut results = Vec::with_capacity(requests.len());
251
252        for request in requests {
253            let result = api.chat_completion(request).await;
254            results.push(result);
255        }
256
257        Ok(results)
258    }
259
260    /// Smart batch processing: automatically choose processing strategy based on request type and size
261    pub async fn process_batch_smart<T: ChatApi + ?Sized>(
262        api: &T,
263        requests: Vec<ChatCompletionRequest>,
264        concurrency_limit: Option<usize>,
265    ) -> Result<Vec<Result<ChatCompletionResponse, AiLibError>>, AiLibError> {
266        let request_count = requests.len();
267
268        // For small batches, use sequential processing
269        if request_count <= 3 {
270            return process_batch_sequential(api, requests).await;
271        }
272
273        // For larger batches, use concurrent processing
274        process_batch_concurrent(api, requests, concurrency_limit).await
275    }
276}