ultrafast_models_sdk/lib.rs
1//! # Ultrafast Models SDK
2//!
3//! A high-performance Rust SDK for interacting with multiple AI/LLM providers
4//! through a unified interface. The SDK provides seamless integration with
5//! various AI services including OpenAI, Anthropic, Google, and more.
6//!
7//! ## Overview
8//!
9//! The Ultrafast Models SDK provides:
10//! - **Unified Interface**: Single API for multiple AI providers
11//! - **Intelligent Routing**: Automatic provider selection and load balancing
12//! - **Circuit Breakers**: Automatic failover and recovery mechanisms
13//! - **Caching Layer**: Built-in response caching for performance
14//! - **Rate Limiting**: Per-provider rate limiting and throttling
15//! - **Error Handling**: Comprehensive error handling and retry logic
16//! - **Metrics Collection**: Performance monitoring and analytics
17//!
18//! ## Supported Providers
19//!
20//! The SDK supports a wide range of AI providers:
21//!
22//! - **OpenAI**: GPT-4, GPT-3.5, and other OpenAI models
23//! - **Anthropic**: Claude-3, Claude-2, and Claude Instant
24//! - **Google**: Gemini Pro, Gemini Pro Vision, and PaLM
25//! - **Azure OpenAI**: Azure-hosted OpenAI models
26//! - **Ollama**: Local and remote Ollama instances
27//! - **Mistral AI**: Mistral 7B, Mixtral, and other models
28//! - **Cohere**: Command, Command R, and other Cohere models
29//! - **Custom Providers**: Extensible provider system
30//!
31//! ## Quick Start
32//!
33//! ```rust
34//! use ultrafast_models_sdk::{UltrafastClient, ChatRequest, Message};
35//!
36//! #[tokio::main]
37//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
38//! // Create a client with multiple providers
39//! let client = UltrafastClient::standalone()
40//! .with_openai("your-openai-key")
41//! .with_anthropic("your-anthropic-key")
42//! .with_ollama("http://localhost:11434")
43//! .build()?;
44//!
45//! // Create a chat request
46//! let request = ChatRequest {
47//! model: "gpt-4".to_string(),
48//! messages: vec![Message::user("Hello, world!")],
49//! temperature: Some(0.7),
50//! max_tokens: Some(100),
51//! stream: Some(false),
52//! ..Default::default()
53//! };
54//!
55//! // Send the request
56//! let response = client.chat_completion(request).await?;
57//! println!("Response: {}", response.choices[0].message.content);
58//!
59//! Ok(())
60//! }
61//! ```
62//!
63//! ## Client Modes
64//!
65//! The SDK supports two client modes:
66//!
67//! ### Standalone Mode
68//!
69//! Direct provider communication without gateway:
70//!
71//! ```rust
72//! let client = UltrafastClient::standalone()
73//! .with_openai("your-key")
74//! .with_anthropic("your-key")
75//! .build()?;
76//! ```
77//!
78//! ### Gateway Mode
79//!
80//! Communication through the Ultrafast Gateway:
81//!
82//! ```rust
83//! let client = UltrafastClient::gateway("http://localhost:3000")
84//! .with_api_key("your-gateway-key")
85//! .build()?;
86//! ```
87//!
88//! ## Routing Strategies
89//!
90//! The SDK provides multiple routing strategies:
91//!
92//! - **Single Provider**: Route all requests to one provider
93//! - **Load Balancing**: Distribute requests across providers
94//! - **Failover**: Primary provider with automatic fallback
95//! - **Conditional Routing**: Route based on request characteristics
96//! - **A/B Testing**: Route requests for testing different providers
97//!
98//! ```rust
99//! use ultrafast_models_sdk::routing::RoutingStrategy;
100//!
101//! // Load balancing with custom weights
102//! let client = UltrafastClient::standalone()
103//! .with_openai("openai-key")
104//! .with_anthropic("anthropic-key")
105//! .with_routing_strategy(RoutingStrategy::LoadBalance {
106//! weights: vec![0.6, 0.4], // 60% OpenAI, 40% Anthropic
107//! })
108//! .build()?;
109//!
110//! // Failover strategy
111//! let client = UltrafastClient::standalone()
112//! .with_openai("primary-key")
113//! .with_anthropic("fallback-key")
114//! .with_routing_strategy(RoutingStrategy::Failover)
115//! .build()?;
116//! ```
117//!
118//! ## Advanced Features
119//!
120//! ### Circuit Breakers
121//!
122//! Automatic failover and recovery:
123//!
124//! ```rust
125//! use ultrafast_models_sdk::circuit_breaker::CircuitBreakerConfig;
126//!
127//! let client = UltrafastClient::standalone()
128//! .with_openai("your-key")
129//! .with_circuit_breaker_config(CircuitBreakerConfig {
130//! failure_threshold: 5,
131//! recovery_timeout: Duration::from_secs(60),
132//! request_timeout: Duration::from_secs(30),
133//! half_open_max_calls: 3,
134//! })
135//! .build()?;
136//! ```
137//!
138//! ### Caching
139//!
140//! Built-in response caching:
141//!
142//! ```rust
143//! use ultrafast_models_sdk::cache::CacheConfig;
144//!
145//! let client = UltrafastClient::standalone()
146//! .with_openai("your-key")
147//! .with_cache_config(CacheConfig {
148//! enabled: true,
149//! ttl: Duration::from_hours(1),
150//! max_size: 1000,
151//! })
152//! .build()?;
153//! ```
154//!
155//! ### Rate Limiting
156//!
157//! Per-provider rate limiting:
158//!
159//! ```rust
160//! use ultrafast_models_sdk::rate_limiting::RateLimitConfig;
161//!
162//! let client = UltrafastClient::standalone()
163//! .with_openai("your-key")
164//! .with_rate_limit_config(RateLimitConfig {
165//! requests_per_minute: 100,
166//! tokens_per_minute: 10000,
167//! burst_size: 10,
168//! })
169//! .build()?;
170//! ```
171//!
172//! ## API Examples
173//!
174//! ### Chat Completions
175//!
176//! ```rust
177//! use ultrafast_models_sdk::{ChatRequest, Message, Role};
178//!
179//! let request = ChatRequest {
180//! model: "gpt-4".to_string(),
181//! messages: vec![
182//! Message {
183//! role: Role::System,
184//! content: "You are a helpful assistant.".to_string(),
185//! },
186//! Message {
187//! role: Role::User,
188//! content: "What is the capital of France?".to_string(),
189//! },
190//! ],
191//! temperature: Some(0.7),
192//! max_tokens: Some(150),
193//! stream: Some(false),
194//! ..Default::default()
195//! };
196//!
197//! let response = client.chat_completion(request).await?;
198//! println!("Response: {}", response.choices[0].message.content);
199//! ```
200//!
201//! ### Streaming Responses
202//!
203//! ```rust
204//! use futures::StreamExt;
205//!
206//! let mut stream = client
207//! .stream_chat_completion(ChatRequest {
208//! model: "gpt-4".to_string(),
209//! messages: vec![Message::user("Tell me a story")],
210//! stream: Some(true),
211//! ..Default::default()
212//! })
213//! .await?;
214//!
215//! while let Some(chunk) = stream.next().await {
216//! match chunk {
217//! Ok(chunk) => {
218//! if let Some(content) = &chunk.choices[0].delta.content {
219//! print!("{}", content);
220//! }
221//! }
222//! Err(e) => eprintln!("Error: {:?}", e),
223//! }
224//! }
225//! ```
226//!
227//! ### Embeddings
228//!
229//! ```rust
230//! use ultrafast_models_sdk::{EmbeddingRequest, EmbeddingInput};
231//!
232//! let request = EmbeddingRequest {
233//! model: "text-embedding-ada-002".to_string(),
234//! input: EmbeddingInput::String("This is a test sentence.".to_string()),
235//! ..Default::default()
236//! };
237//!
238//! let response = client.embedding(request).await?;
239//! println!("Embedding dimensions: {}", response.data[0].embedding.len());
240//! ```
241//!
242//! ### Image Generation
243//!
244//! ```rust
245//! use ultrafast_models_sdk::ImageGenerationRequest;
246//!
247//! let request = ImageGenerationRequest {
248//! model: "dall-e-3".to_string(),
249//! prompt: "A beautiful sunset over the ocean".to_string(),
250//! n: Some(1),
251//! size: Some("1024x1024".to_string()),
252//! ..Default::default()
253//! };
254//!
255//! let response = client.generate_image(request).await?;
256//! println!("Image URL: {}", response.data[0].url);
257//! ```
258//!
259//! ## Error Handling
260//!
261//! Comprehensive error handling with specific error types:
262//!
263//! ```rust
264//! use ultrafast_models_sdk::error::UltrafastError;
265//!
266//! match client.chat_completion(request).await {
267//! Ok(response) => println!("Success: {:?}", response),
268//! Err(UltrafastError::AuthenticationError { .. }) => {
269//! eprintln!("Authentication failed");
270//! }
271//! Err(UltrafastError::RateLimitExceeded { retry_after, .. }) => {
272//! eprintln!("Rate limit exceeded, retry after: {:?}", retry_after);
273//! }
274//! Err(UltrafastError::ProviderError { provider, message, .. }) => {
275//! eprintln!("Provider {} error: {}", provider, message);
276//! }
277//! Err(e) => eprintln!("Other error: {:?}", e),
278//! }
279//! ```
280//!
281//! ## Configuration
282//!
283//! Advanced client configuration:
284//!
285//! ```rust
286//! use ultrafast_models_sdk::{UltrafastClient, ClientConfig};
287//! use std::time::Duration;
288//!
289//! let config = ClientConfig {
290//! timeout: Duration::from_secs(30),
291//! max_retries: 3,
292//! retry_delay: Duration::from_secs(1),
293//! user_agent: Some("MyApp/1.0".to_string()),
294//! ..Default::default()
295//! };
296//!
297//! let client = UltrafastClient::standalone()
298//! .with_config(config)
299//! .with_openai("your-key")
300//! .build()?;
301//! ```
302//!
303//! ## Testing
304//!
305//! The SDK includes testing utilities:
306//!
307//! ```rust
308//! #[cfg(test)]
309//! mod tests {
310//! use super::*;
311//! use tokio_test;
312//!
313//! #[tokio_test]
314//! async fn test_chat_completion() {
315//! let client = UltrafastClient::standalone()
316//! .with_openai("test-key")
317//! .build()
318//! .unwrap();
319//!
320//! let request = ChatRequest {
321//! model: "gpt-4".to_string(),
322//! messages: vec![Message::user("Hello")],
323//! ..Default::default()
324//! };
325//!
326//! let result = client.chat_completion(request).await;
327//! assert!(result.is_ok());
328//! }
329//! }
330//! ```
331//!
332//! ## Performance Optimization
333//!
334//! Tips for optimal performance:
335//!
336//! ```rust
337//! // Use connection pooling
338//! let client = UltrafastClient::standalone()
339//! .with_connection_pool_size(10)
340//! .with_openai("your-key")
341//! .build()?;
342//!
343//! // Enable compression
344//! let client = UltrafastClient::standalone()
345//! .with_compression(true)
346//! .with_openai("your-key")
347//! .build()?;
348//!
349//! // Configure timeouts
350//! let client = UltrafastClient::standalone()
351//! .with_timeout(Duration::from_secs(15))
352//! .with_openai("your-key")
353//! .build()?;
354//! ```
355//!
356//! ## Migration Guide
357//!
358//! ### From OpenAI SDK
359//!
360//! ```rust
361//! // Before (OpenAI SDK)
362//! use openai::Client;
363//! let client = Client::new("your-key");
364//! let response = client.chat().create(request).await?;
365//!
366//! // After (Ultrafast SDK)
367//! use ultrafast_models_sdk::UltrafastClient;
368//! let client = UltrafastClient::standalone()
369//! .with_openai("your-key")
370//! .build()?;
371//! let response = client.chat_completion(request).await?;
372//! ```
373//!
374//! ### From Anthropic SDK
375//!
376//! ```rust
377//! // Before (Anthropic SDK)
378//! use anthropic::Client;
379//! let client = Client::new("your-key");
380//! let response = client.messages().create(request).await?;
381//!
382//! // After (Ultrafast SDK)
383//! use ultrafast_models_sdk::UltrafastClient;
384//! let client = UltrafastClient::standalone()
385//! .with_anthropic("your-key")
386//! .build()?;
387//! let response = client.chat_completion(request).await?;
388//! ```
389//!
390//! ## Contributing
391//!
392//! We welcome contributions! Please see our contributing guide for details on:
393//!
394//! - Code style and formatting
395//! - Testing requirements
396//! - Documentation standards
397//! - Pull request process
398//!
399//! ## License
400//!
401//! This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
402//!
403//! ## Support
404//!
405//! For support and questions:
406//!
407//! - **Issues**: [GitHub Issues](https://github.com/techgopal/ultrafast-ai-gateway/issues)
408//! - **Discussions**: [GitHub Discussions](https://github.com/techgopal/ultrafast-ai-gateway/discussions)
409//! - **Documentation**: [Project Wiki](https://github.com/techgopal/ultrafast-ai-gateway/wiki)
410
411pub mod cache;
412pub mod circuit_breaker;
413pub mod client;
414pub mod common;
415pub mod error;
416pub mod models;
417pub mod providers;
418pub mod routing;
419
420pub use circuit_breaker::{CircuitBreaker, CircuitBreakerConfig, CircuitState};
421pub use client::{ClientMode, UltrafastClient, UltrafastClientBuilder};
422pub use error::{ClientError, ProviderError};
423pub use models::{
424 AudioRequest, AudioResponse, ChatRequest, ChatResponse, Choice, EmbeddingRequest,
425 EmbeddingResponse, ImageRequest, ImageResponse, Message, Role, SpeechRequest, SpeechResponse,
426 Usage,
427};
428pub use providers::{
429 create_provider_with_circuit_breaker, Provider, ProviderConfig, ProviderMetrics,
430};
431pub use routing::{Condition, RoutingRule, RoutingStrategy};
432
433/// Result type for SDK operations.
434///
435/// This is a convenience type alias for SDK operations that can fail.
436/// It uses `ClientError` as the error type.
437pub type Result<T> = std::result::Result<T, ClientError>;
438
439#[cfg(test)]
440mod tests {
441 use super::*;
442 use crate::models::{ChatRequest, Message, Role};
443
444 #[test]
445 fn test_message_creation() {
446 let user_msg = Message::user("Hello, world!");
447 assert_eq!(user_msg.role, Role::User);
448 assert_eq!(user_msg.content, "Hello, world!");
449
450 let assistant_msg = Message::assistant("Hi there!");
451 assert_eq!(assistant_msg.role, Role::Assistant);
452 assert_eq!(assistant_msg.content, "Hi there!");
453
454 let system_msg = Message::system("You are a helpful assistant.");
455 assert_eq!(system_msg.role, Role::System);
456 assert_eq!(system_msg.content, "You are a helpful assistant.");
457 }
458
459 #[test]
460 fn test_chat_request_default() {
461 let request = ChatRequest::default();
462 assert_eq!(request.model, "");
463 assert_eq!(request.messages.len(), 0);
464 assert_eq!(request.temperature, None);
465 assert_eq!(request.max_tokens, None);
466 assert_eq!(request.stream, None);
467 }
468
469 #[test]
470 fn test_provider_config_creation() {
471 let config = ProviderConfig::new("test-provider", "test-key");
472 assert_eq!(config.name, "test-provider");
473 assert_eq!(config.api_key, "test-key");
474 assert_eq!(config.timeout.as_secs(), 30);
475 assert_eq!(config.max_retries, 3);
476 assert!(config.enabled);
477 }
478
479 #[test]
480 fn test_routing_strategy_creation() {
481 let single = RoutingStrategy::Single;
482 let fallback = RoutingStrategy::Fallback;
483 let load_balance = RoutingStrategy::LoadBalance {
484 weights: vec![0.5, 0.5],
485 };
486 let conditional = RoutingStrategy::Conditional { rules: vec![] };
487 let ab_testing = RoutingStrategy::ABTesting { split: 0.5 };
488
489 assert!(matches!(single, RoutingStrategy::Single));
490 assert!(matches!(fallback, RoutingStrategy::Fallback));
491 assert!(matches!(load_balance, RoutingStrategy::LoadBalance { .. }));
492 assert!(matches!(conditional, RoutingStrategy::Conditional { .. }));
493 assert!(matches!(ab_testing, RoutingStrategy::ABTesting { .. }));
494 }
495
496 #[test]
497 fn test_condition_matching() {
498 let context = routing::RoutingContext {
499 model: Some("gpt-4".to_string()),
500 user_region: Some("us-east-1".to_string()),
501 request_size: 1000,
502 estimated_tokens: 500,
503 user_id: Some("user123".to_string()),
504 metadata: std::collections::HashMap::new(),
505 };
506
507 let model_condition = Condition::ModelName("gpt-4".to_string());
508 assert!(model_condition.matches(&context));
509
510 let region_condition = Condition::UserRegion("us-east-1".to_string());
511 assert!(region_condition.matches(&context));
512
513 let size_condition = Condition::RequestSize(500);
514 assert!(size_condition.matches(&context));
515
516 let token_condition = Condition::TokenCount(300);
517 assert!(token_condition.matches(&context));
518 }
519}