Struct ModelArray

Source

pub struct ModelArray {
    pub name: String,
    pub endpoints: Vec<ModelEndpoint>,
    pub strategy: LoadBalancingStrategy,
    pub health_check: HealthCheckConfig,
}

Expand description

Model array for load balancing and A/B testing

This struct allows developers to build model arrays with multiple endpoints, supporting various load balancing strategies.

Fields§

§name: String

Array name/identifier

§endpoints: Vec<ModelEndpoint>

Model endpoints in the array

§strategy: LoadBalancingStrategy

Load balancing strategy

§health_check: HealthCheckConfig

Health check configuration

Implementations§

Source §

impl ModelArray

Source

pub fn new(name: &str) -> Self

Create new model array

Examples found in repository ?

examples/model_management.rs (line 113)

8async fn main() -> Result<(), Box<dyn std::error::Error>> {
9    println!("🚀 AI-lib Model Management Tools Example");
10    println!("======================================");
11
12    // Example 1: Create a custom model manager for Groq
13    println!("\n📋 Example 1: Custom Model Manager for Groq");
14    println!("    Building a model manager with multiple models and selection strategies");
15
16    let mut groq_manager =
17        CustomModelManager::new("groq").with_strategy(ModelSelectionStrategy::PerformanceBased);
18
19    // Add different Groq models with their capabilities
20    let llama3_8b = ModelInfo {
21        name: "llama3-8b-8192".to_string(),
22        display_name: "Llama 3 8B".to_string(),
23        description: "Fast and cost-effective model for general tasks".to_string(),
24        capabilities: ModelCapabilities::new()
25            .with_chat()
26            .with_code_generation()
27            .with_context_window(8192),
28        pricing: PricingInfo::new(0.05, 0.10), // $0.05/1K input, $0.10/1K output
29        performance: PerformanceMetrics::new()
30            .with_speed(SpeedTier::Fast)
31            .with_quality(QualityTier::Good)
32            .with_avg_response_time(Duration::from_millis(500)),
33        metadata: std::collections::HashMap::new(),
34    };
35
36    let llama3_70b = ModelInfo {
37        name: "llama3-70b-8192".to_string(),
38        display_name: "Llama 3 70B".to_string(),
39        description: "High-performance model for complex tasks".to_string(),
40        capabilities: ModelCapabilities::new()
41            .with_chat()
42            .with_code_generation()
43            .with_function_calling()
44            .with_context_window(8192),
45        pricing: PricingInfo::new(0.59, 1.99), // $0.59/1K input, $1.99/1K output
46        performance: PerformanceMetrics::new()
47            .with_speed(SpeedTier::Slow)
48            .with_quality(QualityTier::Excellent)
49            .with_avg_response_time(Duration::from_secs(3)),
50        metadata: std::collections::HashMap::new(),
51    };
52
53    let mixtral = ModelInfo {
54        name: "mixtral-8x7b-32768".to_string(),
55        display_name: "Mixtral 8x7B".to_string(),
56        description: "Balanced performance and cost model".to_string(),
57        capabilities: ModelCapabilities::new()
58            .with_chat()
59            .with_code_generation()
60            .with_context_window(32768),
61        pricing: PricingInfo::new(0.14, 0.42), // $0.14/1K input, $0.42/1K output
62        performance: PerformanceMetrics::new()
63            .with_speed(SpeedTier::Balanced)
64            .with_quality(QualityTier::Good)
65            .with_avg_response_time(Duration::from_secs(1)),
66        metadata: std::collections::HashMap::new(),
67    };
68
69    // Add models to the manager
70    groq_manager.add_model(llama3_8b);
71    groq_manager.add_model(llama3_70b);
72    groq_manager.add_model(mixtral);
73
74    println!(
75        "✅ Added {} models to Groq manager",
76        groq_manager.models.len()
77    );
78
79    // Demonstrate model selection
80    if let Some(selected_model) = groq_manager.select_model() {
81        println!(
82            "🎯 Selected model: {} ({})",
83            selected_model.display_name, selected_model.name
84        );
85        println!(
86            "   Cost: ${:.3}/1K input, ${:.3}/1K output",
87            selected_model.pricing.input_cost_per_1k, selected_model.pricing.output_cost_per_1k
88        );
89    }
90
91    // Example 2: Model recommendation for specific use cases
92    println!("\n📋 Example 2: Model Recommendation for Use Cases");
93
94    if let Some(recommended_model) = groq_manager.recommend_for("chat") {
95        println!(
96            "💬 Chat recommendation: {} ({})",
97            recommended_model.display_name, recommended_model.name
98        );
99    }
100
101    if let Some(recommended_model) = groq_manager.recommend_for("code_generation") {
102        println!(
103            "💻 Code generation recommendation: {} ({})",
104            recommended_model.display_name, recommended_model.name
105        );
106    }
107
108    // Example 3: Create a model array for load balancing
109    println!("\n📋 Example 3: Model Array for Load Balancing");
110    println!("    Building a load-balanced array of model endpoints");
111
112    let mut groq_array =
113        ModelArray::new("groq-production").with_strategy(LoadBalancingStrategy::RoundRobin);
114
115    // Add multiple endpoints for the same model
116    let endpoint1 = ModelEndpoint {
117        name: "groq-us-east-1".to_string(),
118        model_name: "llama3-8b-8192".to_string(),
119        url: "https://api.groq.com/openai/v1".to_string(),
120        weight: 1.0,
121        healthy: true,
122        connection_count: 0,
123    };
124
125    let endpoint2 = ModelEndpoint {
126        name: "groq-us-west-1".to_string(),
127        model_name: "llama3-8b-8192".to_string(),
128        url: "https://api-west.groq.com/openai/v1".to_string(),
129        weight: 1.0,
130        healthy: true,
131        connection_count: 0,
132    };
133
134    let endpoint3 = ModelEndpoint {
135        name: "groq-eu-west-1".to_string(),
136        model_name: "llama3-8b-8192".to_string(),
137        url: "https://api-eu.groq.com/openai/v1".to_string(),
138        weight: 0.8, // Slightly lower weight for EU region
139        healthy: true,
140        connection_count: 0,
141    };
142
143    groq_array.add_endpoint(endpoint1);
144    groq_array.add_endpoint(endpoint2);
145    groq_array.add_endpoint(endpoint3);
146
147    println!(
148        "✅ Added {} endpoints to Groq array",
149        groq_array.endpoints.len()
150    );
151
152    // Demonstrate load balancing
153    for i in 0..5 {
154        if let Some(endpoint) = groq_array.select_endpoint() {
155            println!(
156                "🔄 Request {} routed to: {} ({})",
157                i + 1,
158                endpoint.name,
159                endpoint.url
160            );
161
162            // Simulate connection tracking
163            endpoint.connection_count += 1;
164        }
165    }
166
167    // Example 4: Cost analysis and comparison
168    println!("\n📋 Example 4: Cost Analysis and Comparison");
169
170    let test_input_tokens = 1000;
171    let test_output_tokens = 500;
172
173    println!(
174        "💰 Cost comparison for {} input + {} output tokens:",
175        test_input_tokens, test_output_tokens
176    );
177
178    for model in groq_manager.list_models() {
179        let cost = model
180            .pricing
181            .calculate_cost(test_input_tokens, test_output_tokens);
182        println!("   {}: ${:.4}", model.display_name, cost);
183    }
184
185    // Example 5: Performance-based model selection
186    println!("\n📋 Example 5: Performance-Based Model Selection");
187
188    let mut performance_manager = groq_manager.clone();
189    performance_manager =
190        performance_manager.with_strategy(ModelSelectionStrategy::PerformanceBased);
191
192    if let Some(best_model) = performance_manager.select_model() {
193        println!(
194            "🏆 Best performance model: {} ({})",
195            best_model.display_name, best_model.name
196        );
197        println!(
198            "   Speed: {:?}, Quality: {:?}",
199            best_model.performance.speed, best_model.performance.quality
200        );
201    }
202
203    // Example 6: Cost-based model selection
204    println!("\n📋 Example 6: Cost-Based Model Selection");
205
206    let mut cost_manager = groq_manager.clone();
207    cost_manager = cost_manager.with_strategy(ModelSelectionStrategy::CostBased);
208
209    if let Some(cheapest_model) = cost_manager.select_model() {
210        println!(
211            "💸 Most cost-effective model: {} ({})",
212            cheapest_model.display_name, cheapest_model.name
213        );
214        println!(
215            "   Cost per 1K tokens: ${:.3}",
216            cheapest_model.pricing.input_cost_per_1k + cheapest_model.pricing.output_cost_per_1k
217        );
218    }
219
220    println!("\n🎉 Model management examples completed successfully!");
221    println!("\n💡 Key benefits of these tools:");
222    println!("   • Build custom model managers for any provider");
223    println!("   • Implement sophisticated model selection strategies");
224    println!("   • Create load-balanced model arrays");
225    println!("   • Analyze costs and performance metrics");
226    println!("   • Recommend models for specific use cases");
227
228    Ok(())
229}

Source

pub fn add_endpoint(&mut self, endpoint: ModelEndpoint)

Add endpoint to the array

Examples found in repository ?

examples/model_management.rs (line 143)

8async fn main() -> Result<(), Box<dyn std::error::Error>> {
9    println!("🚀 AI-lib Model Management Tools Example");
10    println!("======================================");
11
12    // Example 1: Create a custom model manager for Groq
13    println!("\n📋 Example 1: Custom Model Manager for Groq");
14    println!("    Building a model manager with multiple models and selection strategies");
15
16    let mut groq_manager =
17        CustomModelManager::new("groq").with_strategy(ModelSelectionStrategy::PerformanceBased);
18
19    // Add different Groq models with their capabilities
20    let llama3_8b = ModelInfo {
21        name: "llama3-8b-8192".to_string(),
22        display_name: "Llama 3 8B".to_string(),
23        description: "Fast and cost-effective model for general tasks".to_string(),
24        capabilities: ModelCapabilities::new()
25            .with_chat()
26            .with_code_generation()
27            .with_context_window(8192),
28        pricing: PricingInfo::new(0.05, 0.10), // $0.05/1K input, $0.10/1K output
29        performance: PerformanceMetrics::new()
30            .with_speed(SpeedTier::Fast)
31            .with_quality(QualityTier::Good)
32            .with_avg_response_time(Duration::from_millis(500)),
33        metadata: std::collections::HashMap::new(),
34    };
35
36    let llama3_70b = ModelInfo {
37        name: "llama3-70b-8192".to_string(),
38        display_name: "Llama 3 70B".to_string(),
39        description: "High-performance model for complex tasks".to_string(),
40        capabilities: ModelCapabilities::new()
41            .with_chat()
42            .with_code_generation()
43            .with_function_calling()
44            .with_context_window(8192),
45        pricing: PricingInfo::new(0.59, 1.99), // $0.59/1K input, $1.99/1K output
46        performance: PerformanceMetrics::new()
47            .with_speed(SpeedTier::Slow)
48            .with_quality(QualityTier::Excellent)
49            .with_avg_response_time(Duration::from_secs(3)),
50        metadata: std::collections::HashMap::new(),
51    };
52
53    let mixtral = ModelInfo {
54        name: "mixtral-8x7b-32768".to_string(),
55        display_name: "Mixtral 8x7B".to_string(),
56        description: "Balanced performance and cost model".to_string(),
57        capabilities: ModelCapabilities::new()
58            .with_chat()
59            .with_code_generation()
60            .with_context_window(32768),
61        pricing: PricingInfo::new(0.14, 0.42), // $0.14/1K input, $0.42/1K output
62        performance: PerformanceMetrics::new()
63            .with_speed(SpeedTier::Balanced)
64            .with_quality(QualityTier::Good)
65            .with_avg_response_time(Duration::from_secs(1)),
66        metadata: std::collections::HashMap::new(),
67    };
68
69    // Add models to the manager
70    groq_manager.add_model(llama3_8b);
71    groq_manager.add_model(llama3_70b);
72    groq_manager.add_model(mixtral);
73
74    println!(
75        "✅ Added {} models to Groq manager",
76        groq_manager.models.len()
77    );
78
79    // Demonstrate model selection
80    if let Some(selected_model) = groq_manager.select_model() {
81        println!(
82            "🎯 Selected model: {} ({})",
83            selected_model.display_name, selected_model.name
84        );
85        println!(
86            "   Cost: ${:.3}/1K input, ${:.3}/1K output",
87            selected_model.pricing.input_cost_per_1k, selected_model.pricing.output_cost_per_1k
88        );
89    }
90
91    // Example 2: Model recommendation for specific use cases
92    println!("\n📋 Example 2: Model Recommendation for Use Cases");
93
94    if let Some(recommended_model) = groq_manager.recommend_for("chat") {
95        println!(
96            "💬 Chat recommendation: {} ({})",
97            recommended_model.display_name, recommended_model.name
98        );
99    }
100
101    if let Some(recommended_model) = groq_manager.recommend_for("code_generation") {
102        println!(
103            "💻 Code generation recommendation: {} ({})",
104            recommended_model.display_name, recommended_model.name
105        );
106    }
107
108    // Example 3: Create a model array for load balancing
109    println!("\n📋 Example 3: Model Array for Load Balancing");
110    println!("    Building a load-balanced array of model endpoints");
111
112    let mut groq_array =
113        ModelArray::new("groq-production").with_strategy(LoadBalancingStrategy::RoundRobin);
114
115    // Add multiple endpoints for the same model
116    let endpoint1 = ModelEndpoint {
117        name: "groq-us-east-1".to_string(),
118        model_name: "llama3-8b-8192".to_string(),
119        url: "https://api.groq.com/openai/v1".to_string(),
120        weight: 1.0,
121        healthy: true,
122        connection_count: 0,
123    };
124
125    let endpoint2 = ModelEndpoint {
126        name: "groq-us-west-1".to_string(),
127        model_name: "llama3-8b-8192".to_string(),
128        url: "https://api-west.groq.com/openai/v1".to_string(),
129        weight: 1.0,
130        healthy: true,
131        connection_count: 0,
132    };
133
134    let endpoint3 = ModelEndpoint {
135        name: "groq-eu-west-1".to_string(),
136        model_name: "llama3-8b-8192".to_string(),
137        url: "https://api-eu.groq.com/openai/v1".to_string(),
138        weight: 0.8, // Slightly lower weight for EU region
139        healthy: true,
140        connection_count: 0,
141    };
142
143    groq_array.add_endpoint(endpoint1);
144    groq_array.add_endpoint(endpoint2);
145    groq_array.add_endpoint(endpoint3);
146
147    println!(
148        "✅ Added {} endpoints to Groq array",
149        groq_array.endpoints.len()
150    );
151
152    // Demonstrate load balancing
153    for i in 0..5 {
154        if let Some(endpoint) = groq_array.select_endpoint() {
155            println!(
156                "🔄 Request {} routed to: {} ({})",
157                i + 1,
158                endpoint.name,
159                endpoint.url
160            );
161
162            // Simulate connection tracking
163            endpoint.connection_count += 1;
164        }
165    }
166
167    // Example 4: Cost analysis and comparison
168    println!("\n📋 Example 4: Cost Analysis and Comparison");
169
170    let test_input_tokens = 1000;
171    let test_output_tokens = 500;
172
173    println!(
174        "💰 Cost comparison for {} input + {} output tokens:",
175        test_input_tokens, test_output_tokens
176    );
177
178    for model in groq_manager.list_models() {
179        let cost = model
180            .pricing
181            .calculate_cost(test_input_tokens, test_output_tokens);
182        println!("   {}: ${:.4}", model.display_name, cost);
183    }
184
185    // Example 5: Performance-based model selection
186    println!("\n📋 Example 5: Performance-Based Model Selection");
187
188    let mut performance_manager = groq_manager.clone();
189    performance_manager =
190        performance_manager.with_strategy(ModelSelectionStrategy::PerformanceBased);
191
192    if let Some(best_model) = performance_manager.select_model() {
193        println!(
194            "🏆 Best performance model: {} ({})",
195            best_model.display_name, best_model.name
196        );
197        println!(
198            "   Speed: {:?}, Quality: {:?}",
199            best_model.performance.speed, best_model.performance.quality
200        );
201    }
202
203    // Example 6: Cost-based model selection
204    println!("\n📋 Example 6: Cost-Based Model Selection");
205
206    let mut cost_manager = groq_manager.clone();
207    cost_manager = cost_manager.with_strategy(ModelSelectionStrategy::CostBased);
208
209    if let Some(cheapest_model) = cost_manager.select_model() {
210        println!(
211            "💸 Most cost-effective model: {} ({})",
212            cheapest_model.display_name, cheapest_model.name
213        );
214        println!(
215            "   Cost per 1K tokens: ${:.3}",
216            cheapest_model.pricing.input_cost_per_1k + cheapest_model.pricing.output_cost_per_1k
217        );
218    }
219
220    println!("\n🎉 Model management examples completed successfully!");
221    println!("\n💡 Key benefits of these tools:");
222    println!("   • Build custom model managers for any provider");
223    println!("   • Implement sophisticated model selection strategies");
224    println!("   • Create load-balanced model arrays");
225    println!("   • Analyze costs and performance metrics");
226    println!("   • Recommend models for specific use cases");
227
228    Ok(())
229}

Source

pub fn with_strategy(self, strategy: LoadBalancingStrategy) -> Self

Set load balancing strategy

Examples found in repository ?

examples/model_management.rs (line 113)

8async fn main() -> Result<(), Box<dyn std::error::Error>> {
9    println!("🚀 AI-lib Model Management Tools Example");
10    println!("======================================");
11
12    // Example 1: Create a custom model manager for Groq
13    println!("\n📋 Example 1: Custom Model Manager for Groq");
14    println!("    Building a model manager with multiple models and selection strategies");
15
16    let mut groq_manager =
17        CustomModelManager::new("groq").with_strategy(ModelSelectionStrategy::PerformanceBased);
18
19    // Add different Groq models with their capabilities
20    let llama3_8b = ModelInfo {
21        name: "llama3-8b-8192".to_string(),
22        display_name: "Llama 3 8B".to_string(),
23        description: "Fast and cost-effective model for general tasks".to_string(),
24        capabilities: ModelCapabilities::new()
25            .with_chat()
26            .with_code_generation()
27            .with_context_window(8192),
28        pricing: PricingInfo::new(0.05, 0.10), // $0.05/1K input, $0.10/1K output
29        performance: PerformanceMetrics::new()
30            .with_speed(SpeedTier::Fast)
31            .with_quality(QualityTier::Good)
32            .with_avg_response_time(Duration::from_millis(500)),
33        metadata: std::collections::HashMap::new(),
34    };
35
36    let llama3_70b = ModelInfo {
37        name: "llama3-70b-8192".to_string(),
38        display_name: "Llama 3 70B".to_string(),
39        description: "High-performance model for complex tasks".to_string(),
40        capabilities: ModelCapabilities::new()
41            .with_chat()
42            .with_code_generation()
43            .with_function_calling()
44            .with_context_window(8192),
45        pricing: PricingInfo::new(0.59, 1.99), // $0.59/1K input, $1.99/1K output
46        performance: PerformanceMetrics::new()
47            .with_speed(SpeedTier::Slow)
48            .with_quality(QualityTier::Excellent)
49            .with_avg_response_time(Duration::from_secs(3)),
50        metadata: std::collections::HashMap::new(),
51    };
52
53    let mixtral = ModelInfo {
54        name: "mixtral-8x7b-32768".to_string(),
55        display_name: "Mixtral 8x7B".to_string(),
56        description: "Balanced performance and cost model".to_string(),
57        capabilities: ModelCapabilities::new()
58            .with_chat()
59            .with_code_generation()
60            .with_context_window(32768),
61        pricing: PricingInfo::new(0.14, 0.42), // $0.14/1K input, $0.42/1K output
62        performance: PerformanceMetrics::new()
63            .with_speed(SpeedTier::Balanced)
64            .with_quality(QualityTier::Good)
65            .with_avg_response_time(Duration::from_secs(1)),
66        metadata: std::collections::HashMap::new(),
67    };
68
69    // Add models to the manager
70    groq_manager.add_model(llama3_8b);
71    groq_manager.add_model(llama3_70b);
72    groq_manager.add_model(mixtral);
73
74    println!(
75        "✅ Added {} models to Groq manager",
76        groq_manager.models.len()
77    );
78
79    // Demonstrate model selection
80    if let Some(selected_model) = groq_manager.select_model() {
81        println!(
82            "🎯 Selected model: {} ({})",
83            selected_model.display_name, selected_model.name
84        );
85        println!(
86            "   Cost: ${:.3}/1K input, ${:.3}/1K output",
87            selected_model.pricing.input_cost_per_1k, selected_model.pricing.output_cost_per_1k
88        );
89    }
90
91    // Example 2: Model recommendation for specific use cases
92    println!("\n📋 Example 2: Model Recommendation for Use Cases");
93
94    if let Some(recommended_model) = groq_manager.recommend_for("chat") {
95        println!(
96            "💬 Chat recommendation: {} ({})",
97            recommended_model.display_name, recommended_model.name
98        );
99    }
100
101    if let Some(recommended_model) = groq_manager.recommend_for("code_generation") {
102        println!(
103            "💻 Code generation recommendation: {} ({})",
104            recommended_model.display_name, recommended_model.name
105        );
106    }
107
108    // Example 3: Create a model array for load balancing
109    println!("\n📋 Example 3: Model Array for Load Balancing");
110    println!("    Building a load-balanced array of model endpoints");
111
112    let mut groq_array =
113        ModelArray::new("groq-production").with_strategy(LoadBalancingStrategy::RoundRobin);
114
115    // Add multiple endpoints for the same model
116    let endpoint1 = ModelEndpoint {
117        name: "groq-us-east-1".to_string(),
118        model_name: "llama3-8b-8192".to_string(),
119        url: "https://api.groq.com/openai/v1".to_string(),
120        weight: 1.0,
121        healthy: true,
122        connection_count: 0,
123    };
124
125    let endpoint2 = ModelEndpoint {
126        name: "groq-us-west-1".to_string(),
127        model_name: "llama3-8b-8192".to_string(),
128        url: "https://api-west.groq.com/openai/v1".to_string(),
129        weight: 1.0,
130        healthy: true,
131        connection_count: 0,
132    };
133
134    let endpoint3 = ModelEndpoint {
135        name: "groq-eu-west-1".to_string(),
136        model_name: "llama3-8b-8192".to_string(),
137        url: "https://api-eu.groq.com/openai/v1".to_string(),
138        weight: 0.8, // Slightly lower weight for EU region
139        healthy: true,
140        connection_count: 0,
141    };
142
143    groq_array.add_endpoint(endpoint1);
144    groq_array.add_endpoint(endpoint2);
145    groq_array.add_endpoint(endpoint3);
146
147    println!(
148        "✅ Added {} endpoints to Groq array",
149        groq_array.endpoints.len()
150    );
151
152    // Demonstrate load balancing
153    for i in 0..5 {
154        if let Some(endpoint) = groq_array.select_endpoint() {
155            println!(
156                "🔄 Request {} routed to: {} ({})",
157                i + 1,
158                endpoint.name,
159                endpoint.url
160            );
161
162            // Simulate connection tracking
163            endpoint.connection_count += 1;
164        }
165    }
166
167    // Example 4: Cost analysis and comparison
168    println!("\n📋 Example 4: Cost Analysis and Comparison");
169
170    let test_input_tokens = 1000;
171    let test_output_tokens = 500;
172
173    println!(
174        "💰 Cost comparison for {} input + {} output tokens:",
175        test_input_tokens, test_output_tokens
176    );
177
178    for model in groq_manager.list_models() {
179        let cost = model
180            .pricing
181            .calculate_cost(test_input_tokens, test_output_tokens);
182        println!("   {}: ${:.4}", model.display_name, cost);
183    }
184
185    // Example 5: Performance-based model selection
186    println!("\n📋 Example 5: Performance-Based Model Selection");
187
188    let mut performance_manager = groq_manager.clone();
189    performance_manager =
190        performance_manager.with_strategy(ModelSelectionStrategy::PerformanceBased);
191
192    if let Some(best_model) = performance_manager.select_model() {
193        println!(
194            "🏆 Best performance model: {} ({})",
195            best_model.display_name, best_model.name
196        );
197        println!(
198            "   Speed: {:?}, Quality: {:?}",
199            best_model.performance.speed, best_model.performance.quality
200        );
201    }
202
203    // Example 6: Cost-based model selection
204    println!("\n📋 Example 6: Cost-Based Model Selection");
205
206    let mut cost_manager = groq_manager.clone();
207    cost_manager = cost_manager.with_strategy(ModelSelectionStrategy::CostBased);
208
209    if let Some(cheapest_model) = cost_manager.select_model() {
210        println!(
211            "💸 Most cost-effective model: {} ({})",
212            cheapest_model.display_name, cheapest_model.name
213        );
214        println!(
215            "   Cost per 1K tokens: ${:.3}",
216            cheapest_model.pricing.input_cost_per_1k + cheapest_model.pricing.output_cost_per_1k
217        );
218    }
219
220    println!("\n🎉 Model management examples completed successfully!");
221    println!("\n💡 Key benefits of these tools:");
222    println!("   • Build custom model managers for any provider");
223    println!("   • Implement sophisticated model selection strategies");
224    println!("   • Create load-balanced model arrays");
225    println!("   • Analyze costs and performance metrics");
226    println!("   • Recommend models for specific use cases");
227
228    Ok(())
229}

Source

pub fn with_health_check(self, config: HealthCheckConfig) -> Self

Configure health check

Source

pub fn select_endpoint(&mut self) -> Option<&mut ModelEndpoint>

Select next endpoint based on strategy

Examples found in repository ?

examples/model_management.rs (line 154)

8async fn main() -> Result<(), Box<dyn std::error::Error>> {
9    println!("🚀 AI-lib Model Management Tools Example");
10    println!("======================================");
11
12    // Example 1: Create a custom model manager for Groq
13    println!("\n📋 Example 1: Custom Model Manager for Groq");
14    println!("    Building a model manager with multiple models and selection strategies");
15
16    let mut groq_manager =
17        CustomModelManager::new("groq").with_strategy(ModelSelectionStrategy::PerformanceBased);
18
19    // Add different Groq models with their capabilities
20    let llama3_8b = ModelInfo {
21        name: "llama3-8b-8192".to_string(),
22        display_name: "Llama 3 8B".to_string(),
23        description: "Fast and cost-effective model for general tasks".to_string(),
24        capabilities: ModelCapabilities::new()
25            .with_chat()
26            .with_code_generation()
27            .with_context_window(8192),
28        pricing: PricingInfo::new(0.05, 0.10), // $0.05/1K input, $0.10/1K output
29        performance: PerformanceMetrics::new()
30            .with_speed(SpeedTier::Fast)
31            .with_quality(QualityTier::Good)
32            .with_avg_response_time(Duration::from_millis(500)),
33        metadata: std::collections::HashMap::new(),
34    };
35
36    let llama3_70b = ModelInfo {
37        name: "llama3-70b-8192".to_string(),
38        display_name: "Llama 3 70B".to_string(),
39        description: "High-performance model for complex tasks".to_string(),
40        capabilities: ModelCapabilities::new()
41            .with_chat()
42            .with_code_generation()
43            .with_function_calling()
44            .with_context_window(8192),
45        pricing: PricingInfo::new(0.59, 1.99), // $0.59/1K input, $1.99/1K output
46        performance: PerformanceMetrics::new()
47            .with_speed(SpeedTier::Slow)
48            .with_quality(QualityTier::Excellent)
49            .with_avg_response_time(Duration::from_secs(3)),
50        metadata: std::collections::HashMap::new(),
51    };
52
53    let mixtral = ModelInfo {
54        name: "mixtral-8x7b-32768".to_string(),
55        display_name: "Mixtral 8x7B".to_string(),
56        description: "Balanced performance and cost model".to_string(),
57        capabilities: ModelCapabilities::new()
58            .with_chat()
59            .with_code_generation()
60            .with_context_window(32768),
61        pricing: PricingInfo::new(0.14, 0.42), // $0.14/1K input, $0.42/1K output
62        performance: PerformanceMetrics::new()
63            .with_speed(SpeedTier::Balanced)
64            .with_quality(QualityTier::Good)
65            .with_avg_response_time(Duration::from_secs(1)),
66        metadata: std::collections::HashMap::new(),
67    };
68
69    // Add models to the manager
70    groq_manager.add_model(llama3_8b);
71    groq_manager.add_model(llama3_70b);
72    groq_manager.add_model(mixtral);
73
74    println!(
75        "✅ Added {} models to Groq manager",
76        groq_manager.models.len()
77    );
78
79    // Demonstrate model selection
80    if let Some(selected_model) = groq_manager.select_model() {
81        println!(
82            "🎯 Selected model: {} ({})",
83            selected_model.display_name, selected_model.name
84        );
85        println!(
86            "   Cost: ${:.3}/1K input, ${:.3}/1K output",
87            selected_model.pricing.input_cost_per_1k, selected_model.pricing.output_cost_per_1k
88        );
89    }
90
91    // Example 2: Model recommendation for specific use cases
92    println!("\n📋 Example 2: Model Recommendation for Use Cases");
93
94    if let Some(recommended_model) = groq_manager.recommend_for("chat") {
95        println!(
96            "💬 Chat recommendation: {} ({})",
97            recommended_model.display_name, recommended_model.name
98        );
99    }
100
101    if let Some(recommended_model) = groq_manager.recommend_for("code_generation") {
102        println!(
103            "💻 Code generation recommendation: {} ({})",
104            recommended_model.display_name, recommended_model.name
105        );
106    }
107
108    // Example 3: Create a model array for load balancing
109    println!("\n📋 Example 3: Model Array for Load Balancing");
110    println!("    Building a load-balanced array of model endpoints");
111
112    let mut groq_array =
113        ModelArray::new("groq-production").with_strategy(LoadBalancingStrategy::RoundRobin);
114
115    // Add multiple endpoints for the same model
116    let endpoint1 = ModelEndpoint {
117        name: "groq-us-east-1".to_string(),
118        model_name: "llama3-8b-8192".to_string(),
119        url: "https://api.groq.com/openai/v1".to_string(),
120        weight: 1.0,
121        healthy: true,
122        connection_count: 0,
123    };
124
125    let endpoint2 = ModelEndpoint {
126        name: "groq-us-west-1".to_string(),
127        model_name: "llama3-8b-8192".to_string(),
128        url: "https://api-west.groq.com/openai/v1".to_string(),
129        weight: 1.0,
130        healthy: true,
131        connection_count: 0,
132    };
133
134    let endpoint3 = ModelEndpoint {
135        name: "groq-eu-west-1".to_string(),
136        model_name: "llama3-8b-8192".to_string(),
137        url: "https://api-eu.groq.com/openai/v1".to_string(),
138        weight: 0.8, // Slightly lower weight for EU region
139        healthy: true,
140        connection_count: 0,
141    };
142
143    groq_array.add_endpoint(endpoint1);
144    groq_array.add_endpoint(endpoint2);
145    groq_array.add_endpoint(endpoint3);
146
147    println!(
148        "✅ Added {} endpoints to Groq array",
149        groq_array.endpoints.len()
150    );
151
152    // Demonstrate load balancing
153    for i in 0..5 {
154        if let Some(endpoint) = groq_array.select_endpoint() {
155            println!(
156                "🔄 Request {} routed to: {} ({})",
157                i + 1,
158                endpoint.name,
159                endpoint.url
160            );
161
162            // Simulate connection tracking
163            endpoint.connection_count += 1;
164        }
165    }
166
167    // Example 4: Cost analysis and comparison
168    println!("\n📋 Example 4: Cost Analysis and Comparison");
169
170    let test_input_tokens = 1000;
171    let test_output_tokens = 500;
172
173    println!(
174        "💰 Cost comparison for {} input + {} output tokens:",
175        test_input_tokens, test_output_tokens
176    );
177
178    for model in groq_manager.list_models() {
179        let cost = model
180            .pricing
181            .calculate_cost(test_input_tokens, test_output_tokens);
182        println!("   {}: ${:.4}", model.display_name, cost);
183    }
184
185    // Example 5: Performance-based model selection
186    println!("\n📋 Example 5: Performance-Based Model Selection");
187
188    let mut performance_manager = groq_manager.clone();
189    performance_manager =
190        performance_manager.with_strategy(ModelSelectionStrategy::PerformanceBased);
191
192    if let Some(best_model) = performance_manager.select_model() {
193        println!(
194            "🏆 Best performance model: {} ({})",
195            best_model.display_name, best_model.name
196        );
197        println!(
198            "   Speed: {:?}, Quality: {:?}",
199            best_model.performance.speed, best_model.performance.quality
200        );
201    }
202
203    // Example 6: Cost-based model selection
204    println!("\n📋 Example 6: Cost-Based Model Selection");
205
206    let mut cost_manager = groq_manager.clone();
207    cost_manager = cost_manager.with_strategy(ModelSelectionStrategy::CostBased);
208
209    if let Some(cheapest_model) = cost_manager.select_model() {
210        println!(
211            "💸 Most cost-effective model: {} ({})",
212            cheapest_model.display_name, cheapest_model.name
213        );
214        println!(
215            "   Cost per 1K tokens: ${:.3}",
216            cheapest_model.pricing.input_cost_per_1k + cheapest_model.pricing.output_cost_per_1k
217        );
218    }
219
220    println!("\n🎉 Model management examples completed successfully!");
221    println!("\n💡 Key benefits of these tools:");
222    println!("   • Build custom model managers for any provider");
223    println!("   • Implement sophisticated model selection strategies");
224    println!("   • Create load-balanced model arrays");
225    println!("   • Analyze costs and performance metrics");
226    println!("   • Recommend models for specific use cases");
227
228    Ok(())
229}