pub struct ModelArray {
pub name: String,
pub endpoints: Vec<ModelEndpoint>,
pub strategy: LoadBalancingStrategy,
pub health_check: HealthCheckConfig,
}
Expand description
Model array for load balancing and A/B testing
This struct allows developers to build model arrays with multiple endpoints, supporting various load balancing strategies.
Fields§
§name: String
Array name/identifier
endpoints: Vec<ModelEndpoint>
Model endpoints in the array
strategy: LoadBalancingStrategy
Load balancing strategy
health_check: HealthCheckConfig
Health check configuration
Implementations§
Source§impl ModelArray
impl ModelArray
Sourcepub fn new(name: &str) -> Self
pub fn new(name: &str) -> Self
Create new model array
Examples found in repository?
examples/model_management.rs (line 113)
8async fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("🚀 AI-lib Model Management Tools Example");
10 println!("======================================");
11
12 // Example 1: Create a custom model manager for Groq
13 println!("\n📋 Example 1: Custom Model Manager for Groq");
14 println!(" Building a model manager with multiple models and selection strategies");
15
16 let mut groq_manager =
17 CustomModelManager::new("groq").with_strategy(ModelSelectionStrategy::PerformanceBased);
18
19 // Add different Groq models with their capabilities
20 let llama3_8b = ModelInfo {
21 name: "llama3-8b-8192".to_string(),
22 display_name: "Llama 3 8B".to_string(),
23 description: "Fast and cost-effective model for general tasks".to_string(),
24 capabilities: ModelCapabilities::new()
25 .with_chat()
26 .with_code_generation()
27 .with_context_window(8192),
28 pricing: PricingInfo::new(0.05, 0.10), // $0.05/1K input, $0.10/1K output
29 performance: PerformanceMetrics::new()
30 .with_speed(SpeedTier::Fast)
31 .with_quality(QualityTier::Good)
32 .with_avg_response_time(Duration::from_millis(500)),
33 metadata: std::collections::HashMap::new(),
34 };
35
36 let llama3_70b = ModelInfo {
37 name: "llama3-70b-8192".to_string(),
38 display_name: "Llama 3 70B".to_string(),
39 description: "High-performance model for complex tasks".to_string(),
40 capabilities: ModelCapabilities::new()
41 .with_chat()
42 .with_code_generation()
43 .with_function_calling()
44 .with_context_window(8192),
45 pricing: PricingInfo::new(0.59, 1.99), // $0.59/1K input, $1.99/1K output
46 performance: PerformanceMetrics::new()
47 .with_speed(SpeedTier::Slow)
48 .with_quality(QualityTier::Excellent)
49 .with_avg_response_time(Duration::from_secs(3)),
50 metadata: std::collections::HashMap::new(),
51 };
52
53 let mixtral = ModelInfo {
54 name: "mixtral-8x7b-32768".to_string(),
55 display_name: "Mixtral 8x7B".to_string(),
56 description: "Balanced performance and cost model".to_string(),
57 capabilities: ModelCapabilities::new()
58 .with_chat()
59 .with_code_generation()
60 .with_context_window(32768),
61 pricing: PricingInfo::new(0.14, 0.42), // $0.14/1K input, $0.42/1K output
62 performance: PerformanceMetrics::new()
63 .with_speed(SpeedTier::Balanced)
64 .with_quality(QualityTier::Good)
65 .with_avg_response_time(Duration::from_secs(1)),
66 metadata: std::collections::HashMap::new(),
67 };
68
69 // Add models to the manager
70 groq_manager.add_model(llama3_8b);
71 groq_manager.add_model(llama3_70b);
72 groq_manager.add_model(mixtral);
73
74 println!(
75 "✅ Added {} models to Groq manager",
76 groq_manager.models.len()
77 );
78
79 // Demonstrate model selection
80 if let Some(selected_model) = groq_manager.select_model() {
81 println!(
82 "🎯 Selected model: {} ({})",
83 selected_model.display_name, selected_model.name
84 );
85 println!(
86 " Cost: ${:.3}/1K input, ${:.3}/1K output",
87 selected_model.pricing.input_cost_per_1k, selected_model.pricing.output_cost_per_1k
88 );
89 }
90
91 // Example 2: Model recommendation for specific use cases
92 println!("\n📋 Example 2: Model Recommendation for Use Cases");
93
94 if let Some(recommended_model) = groq_manager.recommend_for("chat") {
95 println!(
96 "💬 Chat recommendation: {} ({})",
97 recommended_model.display_name, recommended_model.name
98 );
99 }
100
101 if let Some(recommended_model) = groq_manager.recommend_for("code_generation") {
102 println!(
103 "💻 Code generation recommendation: {} ({})",
104 recommended_model.display_name, recommended_model.name
105 );
106 }
107
108 // Example 3: Create a model array for load balancing
109 println!("\n📋 Example 3: Model Array for Load Balancing");
110 println!(" Building a load-balanced array of model endpoints");
111
112 let mut groq_array =
113 ModelArray::new("groq-production").with_strategy(LoadBalancingStrategy::RoundRobin);
114
115 // Add multiple endpoints for the same model
116 let endpoint1 = ModelEndpoint {
117 name: "groq-us-east-1".to_string(),
118 model_name: "llama3-8b-8192".to_string(),
119 url: "https://api.groq.com/openai/v1".to_string(),
120 weight: 1.0,
121 healthy: true,
122 connection_count: 0,
123 };
124
125 let endpoint2 = ModelEndpoint {
126 name: "groq-us-west-1".to_string(),
127 model_name: "llama3-8b-8192".to_string(),
128 url: "https://api-west.groq.com/openai/v1".to_string(),
129 weight: 1.0,
130 healthy: true,
131 connection_count: 0,
132 };
133
134 let endpoint3 = ModelEndpoint {
135 name: "groq-eu-west-1".to_string(),
136 model_name: "llama3-8b-8192".to_string(),
137 url: "https://api-eu.groq.com/openai/v1".to_string(),
138 weight: 0.8, // Slightly lower weight for EU region
139 healthy: true,
140 connection_count: 0,
141 };
142
143 groq_array.add_endpoint(endpoint1);
144 groq_array.add_endpoint(endpoint2);
145 groq_array.add_endpoint(endpoint3);
146
147 println!(
148 "✅ Added {} endpoints to Groq array",
149 groq_array.endpoints.len()
150 );
151
152 // Demonstrate load balancing
153 for i in 0..5 {
154 if let Some(endpoint) = groq_array.select_endpoint() {
155 println!(
156 "🔄 Request {} routed to: {} ({})",
157 i + 1,
158 endpoint.name,
159 endpoint.url
160 );
161
162 // Simulate connection tracking
163 endpoint.connection_count += 1;
164 }
165 }
166
167 // Example 4: Cost analysis and comparison
168 println!("\n📋 Example 4: Cost Analysis and Comparison");
169
170 let test_input_tokens = 1000;
171 let test_output_tokens = 500;
172
173 println!(
174 "💰 Cost comparison for {} input + {} output tokens:",
175 test_input_tokens, test_output_tokens
176 );
177
178 for model in groq_manager.list_models() {
179 let cost = model
180 .pricing
181 .calculate_cost(test_input_tokens, test_output_tokens);
182 println!(" {}: ${:.4}", model.display_name, cost);
183 }
184
185 // Example 5: Performance-based model selection
186 println!("\n📋 Example 5: Performance-Based Model Selection");
187
188 let mut performance_manager = groq_manager.clone();
189 performance_manager =
190 performance_manager.with_strategy(ModelSelectionStrategy::PerformanceBased);
191
192 if let Some(best_model) = performance_manager.select_model() {
193 println!(
194 "🏆 Best performance model: {} ({})",
195 best_model.display_name, best_model.name
196 );
197 println!(
198 " Speed: {:?}, Quality: {:?}",
199 best_model.performance.speed, best_model.performance.quality
200 );
201 }
202
203 // Example 6: Cost-based model selection
204 println!("\n📋 Example 6: Cost-Based Model Selection");
205
206 let mut cost_manager = groq_manager.clone();
207 cost_manager = cost_manager.with_strategy(ModelSelectionStrategy::CostBased);
208
209 if let Some(cheapest_model) = cost_manager.select_model() {
210 println!(
211 "💸 Most cost-effective model: {} ({})",
212 cheapest_model.display_name, cheapest_model.name
213 );
214 println!(
215 " Cost per 1K tokens: ${:.3}",
216 cheapest_model.pricing.input_cost_per_1k + cheapest_model.pricing.output_cost_per_1k
217 );
218 }
219
220 println!("\n🎉 Model management examples completed successfully!");
221 println!("\n💡 Key benefits of these tools:");
222 println!(" • Build custom model managers for any provider");
223 println!(" • Implement sophisticated model selection strategies");
224 println!(" • Create load-balanced model arrays");
225 println!(" • Analyze costs and performance metrics");
226 println!(" • Recommend models for specific use cases");
227
228 Ok(())
229}
Sourcepub fn add_endpoint(&mut self, endpoint: ModelEndpoint)
pub fn add_endpoint(&mut self, endpoint: ModelEndpoint)
Add endpoint to the array
Examples found in repository?
examples/model_management.rs (line 143)
8async fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("🚀 AI-lib Model Management Tools Example");
10 println!("======================================");
11
12 // Example 1: Create a custom model manager for Groq
13 println!("\n📋 Example 1: Custom Model Manager for Groq");
14 println!(" Building a model manager with multiple models and selection strategies");
15
16 let mut groq_manager =
17 CustomModelManager::new("groq").with_strategy(ModelSelectionStrategy::PerformanceBased);
18
19 // Add different Groq models with their capabilities
20 let llama3_8b = ModelInfo {
21 name: "llama3-8b-8192".to_string(),
22 display_name: "Llama 3 8B".to_string(),
23 description: "Fast and cost-effective model for general tasks".to_string(),
24 capabilities: ModelCapabilities::new()
25 .with_chat()
26 .with_code_generation()
27 .with_context_window(8192),
28 pricing: PricingInfo::new(0.05, 0.10), // $0.05/1K input, $0.10/1K output
29 performance: PerformanceMetrics::new()
30 .with_speed(SpeedTier::Fast)
31 .with_quality(QualityTier::Good)
32 .with_avg_response_time(Duration::from_millis(500)),
33 metadata: std::collections::HashMap::new(),
34 };
35
36 let llama3_70b = ModelInfo {
37 name: "llama3-70b-8192".to_string(),
38 display_name: "Llama 3 70B".to_string(),
39 description: "High-performance model for complex tasks".to_string(),
40 capabilities: ModelCapabilities::new()
41 .with_chat()
42 .with_code_generation()
43 .with_function_calling()
44 .with_context_window(8192),
45 pricing: PricingInfo::new(0.59, 1.99), // $0.59/1K input, $1.99/1K output
46 performance: PerformanceMetrics::new()
47 .with_speed(SpeedTier::Slow)
48 .with_quality(QualityTier::Excellent)
49 .with_avg_response_time(Duration::from_secs(3)),
50 metadata: std::collections::HashMap::new(),
51 };
52
53 let mixtral = ModelInfo {
54 name: "mixtral-8x7b-32768".to_string(),
55 display_name: "Mixtral 8x7B".to_string(),
56 description: "Balanced performance and cost model".to_string(),
57 capabilities: ModelCapabilities::new()
58 .with_chat()
59 .with_code_generation()
60 .with_context_window(32768),
61 pricing: PricingInfo::new(0.14, 0.42), // $0.14/1K input, $0.42/1K output
62 performance: PerformanceMetrics::new()
63 .with_speed(SpeedTier::Balanced)
64 .with_quality(QualityTier::Good)
65 .with_avg_response_time(Duration::from_secs(1)),
66 metadata: std::collections::HashMap::new(),
67 };
68
69 // Add models to the manager
70 groq_manager.add_model(llama3_8b);
71 groq_manager.add_model(llama3_70b);
72 groq_manager.add_model(mixtral);
73
74 println!(
75 "✅ Added {} models to Groq manager",
76 groq_manager.models.len()
77 );
78
79 // Demonstrate model selection
80 if let Some(selected_model) = groq_manager.select_model() {
81 println!(
82 "🎯 Selected model: {} ({})",
83 selected_model.display_name, selected_model.name
84 );
85 println!(
86 " Cost: ${:.3}/1K input, ${:.3}/1K output",
87 selected_model.pricing.input_cost_per_1k, selected_model.pricing.output_cost_per_1k
88 );
89 }
90
91 // Example 2: Model recommendation for specific use cases
92 println!("\n📋 Example 2: Model Recommendation for Use Cases");
93
94 if let Some(recommended_model) = groq_manager.recommend_for("chat") {
95 println!(
96 "💬 Chat recommendation: {} ({})",
97 recommended_model.display_name, recommended_model.name
98 );
99 }
100
101 if let Some(recommended_model) = groq_manager.recommend_for("code_generation") {
102 println!(
103 "💻 Code generation recommendation: {} ({})",
104 recommended_model.display_name, recommended_model.name
105 );
106 }
107
108 // Example 3: Create a model array for load balancing
109 println!("\n📋 Example 3: Model Array for Load Balancing");
110 println!(" Building a load-balanced array of model endpoints");
111
112 let mut groq_array =
113 ModelArray::new("groq-production").with_strategy(LoadBalancingStrategy::RoundRobin);
114
115 // Add multiple endpoints for the same model
116 let endpoint1 = ModelEndpoint {
117 name: "groq-us-east-1".to_string(),
118 model_name: "llama3-8b-8192".to_string(),
119 url: "https://api.groq.com/openai/v1".to_string(),
120 weight: 1.0,
121 healthy: true,
122 connection_count: 0,
123 };
124
125 let endpoint2 = ModelEndpoint {
126 name: "groq-us-west-1".to_string(),
127 model_name: "llama3-8b-8192".to_string(),
128 url: "https://api-west.groq.com/openai/v1".to_string(),
129 weight: 1.0,
130 healthy: true,
131 connection_count: 0,
132 };
133
134 let endpoint3 = ModelEndpoint {
135 name: "groq-eu-west-1".to_string(),
136 model_name: "llama3-8b-8192".to_string(),
137 url: "https://api-eu.groq.com/openai/v1".to_string(),
138 weight: 0.8, // Slightly lower weight for EU region
139 healthy: true,
140 connection_count: 0,
141 };
142
143 groq_array.add_endpoint(endpoint1);
144 groq_array.add_endpoint(endpoint2);
145 groq_array.add_endpoint(endpoint3);
146
147 println!(
148 "✅ Added {} endpoints to Groq array",
149 groq_array.endpoints.len()
150 );
151
152 // Demonstrate load balancing
153 for i in 0..5 {
154 if let Some(endpoint) = groq_array.select_endpoint() {
155 println!(
156 "🔄 Request {} routed to: {} ({})",
157 i + 1,
158 endpoint.name,
159 endpoint.url
160 );
161
162 // Simulate connection tracking
163 endpoint.connection_count += 1;
164 }
165 }
166
167 // Example 4: Cost analysis and comparison
168 println!("\n📋 Example 4: Cost Analysis and Comparison");
169
170 let test_input_tokens = 1000;
171 let test_output_tokens = 500;
172
173 println!(
174 "💰 Cost comparison for {} input + {} output tokens:",
175 test_input_tokens, test_output_tokens
176 );
177
178 for model in groq_manager.list_models() {
179 let cost = model
180 .pricing
181 .calculate_cost(test_input_tokens, test_output_tokens);
182 println!(" {}: ${:.4}", model.display_name, cost);
183 }
184
185 // Example 5: Performance-based model selection
186 println!("\n📋 Example 5: Performance-Based Model Selection");
187
188 let mut performance_manager = groq_manager.clone();
189 performance_manager =
190 performance_manager.with_strategy(ModelSelectionStrategy::PerformanceBased);
191
192 if let Some(best_model) = performance_manager.select_model() {
193 println!(
194 "🏆 Best performance model: {} ({})",
195 best_model.display_name, best_model.name
196 );
197 println!(
198 " Speed: {:?}, Quality: {:?}",
199 best_model.performance.speed, best_model.performance.quality
200 );
201 }
202
203 // Example 6: Cost-based model selection
204 println!("\n📋 Example 6: Cost-Based Model Selection");
205
206 let mut cost_manager = groq_manager.clone();
207 cost_manager = cost_manager.with_strategy(ModelSelectionStrategy::CostBased);
208
209 if let Some(cheapest_model) = cost_manager.select_model() {
210 println!(
211 "💸 Most cost-effective model: {} ({})",
212 cheapest_model.display_name, cheapest_model.name
213 );
214 println!(
215 " Cost per 1K tokens: ${:.3}",
216 cheapest_model.pricing.input_cost_per_1k + cheapest_model.pricing.output_cost_per_1k
217 );
218 }
219
220 println!("\n🎉 Model management examples completed successfully!");
221 println!("\n💡 Key benefits of these tools:");
222 println!(" • Build custom model managers for any provider");
223 println!(" • Implement sophisticated model selection strategies");
224 println!(" • Create load-balanced model arrays");
225 println!(" • Analyze costs and performance metrics");
226 println!(" • Recommend models for specific use cases");
227
228 Ok(())
229}
Sourcepub fn with_strategy(self, strategy: LoadBalancingStrategy) -> Self
pub fn with_strategy(self, strategy: LoadBalancingStrategy) -> Self
Set load balancing strategy
Examples found in repository?
examples/model_management.rs (line 113)
8async fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("🚀 AI-lib Model Management Tools Example");
10 println!("======================================");
11
12 // Example 1: Create a custom model manager for Groq
13 println!("\n📋 Example 1: Custom Model Manager for Groq");
14 println!(" Building a model manager with multiple models and selection strategies");
15
16 let mut groq_manager =
17 CustomModelManager::new("groq").with_strategy(ModelSelectionStrategy::PerformanceBased);
18
19 // Add different Groq models with their capabilities
20 let llama3_8b = ModelInfo {
21 name: "llama3-8b-8192".to_string(),
22 display_name: "Llama 3 8B".to_string(),
23 description: "Fast and cost-effective model for general tasks".to_string(),
24 capabilities: ModelCapabilities::new()
25 .with_chat()
26 .with_code_generation()
27 .with_context_window(8192),
28 pricing: PricingInfo::new(0.05, 0.10), // $0.05/1K input, $0.10/1K output
29 performance: PerformanceMetrics::new()
30 .with_speed(SpeedTier::Fast)
31 .with_quality(QualityTier::Good)
32 .with_avg_response_time(Duration::from_millis(500)),
33 metadata: std::collections::HashMap::new(),
34 };
35
36 let llama3_70b = ModelInfo {
37 name: "llama3-70b-8192".to_string(),
38 display_name: "Llama 3 70B".to_string(),
39 description: "High-performance model for complex tasks".to_string(),
40 capabilities: ModelCapabilities::new()
41 .with_chat()
42 .with_code_generation()
43 .with_function_calling()
44 .with_context_window(8192),
45 pricing: PricingInfo::new(0.59, 1.99), // $0.59/1K input, $1.99/1K output
46 performance: PerformanceMetrics::new()
47 .with_speed(SpeedTier::Slow)
48 .with_quality(QualityTier::Excellent)
49 .with_avg_response_time(Duration::from_secs(3)),
50 metadata: std::collections::HashMap::new(),
51 };
52
53 let mixtral = ModelInfo {
54 name: "mixtral-8x7b-32768".to_string(),
55 display_name: "Mixtral 8x7B".to_string(),
56 description: "Balanced performance and cost model".to_string(),
57 capabilities: ModelCapabilities::new()
58 .with_chat()
59 .with_code_generation()
60 .with_context_window(32768),
61 pricing: PricingInfo::new(0.14, 0.42), // $0.14/1K input, $0.42/1K output
62 performance: PerformanceMetrics::new()
63 .with_speed(SpeedTier::Balanced)
64 .with_quality(QualityTier::Good)
65 .with_avg_response_time(Duration::from_secs(1)),
66 metadata: std::collections::HashMap::new(),
67 };
68
69 // Add models to the manager
70 groq_manager.add_model(llama3_8b);
71 groq_manager.add_model(llama3_70b);
72 groq_manager.add_model(mixtral);
73
74 println!(
75 "✅ Added {} models to Groq manager",
76 groq_manager.models.len()
77 );
78
79 // Demonstrate model selection
80 if let Some(selected_model) = groq_manager.select_model() {
81 println!(
82 "🎯 Selected model: {} ({})",
83 selected_model.display_name, selected_model.name
84 );
85 println!(
86 " Cost: ${:.3}/1K input, ${:.3}/1K output",
87 selected_model.pricing.input_cost_per_1k, selected_model.pricing.output_cost_per_1k
88 );
89 }
90
91 // Example 2: Model recommendation for specific use cases
92 println!("\n📋 Example 2: Model Recommendation for Use Cases");
93
94 if let Some(recommended_model) = groq_manager.recommend_for("chat") {
95 println!(
96 "💬 Chat recommendation: {} ({})",
97 recommended_model.display_name, recommended_model.name
98 );
99 }
100
101 if let Some(recommended_model) = groq_manager.recommend_for("code_generation") {
102 println!(
103 "💻 Code generation recommendation: {} ({})",
104 recommended_model.display_name, recommended_model.name
105 );
106 }
107
108 // Example 3: Create a model array for load balancing
109 println!("\n📋 Example 3: Model Array for Load Balancing");
110 println!(" Building a load-balanced array of model endpoints");
111
112 let mut groq_array =
113 ModelArray::new("groq-production").with_strategy(LoadBalancingStrategy::RoundRobin);
114
115 // Add multiple endpoints for the same model
116 let endpoint1 = ModelEndpoint {
117 name: "groq-us-east-1".to_string(),
118 model_name: "llama3-8b-8192".to_string(),
119 url: "https://api.groq.com/openai/v1".to_string(),
120 weight: 1.0,
121 healthy: true,
122 connection_count: 0,
123 };
124
125 let endpoint2 = ModelEndpoint {
126 name: "groq-us-west-1".to_string(),
127 model_name: "llama3-8b-8192".to_string(),
128 url: "https://api-west.groq.com/openai/v1".to_string(),
129 weight: 1.0,
130 healthy: true,
131 connection_count: 0,
132 };
133
134 let endpoint3 = ModelEndpoint {
135 name: "groq-eu-west-1".to_string(),
136 model_name: "llama3-8b-8192".to_string(),
137 url: "https://api-eu.groq.com/openai/v1".to_string(),
138 weight: 0.8, // Slightly lower weight for EU region
139 healthy: true,
140 connection_count: 0,
141 };
142
143 groq_array.add_endpoint(endpoint1);
144 groq_array.add_endpoint(endpoint2);
145 groq_array.add_endpoint(endpoint3);
146
147 println!(
148 "✅ Added {} endpoints to Groq array",
149 groq_array.endpoints.len()
150 );
151
152 // Demonstrate load balancing
153 for i in 0..5 {
154 if let Some(endpoint) = groq_array.select_endpoint() {
155 println!(
156 "🔄 Request {} routed to: {} ({})",
157 i + 1,
158 endpoint.name,
159 endpoint.url
160 );
161
162 // Simulate connection tracking
163 endpoint.connection_count += 1;
164 }
165 }
166
167 // Example 4: Cost analysis and comparison
168 println!("\n📋 Example 4: Cost Analysis and Comparison");
169
170 let test_input_tokens = 1000;
171 let test_output_tokens = 500;
172
173 println!(
174 "💰 Cost comparison for {} input + {} output tokens:",
175 test_input_tokens, test_output_tokens
176 );
177
178 for model in groq_manager.list_models() {
179 let cost = model
180 .pricing
181 .calculate_cost(test_input_tokens, test_output_tokens);
182 println!(" {}: ${:.4}", model.display_name, cost);
183 }
184
185 // Example 5: Performance-based model selection
186 println!("\n📋 Example 5: Performance-Based Model Selection");
187
188 let mut performance_manager = groq_manager.clone();
189 performance_manager =
190 performance_manager.with_strategy(ModelSelectionStrategy::PerformanceBased);
191
192 if let Some(best_model) = performance_manager.select_model() {
193 println!(
194 "🏆 Best performance model: {} ({})",
195 best_model.display_name, best_model.name
196 );
197 println!(
198 " Speed: {:?}, Quality: {:?}",
199 best_model.performance.speed, best_model.performance.quality
200 );
201 }
202
203 // Example 6: Cost-based model selection
204 println!("\n📋 Example 6: Cost-Based Model Selection");
205
206 let mut cost_manager = groq_manager.clone();
207 cost_manager = cost_manager.with_strategy(ModelSelectionStrategy::CostBased);
208
209 if let Some(cheapest_model) = cost_manager.select_model() {
210 println!(
211 "💸 Most cost-effective model: {} ({})",
212 cheapest_model.display_name, cheapest_model.name
213 );
214 println!(
215 " Cost per 1K tokens: ${:.3}",
216 cheapest_model.pricing.input_cost_per_1k + cheapest_model.pricing.output_cost_per_1k
217 );
218 }
219
220 println!("\n🎉 Model management examples completed successfully!");
221 println!("\n💡 Key benefits of these tools:");
222 println!(" • Build custom model managers for any provider");
223 println!(" • Implement sophisticated model selection strategies");
224 println!(" • Create load-balanced model arrays");
225 println!(" • Analyze costs and performance metrics");
226 println!(" • Recommend models for specific use cases");
227
228 Ok(())
229}
Sourcepub fn with_health_check(self, config: HealthCheckConfig) -> Self
pub fn with_health_check(self, config: HealthCheckConfig) -> Self
Configure health check
Sourcepub fn select_endpoint(&mut self) -> Option<&mut ModelEndpoint>
pub fn select_endpoint(&mut self) -> Option<&mut ModelEndpoint>
Select next endpoint based on strategy
Examples found in repository?
examples/model_management.rs (line 154)
8async fn main() -> Result<(), Box<dyn std::error::Error>> {
9 println!("🚀 AI-lib Model Management Tools Example");
10 println!("======================================");
11
12 // Example 1: Create a custom model manager for Groq
13 println!("\n📋 Example 1: Custom Model Manager for Groq");
14 println!(" Building a model manager with multiple models and selection strategies");
15
16 let mut groq_manager =
17 CustomModelManager::new("groq").with_strategy(ModelSelectionStrategy::PerformanceBased);
18
19 // Add different Groq models with their capabilities
20 let llama3_8b = ModelInfo {
21 name: "llama3-8b-8192".to_string(),
22 display_name: "Llama 3 8B".to_string(),
23 description: "Fast and cost-effective model for general tasks".to_string(),
24 capabilities: ModelCapabilities::new()
25 .with_chat()
26 .with_code_generation()
27 .with_context_window(8192),
28 pricing: PricingInfo::new(0.05, 0.10), // $0.05/1K input, $0.10/1K output
29 performance: PerformanceMetrics::new()
30 .with_speed(SpeedTier::Fast)
31 .with_quality(QualityTier::Good)
32 .with_avg_response_time(Duration::from_millis(500)),
33 metadata: std::collections::HashMap::new(),
34 };
35
36 let llama3_70b = ModelInfo {
37 name: "llama3-70b-8192".to_string(),
38 display_name: "Llama 3 70B".to_string(),
39 description: "High-performance model for complex tasks".to_string(),
40 capabilities: ModelCapabilities::new()
41 .with_chat()
42 .with_code_generation()
43 .with_function_calling()
44 .with_context_window(8192),
45 pricing: PricingInfo::new(0.59, 1.99), // $0.59/1K input, $1.99/1K output
46 performance: PerformanceMetrics::new()
47 .with_speed(SpeedTier::Slow)
48 .with_quality(QualityTier::Excellent)
49 .with_avg_response_time(Duration::from_secs(3)),
50 metadata: std::collections::HashMap::new(),
51 };
52
53 let mixtral = ModelInfo {
54 name: "mixtral-8x7b-32768".to_string(),
55 display_name: "Mixtral 8x7B".to_string(),
56 description: "Balanced performance and cost model".to_string(),
57 capabilities: ModelCapabilities::new()
58 .with_chat()
59 .with_code_generation()
60 .with_context_window(32768),
61 pricing: PricingInfo::new(0.14, 0.42), // $0.14/1K input, $0.42/1K output
62 performance: PerformanceMetrics::new()
63 .with_speed(SpeedTier::Balanced)
64 .with_quality(QualityTier::Good)
65 .with_avg_response_time(Duration::from_secs(1)),
66 metadata: std::collections::HashMap::new(),
67 };
68
69 // Add models to the manager
70 groq_manager.add_model(llama3_8b);
71 groq_manager.add_model(llama3_70b);
72 groq_manager.add_model(mixtral);
73
74 println!(
75 "✅ Added {} models to Groq manager",
76 groq_manager.models.len()
77 );
78
79 // Demonstrate model selection
80 if let Some(selected_model) = groq_manager.select_model() {
81 println!(
82 "🎯 Selected model: {} ({})",
83 selected_model.display_name, selected_model.name
84 );
85 println!(
86 " Cost: ${:.3}/1K input, ${:.3}/1K output",
87 selected_model.pricing.input_cost_per_1k, selected_model.pricing.output_cost_per_1k
88 );
89 }
90
91 // Example 2: Model recommendation for specific use cases
92 println!("\n📋 Example 2: Model Recommendation for Use Cases");
93
94 if let Some(recommended_model) = groq_manager.recommend_for("chat") {
95 println!(
96 "💬 Chat recommendation: {} ({})",
97 recommended_model.display_name, recommended_model.name
98 );
99 }
100
101 if let Some(recommended_model) = groq_manager.recommend_for("code_generation") {
102 println!(
103 "💻 Code generation recommendation: {} ({})",
104 recommended_model.display_name, recommended_model.name
105 );
106 }
107
108 // Example 3: Create a model array for load balancing
109 println!("\n📋 Example 3: Model Array for Load Balancing");
110 println!(" Building a load-balanced array of model endpoints");
111
112 let mut groq_array =
113 ModelArray::new("groq-production").with_strategy(LoadBalancingStrategy::RoundRobin);
114
115 // Add multiple endpoints for the same model
116 let endpoint1 = ModelEndpoint {
117 name: "groq-us-east-1".to_string(),
118 model_name: "llama3-8b-8192".to_string(),
119 url: "https://api.groq.com/openai/v1".to_string(),
120 weight: 1.0,
121 healthy: true,
122 connection_count: 0,
123 };
124
125 let endpoint2 = ModelEndpoint {
126 name: "groq-us-west-1".to_string(),
127 model_name: "llama3-8b-8192".to_string(),
128 url: "https://api-west.groq.com/openai/v1".to_string(),
129 weight: 1.0,
130 healthy: true,
131 connection_count: 0,
132 };
133
134 let endpoint3 = ModelEndpoint {
135 name: "groq-eu-west-1".to_string(),
136 model_name: "llama3-8b-8192".to_string(),
137 url: "https://api-eu.groq.com/openai/v1".to_string(),
138 weight: 0.8, // Slightly lower weight for EU region
139 healthy: true,
140 connection_count: 0,
141 };
142
143 groq_array.add_endpoint(endpoint1);
144 groq_array.add_endpoint(endpoint2);
145 groq_array.add_endpoint(endpoint3);
146
147 println!(
148 "✅ Added {} endpoints to Groq array",
149 groq_array.endpoints.len()
150 );
151
152 // Demonstrate load balancing
153 for i in 0..5 {
154 if let Some(endpoint) = groq_array.select_endpoint() {
155 println!(
156 "🔄 Request {} routed to: {} ({})",
157 i + 1,
158 endpoint.name,
159 endpoint.url
160 );
161
162 // Simulate connection tracking
163 endpoint.connection_count += 1;
164 }
165 }
166
167 // Example 4: Cost analysis and comparison
168 println!("\n📋 Example 4: Cost Analysis and Comparison");
169
170 let test_input_tokens = 1000;
171 let test_output_tokens = 500;
172
173 println!(
174 "💰 Cost comparison for {} input + {} output tokens:",
175 test_input_tokens, test_output_tokens
176 );
177
178 for model in groq_manager.list_models() {
179 let cost = model
180 .pricing
181 .calculate_cost(test_input_tokens, test_output_tokens);
182 println!(" {}: ${:.4}", model.display_name, cost);
183 }
184
185 // Example 5: Performance-based model selection
186 println!("\n📋 Example 5: Performance-Based Model Selection");
187
188 let mut performance_manager = groq_manager.clone();
189 performance_manager =
190 performance_manager.with_strategy(ModelSelectionStrategy::PerformanceBased);
191
192 if let Some(best_model) = performance_manager.select_model() {
193 println!(
194 "🏆 Best performance model: {} ({})",
195 best_model.display_name, best_model.name
196 );
197 println!(
198 " Speed: {:?}, Quality: {:?}",
199 best_model.performance.speed, best_model.performance.quality
200 );
201 }
202
203 // Example 6: Cost-based model selection
204 println!("\n📋 Example 6: Cost-Based Model Selection");
205
206 let mut cost_manager = groq_manager.clone();
207 cost_manager = cost_manager.with_strategy(ModelSelectionStrategy::CostBased);
208
209 if let Some(cheapest_model) = cost_manager.select_model() {
210 println!(
211 "💸 Most cost-effective model: {} ({})",
212 cheapest_model.display_name, cheapest_model.name
213 );
214 println!(
215 " Cost per 1K tokens: ${:.3}",
216 cheapest_model.pricing.input_cost_per_1k + cheapest_model.pricing.output_cost_per_1k
217 );
218 }
219
220 println!("\n🎉 Model management examples completed successfully!");
221 println!("\n💡 Key benefits of these tools:");
222 println!(" • Build custom model managers for any provider");
223 println!(" • Implement sophisticated model selection strategies");
224 println!(" • Create load-balanced model arrays");
225 println!(" • Analyze costs and performance metrics");
226 println!(" • Recommend models for specific use cases");
227
228 Ok(())
229}
Sourcepub fn mark_unhealthy(&mut self, endpoint_name: &str)
pub fn mark_unhealthy(&mut self, endpoint_name: &str)
Mark endpoint as unhealthy
Sourcepub fn mark_healthy(&mut self, endpoint_name: &str)
pub fn mark_healthy(&mut self, endpoint_name: &str)
Mark endpoint as healthy
Sourcepub fn is_healthy(&self) -> bool
pub fn is_healthy(&self) -> bool
Get array health status
Auto Trait Implementations§
impl Freeze for ModelArray
impl RefUnwindSafe for ModelArray
impl Send for ModelArray
impl Sync for ModelArray
impl Unpin for ModelArray
impl UnwindSafe for ModelArray
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more