1use crate::error::Result;
5
6pub struct CostOptimizationExample;
11
12impl CostOptimizationExample {
13 #[allow(dead_code)]
15 pub async fn model_tier_selection() -> Result<()> {
16 println!("=== Model Tier Selection for Cost Optimization ===");
17 println!();
18
19 println!("Task-based routing:");
20 println!();
21 println!("Simple tasks (sentiment, classification):");
22 println!(" -> Gemini 1.5 Flash or DeepSeek Chat");
23 println!(" -> Cost: ~$0.14/M tokens");
24 println!(" -> Use when: Fast, cheap answers needed");
25 println!();
26 println!("Medium tasks (code review, summarization):");
27 println!(" -> Claude 3.5 Sonnet or GPT-4 Turbo");
28 println!(" -> Cost: ~$3-10/M tokens");
29 println!(" -> Use when: Quality matters, moderate complexity");
30 println!();
31 println!("Complex tasks (reasoning, critical decisions):");
32 println!(" -> Claude 3 Opus or GPT-4");
33 println!(" -> Cost: ~$15-30/M tokens");
34 println!(" -> Use when: Highest quality required");
35 println!();
36 println!("Development/testing:");
37 println!(" -> Ollama (local, free)");
38 println!(" -> Cost: $0");
39 println!(" -> Use when: Prototyping, no API costs desired");
40
41 Ok(())
42 }
43
44 #[allow(dead_code)]
46 pub async fn complexity_based_routing(openai_key: &str, anthropic_key: &str) -> Result<()> {
47 use crate::llm::LlmClientBuilder;
48
49 println!("=== Automatic Complexity-Based Routing ===");
50 println!();
51
52 let _client = LlmClientBuilder::new()
53 .openai_api_key(openai_key)
54 .anthropic_api_key(anthropic_key)
55 .build()
56 .expect("Failed to build LLM client");
57
58 println!("Automatic routing:");
59 println!(" Simple task -> gemini-1.5-flash");
60 println!(" Medium task -> claude-3.5-sonnet");
61 println!(" Complex task -> claude-3-opus");
62 println!();
63 println!("Estimated savings: 70-90% vs using GPT-4 for everything");
64
65 Ok(())
66 }
67
68 #[allow(dead_code)]
70 pub async fn caching_example(_api_key: &str) -> Result<()> {
71 println!("=== Response Caching for Cost Reduction ===");
72 println!();
73
74 println!("Note: Caching example (conceptual):");
75
76 println!("Cache configuration:");
77 println!(" Max size: 1000 entries");
78 println!(" TTL: 1 hour");
79 println!(" Semantic matching: Enabled (95% threshold)");
80 println!();
81 println!("Example:");
82 println!(" First request: 'What is blockchain?' -> API call ($)");
83 println!(" Second request: 'What is blockchain?' -> Cache hit (FREE)");
84 println!(" Similar request: 'Explain blockchain' -> Cache hit (FREE)");
85 println!();
86 println!("Typical savings: 60-80% for repeated queries");
87
88 Ok(())
89 }
90
91 #[allow(dead_code)]
93 pub async fn batching_example(_api_key: &str) -> Result<()> {
94 println!("=== Request Batching for Cost Efficiency ===");
95 println!();
96
97 println!("Note: Batch processing example (conceptual):");
98
99 println!("Batch configuration:");
100 println!(" Max concurrency: 5 requests in parallel");
101 println!(" Delay: 100ms between batches");
102 println!();
103 println!("Benefits:");
104 println!(" * Shared prompt overhead");
105 println!(" * Better rate limit utilization");
106 println!(" * Reduced API call overhead");
107 println!();
108 println!("Example: Evaluating 100 code submissions");
109 println!(" Without batching: 100 API calls");
110 println!(" With batching (20 per batch): 5 API calls");
111 println!(" Cost reduction: ~80%");
112
113 Ok(())
114 }
115
116 #[allow(dead_code)]
118 pub async fn cost_tracking_example() -> Result<()> {
119 use crate::llm::MetricsCollector;
120
121 println!("=== Cost Tracking and Monitoring ===");
122 println!();
123
124 let _metrics = MetricsCollector::new();
125
126 println!("Track costs in real-time:");
127 println!(" * Total API calls made");
128 println!(" * Tokens used (input + output)");
129 println!(" * Estimated cost per provider");
130 println!(" * Cost per operation type");
131 println!();
132 println!("Example metrics:");
133 println!(" Total requests: 1,250");
134 println!(" Total tokens: 5.2M");
135 println!(" Estimated cost: $12.50");
136 println!(" Average per request: $0.01");
137 println!();
138 println!("Set budgets and alerts:");
139 println!(" * Daily budget: $100");
140 println!(" * Warning at 75% ($75)");
141 println!(" * Stop at 100% ($100)");
142
143 Ok(())
144 }
145
146 #[allow(dead_code)]
148 pub async fn cost_optimization_case_study() -> Result<()> {
149 println!("=== Cost Optimization Case Study ===");
150 println!();
151
152 println!("Scenario: Code review platform (10,000 reviews/month)");
153 println!();
154 println!("BEFORE optimization:");
155 println!(" Model: GPT-4 Turbo for all reviews");
156 println!(" Avg tokens: 4,000 per review (2K in, 2K out)");
157 println!(" Cost per review: $0.54");
158 println!(" Monthly cost: $5,400");
159 println!();
160 println!("AFTER optimization:");
161 println!(" 70% simple reviews -> Gemini Flash ($0.04 each)");
162 println!(" 25% medium reviews -> Claude Sonnet ($0.24 each)");
163 println!(" 5% complex reviews -> GPT-4 ($0.54 each)");
164 println!(" 50% cache hits -> FREE");
165 println!();
166 println!("New monthly cost:");
167 println!(" Simple (3,500 reviews): $140");
168 println!(" Medium (1,250 reviews): $300");
169 println!(" Complex (250 reviews): $135");
170 println!(" Cache savings: -$125");
171 println!(" Total: $450/month");
172 println!();
173 println!("SAVINGS: $4,950/month (92% reduction!)");
174
175 Ok(())
176 }
177}
178
179pub struct ResilienceExample;
184
185impl ResilienceExample {
186 #[allow(dead_code)]
188 pub async fn circuit_breaker_example() -> Result<()> {
189 use crate::llm::{CircuitBreaker, CircuitBreakerConfig};
190
191 println!("=== Circuit Breaker Pattern ===");
192 println!();
193
194 let config = CircuitBreakerConfig {
195 failure_threshold: 5,
196 success_threshold: 2,
197 timeout: std::time::Duration::from_secs(60),
198 failure_window: std::time::Duration::from_secs(60),
199 };
200
201 let _breaker = CircuitBreaker::new("openai".to_string(), config);
202
203 println!("Circuit breaker configuration:");
204 println!(" Failure threshold: 5 consecutive failures");
205 println!(" Success threshold: 2 successes to recover");
206 println!(" Timeout: 60 seconds in open state");
207 println!();
208 println!("States:");
209 println!(" CLOSED: Normal operation, all requests pass");
210 println!(" OPEN: Too many failures, reject requests");
211 println!(" HALF-OPEN: Testing recovery, limited requests");
212 println!();
213 println!("Benefits:");
214 println!(" * Prevent cascading failures");
215 println!(" * Fail fast when service is down");
216 println!(" * Automatic recovery detection");
217 println!(" * Protect downstream services");
218
219 Ok(())
220 }
221
222 #[allow(dead_code)]
224 pub async fn retry_example() -> Result<()> {
225 use crate::llm::RetryConfig;
226
227 println!("=== Retry Logic with Exponential Backoff ===");
228 println!();
229
230 let _config = RetryConfig {
231 max_attempts: 3,
232 initial_delay: std::time::Duration::from_millis(1000),
233 max_delay: std::time::Duration::from_millis(10000),
234 backoff_multiplier: 2.0,
235 use_jitter: true,
236 };
237
238 println!("Retry configuration:");
241 println!(" Max retries: 3");
242 println!(" Initial delay: 1 second");
243 println!(" Backoff multiplier: 2.0");
244 println!(" Max delay: 10 seconds");
245 println!(" Jitter: Enabled");
246 println!();
247 println!("Retry timeline:");
248 println!(" Attempt 1: Immediate");
249 println!(" Attempt 2: Wait 1s (+jitter)");
250 println!(" Attempt 3: Wait 2s (+jitter)");
251 println!(" Attempt 4: Wait 4s (+jitter)");
252 println!();
253 println!("Retryable errors:");
254 println!(" * Rate limit exceeded (429)");
255 println!(" * Service unavailable (503)");
256 println!(" * Gateway timeout (504)");
257 println!(" * Network errors");
258
259 Ok(())
260 }
261
262 #[allow(dead_code)]
264 pub async fn health_monitoring_example() -> Result<()> {
265 println!("=== Health Monitoring and Failover ===");
266 println!();
267
268 println!("Note: Health monitoring example (conceptual):");
269 println!("In production, you would:");
270 println!(" 1. Create HealthMonitor with config");
271 println!(" 2. Register providers");
272 println!(" 3. Monitor health metrics");
273
274 println!("Health monitoring configuration:");
275 println!(" Check interval: 60 seconds");
276 println!(" Unhealthy threshold: 3 failures");
277 println!(" Healthy threshold: 2 successes");
278 println!();
279 println!("Tracked metrics:");
280 println!(" * Success rate");
281 println!(" * Average response time");
282 println!(" * Consecutive failures");
283 println!(" * Health score (0-100)");
284 println!();
285 println!("Automatic failover:");
286 println!(" If OpenAI unhealthy -> Use Anthropic");
287 println!(" If Anthropic unhealthy -> Use Gemini");
288 println!(" Select healthiest provider automatically");
289
290 Ok(())
291 }
292
293 #[allow(dead_code)]
295 pub async fn rate_limiting_example() -> Result<()> {
296 println!("=== Rate Limiting ===");
297 println!();
298
299 println!("Rate limiter configuration:");
302 println!(" Requests per second: 10");
303 println!(" Burst size: 20");
304 println!();
305 println!("Token bucket algorithm:");
306 println!(" * Bucket holds 20 tokens (burst)");
307 println!(" * Refills at 10 tokens/second");
308 println!(" * Each request consumes 1 token");
309 println!();
310 println!("Example:");
311 println!(" Burst: Process 20 requests immediately");
312 println!(" Sustained: 10 requests/second max");
313 println!(" Over limit: Wait for tokens to refill");
314 println!();
315 println!("Benefits:");
316 println!(" * Prevent API rate limit errors");
317 println!(" * Smooth traffic spikes");
318 println!(" * Predictable throughput");
319
320 Ok(())
321 }
322
323 #[allow(dead_code)]
325 pub async fn complete_resilience_stack(_api_key: &str) -> Result<()> {
326 println!("=== Complete Resilience Stack ===");
327 println!();
328
329 println!("Layered resilience approach:");
330 println!();
331 println!("1. Rate Limiting (outermost)");
332 println!(" -> Prevent overwhelming APIs");
333 println!();
334 println!("2. Circuit Breaker");
335 println!(" -> Fail fast when service is down");
336 println!();
337 println!("3. Retry Logic");
338 println!(" -> Handle transient failures");
339 println!();
340 println!("4. Health Monitoring");
341 println!(" -> Track provider availability");
342 println!();
343 println!("5. Multi-Provider Fallback");
344 println!(" -> Switch to backup provider");
345 println!();
346 println!("Result:");
347 println!(" * 99.9% success rate (vs 95% without)");
348 println!(" * Mean time to recovery: <1 minute");
349 println!(" * Zero manual intervention");
350 println!(" * Cost-efficient failover");
351
352 Ok(())
353 }
354}
355
356pub struct BudgetManagementExample;
360
361impl BudgetManagementExample {
362 #[allow(dead_code)]
364 pub async fn budget_configuration() -> Result<()> {
365 use crate::llm::{BudgetConfig, BudgetManager, BudgetPeriod};
366
367 println!("=== Budget Configuration ===");
368 println!();
369
370 let mut config = BudgetConfig::default();
371 config.set_limit(BudgetPeriod::Hourly, 10.0);
372 config.set_limit(BudgetPeriod::Daily, 100.0);
373 config.set_limit(BudgetPeriod::Weekly, 500.0);
374 config.set_limit(BudgetPeriod::Monthly, 2000.0);
375
376 let _manager = BudgetManager::new(config);
377
378 println!("Budget limits configured:");
379 println!(" Hourly: $10");
380 println!(" Daily: $100");
381 println!(" Weekly: $500");
382 println!(" Monthly: $2,000");
383 println!();
384 println!("Auto-reset:");
385 println!(" * Hourly budget resets every hour");
386 println!(" * Daily budget resets at midnight");
387 println!(" * Weekly budget resets on Monday");
388 println!(" * Monthly budget resets on 1st");
389
390 Ok(())
391 }
392
393 #[allow(dead_code)]
395 pub async fn usage_tracking() -> Result<()> {
396 use crate::llm::{BudgetConfig, BudgetManager};
397
398 println!("=== Usage Tracking ===");
399 println!();
400
401 let manager = BudgetManager::new(BudgetConfig::default());
402
403 manager.record_cost(15.50).await?;
405 manager.record_cost(8.25).await?;
406 manager.record_cost(12.00).await?;
407
408 let daily_usage = manager.get_usage(crate::llm::BudgetPeriod::Daily).await;
409 let daily = daily_usage.map_or(0.0, |u| u.total_cost);
410
411 println!("Current usage:");
412 println!(" Today: ${daily:.2}");
413 println!();
414 println!("Tracked metrics:");
415 println!(" * Number of requests");
416 println!(" * Tokens consumed");
417 println!(" * Cost per provider");
418 println!(" * Cost per period");
419
420 Ok(())
421 }
422
423 #[allow(dead_code)]
425 pub async fn alert_system() -> Result<()> {
426 println!("=== Budget Alert System ===");
427 println!();
428
429 println!("Alert levels:");
430 println!();
431 println!("INFO (50%):");
432 println!(" * Budget: $100, Used: $50");
433 println!(" * Action: Log for awareness");
434 println!();
435 println!("WARNING (75%):");
436 println!(" * Budget: $100, Used: $75");
437 println!(" * Action: Send notification");
438 println!();
439 println!("CRITICAL (90%):");
440 println!(" * Budget: $100, Used: $90");
441 println!(" * Action: Alert on-call team");
442 println!();
443 println!("EXCEEDED (100%):");
444 println!(" * Budget: $100, Used: $100+");
445 println!(" * Action: Block new requests (optional)");
446 println!();
447 println!("Custom alert handlers:");
448 println!(" manager.on_alert(|alert| {{");
449 println!(" match alert.level {{");
450 println!(" AlertLevel::Warning => send_email(),");
451 println!(" AlertLevel::Critical => send_sms(),");
452 println!(" AlertLevel::Exceeded => pause_service(),");
453 println!(" _ => log_info(),");
454 println!(" }}");
455 println!(" }});");
456
457 Ok(())
458 }
459
460 #[allow(dead_code)]
462 pub async fn monitoring_and_reporting() -> Result<()> {
463 use crate::llm::{BudgetConfig, BudgetManager, BudgetPeriod};
464
465 println!("=== Budget Monitoring and Reporting ===");
466 println!();
467
468 let mut config = BudgetConfig::default();
469 config.set_limit(BudgetPeriod::Daily, 100.0);
470
471 let manager = BudgetManager::new(config);
472
473 let remaining = manager
474 .get_remaining(BudgetPeriod::Daily)
475 .await
476 .unwrap_or(100.0);
477 let utilization = manager
478 .get_utilization(BudgetPeriod::Daily)
479 .await
480 .unwrap_or(0.0);
481
482 println!("Daily budget report:");
483 println!(" Limit: $100.00");
484 println!(" Used: $67.50");
485 println!(" Remaining: ${remaining:.2}");
486 println!(" Utilization: {:.1}%", utilization * 100.0);
487 println!();
488 println!("Trend analysis:");
489 println!(" * Average daily spend: $65");
490 println!(" * Projected monthly: $1,950");
491 println!(" * vs Monthly budget: $2,000 (check)");
492 println!();
493 println!("Recommendations:");
494 println!(" * Current pace is sustainable");
495 println!(" * Consider caching to reduce costs");
496 println!(" * Peak usage: 2-4pm UTC");
497
498 Ok(())
499 }
500
501 #[allow(dead_code)]
503 pub async fn cost_projection() -> Result<()> {
504 println!("=== Cost Projection and Optimization ===");
505 println!();
506
507 println!("Current usage pattern:");
508 println!(" Week 1: $450");
509 println!(" Week 2: $480");
510 println!(" Week 3: $520");
511 println!(" Week 4: $550");
512 println!();
513 println!("Projection:");
514 println!(" Month total: ~$2,000");
515 println!(" Next month: ~$2,200 (10% growth)");
516 println!();
517 println!("Optimization opportunities:");
518 println!(" 1. Enable caching -> Save $400/month (20%)");
519 println!(" 2. Use Gemini Flash for simple tasks -> Save $300/month (15%)");
520 println!(" 3. Batch requests -> Save $200/month (10%)");
521 println!();
522 println!("Optimized projection:");
523 println!(" Current: $2,000/month");
524 println!(" Optimized: $1,100/month");
525 println!(" Savings: $900/month (45%)");
526
527 Ok(())
528 }
529}
530
531pub struct AccessControlExample;
536
537impl AccessControlExample {
538 #[allow(dead_code)]
540 pub async fn access_tiers() -> Result<()> {
541 println!("=== Token-Gated AI Access Tiers ===");
542 println!();
543
544 println!("Access tiers based on token holdings:");
545 println!();
546 println!("FREE (0-99 tokens):");
547 println!(" * Basic code evaluation");
548 println!(" * 10 requests/day");
549 println!(" * No custom agents");
550 println!();
551 println!("BRONZE (100-999 tokens):");
552 println!(" * Code + content evaluation");
553 println!(" * 100 requests/day");
554 println!(" * 1 custom agent");
555 println!();
556 println!("SILVER (1,000-9,999 tokens):");
557 println!(" * All evaluations + fraud detection");
558 println!(" * 1,000 requests/day");
559 println!(" * 3 custom agents");
560 println!();
561 println!("GOLD (10,000-99,999 tokens):");
562 println!(" * All features + batch processing");
563 println!(" * 10,000 requests/day");
564 println!(" * 10 custom agents");
565 println!();
566 println!("PLATINUM (100,000+ tokens):");
567 println!(" * Unlimited access");
568 println!(" * Priority processing");
569 println!(" * Unlimited custom agents");
570
571 Ok(())
572 }
573
574 #[allow(dead_code)]
576 pub async fn feature_quotas() -> Result<()> {
577 use crate::access_control::{AccessControlManager, AiFeature, TokenHolder};
578 use rust_decimal::Decimal;
579 use uuid::Uuid;
580
581 println!("=== Tier-Based Feature Quotas ===");
582 println!();
583
584 let holder = TokenHolder {
585 user_id: Uuid::new_v4(),
586 token_id: Uuid::new_v4(),
587 balance: Decimal::from(5000),
588 tier: crate::access_control::AccessTier::Silver,
589 };
590
591 let manager = AccessControlManager::new();
592
593 let can_evaluate = manager.can_access_feature(&holder, AiFeature::CodeEvaluation)?;
595 let can_fraud = manager.can_access_feature(&holder, AiFeature::FraudDetection)?;
596
597 println!("User: {} (SILVER tier, 5,000 tokens)", holder.user_id);
598 println!();
599 println!("Feature access:");
600 println!(
601 " Code evaluation: {}",
602 if can_evaluate { "Allowed" } else { "Denied" }
603 );
604 println!(
605 " Fraud detection: {}",
606 if can_fraud { "Allowed" } else { "Denied" }
607 );
608 println!();
609 println!("Daily quotas:");
610 println!(" Code evaluations: 1,000/day");
611 println!(" Fraud checks: 500/day");
612 println!(" Custom agent calls: 2,000/day");
613
614 Ok(())
615 }
616
617 #[allow(dead_code)]
619 pub async fn custom_agents(_api_key: &str) -> Result<()> {
620 use crate::access_control::{AccessTier, CustomAgentConfig};
621 use uuid::Uuid;
622
623 println!("=== Custom AI Agents ===");
624 println!();
625
626 let agent = CustomAgentConfig {
627 agent_id: Uuid::new_v4(),
628 token_id: Uuid::new_v4(),
629 name: "My Coding Assistant".to_string(),
630 description: Some("Specialized in Rust code review".to_string()),
631 system_prompt: "You are an expert Rust developer...".to_string(),
632 model: "gpt-4-turbo-preview".to_string(),
633 temperature: 0.3,
634 is_active: true,
635 min_tier: AccessTier::Silver,
636 custom_endpoint: None,
637 personalization: None,
638 };
639
640 println!("Custom agent created:");
641 println!(" Name: {}", agent.name);
642 println!(" Model: {}", agent.model);
643 println!(" Specialization: {}", agent.description.as_ref().unwrap());
644 println!(" Min tier: {:?}", agent.min_tier);
645 println!();
646 println!("Usage:");
647 println!(" let client = LlmClientBuilder::new()");
648 println!(" .openai_api_key(api_key)");
649 println!(" .openai_model(&agent.model)");
650 println!(" .build();");
651 println!();
652 println!("Benefits:");
653 println!(" * Tailored to your domain");
654 println!(" * Consistent responses");
655 println!(" * Custom instructions");
656 println!(" * Token holder exclusive");
657
658 Ok(())
659 }
660
661 #[allow(dead_code)]
663 pub async fn usage_monitoring() -> Result<()> {
664 use crate::access_control::AccessControlManager;
665
666 println!("=== Usage Monitoring Per Tier ===");
667 println!();
668
669 let _manager = AccessControlManager::new();
670
671 println!("Track usage by tier:");
672 println!();
673 println!("FREE tier:");
674 println!(" Active users: 1,250");
675 println!(" Avg requests/user: 8/day");
676 println!(" Total requests: 10,000/day");
677 println!();
678 println!("SILVER tier:");
679 println!(" Active users: 150");
680 println!(" Avg requests/user: 450/day");
681 println!(" Total requests: 67,500/day");
682 println!();
683 println!("PLATINUM tier:");
684 println!(" Active users: 10");
685 println!(" Avg requests/user: 5,000/day");
686 println!(" Total requests: 50,000/day");
687 println!();
688 println!("Insights:");
689 println!(" * PLATINUM users drive 40% of usage");
690 println!(" * Most common: Code evaluation (65%)");
691 println!(" * Peak hours: 9am-5pm UTC");
692
693 Ok(())
694 }
695
696 #[allow(dead_code)]
698 pub async fn network_effects() -> Result<()> {
699 println!("=== Access Control Economics ===");
700 println!();
701
702 println!("Token utility:");
703 println!(" * Gate premium AI features");
704 println!(" * Create custom AI agents");
705 println!(" * Priority processing queue");
706 println!(" * Access to advanced models");
707 println!();
708 println!("Network effects:");
709 println!(" * More users -> More token demand");
710 println!(" * Higher tier -> Better features");
711 println!(" * Custom agents -> Unique value");
712 println!(" * Exclusive access -> Premium pricing");
713 println!();
714 println!("Economics:");
715 println!(" * Free tier: Low-cost models (Gemini Flash)");
716 println!(" * Paid tiers: Premium models (GPT-4, Claude Opus)");
717 println!(" * Cost covered by: Token purchase requirements");
718 println!(" * Revenue model: Token sales + transaction fees");
719
720 Ok(())
721 }
722}