Skip to main content

kaccy_ai/examples/
integrations.rs

1//! LLM provider integration examples: Google Gemini, DeepSeek, and Ollama.
2
3use crate::ai_evaluator::{AiEvaluator, EvaluatorConfig};
4use crate::error::Result;
5use crate::evaluator::QualityEvaluator;
6use crate::llm::{
7    DeepSeekClient, GeminiClient, LlmClient, LlmClientBuilder, ModelTier, RoutingConfig,
8};
9
10/// Example: Google Gemini integration workflow
11///
12/// Demonstrates how to:
13/// - Use Google Gemini as an LLM provider
14/// - Leverage cost-effective Gemini models
15/// - Set up multi-provider fallback with Gemini
16/// - Use Gemini for cost optimization
17pub struct GeminiIntegrationExample;
18
19impl GeminiIntegrationExample {
20    /// Run the Gemini integration example with basic usage
21    #[allow(dead_code)]
22    pub async fn run_basic(api_key: &str) -> Result<()> {
23        // 1. Create a Gemini client (Flash for cost efficiency)
24        let gemini = GeminiClient::with_flash(api_key);
25        let llm_client = LlmClient::new(Box::new(gemini));
26
27        // 2. Create an evaluator
28        let evaluator = AiEvaluator::with_config(llm_client, EvaluatorConfig::default());
29
30        // 3. Evaluate code with Gemini
31        let code = r"
32            fn fibonacci(n: u32) -> u32 {
33                match n {
34                    0 => 0,
35                    1 => 1,
36                    _ => fibonacci(n - 1) + fibonacci(n - 2),
37                }
38            }
39        ";
40
41        let result = evaluator.evaluate_code(code, "rust").await?;
42
43        println!("Gemini Flash Evaluation:");
44        println!("  Quality: {}", result.quality_score);
45        println!("  Complexity: {}", result.complexity_score);
46        println!("  Feedback: {}", result.feedback);
47
48        Ok(())
49    }
50
51    /// Run example with multi-provider setup (Gemini + fallback)
52    #[allow(dead_code)]
53    pub async fn run_with_fallback(gemini_key: &str, openai_key: &str) -> Result<()> {
54        // 1. Set up multi-provider client with Gemini as primary
55        let client = LlmClientBuilder::new()
56            .gemini_api_key(gemini_key)
57            .gemini_model("gemini-2.0-flash-exp") // Free experimental model
58            .openai_api_key(openai_key)
59            .prefer_gemini() // Use Gemini first
60            .build()
61            .ok_or_else(|| crate::error::AiError::Configuration("No API keys provided".into()))?;
62
63        // 2. Create evaluator
64        let evaluator = AiEvaluator::with_config(client, EvaluatorConfig::default());
65
66        // 3. Evaluate (will use Gemini, fallback to OpenAI if needed)
67        let content = "Building a distributed system with microservices architecture...";
68
69        let result = evaluator.evaluate_content(content, "technical").await?;
70
71        println!("Multi-provider Evaluation:");
72        println!("  Quality: {}", result.quality_score);
73        println!("  (Using Gemini with OpenAI fallback)");
74
75        Ok(())
76    }
77
78    /// Run example with cost-optimized routing using Gemini
79    #[allow(dead_code)]
80    pub async fn run_cost_optimized(_api_key: &str) -> Result<()> {
81        // 1. Create cost-optimized routing config with Gemini models
82        let routing_config = RoutingConfig {
83            tiers: vec![
84                ModelTier::gemini_2_0_flash(), // Free tier for simple tasks
85                ModelTier::gemini_1_5_flash(), // Very cheap for medium tasks
86                ModelTier::gemini_1_5_pro(),   // Cost-effective for complex tasks
87            ],
88            auto_escalate: true,
89            escalation_threshold: 70.0,
90        };
91
92        println!("Cost-Optimized Gemini Routing:");
93        println!("  Simple tasks -> Gemini 2.0 Flash (Free)");
94        println!("  Medium tasks -> Gemini 1.5 Flash ($0.075/M input)");
95        println!("  Complex tasks -> Gemini 1.5 Pro ($1.25/M input)");
96        println!();
97
98        // 2. Estimate costs for different scenarios
99        let simple_cost = routing_config.estimate_cost("gemini-2.0-flash-exp", 1000, 500);
100        let medium_cost = routing_config.estimate_cost("gemini-1.5-flash", 2000, 1000);
101        let complex_cost = routing_config.estimate_cost("gemini-1.5-pro", 5000, 2000);
102
103        println!("Estimated costs:");
104        println!("  Simple task (1K in, 500 out): ${simple_cost:.6}");
105        println!("  Medium task (2K in, 1K out): ${medium_cost:.6}");
106        println!("  Complex task (5K in, 2K out): ${complex_cost:.6}");
107
108        Ok(())
109    }
110
111    /// Run example showing all Gemini model options
112    #[allow(dead_code)]
113    pub async fn run_model_comparison(api_key: &str) -> Result<()> {
114        println!("Google Gemini Model Comparison:");
115        println!();
116
117        // Gemini 2.0 Flash (Experimental)
118        println!("1. Gemini 2.0 Flash (Experimental)");
119        println!("   - Context: 1M tokens");
120        println!("   - Cost: Free (experimental)");
121        println!("   - Best for: Testing, development, simple tasks");
122        println!();
123
124        // Gemini 1.5 Flash
125        println!("2. Gemini 1.5 Flash");
126        println!("   - Context: 1M tokens");
127        println!("   - Cost: $0.075/M input, $0.30/M output");
128        println!("   - Best for: High-volume, cost-sensitive workloads");
129        println!();
130
131        // Gemini 1.5 Pro
132        println!("3. Gemini 1.5 Pro");
133        println!("   - Context: 1M tokens");
134        println!("   - Cost: $1.25/M input, $5.00/M output");
135        println!("   - Best for: Complex reasoning, code generation");
136        println!();
137
138        // Example: Create clients for each
139        let _flash_exp = GeminiClient::with_2_0_flash(api_key);
140        let _flash = GeminiClient::with_flash(api_key);
141        let _pro = GeminiClient::with_default_model(api_key);
142
143        println!("Created clients: Flash Exp, Flash, Pro");
144
145        Ok(())
146    }
147}
148
149/// Example: `DeepSeek` LLM integration
150///
151/// Demonstrates:
152/// - Using `DeepSeek`'s cost-effective AI models
153/// - Specialized models (Chat, Coder, Reasoner)
154/// - Multi-provider setup with `DeepSeek`
155/// - Cost optimization with `DeepSeek`
156pub struct DeepSeekIntegrationExample;
157
158impl DeepSeekIntegrationExample {
159    /// Run the `DeepSeek` integration example with basic usage
160    #[allow(dead_code)]
161    pub async fn run_basic(api_key: &str) -> Result<()> {
162        // 1. Create a DeepSeek client (Chat model for general use)
163        let deepseek = DeepSeekClient::with_default_model(api_key);
164        let llm_client = LlmClient::new(Box::new(deepseek));
165
166        // 2. Create an evaluator
167        let evaluator = AiEvaluator::with_config(llm_client, EvaluatorConfig::default());
168
169        // 3. Evaluate code with DeepSeek
170        let code = r"
171            fn binary_search<T: Ord>(arr: &[T], target: &T) -> Option<usize> {
172                let mut left = 0;
173                let mut right = arr.len();
174
175                while left < right {
176                    let mid = left + (right - left) / 2;
177                    match arr[mid].cmp(target) {
178                        std::cmp::Ordering::Equal => return Some(mid),
179                        std::cmp::Ordering::Less => left = mid + 1,
180                        std::cmp::Ordering::Greater => right = mid,
181                    }
182                }
183                None
184            }
185        ";
186
187        let result = evaluator.evaluate_code(code, "rust").await?;
188
189        println!("DeepSeek Chat Evaluation:");
190        println!("  Quality: {}", result.quality_score);
191        println!("  Complexity: {}", result.complexity_score);
192        println!("  Feedback: {}", result.feedback);
193
194        Ok(())
195    }
196
197    /// Run example with `DeepSeek` Coder (optimized for code tasks)
198    #[allow(dead_code)]
199    pub async fn run_with_coder(api_key: &str) -> Result<()> {
200        // 1. Create DeepSeek Coder client (specialized for code)
201        let deepseek_coder = DeepSeekClient::with_coder_model(api_key);
202        let llm_client = LlmClient::new(Box::new(deepseek_coder));
203
204        // 2. Create evaluator
205        let evaluator = AiEvaluator::with_config(llm_client, EvaluatorConfig::default());
206
207        // 3. Evaluate code with specialized model
208        let code = r#"
209            class RedBlackTree:
210                def __init__(self):
211                    self.nil = Node(None, "BLACK")
212                    self.root = self.nil
213
214                def insert(self, key):
215                    node = Node(key)
216                    node.left = node.right = self.nil
217                    # ... implementation
218        "#;
219
220        let result = evaluator.evaluate_code(code, "python").await?;
221
222        println!("DeepSeek Coder Evaluation:");
223        println!("  Quality: {}", result.quality_score);
224        println!("  Complexity: {}", result.complexity_score);
225        println!("  (Using specialized code model)");
226
227        Ok(())
228    }
229
230    /// Run example with multi-provider setup (`DeepSeek` + fallback)
231    #[allow(dead_code)]
232    pub async fn run_with_fallback(deepseek_key: &str, openai_key: &str) -> Result<()> {
233        // 1. Set up multi-provider client with DeepSeek as primary
234        let client = LlmClientBuilder::new()
235            .deepseek_api_key(deepseek_key)
236            .deepseek_model("deepseek-chat")
237            .openai_api_key(openai_key)
238            .prefer_deepseek() // Use DeepSeek first (most cost-effective)
239            .build()
240            .ok_or_else(|| crate::error::AiError::Configuration("No API keys provided".into()))?;
241
242        // 2. Create evaluator
243        let evaluator = AiEvaluator::with_config(client, EvaluatorConfig::default());
244
245        // 3. Evaluate (will use DeepSeek, fallback to OpenAI if needed)
246        let content =
247            "Implementing a high-performance distributed cache with consistent hashing...";
248
249        let result = evaluator.evaluate_content(content, "technical").await?;
250
251        println!("Multi-provider Evaluation:");
252        println!("  Quality: {}", result.quality_score);
253        println!("  (Using DeepSeek with OpenAI fallback)");
254
255        Ok(())
256    }
257
258    /// Run example with cost-optimized routing using `DeepSeek`
259    #[allow(dead_code)]
260    pub async fn run_cost_optimized(_api_key: &str) -> Result<()> {
261        // 1. Create ultra-cost-optimized routing config with DeepSeek models
262        let routing_config = RoutingConfig {
263            tiers: vec![
264                ModelTier::deepseek_chat(),     // Very cheap for simple-medium tasks
265                ModelTier::deepseek_coder(),    // Code-optimized at same low price
266                ModelTier::deepseek_reasoner(), // Advanced reasoning, still cheap
267            ],
268            auto_escalate: true,
269            escalation_threshold: 70.0,
270        };
271
272        println!("Ultra-Cost-Optimized DeepSeek Routing:");
273        println!("  Simple-Medium tasks -> DeepSeek Chat ($0.14/M input)");
274        println!("  Code tasks -> DeepSeek Coder ($0.14/M input)");
275        println!("  Complex reasoning -> DeepSeek Reasoner ($0.55/M input)");
276        println!();
277
278        // 2. Estimate costs for different scenarios
279        let simple_cost = routing_config.estimate_cost("deepseek-chat", 1000, 500);
280        let code_cost = routing_config.estimate_cost("deepseek-coder", 2000, 1000);
281        let complex_cost = routing_config.estimate_cost("deepseek-reasoner", 5000, 2000);
282
283        println!("Estimated costs:");
284        println!("  Simple task (1K in, 500 out): ${simple_cost:.6}");
285        println!("  Code task (2K in, 1K out): ${code_cost:.6}");
286        println!("  Complex task (5K in, 2K out): ${complex_cost:.6}");
287        println!();
288        println!("Note: DeepSeek is ~100x cheaper than GPT-4, ~50x cheaper than Claude Opus!");
289
290        Ok(())
291    }
292
293    /// Run example showing all `DeepSeek` model options
294    #[allow(dead_code)]
295    pub async fn run_model_comparison(api_key: &str) -> Result<()> {
296        println!("DeepSeek Model Comparison:");
297        println!();
298
299        // DeepSeek Chat
300        println!("1. DeepSeek Chat");
301        println!("   - Context: 32K tokens");
302        println!("   - Cost: $0.14/M input, $0.28/M output");
303        println!("   - Best for: General purpose, cost-sensitive workloads");
304        println!();
305
306        // DeepSeek Coder
307        println!("2. DeepSeek Coder");
308        println!("   - Context: 32K tokens");
309        println!("   - Cost: $0.14/M input, $0.28/M output");
310        println!("   - Best for: Code generation, analysis, refactoring");
311        println!();
312
313        // DeepSeek Reasoner
314        println!("3. DeepSeek Reasoner");
315        println!("   - Context: 64K tokens");
316        println!("   - Cost: $0.55/M input, $2.19/M output");
317        println!("   - Best for: Complex reasoning, math, logic problems");
318        println!();
319
320        // Example: Create clients for each
321        let _chat = DeepSeekClient::with_default_model(api_key);
322        let _coder = DeepSeekClient::with_coder_model(api_key);
323        let _reasoner = DeepSeekClient::with_reasoner_model(api_key);
324
325        println!("Created clients: Chat, Coder, Reasoner");
326        println!();
327        println!("Tip: DeepSeek offers the best price/performance ratio!");
328        println!("   Compare: GPT-4 Turbo costs $10/M vs DeepSeek Chat $0.14/M");
329
330        Ok(())
331    }
332
333    /// Run example comparing cost across all providers
334    #[allow(dead_code)]
335    pub async fn run_cost_comparison() -> Result<()> {
336        println!("LLM Provider Cost Comparison (per 1M tokens):");
337        println!();
338        println!("Input costs:");
339        println!("  DeepSeek Chat:     $0.14   Best value");
340        println!("  Gemini 1.5 Flash:  $0.075  Google's cheapest");
341        println!("  DeepSeek Reasoner: $0.55");
342        println!("  Gemini 1.5 Pro:    $1.25");
343        println!("  Claude 3.5 Sonnet: $3.00");
344        println!("  GPT-4 Turbo:       $10.00");
345        println!("  Claude 3 Opus:     $15.00");
346        println!();
347        println!("Output costs:");
348        println!("  DeepSeek Chat:     $0.28   Best value");
349        println!("  Gemini 1.5 Flash:  $0.30   Google's cheapest");
350        println!("  DeepSeek Reasoner: $2.19");
351        println!("  Gemini 1.5 Pro:    $5.00");
352        println!("  Claude 3.5 Sonnet: $15.00");
353        println!("  GPT-4 Turbo:       $30.00");
354        println!("  Claude 3 Opus:     $75.00");
355        println!();
356        println!("Cost savings example (1M input + 1M output tokens):");
357        println!("   GPT-4 Turbo: $40.00");
358        println!("   DeepSeek Chat: $0.42 (95% savings!)");
359
360        Ok(())
361    }
362}
363
364/// Example: Ollama Integration (Local LLM Execution)
365///
366/// This example demonstrates how to use Ollama for local LLM execution,
367/// including model selection, hybrid setups, and cost comparison.
368pub struct OllamaIntegrationExample;
369
370impl OllamaIntegrationExample {
371    /// Run basic Ollama usage example
372    #[allow(dead_code)]
373    pub async fn run_basic_usage() -> Result<()> {
374        println!("=== Ollama Integration Example ===");
375        println!();
376
377        // Create Ollama client (assumes Ollama is running locally on port 11434)
378        // let _ollama = crate::llm::OllamaClient::new("http://localhost:11434", "llama2");
379
380        println!("Connected to Ollama at http://localhost:11434");
381        println!("  Model: llama2");
382        println!();
383
384        // Note: OllamaClient would be used for making requests
385        println!("Sending request to Ollama...");
386        println!("Example: 'Explain what a blockchain is in one sentence.'");
387        println!();
388        println!("Note: In production code, you would:");
389        println!("  1. Create a completion request");
390        println!("  2. Call ollama.complete(request).await");
391        println!("  3. Process the response");
392
393        Ok(())
394    }
395
396    /// Model selection guide
397    #[allow(dead_code)]
398    pub async fn model_selection_guide() -> Result<()> {
399        println!("=== Ollama Model Selection Guide ===");
400        println!();
401
402        println!("Recommended models:");
403        println!();
404        println!("1. Llama 2 (7B)");
405        println!("   - Use case: General purpose, fast responses");
406        println!("   - RAM: ~8GB");
407        println!("   - Quality: Good");
408        println!();
409        println!("2. CodeLlama (7B)");
410        println!("   - Use case: Code generation and analysis");
411        println!("   - RAM: ~8GB");
412        println!("   - Quality: Excellent for code");
413        println!();
414        println!("3. Mistral (7B)");
415        println!("   - Use case: High quality, balanced");
416        println!("   - RAM: ~8GB");
417        println!("   - Quality: Excellent");
418        println!();
419        println!("4. Llama 2 (70B)");
420        println!("   - Use case: Complex reasoning, highest quality");
421        println!("   - RAM: ~40GB");
422        println!("   - Quality: Best");
423        println!();
424        println!("Start Ollama: ollama run llama2");
425
426        Ok(())
427    }
428
429    /// Hybrid setup with cloud fallback
430    #[allow(dead_code)]
431    pub async fn hybrid_setup_example(_openai_key: &str) -> Result<()> {
432        println!("=== Hybrid Setup: Ollama + Cloud Fallback ===");
433        println!();
434
435        // Note: For hybrid setups, you would create separate clients
436        // and implement fallback logic in your application layer
437
438        println!("Hybrid Setup Concept:");
439        println!("  Primary: Ollama (llama2) - FREE, private, local");
440        println!("  Fallback: OpenAI (gpt-4-turbo) - if Ollama unavailable");
441        println!();
442        println!("Implementation:");
443        println!("  1. Create OllamaClient for local requests");
444        println!("  2. Create OpenAI client for fallback");
445        println!("  3. Try Ollama first, fall back to OpenAI on error");
446        println!();
447        println!("Benefits:");
448        println!("  * Free development and testing with Ollama");
449        println!("  * Privacy: sensitive data stays local");
450        println!("  * Reliability: cloud fallback for production");
451
452        Ok(())
453    }
454
455    /// Cost comparison: Ollama vs Cloud
456    #[allow(dead_code)]
457    pub async fn cost_comparison() -> Result<()> {
458        println!("=== Cost Comparison: Ollama vs Cloud Providers ===");
459        println!();
460
461        println!("Monthly cost for 1M tokens (input + output):");
462        println!();
463        println!("  Ollama (local):        $0.00    FREE!");
464        println!("  DeepSeek Chat:         $0.42");
465        println!("  Gemini 1.5 Flash:      $0.38");
466        println!("  GPT-4 Turbo:           $40.00");
467        println!("  Claude 3 Opus:         $90.00");
468        println!();
469        println!("Additional Ollama benefits:");
470        println!("  * No API rate limits");
471        println!("  * Complete privacy (data never leaves your server)");
472        println!("  * No internet required");
473        println!("  * Predictable costs (hardware only)");
474        println!();
475        println!("Recommended strategy:");
476        println!("  1. Use Ollama for development/testing (free)");
477        println!("  2. Use DeepSeek/Gemini for production (cheap)");
478        println!("  3. Use GPT-4/Claude for critical tasks (expensive but best quality)");
479
480        Ok(())
481    }
482
483    /// Installation and setup guide
484    #[allow(dead_code)]
485    pub async fn installation_guide() -> Result<()> {
486        println!("=== Ollama Installation Guide ===");
487        println!();
488
489        println!("1. Install Ollama:");
490        println!("   curl -fsSL https://ollama.com/install.sh | sh");
491        println!();
492        println!("2. Pull a model:");
493        println!("   ollama pull llama2        # 7B model (~4GB download)");
494        println!("   ollama pull codellama     # Code-specialized");
495        println!("   ollama pull mistral       # High quality 7B");
496        println!();
497        println!("3. Start Ollama service:");
498        println!("   ollama serve");
499        println!();
500        println!("4. Configure kaccy-ai:");
501        println!("   export OLLAMA_BASE_URL=http://localhost:11434");
502        println!("   export OLLAMA_MODEL=llama2");
503        println!();
504        println!("5. Test integration:");
505        println!("   let client = OllamaClient::from_env();");
506        println!();
507        println!("Docs: https://ollama.com/");
508
509        Ok(())
510    }
511
512    /// Environment-based configuration
513    #[allow(dead_code)]
514    pub async fn environment_config_example() -> Result<()> {
515        println!("=== Environment-Based Configuration ===");
516        println!();
517
518        println!("Set environment variables:");
519        println!("  export OLLAMA_BASE_URL=http://localhost:11434");
520        println!("  export OLLAMA_MODEL=llama2");
521        println!();
522
523        // Load from environment
524        // let _client = crate::llm::OllamaClient::from_env();
525
526        println!("Client created from environment variables");
527        println!();
528        println!("This allows easy configuration across environments:");
529        println!("  * Development: OLLAMA_MODEL=llama2 (fast, local)");
530        println!("  * Production: OLLAMA_MODEL=llama2:70b (higher quality)");
531
532        Ok(())
533    }
534}