kaccy-ai 0.2.0

AI-powered intelligence for Kaccy Protocol - forecasting, optimization, and insights
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
//! LLM provider integration examples: Google Gemini, DeepSeek, and Ollama.

use crate::ai_evaluator::{AiEvaluator, EvaluatorConfig};
use crate::error::Result;
use crate::evaluator::QualityEvaluator;
use crate::llm::{
    DeepSeekClient, GeminiClient, LlmClient, LlmClientBuilder, ModelTier, RoutingConfig,
};

/// Example: Google Gemini integration workflow
///
/// Demonstrates how to:
/// - Use Google Gemini as an LLM provider
/// - Leverage cost-effective Gemini models
/// - Set up multi-provider fallback with Gemini
/// - Use Gemini for cost optimization
pub struct GeminiIntegrationExample;

impl GeminiIntegrationExample {
    /// Run the Gemini integration example with basic usage
    #[allow(dead_code)]
    pub async fn run_basic(api_key: &str) -> Result<()> {
        // 1. Create a Gemini client (Flash for cost efficiency)
        let gemini = GeminiClient::with_flash(api_key);
        let llm_client = LlmClient::new(Box::new(gemini));

        // 2. Create an evaluator
        let evaluator = AiEvaluator::with_config(llm_client, EvaluatorConfig::default());

        // 3. Evaluate code with Gemini
        let code = r"
            fn fibonacci(n: u32) -> u32 {
                match n {
                    0 => 0,
                    1 => 1,
                    _ => fibonacci(n - 1) + fibonacci(n - 2),
                }
            }
        ";

        let result = evaluator.evaluate_code(code, "rust").await?;

        println!("Gemini Flash Evaluation:");
        println!("  Quality: {}", result.quality_score);
        println!("  Complexity: {}", result.complexity_score);
        println!("  Feedback: {}", result.feedback);

        Ok(())
    }

    /// Run example with multi-provider setup (Gemini + fallback)
    #[allow(dead_code)]
    pub async fn run_with_fallback(gemini_key: &str, openai_key: &str) -> Result<()> {
        // 1. Set up multi-provider client with Gemini as primary
        let client = LlmClientBuilder::new()
            .gemini_api_key(gemini_key)
            .gemini_model("gemini-2.0-flash-exp") // Free experimental model
            .openai_api_key(openai_key)
            .prefer_gemini() // Use Gemini first
            .build()
            .ok_or_else(|| crate::error::AiError::Configuration("No API keys provided".into()))?;

        // 2. Create evaluator
        let evaluator = AiEvaluator::with_config(client, EvaluatorConfig::default());

        // 3. Evaluate (will use Gemini, fallback to OpenAI if needed)
        let content = "Building a distributed system with microservices architecture...";

        let result = evaluator.evaluate_content(content, "technical").await?;

        println!("Multi-provider Evaluation:");
        println!("  Quality: {}", result.quality_score);
        println!("  (Using Gemini with OpenAI fallback)");

        Ok(())
    }

    /// Run example with cost-optimized routing using Gemini
    #[allow(dead_code)]
    pub async fn run_cost_optimized(_api_key: &str) -> Result<()> {
        // 1. Create cost-optimized routing config with Gemini models
        let routing_config = RoutingConfig {
            tiers: vec![
                ModelTier::gemini_2_0_flash(), // Free tier for simple tasks
                ModelTier::gemini_1_5_flash(), // Very cheap for medium tasks
                ModelTier::gemini_1_5_pro(),   // Cost-effective for complex tasks
            ],
            auto_escalate: true,
            escalation_threshold: 70.0,
        };

        println!("Cost-Optimized Gemini Routing:");
        println!("  Simple tasks -> Gemini 2.0 Flash (Free)");
        println!("  Medium tasks -> Gemini 1.5 Flash ($0.075/M input)");
        println!("  Complex tasks -> Gemini 1.5 Pro ($1.25/M input)");
        println!();

        // 2. Estimate costs for different scenarios
        let simple_cost = routing_config.estimate_cost("gemini-2.0-flash-exp", 1000, 500);
        let medium_cost = routing_config.estimate_cost("gemini-1.5-flash", 2000, 1000);
        let complex_cost = routing_config.estimate_cost("gemini-1.5-pro", 5000, 2000);

        println!("Estimated costs:");
        println!("  Simple task (1K in, 500 out): ${simple_cost:.6}");
        println!("  Medium task (2K in, 1K out): ${medium_cost:.6}");
        println!("  Complex task (5K in, 2K out): ${complex_cost:.6}");

        Ok(())
    }

    /// Run example showing all Gemini model options
    #[allow(dead_code)]
    pub async fn run_model_comparison(api_key: &str) -> Result<()> {
        println!("Google Gemini Model Comparison:");
        println!();

        // Gemini 2.0 Flash (Experimental)
        println!("1. Gemini 2.0 Flash (Experimental)");
        println!("   - Context: 1M tokens");
        println!("   - Cost: Free (experimental)");
        println!("   - Best for: Testing, development, simple tasks");
        println!();

        // Gemini 1.5 Flash
        println!("2. Gemini 1.5 Flash");
        println!("   - Context: 1M tokens");
        println!("   - Cost: $0.075/M input, $0.30/M output");
        println!("   - Best for: High-volume, cost-sensitive workloads");
        println!();

        // Gemini 1.5 Pro
        println!("3. Gemini 1.5 Pro");
        println!("   - Context: 1M tokens");
        println!("   - Cost: $1.25/M input, $5.00/M output");
        println!("   - Best for: Complex reasoning, code generation");
        println!();

        // Example: Create clients for each
        let _flash_exp = GeminiClient::with_2_0_flash(api_key);
        let _flash = GeminiClient::with_flash(api_key);
        let _pro = GeminiClient::with_default_model(api_key);

        println!("Created clients: Flash Exp, Flash, Pro");

        Ok(())
    }
}

/// Example: `DeepSeek` LLM integration
///
/// Demonstrates:
/// - Using `DeepSeek`'s cost-effective AI models
/// - Specialized models (Chat, Coder, Reasoner)
/// - Multi-provider setup with `DeepSeek`
/// - Cost optimization with `DeepSeek`
pub struct DeepSeekIntegrationExample;

impl DeepSeekIntegrationExample {
    /// Run the `DeepSeek` integration example with basic usage
    #[allow(dead_code)]
    pub async fn run_basic(api_key: &str) -> Result<()> {
        // 1. Create a DeepSeek client (Chat model for general use)
        let deepseek = DeepSeekClient::with_default_model(api_key);
        let llm_client = LlmClient::new(Box::new(deepseek));

        // 2. Create an evaluator
        let evaluator = AiEvaluator::with_config(llm_client, EvaluatorConfig::default());

        // 3. Evaluate code with DeepSeek
        let code = r"
            fn binary_search<T: Ord>(arr: &[T], target: &T) -> Option<usize> {
                let mut left = 0;
                let mut right = arr.len();

                while left < right {
                    let mid = left + (right - left) / 2;
                    match arr[mid].cmp(target) {
                        std::cmp::Ordering::Equal => return Some(mid),
                        std::cmp::Ordering::Less => left = mid + 1,
                        std::cmp::Ordering::Greater => right = mid,
                    }
                }
                None
            }
        ";

        let result = evaluator.evaluate_code(code, "rust").await?;

        println!("DeepSeek Chat Evaluation:");
        println!("  Quality: {}", result.quality_score);
        println!("  Complexity: {}", result.complexity_score);
        println!("  Feedback: {}", result.feedback);

        Ok(())
    }

    /// Run example with `DeepSeek` Coder (optimized for code tasks)
    #[allow(dead_code)]
    pub async fn run_with_coder(api_key: &str) -> Result<()> {
        // 1. Create DeepSeek Coder client (specialized for code)
        let deepseek_coder = DeepSeekClient::with_coder_model(api_key);
        let llm_client = LlmClient::new(Box::new(deepseek_coder));

        // 2. Create evaluator
        let evaluator = AiEvaluator::with_config(llm_client, EvaluatorConfig::default());

        // 3. Evaluate code with specialized model
        let code = r#"
            class RedBlackTree:
                def __init__(self):
                    self.nil = Node(None, "BLACK")
                    self.root = self.nil

                def insert(self, key):
                    node = Node(key)
                    node.left = node.right = self.nil
                    # ... implementation
        "#;

        let result = evaluator.evaluate_code(code, "python").await?;

        println!("DeepSeek Coder Evaluation:");
        println!("  Quality: {}", result.quality_score);
        println!("  Complexity: {}", result.complexity_score);
        println!("  (Using specialized code model)");

        Ok(())
    }

    /// Run example with multi-provider setup (`DeepSeek` + fallback)
    #[allow(dead_code)]
    pub async fn run_with_fallback(deepseek_key: &str, openai_key: &str) -> Result<()> {
        // 1. Set up multi-provider client with DeepSeek as primary
        let client = LlmClientBuilder::new()
            .deepseek_api_key(deepseek_key)
            .deepseek_model("deepseek-chat")
            .openai_api_key(openai_key)
            .prefer_deepseek() // Use DeepSeek first (most cost-effective)
            .build()
            .ok_or_else(|| crate::error::AiError::Configuration("No API keys provided".into()))?;

        // 2. Create evaluator
        let evaluator = AiEvaluator::with_config(client, EvaluatorConfig::default());

        // 3. Evaluate (will use DeepSeek, fallback to OpenAI if needed)
        let content =
            "Implementing a high-performance distributed cache with consistent hashing...";

        let result = evaluator.evaluate_content(content, "technical").await?;

        println!("Multi-provider Evaluation:");
        println!("  Quality: {}", result.quality_score);
        println!("  (Using DeepSeek with OpenAI fallback)");

        Ok(())
    }

    /// Run example with cost-optimized routing using `DeepSeek`
    #[allow(dead_code)]
    pub async fn run_cost_optimized(_api_key: &str) -> Result<()> {
        // 1. Create ultra-cost-optimized routing config with DeepSeek models
        let routing_config = RoutingConfig {
            tiers: vec![
                ModelTier::deepseek_chat(),     // Very cheap for simple-medium tasks
                ModelTier::deepseek_coder(),    // Code-optimized at same low price
                ModelTier::deepseek_reasoner(), // Advanced reasoning, still cheap
            ],
            auto_escalate: true,
            escalation_threshold: 70.0,
        };

        println!("Ultra-Cost-Optimized DeepSeek Routing:");
        println!("  Simple-Medium tasks -> DeepSeek Chat ($0.14/M input)");
        println!("  Code tasks -> DeepSeek Coder ($0.14/M input)");
        println!("  Complex reasoning -> DeepSeek Reasoner ($0.55/M input)");
        println!();

        // 2. Estimate costs for different scenarios
        let simple_cost = routing_config.estimate_cost("deepseek-chat", 1000, 500);
        let code_cost = routing_config.estimate_cost("deepseek-coder", 2000, 1000);
        let complex_cost = routing_config.estimate_cost("deepseek-reasoner", 5000, 2000);

        println!("Estimated costs:");
        println!("  Simple task (1K in, 500 out): ${simple_cost:.6}");
        println!("  Code task (2K in, 1K out): ${code_cost:.6}");
        println!("  Complex task (5K in, 2K out): ${complex_cost:.6}");
        println!();
        println!("Note: DeepSeek is ~100x cheaper than GPT-4, ~50x cheaper than Claude Opus!");

        Ok(())
    }

    /// Run example showing all `DeepSeek` model options
    #[allow(dead_code)]
    pub async fn run_model_comparison(api_key: &str) -> Result<()> {
        println!("DeepSeek Model Comparison:");
        println!();

        // DeepSeek Chat
        println!("1. DeepSeek Chat");
        println!("   - Context: 32K tokens");
        println!("   - Cost: $0.14/M input, $0.28/M output");
        println!("   - Best for: General purpose, cost-sensitive workloads");
        println!();

        // DeepSeek Coder
        println!("2. DeepSeek Coder");
        println!("   - Context: 32K tokens");
        println!("   - Cost: $0.14/M input, $0.28/M output");
        println!("   - Best for: Code generation, analysis, refactoring");
        println!();

        // DeepSeek Reasoner
        println!("3. DeepSeek Reasoner");
        println!("   - Context: 64K tokens");
        println!("   - Cost: $0.55/M input, $2.19/M output");
        println!("   - Best for: Complex reasoning, math, logic problems");
        println!();

        // Example: Create clients for each
        let _chat = DeepSeekClient::with_default_model(api_key);
        let _coder = DeepSeekClient::with_coder_model(api_key);
        let _reasoner = DeepSeekClient::with_reasoner_model(api_key);

        println!("Created clients: Chat, Coder, Reasoner");
        println!();
        println!("Tip: DeepSeek offers the best price/performance ratio!");
        println!("   Compare: GPT-4 Turbo costs $10/M vs DeepSeek Chat $0.14/M");

        Ok(())
    }

    /// Run example comparing cost across all providers
    #[allow(dead_code)]
    pub async fn run_cost_comparison() -> Result<()> {
        println!("LLM Provider Cost Comparison (per 1M tokens):");
        println!();
        println!("Input costs:");
        println!("  DeepSeek Chat:     $0.14   Best value");
        println!("  Gemini 1.5 Flash:  $0.075  Google's cheapest");
        println!("  DeepSeek Reasoner: $0.55");
        println!("  Gemini 1.5 Pro:    $1.25");
        println!("  Claude 3.5 Sonnet: $3.00");
        println!("  GPT-4 Turbo:       $10.00");
        println!("  Claude 3 Opus:     $15.00");
        println!();
        println!("Output costs:");
        println!("  DeepSeek Chat:     $0.28   Best value");
        println!("  Gemini 1.5 Flash:  $0.30   Google's cheapest");
        println!("  DeepSeek Reasoner: $2.19");
        println!("  Gemini 1.5 Pro:    $5.00");
        println!("  Claude 3.5 Sonnet: $15.00");
        println!("  GPT-4 Turbo:       $30.00");
        println!("  Claude 3 Opus:     $75.00");
        println!();
        println!("Cost savings example (1M input + 1M output tokens):");
        println!("   GPT-4 Turbo: $40.00");
        println!("   DeepSeek Chat: $0.42 (95% savings!)");

        Ok(())
    }
}

/// Example: Ollama Integration (Local LLM Execution)
///
/// This example demonstrates how to use Ollama for local LLM execution,
/// including model selection, hybrid setups, and cost comparison.
pub struct OllamaIntegrationExample;

impl OllamaIntegrationExample {
    /// Run basic Ollama usage example
    #[allow(dead_code)]
    pub async fn run_basic_usage() -> Result<()> {
        println!("=== Ollama Integration Example ===");
        println!();

        // Create Ollama client (assumes Ollama is running locally on port 11434)
        // let _ollama = crate::llm::OllamaClient::new("http://localhost:11434", "llama2");

        println!("Connected to Ollama at http://localhost:11434");
        println!("  Model: llama2");
        println!();

        // Note: OllamaClient would be used for making requests
        println!("Sending request to Ollama...");
        println!("Example: 'Explain what a blockchain is in one sentence.'");
        println!();
        println!("Note: In production code, you would:");
        println!("  1. Create a completion request");
        println!("  2. Call ollama.complete(request).await");
        println!("  3. Process the response");

        Ok(())
    }

    /// Model selection guide
    #[allow(dead_code)]
    pub async fn model_selection_guide() -> Result<()> {
        println!("=== Ollama Model Selection Guide ===");
        println!();

        println!("Recommended models:");
        println!();
        println!("1. Llama 2 (7B)");
        println!("   - Use case: General purpose, fast responses");
        println!("   - RAM: ~8GB");
        println!("   - Quality: Good");
        println!();
        println!("2. CodeLlama (7B)");
        println!("   - Use case: Code generation and analysis");
        println!("   - RAM: ~8GB");
        println!("   - Quality: Excellent for code");
        println!();
        println!("3. Mistral (7B)");
        println!("   - Use case: High quality, balanced");
        println!("   - RAM: ~8GB");
        println!("   - Quality: Excellent");
        println!();
        println!("4. Llama 2 (70B)");
        println!("   - Use case: Complex reasoning, highest quality");
        println!("   - RAM: ~40GB");
        println!("   - Quality: Best");
        println!();
        println!("Start Ollama: ollama run llama2");

        Ok(())
    }

    /// Hybrid setup with cloud fallback
    #[allow(dead_code)]
    pub async fn hybrid_setup_example(_openai_key: &str) -> Result<()> {
        println!("=== Hybrid Setup: Ollama + Cloud Fallback ===");
        println!();

        // Note: For hybrid setups, you would create separate clients
        // and implement fallback logic in your application layer

        println!("Hybrid Setup Concept:");
        println!("  Primary: Ollama (llama2) - FREE, private, local");
        println!("  Fallback: OpenAI (gpt-4-turbo) - if Ollama unavailable");
        println!();
        println!("Implementation:");
        println!("  1. Create OllamaClient for local requests");
        println!("  2. Create OpenAI client for fallback");
        println!("  3. Try Ollama first, fall back to OpenAI on error");
        println!();
        println!("Benefits:");
        println!("  * Free development and testing with Ollama");
        println!("  * Privacy: sensitive data stays local");
        println!("  * Reliability: cloud fallback for production");

        Ok(())
    }

    /// Cost comparison: Ollama vs Cloud
    #[allow(dead_code)]
    pub async fn cost_comparison() -> Result<()> {
        println!("=== Cost Comparison: Ollama vs Cloud Providers ===");
        println!();

        println!("Monthly cost for 1M tokens (input + output):");
        println!();
        println!("  Ollama (local):        $0.00    FREE!");
        println!("  DeepSeek Chat:         $0.42");
        println!("  Gemini 1.5 Flash:      $0.38");
        println!("  GPT-4 Turbo:           $40.00");
        println!("  Claude 3 Opus:         $90.00");
        println!();
        println!("Additional Ollama benefits:");
        println!("  * No API rate limits");
        println!("  * Complete privacy (data never leaves your server)");
        println!("  * No internet required");
        println!("  * Predictable costs (hardware only)");
        println!();
        println!("Recommended strategy:");
        println!("  1. Use Ollama for development/testing (free)");
        println!("  2. Use DeepSeek/Gemini for production (cheap)");
        println!("  3. Use GPT-4/Claude for critical tasks (expensive but best quality)");

        Ok(())
    }

    /// Installation and setup guide
    #[allow(dead_code)]
    pub async fn installation_guide() -> Result<()> {
        println!("=== Ollama Installation Guide ===");
        println!();

        println!("1. Install Ollama:");
        println!("   curl -fsSL https://ollama.com/install.sh | sh");
        println!();
        println!("2. Pull a model:");
        println!("   ollama pull llama2        # 7B model (~4GB download)");
        println!("   ollama pull codellama     # Code-specialized");
        println!("   ollama pull mistral       # High quality 7B");
        println!();
        println!("3. Start Ollama service:");
        println!("   ollama serve");
        println!();
        println!("4. Configure kaccy-ai:");
        println!("   export OLLAMA_BASE_URL=http://localhost:11434");
        println!("   export OLLAMA_MODEL=llama2");
        println!();
        println!("5. Test integration:");
        println!("   let client = OllamaClient::from_env();");
        println!();
        println!("Docs: https://ollama.com/");

        Ok(())
    }

    /// Environment-based configuration
    #[allow(dead_code)]
    pub async fn environment_config_example() -> Result<()> {
        println!("=== Environment-Based Configuration ===");
        println!();

        println!("Set environment variables:");
        println!("  export OLLAMA_BASE_URL=http://localhost:11434");
        println!("  export OLLAMA_MODEL=llama2");
        println!();

        // Load from environment
        // let _client = crate::llm::OllamaClient::from_env();

        println!("Client created from environment variables");
        println!();
        println!("This allows easy configuration across environments:");
        println!("  * Development: OLLAMA_MODEL=llama2 (fast, local)");
        println!("  * Production: OLLAMA_MODEL=llama2:70b (higher quality)");

        Ok(())
    }
}