siumai 0.10.3

A unified LLM interface library for Rust
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
//! 🛡️ Error Handling - Production-ready error management
//!
//! This example demonstrates robust error handling patterns:
//! - Different error types and their handling
//! - Retry strategies for transient errors
//! - Rate limit handling and backoff
//! - Graceful degradation patterns
//! - Error logging and monitoring
//!
//! Before running, set your API keys:
//! ```bash
//! export OPENAI_API_KEY="your-key"
//! export ANTHROPIC_API_KEY="your-key"
//! ```
//!
//! Run with:
//! ```bash
//! cargo run --example error_handling
//! ```

use siumai::models;
use siumai::prelude::*;
use std::time::Duration;
use tokio::time::sleep;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    println!("🛡️ Error Handling - Production-ready error management\n");

    // Demonstrate different error handling patterns
    demonstrate_error_types().await;
    demonstrate_retry_strategies().await;
    demonstrate_rate_limit_handling().await;
    demonstrate_graceful_degradation().await;
    demonstrate_error_classification().await;

    println!("\n✅ Error handling examples completed!");
    Ok(())
}

/// Demonstrate different types of errors and their characteristics
async fn demonstrate_error_types() {
    println!("🔍 Error Types and Classification:\n");

    // Test different error scenarios
    println!("   Testing Invalid API Key:");
    match test_invalid_api_key().await {
        Ok(_) => println!("      ❌ Expected error but got success"),
        Err(e) => {
            println!("      ✅ Got expected error: {e}");
            demonstrate_error_handling(&e);
        }
    }

    println!("\n   Testing Invalid Model:");
    match test_invalid_model().await {
        Ok(_) => println!("      ❌ Expected error but got success"),
        Err(e) => {
            println!("      ✅ Got expected error: {e}");
            demonstrate_error_handling(&e);
        }
    }

    println!("\n   Testing Network Timeout:");
    match test_network_timeout().await {
        Ok(_) => println!("      ❌ Expected error but got success"),
        Err(e) => {
            println!("      ✅ Got expected error: {e}");
            demonstrate_error_handling(&e);
        }
    }

    println!("\n   Testing Rate Limit:");
    match test_rate_limit().await {
        Ok(_) => println!("      ❌ Expected error but got success"),
        Err(e) => {
            println!("      ✅ Got expected error: {e}");
            demonstrate_error_handling(&e);
        }
    }

    println!("\n   Testing Invalid Request:");
    match test_invalid_request().await {
        Ok(_) => println!("      ❌ Expected error but got success"),
        Err(e) => {
            println!("      ✅ Got expected error: {e}");
            demonstrate_error_handling(&e);
        }
    }
}

/// Demonstrate retry strategies for transient errors
async fn demonstrate_retry_strategies() {
    println!("🔄 Retry Strategies:\n");

    let message = "Hello! This is a test message.";

    // Strategy 1: Simple retry with exponential backoff
    println!("   Strategy 1: Exponential Backoff");
    match retry_with_exponential_backoff(message, 3).await {
        Ok(response) => {
            println!("      ✅ Success after retries");
            if let Some(text) = response.content_text() {
                println!("      Response: {}", &text[..text.len().min(50)]);
            }
        }
        Err(e) => {
            println!("      ❌ Failed after all retries: {e}");
        }
    }

    // Strategy 2: Retry with jitter
    println!("\n   Strategy 2: Retry with Jitter");
    match retry_with_jitter(message, 3).await {
        Ok(response) => {
            println!("      ✅ Success with jitter strategy");
            if let Some(text) = response.content_text() {
                println!("      Response: {}", &text[..text.len().min(50)]);
            }
        }
        Err(e) => {
            println!("      ❌ Failed with jitter strategy: {e}");
        }
    }

    println!();
}

/// Demonstrate rate limit handling
async fn demonstrate_rate_limit_handling() {
    println!("⏱️ Rate Limit Handling:\n");

    // Simulate rate limit scenario
    println!("   Testing rate limit detection and handling...");

    match handle_rate_limits("Test rate limit handling").await {
        Ok(response) => {
            println!("   ✅ Successfully handled rate limits");
            if let Some(text) = response.content_text() {
                println!("   Response: {}", &text[..text.len().min(100)]);
            }
        }
        Err(e) => {
            println!("   ❌ Rate limit handling failed: {e}");
        }
    }

    println!();
}

/// Demonstrate graceful degradation
async fn demonstrate_graceful_degradation() {
    println!("🎭 Graceful Degradation:\n");

    let user_message = "Explain quantum computing";

    match chat_with_graceful_degradation(user_message).await {
        Ok((provider, response)) => {
            println!("   ✅ Successfully used provider: {provider}");
            if let Some(text) = response.content_text() {
                println!("   Response: {}", &text[..text.len().min(100)]);
            }
        }
        Err(e) => {
            println!("   ❌ All degradation strategies failed: {e}");
            println!("   💡 In production, you might return a cached response or error message");
        }
    }

    println!();
}

/// Demonstrate error classification for monitoring
async fn demonstrate_error_classification() {
    println!("📊 Error Classification for Monitoring:\n");

    // Simulate various errors and classify them
    let test_errors = vec![
        LlmError::AuthenticationError("Invalid API key".to_string()),
        LlmError::RateLimitError("Rate limit exceeded".to_string()),
        LlmError::TimeoutError("Request timed out".to_string()),
        LlmError::ModelNotSupported("gpt-5".to_string()),
        LlmError::InternalError("Network error".to_string()),
    ];

    for error in test_errors {
        println!("   Error: {error}");

        let classification = classify_error_for_monitoring(&error);
        println!("      Classification: {classification:?}");
        println!("      Action: {}", get_recommended_action(&classification));
        println!();
    }
}

/// Test invalid API key scenario
async fn test_invalid_api_key() -> Result<ChatResponse, LlmError> {
    let client = LlmBuilder::new()
        .openai()
        .api_key("invalid-key-12345")
        .model(models::openai::GPT_4O_MINI)
        .build()
        .await?;

    let messages = vec![user!("Hello")];
    client.chat(messages).await
}

/// Test invalid model scenario
async fn test_invalid_model() -> Result<ChatResponse, LlmError> {
    if let Ok(api_key) = std::env::var("OPENAI_API_KEY") {
        let client = LlmBuilder::new()
            .openai()
            .api_key(&api_key)
            .model("gpt-nonexistent-model")
            .build()
            .await?;

        let messages = vec![user!("Hello")];
        client.chat(messages).await
    } else {
        Err(LlmError::AuthenticationError("No API key".to_string()))
    }
}

/// Test network timeout scenario
async fn test_network_timeout() -> Result<ChatResponse, LlmError> {
    // This would require a client with very short timeout
    // For demo purposes, we'll simulate it
    Err(LlmError::TimeoutError("Simulated timeout".to_string()))
}

/// Test rate limit scenario
async fn test_rate_limit() -> Result<ChatResponse, LlmError> {
    // Simulate rate limit error
    Err(LlmError::RateLimitError("Rate limit exceeded".to_string()))
}

/// Test invalid request scenario
async fn test_invalid_request() -> Result<ChatResponse, LlmError> {
    // Simulate invalid request
    Err(LlmError::InternalError(
        "Invalid request format".to_string(),
    ))
}

/// Retry with exponential backoff
async fn retry_with_exponential_backoff(
    message: &str,
    max_retries: u32,
) -> Result<ChatResponse, LlmError> {
    let mut delay = Duration::from_millis(100);

    for attempt in 1..=max_retries {
        match try_chat_request(message).await {
            Ok(response) => {
                println!("      ✅ Success on attempt {attempt}");
                return Ok(response);
            }
            Err(e) if is_retryable_error(&e) && attempt < max_retries => {
                println!("      ⏳ Attempt {attempt} failed, retrying in {delay:?}");
                sleep(delay).await;
                delay *= 2; // Exponential backoff
            }
            Err(e) => {
                println!("      ❌ Non-retryable error or max retries reached: {e}");
                return Err(e);
            }
        }
    }

    Err(LlmError::InternalError("Max retries exceeded".to_string()))
}

/// Retry with jitter to avoid thundering herd
async fn retry_with_jitter(message: &str, max_retries: u32) -> Result<ChatResponse, LlmError> {
    use rand::Rng;
    let mut rng = rand::thread_rng();

    for attempt in 1..=max_retries {
        match try_chat_request(message).await {
            Ok(response) => {
                println!("      ✅ Success on attempt {attempt} with jitter");
                return Ok(response);
            }
            Err(e) if is_retryable_error(&e) && attempt < max_retries => {
                let base_delay = 100 * (1 << (attempt - 1)); // Exponential base
                let jitter = rng.gen_range(0..=base_delay / 2); // Add jitter
                let delay = Duration::from_millis(base_delay + jitter);

                println!("      ⏳ Attempt {attempt} failed, retrying in {delay:?} (with jitter)");
                sleep(delay).await;
            }
            Err(e) => {
                return Err(e);
            }
        }
    }

    Err(LlmError::InternalError("Max retries exceeded".to_string()))
}

/// Handle rate limits with appropriate backoff
async fn handle_rate_limits(message: &str) -> Result<ChatResponse, LlmError> {
    match try_chat_request(message).await {
        Ok(response) => Ok(response),
        Err(LlmError::RateLimitError(_)) => {
            println!("   ⏳ Rate limit detected, waiting 60 seconds...");
            sleep(Duration::from_secs(60)).await;

            // Retry after rate limit wait
            try_chat_request(message).await
        }
        Err(e) => Err(e),
    }
}

/// Chat with graceful degradation
async fn chat_with_graceful_degradation(message: &str) -> Result<(String, ChatResponse), LlmError> {
    // Try primary provider
    if let Ok(response) = try_chat_request(message).await {
        return Ok(("Primary".to_string(), response));
    }

    // Try Ollama as fallback
    if let Ok(client) = LlmBuilder::new()
        .ollama()
        .base_url("http://localhost:11434")
        .model("llama3.2")
        .build()
        .await
    {
        let messages = vec![user!(message)];
        if let Ok(response) = client.chat(messages).await {
            return Ok(("Ollama Fallback".to_string(), response));
        }
    }

    // Final fallback: return a helpful error message
    Err(LlmError::InternalError(
        "All providers unavailable".to_string(),
    ))
}

/// Try a chat request with the best available provider
async fn try_chat_request(message: &str) -> Result<ChatResponse, LlmError> {
    if let Ok(api_key) = std::env::var("OPENAI_API_KEY") {
        let client = LlmBuilder::new()
            .openai()
            .api_key(&api_key)
            .model(models::openai::GPT_4O_MINI)
            .build()
            .await?;

        let messages = vec![user!(message)];
        client.chat(messages).await
    } else {
        Err(LlmError::AuthenticationError(
            "No API key available".to_string(),
        ))
    }
}

/// Check if an error is retryable
const fn is_retryable_error(error: &LlmError) -> bool {
    matches!(
        error,
        LlmError::TimeoutError(_) | LlmError::RateLimitError(_) | LlmError::InternalError(_)
    )
}

/// Check if an error is authentication-related
const fn is_auth_error(error: &LlmError) -> bool {
    matches!(error, LlmError::AuthenticationError(_))
}

/// Check if an error is rate limit-related
const fn is_rate_limit_error(error: &LlmError) -> bool {
    matches!(error, LlmError::RateLimitError(_))
}

/// Check if an error is a client error (4xx)
const fn is_client_error(error: &LlmError) -> bool {
    matches!(
        error,
        LlmError::AuthenticationError(_) | LlmError::ModelNotSupported(_)
    )
}

/// Error classification for monitoring
#[derive(Debug)]
enum ErrorClassification {
    Transient,      // Temporary issues, retry
    Authentication, // Auth problems, check credentials
    RateLimit,      // Rate limiting, backoff
    ClientError,    // Client-side issues, fix request
    ServerError,    // Server-side issues, contact support
}

/// Classify error for monitoring and alerting
const fn classify_error_for_monitoring(error: &LlmError) -> ErrorClassification {
    match error {
        LlmError::AuthenticationError(_) => ErrorClassification::Authentication,
        LlmError::RateLimitError(_) => ErrorClassification::RateLimit,
        LlmError::TimeoutError(_) => ErrorClassification::Transient,
        LlmError::ModelNotSupported(_) => ErrorClassification::ClientError,
        LlmError::InternalError(_) => ErrorClassification::ServerError,
        _ => ErrorClassification::ServerError, // Default for other error types
    }
}

/// Get recommended action for error classification
const fn get_recommended_action(classification: &ErrorClassification) -> &'static str {
    match classification {
        ErrorClassification::Transient => "Retry with exponential backoff",
        ErrorClassification::Authentication => "Check API credentials",
        ErrorClassification::RateLimit => "Implement rate limiting and backoff",
        ErrorClassification::ClientError => "Fix request parameters",
        ErrorClassification::ServerError => "Monitor and escalate if persistent",
    }
}

/// Demonstrate error handling for a specific error
fn demonstrate_error_handling(error: &LlmError) {
    println!("      📊 Error Analysis:");
    println!("         - Retryable: {}", is_retryable_error(error));
    println!("         - Auth error: {}", is_auth_error(error));
    println!("         - Rate limit: {}", is_rate_limit_error(error));
    println!("         - Client error: {}", is_client_error(error));

    let classification = classify_error_for_monitoring(error);
    println!("         - Classification: {classification:?}");
    println!(
        "         - Recommended action: {}",
        get_recommended_action(&classification)
    );
}

/*
🎯 Key Error Handling Concepts:

Error Types:
- Authentication: Invalid API keys, expired tokens
- Rate Limits: Too many requests, quota exceeded
- Timeouts: Network issues, slow responses
- Client Errors: Invalid requests, unsupported models
- Server Errors: Provider outages, internal errors

Retry Strategies:
- Exponential backoff: Increase delay between retries
- Jitter: Add randomness to prevent thundering herd
- Circuit breaker: Stop retrying after threshold
- Selective retry: Only retry transient errors

Best Practices:
1. Classify errors appropriately
2. Implement proper retry logic
3. Use graceful degradation
4. Log errors for monitoring
5. Provide meaningful user feedback
6. Set reasonable timeouts
7. Monitor error rates and patterns

Production Considerations:
- Error tracking and alerting
- Graceful degradation strategies
- User experience during failures
- Cost implications of retries
- Provider SLA monitoring

Next Steps:
- capability_detection.rs: Feature detection patterns
- ../03_advanced_features/: Advanced error handling
- ../05_use_cases/: Production error handling examples
*/