rrag 0.1.0-alpha.2

High-performance Rust framework for Retrieval-Augmented Generation with pluggable components, async-first design, and comprehensive observability
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
//! # Cache Metrics and Monitoring
//!
//! Performance metrics and monitoring for the caching layer.

use super::{CacheStats, OverallCacheMetrics};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::time::{Duration, SystemTime};

/// Cache metrics collector
pub struct MetricsCollector {
    /// Per-cache metrics
    cache_metrics: HashMap<String, CacheStats>,

    /// Operation timings
    operation_timings: OperationTimings,

    /// Memory tracking
    memory_tracker: MemoryTracker,

    /// Performance analyzer
    analyzer: PerformanceAnalyzer,

    /// Metrics history
    history: MetricsHistory,
}

/// Operation timing statistics
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OperationTimings {
    /// Get operation timings
    pub get_timings: TimingStats,

    /// Put operation timings
    pub put_timings: TimingStats,

    /// Remove operation timings
    pub remove_timings: TimingStats,

    /// Eviction timings
    pub eviction_timings: TimingStats,

    /// Compression timings
    pub compression_timings: TimingStats,
}

/// Timing statistics for an operation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TimingStats {
    /// Total operations
    pub count: u64,

    /// Total time in microseconds
    pub total_us: u64,

    /// Average time in microseconds
    pub avg_us: f32,

    /// Minimum time
    pub min_us: u64,

    /// Maximum time
    pub max_us: u64,

    /// 50th percentile
    pub p50_us: u64,

    /// 95th percentile
    pub p95_us: u64,

    /// 99th percentile
    pub p99_us: u64,
}

/// Memory usage tracker
#[derive(Debug, Clone)]
pub struct MemoryTracker {
    /// Current memory usage
    pub current_bytes: usize,

    /// Peak memory usage
    pub peak_bytes: usize,

    /// Memory saved through compression
    pub compression_saved_bytes: usize,

    /// Memory saved through deduplication
    pub deduplication_saved_bytes: usize,

    /// Memory pressure events
    pub pressure_events: Vec<MemoryPressureEvent>,
}

/// Memory pressure event
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryPressureEvent {
    /// When the event occurred
    pub timestamp: SystemTime,

    /// Memory usage at time of event
    pub memory_bytes: usize,

    /// Pressure level (0.0 to 1.0)
    pub pressure_level: f32,

    /// Action taken
    pub action: PressureAction,

    /// Memory freed
    pub freed_bytes: usize,
}

/// Actions taken under memory pressure
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum PressureAction {
    /// Evicted entries
    Eviction { count: usize },

    /// Compressed entries
    Compression { count: usize },

    /// Cleared entire cache
    ClearCache,

    /// No action needed
    None,
}

/// Performance analyzer
#[derive(Debug, Clone)]
pub struct PerformanceAnalyzer {
    /// Hit rate over time
    pub hit_rate_history: Vec<(SystemTime, f32)>,

    /// Operations per second history
    pub ops_history: Vec<(SystemTime, f32)>,

    /// Latency percentiles over time
    pub latency_history: Vec<(SystemTime, LatencySnapshot)>,

    /// Efficiency score history
    pub efficiency_history: Vec<(SystemTime, f32)>,
}

/// Latency snapshot at a point in time
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LatencySnapshot {
    pub p50_us: u64,
    pub p95_us: u64,
    pub p99_us: u64,
    pub max_us: u64,
}

/// Metrics history for trend analysis
#[derive(Debug, Clone)]
pub struct MetricsHistory {
    /// Historical snapshots
    pub snapshots: Vec<MetricsSnapshot>,

    /// Maximum history size
    pub max_size: usize,

    /// Snapshot interval
    pub interval: Duration,

    /// Last snapshot time
    pub last_snapshot: SystemTime,
}

/// Point-in-time metrics snapshot
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MetricsSnapshot {
    /// Snapshot timestamp
    pub timestamp: SystemTime,

    /// Overall metrics
    pub overall: OverallCacheMetrics,

    /// Individual cache stats
    pub cache_stats: HashMap<String, CacheStats>,

    /// Memory usage
    pub memory_bytes: usize,

    /// Active operations
    pub active_operations: u32,
}

impl MetricsCollector {
    /// Create new metrics collector
    pub fn new() -> Self {
        Self {
            cache_metrics: HashMap::new(),
            operation_timings: OperationTimings::default(),
            memory_tracker: MemoryTracker::new(),
            analyzer: PerformanceAnalyzer::new(),
            history: MetricsHistory::new(1000, Duration::from_secs(60)),
        }
    }

    /// Record cache operation
    pub fn record_operation(&mut self, cache: &str, operation: Operation, duration: Duration) {
        let duration_us = duration.as_micros() as u64;

        match operation {
            Operation::Get { hit } => {
                self.operation_timings.get_timings.record(duration_us);

                if let Some(stats) = self.cache_metrics.get_mut(cache) {
                    if hit {
                        stats.hits += 1;
                    } else {
                        stats.misses += 1;
                    }
                    stats.hit_rate = stats.hits as f32 / (stats.hits + stats.misses) as f32;
                }
            }
            Operation::Put => {
                self.operation_timings.put_timings.record(duration_us);
            }
            Operation::Remove => {
                self.operation_timings.remove_timings.record(duration_us);
            }
            Operation::Evict => {
                self.operation_timings.eviction_timings.record(duration_us);

                if let Some(stats) = self.cache_metrics.get_mut(cache) {
                    stats.evictions += 1;
                }
            }
        }
    }

    /// Update memory usage
    pub fn update_memory(&mut self, cache: &str, bytes: usize) {
        self.memory_tracker.current_bytes = bytes;
        self.memory_tracker.peak_bytes = self.memory_tracker.peak_bytes.max(bytes);

        if let Some(stats) = self.cache_metrics.get_mut(cache) {
            stats.memory_usage = bytes;
        }

        // Check for memory pressure
        let pressure = self.calculate_memory_pressure();
        if pressure > 0.8 {
            self.memory_tracker
                .pressure_events
                .push(MemoryPressureEvent {
                    timestamp: SystemTime::now(),
                    memory_bytes: bytes,
                    pressure_level: pressure,
                    action: PressureAction::None,
                    freed_bytes: 0,
                });
        }
    }

    /// Calculate memory pressure (0.0 to 1.0)
    fn calculate_memory_pressure(&self) -> f32 {
        // Simplified - would use system memory in real implementation
        const MAX_MEMORY: usize = 1024 * 1024 * 1024; // 1GB
        (self.memory_tracker.current_bytes as f32 / MAX_MEMORY as f32).min(1.0)
    }

    /// Take metrics snapshot
    pub fn snapshot(&mut self) -> MetricsSnapshot {
        let overall = self.calculate_overall_metrics();

        MetricsSnapshot {
            timestamp: SystemTime::now(),
            overall,
            cache_stats: self.cache_metrics.clone(),
            memory_bytes: self.memory_tracker.current_bytes,
            active_operations: 0, // Would track active operations
        }
    }

    /// Calculate overall metrics
    fn calculate_overall_metrics(&self) -> OverallCacheMetrics {
        let total_hits: u64 = self.cache_metrics.values().map(|s| s.hits).sum();
        let total_misses: u64 = self.cache_metrics.values().map(|s| s.misses).sum();
        let total_ops = total_hits + total_misses;

        let hit_rate = if total_ops > 0 {
            total_hits as f32 / total_ops as f32
        } else {
            0.0
        };

        // Calculate time saved (estimated)
        let avg_cache_time = self.operation_timings.get_timings.avg_us;
        let avg_miss_time = avg_cache_time * 10.0; // Assume cache is 10x faster
        let time_saved_ms = (total_hits as f32 * (avg_miss_time - avg_cache_time)) / 1000.0;

        // Calculate efficiency score
        let efficiency_score = hit_rate * 0.4
            + (1.0 - self.calculate_memory_pressure()) * 0.3
            + (time_saved_ms / 1000.0).min(1.0) * 0.3;

        OverallCacheMetrics {
            memory_saved: self.memory_tracker.compression_saved_bytes
                + self.memory_tracker.deduplication_saved_bytes,
            time_saved_ms,
            efficiency_score,
            memory_pressure: self.calculate_memory_pressure(),
            ops_per_second: self.calculate_ops_per_second(),
        }
    }

    /// Calculate operations per second
    fn calculate_ops_per_second(&self) -> f32 {
        // Would calculate based on recent operations
        100.0 // Placeholder
    }

    /// Get performance report
    pub fn get_report(&self) -> PerformanceReport {
        PerformanceReport {
            summary: self.get_summary(),
            recommendations: self.generate_recommendations(),
            alerts: self.generate_alerts(),
            trends: self.analyze_trends(),
        }
    }

    /// Get summary statistics
    fn get_summary(&self) -> SummaryStats {
        let total_hits: u64 = self.cache_metrics.values().map(|s| s.hits).sum();
        let total_misses: u64 = self.cache_metrics.values().map(|s| s.misses).sum();

        SummaryStats {
            total_operations: total_hits + total_misses,
            overall_hit_rate: if total_hits + total_misses > 0 {
                total_hits as f32 / (total_hits + total_misses) as f32
            } else {
                0.0
            },
            memory_usage_mb: self.memory_tracker.current_bytes as f32 / (1024.0 * 1024.0),
            avg_latency_us: self.operation_timings.get_timings.avg_us,
            efficiency_score: self.calculate_overall_metrics().efficiency_score,
        }
    }

    /// Generate performance recommendations
    fn generate_recommendations(&self) -> Vec<String> {
        let mut recommendations = Vec::new();

        // Check hit rate
        let summary = self.get_summary();
        if summary.overall_hit_rate < 0.5 {
            recommendations.push(
                "Low hit rate detected. Consider increasing cache size or adjusting eviction policy.".to_string()
            );
        }

        // Check memory pressure
        if self.calculate_memory_pressure() > 0.8 {
            recommendations
                .push("High memory pressure. Enable compression or reduce cache size.".to_string());
        }

        // Check latency
        if self.operation_timings.get_timings.p99_us > 1000 {
            recommendations.push(
                "High cache latency detected. Consider optimizing data structures.".to_string(),
            );
        }

        recommendations
    }

    /// Generate alerts for issues
    fn generate_alerts(&self) -> Vec<Alert> {
        let mut alerts = Vec::new();

        if self.calculate_memory_pressure() > 0.9 {
            alerts.push(Alert {
                severity: AlertSeverity::Critical,
                message: "Critical memory pressure - cache may start dropping entries".to_string(),
                timestamp: SystemTime::now(),
            });
        }

        if self.get_summary().overall_hit_rate < 0.3 {
            alerts.push(Alert {
                severity: AlertSeverity::Warning,
                message: "Very low cache hit rate - cache may not be effective".to_string(),
                timestamp: SystemTime::now(),
            });
        }

        alerts
    }

    /// Analyze performance trends
    fn analyze_trends(&self) -> TrendAnalysis {
        TrendAnalysis {
            hit_rate_trend: Trend::Stable,
            memory_trend: Trend::Increasing,
            latency_trend: Trend::Stable,
            efficiency_trend: Trend::Stable,
        }
    }
}

/// Cache operation types
#[derive(Debug, Clone)]
pub enum Operation {
    Get { hit: bool },
    Put,
    Remove,
    Evict,
}

/// Performance report
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceReport {
    pub summary: SummaryStats,
    pub recommendations: Vec<String>,
    pub alerts: Vec<Alert>,
    pub trends: TrendAnalysis,
}

/// Summary statistics
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SummaryStats {
    pub total_operations: u64,
    pub overall_hit_rate: f32,
    pub memory_usage_mb: f32,
    pub avg_latency_us: f32,
    pub efficiency_score: f32,
}

/// Performance alert
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Alert {
    pub severity: AlertSeverity,
    pub message: String,
    pub timestamp: SystemTime,
}

/// Alert severity levels
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum AlertSeverity {
    Info,
    Warning,
    Critical,
}

/// Trend analysis results
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TrendAnalysis {
    pub hit_rate_trend: Trend,
    pub memory_trend: Trend,
    pub latency_trend: Trend,
    pub efficiency_trend: Trend,
}

/// Trend direction
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Trend {
    Increasing,
    Decreasing,
    Stable,
    Volatile,
}

// Implementations

impl Default for OperationTimings {
    fn default() -> Self {
        Self {
            get_timings: TimingStats::default(),
            put_timings: TimingStats::default(),
            remove_timings: TimingStats::default(),
            eviction_timings: TimingStats::default(),
            compression_timings: TimingStats::default(),
        }
    }
}

impl Default for TimingStats {
    fn default() -> Self {
        Self {
            count: 0,
            total_us: 0,
            avg_us: 0.0,
            min_us: u64::MAX,
            max_us: 0,
            p50_us: 0,
            p95_us: 0,
            p99_us: 0,
        }
    }
}

impl TimingStats {
    /// Record a timing measurement
    pub fn record(&mut self, duration_us: u64) {
        self.count += 1;
        self.total_us += duration_us;
        self.avg_us = self.total_us as f32 / self.count as f32;
        self.min_us = self.min_us.min(duration_us);
        self.max_us = self.max_us.max(duration_us);

        // Update percentiles (simplified - would use proper algorithm)
        self.p50_us = self.avg_us as u64;
        self.p95_us = (self.avg_us * 1.5) as u64;
        self.p99_us = (self.avg_us * 2.0) as u64;
    }
}

impl MemoryTracker {
    pub fn new() -> Self {
        Self {
            current_bytes: 0,
            peak_bytes: 0,
            compression_saved_bytes: 0,
            deduplication_saved_bytes: 0,
            pressure_events: Vec::new(),
        }
    }
}

impl PerformanceAnalyzer {
    pub fn new() -> Self {
        Self {
            hit_rate_history: Vec::new(),
            ops_history: Vec::new(),
            latency_history: Vec::new(),
            efficiency_history: Vec::new(),
        }
    }
}

impl MetricsHistory {
    pub fn new(max_size: usize, interval: Duration) -> Self {
        Self {
            snapshots: Vec::new(),
            max_size,
            interval,
            last_snapshot: SystemTime::now(),
        }
    }

    pub fn add_snapshot(&mut self, snapshot: MetricsSnapshot) {
        self.snapshots.push(snapshot);
        if self.snapshots.len() > self.max_size {
            self.snapshots.remove(0);
        }
        self.last_snapshot = SystemTime::now();
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_timing_stats() {
        let mut stats = TimingStats::default();

        stats.record(100);
        stats.record(200);
        stats.record(150);

        assert_eq!(stats.count, 3);
        assert_eq!(stats.avg_us, 150.0);
        assert_eq!(stats.min_us, 100);
        assert_eq!(stats.max_us, 200);
    }

    #[test]
    fn test_metrics_collector() {
        let mut collector = MetricsCollector::new();

        collector.cache_metrics.insert(
            "test".to_string(),
            CacheStats {
                total_entries: 100,
                hits: 80,
                misses: 20,
                hit_rate: 0.8,
                memory_usage: 1024,
                avg_access_time_us: 10.0,
                evictions: 5,
                last_cleanup: SystemTime::now(),
            },
        );

        collector.record_operation(
            "test",
            Operation::Get { hit: true },
            Duration::from_micros(10),
        );

        let report = collector.get_report();
        assert!(report.summary.overall_hit_rate > 0.0);
    }

    #[test]
    fn test_memory_tracker() {
        let mut tracker = MemoryTracker::new();

        tracker.current_bytes = 1024;
        tracker.peak_bytes = 2048;
        tracker.compression_saved_bytes = 512;

        assert_eq!(tracker.peak_bytes, 2048);
    }
}