ruvllm 2.2.0

LLM serving runtime with Ruvector integration - Paged attention, KV cache, and SONA learning
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
//! ReasoningBank - Production-grade learning from Claude trajectories
//!
//! This module implements a complete system for learning from Claude (and other LLM)
//! trajectories, enabling continuous improvement through:
//!
//! - **Trajectory Recording**: Real-time capture of execution paths with quality metrics
//! - **Pattern Storage**: HNSW-indexed pattern storage for fast similarity search (150x faster)
//! - **Verdict Analysis**: Enhanced verdict system for failure analysis and root cause detection
//! - **Memory Consolidation**: EWC++ style consolidation to prevent catastrophic forgetting
//! - **Memory Distillation**: Compress old trajectories while preserving key lessons
//!
//! ## Architecture
//!
//! ```text
//! ┌─────────────────────────────────────────────────────────────────────┐
//! │                         ReasoningBank                               │
//! ├─────────────────────────────────────────────────────────────────────┤
//! │  ┌─────────────┐   ┌─────────────┐   ┌─────────────┐              │
//! │  │ Trajectory  │──>│   Pattern   │──>│  Verdict    │              │
//! │  │  Recorder   │   │   Store     │   │  Analyzer   │              │
//! │  └─────────────┘   └─────────────┘   └─────────────┘              │
//! │        │                  │                  │                     │
//! │        v                  v                  v                     │
//! │  ┌─────────────────────────────────────────────────────┐         │
//! │  │              HNSW Index (ruvector-core)             │         │
//! │  │            ef_construction=200, M=32                │         │
//! │  └─────────────────────────────────────────────────────┘         │
//! │        │                  │                  │                     │
//! │        v                  v                  v                     │
//! │  ┌─────────────┐   ┌─────────────┐   ┌─────────────┐              │
//! │  │ Consolidator│   │ Distiller   │   │   Export    │              │
//! │  │   (EWC++)   │   │             │   │             │              │
//! │  └─────────────┘   └─────────────┘   └─────────────┘              │
//! └─────────────────────────────────────────────────────────────────────┘
//! ```
//!
//! ## Example Usage
//!
//! ```rust,ignore
//! use ruvllm::reasoning_bank::{
//!     ReasoningBank, ReasoningBankConfig,
//!     TrajectoryRecorder, Verdict,
//! };
//!
//! // Create the reasoning bank
//! let config = ReasoningBankConfig::default();
//! let bank = ReasoningBank::new(config)?;
//!
//! // Start recording a trajectory
//! let mut recorder = bank.start_trajectory("user-query-embedding");
//! recorder.add_step(action, rationale, outcome, confidence);
//! recorder.add_step(action2, rationale2, outcome2, confidence2);
//!
//! // Complete with a verdict
//! let trajectory = recorder.complete(Verdict::Success);
//!
//! // Store for learning
//! bank.store_trajectory(trajectory)?;
//!
//! // Search for similar patterns
//! let similar = bank.search_similar(&query_embedding, 10)?;
//!
//! // Periodic consolidation
//! bank.consolidate()?;
//! ```

pub mod consolidation;
pub mod distillation;
pub mod pattern_store;
pub mod trajectory;
pub mod verdicts;

// Re-exports for convenience
pub use consolidation::{
    ConsolidationConfig, ConsolidationResult, FisherInformation, ImportanceScore,
    PatternConsolidator,
};
pub use distillation::{
    CompressedTrajectory, DistillationConfig, DistillationResult, KeyLesson, MemoryDistiller,
};
pub use pattern_store::{
    Pattern, PatternCategory, PatternSearchResult, PatternStats, PatternStore, PatternStoreConfig,
};
pub use trajectory::{
    StepOutcome, Trajectory, TrajectoryId, TrajectoryMetadata, TrajectoryRecorder, TrajectoryStep,
};
pub use verdicts::{
    FailurePattern, RecoveryStrategy, RootCause, Verdict, VerdictAnalysis, VerdictAnalyzer,
};

use crate::error::Result;
use parking_lot::RwLock;
use serde::{Deserialize, Serialize};
use std::sync::Arc;

/// Configuration for the ReasoningBank
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReasoningBankConfig {
    /// Storage path for persistent data
    pub storage_path: String,
    /// Embedding dimension for vectors
    pub embedding_dim: usize,
    /// HNSW ef_construction parameter (default: 200)
    pub ef_construction: usize,
    /// HNSW ef_search parameter (default: 100)
    pub ef_search: usize,
    /// HNSW M parameter (default: 32)
    pub m: usize,
    /// Maximum trajectories to store before compression
    pub max_trajectories: usize,
    /// Minimum quality threshold for pattern extraction
    pub min_quality_threshold: f32,
    /// Consolidation interval in seconds
    pub consolidation_interval_secs: u64,
    /// Enable automatic consolidation
    pub auto_consolidate: bool,
    /// Pattern store configuration
    pub pattern_config: PatternStoreConfig,
    /// Consolidation configuration
    pub consolidation_config: ConsolidationConfig,
    /// Distillation configuration
    pub distillation_config: DistillationConfig,
}

impl Default for ReasoningBankConfig {
    fn default() -> Self {
        Self {
            storage_path: ".ruvllm/reasoning_bank".to_string(),
            embedding_dim: 768,
            ef_construction: 200,
            ef_search: 100,
            m: 32,
            max_trajectories: 100_000,
            min_quality_threshold: 0.3,
            consolidation_interval_secs: 3600, // 1 hour
            auto_consolidate: true,
            pattern_config: PatternStoreConfig::default(),
            consolidation_config: ConsolidationConfig::default(),
            distillation_config: DistillationConfig::default(),
        }
    }
}

/// Main ReasoningBank for learning from Claude trajectories
///
/// The ReasoningBank provides a unified interface for:
/// - Recording trajectories during Claude interactions
/// - Storing and indexing patterns with HNSW
/// - Analyzing verdicts and extracting lessons
/// - Consolidating patterns to prevent forgetting
/// - Distilling old trajectories to preserve key insights
pub struct ReasoningBank {
    /// Configuration
    config: ReasoningBankConfig,
    /// Pattern store with HNSW index
    pattern_store: Arc<RwLock<PatternStore>>,
    /// Verdict analyzer
    verdict_analyzer: VerdictAnalyzer,
    /// Pattern consolidator
    consolidator: PatternConsolidator,
    /// Memory distiller
    distiller: MemoryDistiller,
    /// Trajectory storage (in-memory buffer)
    trajectories: Arc<RwLock<Vec<Trajectory>>>,
    /// Statistics
    stats: Arc<RwLock<ReasoningBankStats>>,
}

/// Statistics for the ReasoningBank
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ReasoningBankStats {
    /// Total trajectories recorded
    pub total_trajectories: u64,
    /// Total patterns stored
    pub total_patterns: u64,
    /// Successful trajectories
    pub success_count: u64,
    /// Failed trajectories
    pub failure_count: u64,
    /// Recovered via reflection
    pub recovered_count: u64,
    /// Consolidations performed
    pub consolidation_count: u64,
    /// Distillations performed
    pub distillation_count: u64,
    /// Average quality score
    pub avg_quality: f32,
    /// Last consolidation timestamp (Unix seconds)
    pub last_consolidation: u64,
    /// Last distillation timestamp (Unix seconds)
    pub last_distillation: u64,
}

impl ReasoningBank {
    /// Create a new ReasoningBank with the given configuration
    pub fn new(config: ReasoningBankConfig) -> Result<Self> {
        let pattern_store = PatternStore::new(config.pattern_config.clone())?;
        let verdict_analyzer = VerdictAnalyzer::new();
        let consolidator = PatternConsolidator::new(config.consolidation_config.clone());
        let distiller = MemoryDistiller::new(config.distillation_config.clone());

        Ok(Self {
            config,
            pattern_store: Arc::new(RwLock::new(pattern_store)),
            verdict_analyzer,
            consolidator,
            distiller,
            trajectories: Arc::new(RwLock::new(Vec::new())),
            stats: Arc::new(RwLock::new(ReasoningBankStats::default())),
        })
    }

    /// Start recording a new trajectory
    pub fn start_trajectory(&self, query_embedding: Vec<f32>) -> TrajectoryRecorder {
        TrajectoryRecorder::new(query_embedding)
    }

    /// Store a completed trajectory
    pub fn store_trajectory(&self, trajectory: Trajectory) -> Result<()> {
        // Update statistics
        {
            let mut stats = self.stats.write();
            stats.total_trajectories += 1;

            match &trajectory.verdict {
                Verdict::Success => stats.success_count += 1,
                Verdict::Failure(_) => stats.failure_count += 1,
                Verdict::RecoveredViaReflection { .. } => stats.recovered_count += 1,
                _ => {}
            }

            // Update rolling average quality
            let n = stats.total_trajectories as f32;
            stats.avg_quality = stats.avg_quality * ((n - 1.0) / n) + trajectory.quality / n;
        }

        // Store trajectory
        {
            let mut trajectories = self.trajectories.write();
            trajectories.push(trajectory.clone());

            // Check if we need to trigger distillation
            if trajectories.len() > self.config.max_trajectories {
                drop(trajectories);
                self.distill()?;
            }
        }

        // Extract pattern if quality is above threshold
        if trajectory.quality >= self.config.min_quality_threshold {
            let pattern = Pattern::from_trajectory(&trajectory);
            let mut store = self.pattern_store.write();
            store.store_pattern(pattern)?;

            let mut stats = self.stats.write();
            stats.total_patterns += 1;
        }

        Ok(())
    }

    /// Analyze a trajectory verdict and extract lessons
    pub fn analyze_verdict(&self, trajectory: &Trajectory) -> VerdictAnalysis {
        self.verdict_analyzer.analyze(trajectory)
    }

    /// Search for similar patterns by embedding
    pub fn search_similar(
        &self,
        query_embedding: &[f32],
        limit: usize,
    ) -> Result<Vec<PatternSearchResult>> {
        let store = self.pattern_store.read();
        store.search_similar(query_embedding, limit)
    }

    /// Search patterns by category
    pub fn search_by_category(
        &self,
        category: PatternCategory,
        limit: usize,
    ) -> Result<Vec<Pattern>> {
        let store = self.pattern_store.read();
        store.get_by_category(category, limit)
    }

    /// Consolidate patterns to prevent forgetting
    pub fn consolidate(&self) -> Result<ConsolidationResult> {
        let mut store = self.pattern_store.write();
        let patterns = store.get_all_patterns()?;

        let result = self.consolidator.consolidate_patterns(&patterns)?;

        // Apply consolidation results
        for pattern_id in &result.merged_pattern_ids {
            store.remove_pattern(*pattern_id)?;
        }

        for pattern_id in &result.pruned_pattern_ids {
            store.remove_pattern(*pattern_id)?;
        }

        // Update stats
        {
            let mut stats = self.stats.write();
            stats.consolidation_count += 1;
            stats.last_consolidation = std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .unwrap_or_default()
                .as_secs();
        }

        Ok(result)
    }

    /// Distill old trajectories to preserve key lessons
    pub fn distill(&self) -> Result<DistillationResult> {
        let trajectories = {
            let mut traj = self.trajectories.write();
            std::mem::take(&mut *traj)
        };

        let result = self.distiller.extract_key_lessons(&trajectories)?;

        // Store compressed trajectories back
        {
            let mut traj = self.trajectories.write();
            for compressed in &result.compressed_trajectories {
                // Reconstruct minimal trajectory from compressed form
                let minimal = Trajectory::from_compressed(compressed);
                traj.push(minimal);
            }
        }

        // Store extracted lessons as patterns
        {
            let mut store = self.pattern_store.write();
            for lesson in &result.key_lessons {
                let pattern = Pattern::from_lesson(lesson);
                store.store_pattern(pattern)?;
            }
        }

        // Update stats
        {
            let mut stats = self.stats.write();
            stats.distillation_count += 1;
            stats.last_distillation = std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .unwrap_or_default()
                .as_secs();
        }

        Ok(result)
    }

    /// Prune low-quality patterns
    pub fn prune_low_quality(&self, min_quality: f32) -> Result<usize> {
        let mut store = self.pattern_store.write();
        store.prune_low_quality(min_quality)
    }

    /// Merge similar patterns
    pub fn merge_similar_patterns(&self, similarity_threshold: f32) -> Result<usize> {
        let mut store = self.pattern_store.write();
        store.merge_similar(similarity_threshold)
    }

    /// Get statistics
    pub fn stats(&self) -> ReasoningBankStats {
        self.stats.read().clone()
    }

    /// Get pattern store statistics
    pub fn pattern_stats(&self) -> PatternStats {
        self.pattern_store.read().stats()
    }

    /// Get configuration
    pub fn config(&self) -> &ReasoningBankConfig {
        &self.config
    }

    /// Export all patterns for transfer learning
    pub fn export_patterns(&self) -> Result<Vec<Pattern>> {
        let store = self.pattern_store.read();
        store.get_all_patterns()
    }

    /// Import patterns from another ReasoningBank
    pub fn import_patterns(&self, patterns: Vec<Pattern>) -> Result<usize> {
        let mut store = self.pattern_store.write();
        let mut imported = 0;

        for pattern in patterns {
            if store.store_pattern(pattern).is_ok() {
                imported += 1;
            }
        }

        let mut stats = self.stats.write();
        stats.total_patterns += imported as u64;

        Ok(imported)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_reasoning_bank_config_default() {
        let config = ReasoningBankConfig::default();
        assert_eq!(config.embedding_dim, 768);
        assert_eq!(config.ef_construction, 200);
        assert_eq!(config.ef_search, 100);
        assert_eq!(config.m, 32);
    }

    #[test]
    fn test_reasoning_bank_creation() {
        let config = ReasoningBankConfig {
            storage_path: "/tmp/test_reasoning_bank".to_string(),
            ..Default::default()
        };
        let bank = ReasoningBank::new(config);
        assert!(bank.is_ok());
    }

    #[test]
    fn test_trajectory_recording() {
        let config = ReasoningBankConfig::default();
        let bank = ReasoningBank::new(config).unwrap();

        let mut recorder = bank.start_trajectory(vec![0.1; 768]);
        recorder.add_step(
            "analyze".to_string(),
            "Need to understand the problem".to_string(),
            StepOutcome::Success,
            0.9,
        );

        let trajectory = recorder.complete(Verdict::Success);
        assert!(!trajectory.steps.is_empty());
    }

    #[test]
    fn test_stats_tracking() {
        // Use a unique temp dir for the underlying VectorDB; the default
        // `.reasoning_bank_patterns` path is shared and triggers
        // "Database already open. Cannot acquire lock." when nextest runs
        // tests concurrently.
        let tmp = tempfile::tempdir().unwrap();
        let mut config = ReasoningBankConfig::default();
        config.pattern_config.storage_path =
            Some(tmp.path().join("pat").to_string_lossy().into_owned());
        let bank = ReasoningBank::new(config).unwrap();

        let stats = bank.stats();
        assert_eq!(stats.total_trajectories, 0);
    }
}