Skip to main content

offline_intelligence/cache_management/
cache_config.rs

1//! Configuration for the KV cache management system
2
3use serde::{Deserialize, Serialize};
4
5/// Configuration for the KV cache management system
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct KVCacheConfig {
8    /// Whether cache management is enabled
9    pub enabled: bool,
10    
11    /// Whether retrieval is enabled
12    pub retrieval_enabled: bool,
13    
14    /// Number of conversations before clearing cache
15    pub clear_after_conversations: usize,
16    
17    /// Memory threshold percentage (0.6 = 60%) for clearing
18    pub memory_threshold_percent: f32,
19    
20    /// Whether to create bridging sentences between cached and retrieved content
21    pub bridge_enabled: bool,
22    
23    /// Maximum entries to keep in KV cache after clearing
24    pub max_cache_entries: usize,
25    
26    /// Minimum importance score to preserve entries during clearing
27    pub min_importance_to_preserve: f32,
28    
29    /// Whether to generate embeddings for cache retrieval
30    pub generate_cache_embeddings: bool,
31    
32    /// Retrieval strategy to use
33    pub retrieval_strategy: RetrievalStrategy,
34    
35    /// Whether to preserve system prompts in cache
36    pub preserve_system_prompts: bool,
37    
38    /// Whether to preserve code-related KV entries
39    pub preserve_code_entries: bool,
40    
41    /// Snapshot strategy to use
42    pub snapshot_strategy: SnapshotStrategy,
43}
44
45impl Default for KVCacheConfig {
46    fn default() -> Self {
47        Self {
48            enabled: true,
49            retrieval_enabled: true,
50            clear_after_conversations: 16,  // Clear after 16 conversations
51            memory_threshold_percent: 0.6,  // 60% memory usage
52            bridge_enabled: true,
53            max_cache_entries: 1000,
54            min_importance_to_preserve: 0.7,
55            generate_cache_embeddings: true,
56            retrieval_strategy: RetrievalStrategy::KeywordThenSemantic,
57            preserve_system_prompts: true,
58            preserve_code_entries: true,
59            snapshot_strategy: SnapshotStrategy::Incremental {
60                interval_conversations: 4,  // Snapshot every 4 conversations
61                max_snapshots: 4,           // Keep last 4 snapshots
62            },
63        }
64    }
65}
66
67/// Different retrieval strategies
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub enum RetrievalStrategy {
70    /// Keyword matching only
71    KeywordOnly,
72    /// Semantic search only
73    SemanticOnly,
74    /// Keyword then semantic as fallback
75    KeywordThenSemantic,
76    /// Semantic then keyword as fallback  
77    SemanticThenKeyword,
78    /// Hybrid approach
79    Hybrid {
80        keyword_weight: f32,
81        semantic_weight: f32,
82    },
83}
84
85/// Different snapshot strategies
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub enum SnapshotStrategy {
88    /// No snapshots
89    None,
90    /// Full snapshot every N conversations
91    Full {
92        interval_conversations: usize,
93    },
94    /// Incremental snapshots
95    Incremental {
96        interval_conversations: usize,
97        max_snapshots: usize,
98    },
99    /// Adaptive based on importance
100    Adaptive {
101        min_importance_threshold: f32,
102        max_snapshots: usize,
103    },
104}
105
106/// Configuration for cache entry preservation
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub struct CachePreservationConfig {
109    /// Preserve attention keys
110    pub preserve_attention_keys: bool,
111    
112    /// Preserve attention values
113    pub preserve_attention_values: bool,
114    
115    /// Preserve FFN keys
116    pub preserve_ffn_keys: bool,
117    
118    /// Preserve FFN values
119    pub preserve_ffn_values: bool,
120    
121    /// Preserve entries from early layers
122    pub preserve_early_layers: bool,
123    
124    /// Preserve entries from late layers
125    pub preserve_late_layers: bool,
126    
127    /// Custom patterns to preserve (regex for key matching)
128    pub custom_patterns: Vec<String>,
129}
130
131impl Default for CachePreservationConfig {
132    fn default() -> Self {
133        Self {
134            preserve_attention_keys: true,
135            preserve_attention_values: true,
136            preserve_ffn_keys: false,
137            preserve_ffn_values: false,
138            preserve_early_layers: true,
139            preserve_late_layers: false,
140            custom_patterns: Vec::new(),
141        }
142    }
143}