offline_intelligence/cache_management/
cache_config.rs

1//! Configuration for the KV cache management system
2
3use serde::{Deserialize, Serialize};
4
5/// Configuration for the KV cache management system
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct KVCacheConfig {
8    /// Whether cache management is enabled
9    pub enabled: bool,
10    
11    /// Whether retrieval is enabled
12    pub retrieval_enabled: bool,
13    
14    /// Number of conversations before clearing cache
15    pub clear_after_conversations: usize,
16    
17    /// Memory threshold percentage (0.6 = 60%) for clearing
18    pub memory_threshold_percent: f32,
19    
20    /// Whether to create bridging sentences between cached and retrieved content
21    pub bridge_enabled: bool,
22    
23    /// Maximum entries to keep in KV cache after clearing
24    pub max_cache_entries: usize,
25    
26    /// Minimum importance score to preserve entries during clearing
27    pub min_importance_to_preserve: f32,
28    
29    /// Whether to generate embeddings for cache retrieval
30    pub generate_cache_embeddings: bool,
31    
32    /// Retrieval strategy to use
33    pub retrieval_strategy: RetrievalStrategy,
34    
35    /// Whether to preserve system prompts in cache
36    pub preserve_system_prompts: bool,
37    
38    /// Whether to preserve code-related KV entries
39    pub preserve_code_entries: bool,
40    
41    /// Snapshot strategy to use
42    pub snapshot_strategy: SnapshotStrategy,
43}
44
45impl Default for KVCacheConfig {
46    fn default() -> Self {
47        Self {
48            enabled: true,
49            retrieval_enabled: true,
50            clear_after_conversations: 16,  // Clear after 16 conversations
51            memory_threshold_percent: 0.6,  // 60% of model context window
52            bridge_enabled: true,
53            max_cache_entries: 1000,
54            min_importance_to_preserve: 0.7,
55            generate_cache_embeddings: true,
56            retrieval_strategy: RetrievalStrategy::KeywordThenSemantic,
57            preserve_system_prompts: true,
58            preserve_code_entries: true,
59            snapshot_strategy: SnapshotStrategy::Incremental {
60                interval_conversations: 4,  // Snapshot every 4 conversations
61                max_snapshots: 4,           // Keep last 4 snapshots
62            },
63        }
64    }
65}
66
67impl KVCacheConfig {
68    /// Build config with token threshold derived from the model's context window size.
69    /// `ctx_size` is the model's total context window in tokens (from Config.ctx_size).
70    /// The clear threshold is 60% of that — the cache is cleared before the window fills.
71    pub fn from_ctx_size(ctx_size: u32) -> Self {
72        let mut config = Self::default();
73        // Express 60% as the threshold; callers convert token count using this ratio
74        // against ctx_size. The ratio is kept so it remains meaningful regardless of model.
75        config.memory_threshold_percent = 0.6;
76        // Max entries: rough heuristic — allow ~4 tokens of KV state per context token
77        config.max_cache_entries = (ctx_size as usize).saturating_mul(4).max(1000);
78        config
79    }
80
81    /// Return the token count at which a cache clear should be triggered.
82    /// This is 60% of the model's context window.
83    pub fn clear_threshold_tokens(&self, ctx_size: u32) -> usize {
84        (ctx_size as f32 * self.memory_threshold_percent) as usize
85    }
86}
87
88/// Different retrieval strategies
89#[derive(Debug, Clone, Serialize, Deserialize)]
90pub enum RetrievalStrategy {
91    /// Keyword matching only
92    KeywordOnly,
93    /// Semantic search only
94    SemanticOnly,
95    /// Keyword then semantic as fallback
96    KeywordThenSemantic,
97    /// Semantic then keyword as fallback  
98    SemanticThenKeyword,
99    /// Hybrid approach
100    Hybrid {
101        keyword_weight: f32,
102        semantic_weight: f32,
103    },
104}
105
106/// Different snapshot strategies
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub enum SnapshotStrategy {
109    /// No snapshots
110    None,
111    /// Full snapshot every N conversations
112    Full {
113        interval_conversations: usize,
114    },
115    /// Incremental snapshots
116    Incremental {
117        interval_conversations: usize,
118        max_snapshots: usize,
119    },
120    /// Adaptive based on importance
121    Adaptive {
122        min_importance_threshold: f32,
123        max_snapshots: usize,
124    },
125}
126
127/// Configuration for cache entry preservation
128#[derive(Debug, Clone, Serialize, Deserialize)]
129pub struct CachePreservationConfig {
130    /// Preserve attention keys
131    pub preserve_attention_keys: bool,
132    
133    /// Preserve attention values
134    pub preserve_attention_values: bool,
135    
136    /// Preserve FFN keys
137    pub preserve_ffn_keys: bool,
138    
139    /// Preserve FFN values
140    pub preserve_ffn_values: bool,
141    
142    /// Preserve entries from early layers
143    pub preserve_early_layers: bool,
144    
145    /// Preserve entries from late layers
146    pub preserve_late_layers: bool,
147    
148    /// Custom patterns to preserve (regex for key matching)
149    pub custom_patterns: Vec<String>,
150}
151
152impl Default for CachePreservationConfig {
153    fn default() -> Self {
154        Self {
155            preserve_attention_keys: true,
156            preserve_attention_values: true,
157            preserve_ffn_keys: false,
158            preserve_ffn_values: false,
159            preserve_early_layers: true,
160            preserve_late_layers: false,
161            custom_patterns: Vec::new(),
162        }
163    }
164}
offline_intelligence/cache_management/cache_config.rs

offline_intelligence/cache_management/
cache_config.rs