offline_intelligence/cache_management/cache_config.rs
1//! Configuration for the KV cache management system
2
3use serde::{Deserialize, Serialize};
4
5/// Configuration for the KV cache management system
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct KVCacheConfig {
8 /// Whether cache management is enabled
9 pub enabled: bool,
10
11 /// Whether retrieval is enabled
12 pub retrieval_enabled: bool,
13
14 /// Number of conversations before clearing cache
15 pub clear_after_conversations: usize,
16
17 /// Memory threshold percentage (0.6 = 60%) for clearing
18 pub memory_threshold_percent: f32,
19
20 /// Whether to create bridging sentences between cached and retrieved content
21 pub bridge_enabled: bool,
22
23 /// Maximum entries to keep in KV cache after clearing
24 pub max_cache_entries: usize,
25
26 /// Minimum importance score to preserve entries during clearing
27 pub min_importance_to_preserve: f32,
28
29 /// Whether to generate embeddings for cache retrieval
30 pub generate_cache_embeddings: bool,
31
32 /// Retrieval strategy to use
33 pub retrieval_strategy: RetrievalStrategy,
34
35 /// Whether to preserve system prompts in cache
36 pub preserve_system_prompts: bool,
37
38 /// Whether to preserve code-related KV entries
39 pub preserve_code_entries: bool,
40
41 /// Snapshot strategy to use
42 pub snapshot_strategy: SnapshotStrategy,
43}
44
45impl Default for KVCacheConfig {
46 fn default() -> Self {
47 Self {
48 enabled: true,
49 retrieval_enabled: true,
50 clear_after_conversations: 16, // Clear after 16 conversations
51 memory_threshold_percent: 0.6, // 60% of model context window
52 bridge_enabled: true,
53 max_cache_entries: 1000,
54 min_importance_to_preserve: 0.7,
55 generate_cache_embeddings: true,
56 retrieval_strategy: RetrievalStrategy::KeywordThenSemantic,
57 preserve_system_prompts: true,
58 preserve_code_entries: true,
59 snapshot_strategy: SnapshotStrategy::Incremental {
60 interval_conversations: 4, // Snapshot every 4 conversations
61 max_snapshots: 4, // Keep last 4 snapshots
62 },
63 }
64 }
65}
66
67impl KVCacheConfig {
68 /// Build config with token threshold derived from the model's context window size.
69 /// `ctx_size` is the model's total context window in tokens (from Config.ctx_size).
70 /// The clear threshold is 60% of that — the cache is cleared before the window fills.
71 pub fn from_ctx_size(ctx_size: u32) -> Self {
72 let mut config = Self::default();
73 // Express 60% as the threshold; callers convert token count using this ratio
74 // against ctx_size. The ratio is kept so it remains meaningful regardless of model.
75 config.memory_threshold_percent = 0.6;
76 // Max entries: rough heuristic — allow ~4 tokens of KV state per context token
77 config.max_cache_entries = (ctx_size as usize).saturating_mul(4).max(1000);
78 config
79 }
80
81 /// Return the token count at which a cache clear should be triggered.
82 /// This is 60% of the model's context window.
83 pub fn clear_threshold_tokens(&self, ctx_size: u32) -> usize {
84 (ctx_size as f32 * self.memory_threshold_percent) as usize
85 }
86}
87
88/// Different retrieval strategies
89#[derive(Debug, Clone, Serialize, Deserialize)]
90pub enum RetrievalStrategy {
91 /// Keyword matching only
92 KeywordOnly,
93 /// Semantic search only
94 SemanticOnly,
95 /// Keyword then semantic as fallback
96 KeywordThenSemantic,
97 /// Semantic then keyword as fallback
98 SemanticThenKeyword,
99 /// Hybrid approach
100 Hybrid {
101 keyword_weight: f32,
102 semantic_weight: f32,
103 },
104}
105
106/// Different snapshot strategies
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub enum SnapshotStrategy {
109 /// No snapshots
110 None,
111 /// Full snapshot every N conversations
112 Full {
113 interval_conversations: usize,
114 },
115 /// Incremental snapshots
116 Incremental {
117 interval_conversations: usize,
118 max_snapshots: usize,
119 },
120 /// Adaptive based on importance
121 Adaptive {
122 min_importance_threshold: f32,
123 max_snapshots: usize,
124 },
125}
126
127/// Configuration for cache entry preservation
128#[derive(Debug, Clone, Serialize, Deserialize)]
129pub struct CachePreservationConfig {
130 /// Preserve attention keys
131 pub preserve_attention_keys: bool,
132
133 /// Preserve attention values
134 pub preserve_attention_values: bool,
135
136 /// Preserve FFN keys
137 pub preserve_ffn_keys: bool,
138
139 /// Preserve FFN values
140 pub preserve_ffn_values: bool,
141
142 /// Preserve entries from early layers
143 pub preserve_early_layers: bool,
144
145 /// Preserve entries from late layers
146 pub preserve_late_layers: bool,
147
148 /// Custom patterns to preserve (regex for key matching)
149 pub custom_patterns: Vec<String>,
150}
151
152impl Default for CachePreservationConfig {
153 fn default() -> Self {
154 Self {
155 preserve_attention_keys: true,
156 preserve_attention_values: true,
157 preserve_ffn_keys: false,
158 preserve_ffn_values: false,
159 preserve_early_layers: true,
160 preserve_late_layers: false,
161 custom_patterns: Vec::new(),
162 }
163 }
164}