zeptoclaw 0.7.3

Ultra-lightweight personal AI assistant
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
//! LLM response cache with TTL expiry and LRU eviction.
//!
//! Persists to `~/.zeptoclaw/cache/responses.json`. Cache key is a SHA-256
//! digest of `(model, system_prompt, user_prompt)`. Entries expire after a
//! configurable TTL and are evicted LRU when the store reaches capacity.

use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::time::{SystemTime, UNIX_EPOCH};
use tracing::{debug, warn};

/// A single cached LLM response.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CacheEntry {
    /// The LLM response text.
    pub response: String,
    /// Estimated token count of the response.
    pub token_count: u32,
    /// Unix timestamp when the entry was created.
    pub created_at: u64,
    /// Unix timestamp when the entry was last accessed.
    pub accessed_at: u64,
    /// Number of cache hits for this entry.
    pub hit_count: u32,
}

/// Persistent store serialized to JSON.
#[derive(Debug, Serialize, Deserialize, Default)]
struct CacheStore {
    entries: HashMap<String, CacheEntry>,
}

/// LLM response cache with TTL expiry, LRU eviction, and JSON persistence.
pub struct ResponseCache {
    store: CacheStore,
    path: PathBuf,
    ttl_secs: u64,
    max_entries: usize,
}

impl ResponseCache {
    /// Create a new response cache with the given TTL and capacity.
    ///
    /// Loads existing entries from `~/.zeptoclaw/cache/responses.json` on disk.
    /// `max_entries` is clamped to a minimum of 1 to prevent infinite loops.
    pub fn new(ttl_secs: u64, max_entries: usize) -> Self {
        let path = dirs::home_dir()
            .unwrap_or_else(|| PathBuf::from("."))
            .join(".zeptoclaw")
            .join("cache")
            .join("responses.json");
        let store = Self::load_from_disk(&path);
        Self {
            store,
            path,
            ttl_secs,
            max_entries: max_entries.max(1),
        }
    }

    /// Build a deterministic cache key: SHA-256 of `(model, system_prompt, user_prompt)`.
    ///
    /// Uses length-prefixed encoding to prevent separator collision attacks
    /// (e.g. `model="a|b"` vs `model="a", system="|b"`).
    pub fn cache_key(model: &str, system_prompt: &str, user_prompt: &str) -> String {
        let mut hasher = Sha256::new();
        hasher.update((model.len() as u64).to_le_bytes());
        hasher.update(model.as_bytes());
        hasher.update((system_prompt.len() as u64).to_le_bytes());
        hasher.update(system_prompt.as_bytes());
        hasher.update((user_prompt.len() as u64).to_le_bytes());
        hasher.update(user_prompt.as_bytes());
        format!("{:x}", hasher.finalize())
    }

    /// Look up a cached response. Returns `None` if the key is absent or expired.
    ///
    /// On hit, updates `accessed_at` and increments `hit_count` in memory.
    /// Does NOT persist to disk on hit — bookkeeping fields are flushed on
    /// the next `put()` or `clear()` call, avoiding O(n) disk writes per read.
    pub fn get(&mut self, key: &str) -> Option<String> {
        let now = Self::now_secs();
        // Check expiry with an immutable borrow first to avoid overlapping borrows.
        let expired = self
            .store
            .entries
            .get(key)
            .map(|e| now.saturating_sub(e.created_at) > self.ttl_secs);
        match expired {
            Some(true) => {
                debug!(key = %&key[..8.min(key.len())], "Cache entry expired, removing");
                self.store.entries.remove(key);
                // Deferred disk write — flushed on next put() or clear()
                None
            }
            Some(false) => {
                let entry = self.store.entries.get_mut(key).unwrap();
                entry.accessed_at = now;
                entry.hit_count = entry.hit_count.saturating_add(1);
                Some(entry.response.clone())
            }
            None => None,
        }
    }

    /// Store a response in the cache.
    ///
    /// Evicts expired entries first, then LRU entries if at capacity.
    pub fn put(&mut self, key: String, response: String, token_count: u32) {
        let now = Self::now_secs();
        // Evict expired entries first
        self.evict_expired(now);
        // LRU eviction if at capacity (guard max_entries=0 to prevent infinite loop)
        let effective_max = self.max_entries.max(1);
        while self.store.entries.len() >= effective_max {
            self.evict_lru();
        }
        self.store.entries.insert(
            key,
            CacheEntry {
                response,
                token_count,
                created_at: now,
                accessed_at: now,
                hit_count: 0,
            },
        );
        self.save_to_disk();
    }

    /// Return aggregate statistics about the cache.
    pub fn stats(&self) -> CacheStats {
        let total_hits: u64 = self
            .store
            .entries
            .values()
            .map(|e| u64::from(e.hit_count))
            .sum();
        let total_tokens_saved: u64 = self
            .store
            .entries
            .values()
            .map(|e| u64::from(e.hit_count) * u64::from(e.token_count))
            .sum();
        CacheStats {
            total_entries: self.store.entries.len(),
            total_hits,
            total_tokens_saved,
        }
    }

    /// Remove all entries from the cache.
    pub fn clear(&mut self) {
        self.store.entries.clear();
        self.save_to_disk();
    }

    /// Return the number of entries currently in the cache.
    pub fn len(&self) -> usize {
        self.store.entries.len()
    }

    /// Return `true` if the cache contains no entries.
    pub fn is_empty(&self) -> bool {
        self.store.entries.is_empty()
    }

    // -- private helpers ---------------------------------------------------

    fn evict_expired(&mut self, now: u64) {
        let ttl = self.ttl_secs;
        self.store
            .entries
            .retain(|_, e| now.saturating_sub(e.created_at) <= ttl);
    }

    fn evict_lru(&mut self) {
        if let Some(lru_key) = self
            .store
            .entries
            .iter()
            .min_by_key(|(_, e)| e.accessed_at)
            .map(|(k, _)| k.clone())
        {
            debug!(key = %&lru_key[..8.min(lru_key.len())], "Evicting LRU cache entry");
            self.store.entries.remove(&lru_key);
        }
    }

    fn now_secs() -> u64 {
        SystemTime::now()
            .duration_since(UNIX_EPOCH)
            .unwrap_or_default()
            .as_secs()
    }

    fn load_from_disk(path: &Path) -> CacheStore {
        match std::fs::read_to_string(path) {
            Ok(data) => match serde_json::from_str(&data) {
                Ok(store) => store,
                Err(e) => {
                    warn!("Response cache file is corrupt, starting empty: {}", e);
                    CacheStore::default()
                }
            },
            Err(e) if e.kind() == std::io::ErrorKind::NotFound => CacheStore::default(),
            Err(e) => {
                warn!("Failed to read response cache, starting empty: {}", e);
                CacheStore::default()
            }
        }
    }

    fn save_to_disk(&self) {
        if let Some(parent) = self.path.parent() {
            let _ = std::fs::create_dir_all(parent);
        }
        if let Ok(data) = serde_json::to_string_pretty(&self.store) {
            if let Err(e) = std::fs::write(&self.path, data) {
                warn!("Failed to save response cache: {}", e);
            }
        }
    }
}

/// Aggregate cache statistics.
#[derive(Debug, Clone)]
pub struct CacheStats {
    /// Number of entries currently in the cache.
    pub total_entries: usize,
    /// Cumulative number of cache hits across all entries.
    pub total_hits: u64,
    /// Estimated total tokens saved by cache hits.
    pub total_tokens_saved: u64,
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Create a test cache with a unique temp path so parallel tests don't collide.
    fn test_cache() -> ResponseCache {
        use std::sync::atomic::{AtomicU64, Ordering};
        static COUNTER: AtomicU64 = AtomicU64::new(0);
        let id = COUNTER.fetch_add(1, Ordering::Relaxed);
        let tid = std::thread::current().id();
        ResponseCache {
            store: CacheStore::default(),
            path: PathBuf::from(format!("/tmp/zeptoclaw-test-cache-{tid:?}-{id}.json")),
            ttl_secs: 3600,
            max_entries: 5,
        }
    }

    #[test]
    fn test_cache_key_deterministic() {
        let k1 = ResponseCache::cache_key("gpt-4", "sys", "hello");
        let k2 = ResponseCache::cache_key("gpt-4", "sys", "hello");
        assert_eq!(k1, k2);
    }

    #[test]
    fn test_cache_key_model_aware() {
        let k1 = ResponseCache::cache_key("gpt-4", "sys", "hello");
        let k2 = ResponseCache::cache_key("claude", "sys", "hello");
        assert_ne!(k1, k2);
    }

    #[test]
    fn test_cache_key_prompt_aware() {
        let k1 = ResponseCache::cache_key("gpt-4", "sys", "hello");
        let k2 = ResponseCache::cache_key("gpt-4", "sys", "goodbye");
        assert_ne!(k1, k2);
    }

    #[test]
    fn test_cache_key_system_prompt_aware() {
        let k1 = ResponseCache::cache_key("gpt-4", "system A", "hello");
        let k2 = ResponseCache::cache_key("gpt-4", "system B", "hello");
        assert_ne!(k1, k2);
    }

    #[test]
    fn test_cache_hit_miss() {
        let mut cache = test_cache();
        let key = "test-key".to_string();
        assert!(cache.get(&key).is_none());
        cache.put(key.clone(), "response".into(), 100);
        assert_eq!(cache.get(&key), Some("response".into()));
    }

    #[test]
    fn test_cache_ttl_expiry() {
        let mut cache = test_cache();
        cache.ttl_secs = 0; // expire immediately
        cache.put("key".into(), "resp".into(), 10);
        // Backdate created_at by 1 second to guarantee expiry
        if let Some(entry) = cache.store.entries.get_mut("key") {
            entry.created_at -= 1;
        }
        assert!(cache.get("key").is_none());
    }

    #[test]
    fn test_cache_lru_eviction() {
        let mut cache = test_cache(); // max 5 entries
        for i in 0..5 {
            cache.put(format!("k{i}"), format!("v{i}"), 10);
        }
        // Manually set accessed_at to ensure deterministic ordering:
        // k0 = 1000 (most recent), k1 = 100 (oldest), k2-k4 = 500
        cache.store.entries.get_mut("k0").unwrap().accessed_at = 1000;
        cache.store.entries.get_mut("k1").unwrap().accessed_at = 100;
        for i in 2..5 {
            cache
                .store
                .entries
                .get_mut(&format!("k{i}"))
                .unwrap()
                .accessed_at = 500;
        }
        // Add k5 — should evict k1 (oldest accessed_at = 100)
        cache.put("k5".into(), "v5".into(), 10);
        assert!(
            cache.get("k0").is_some(),
            "k0 had most recent access, should survive LRU"
        );
        assert!(
            !cache.store.entries.contains_key("k1"),
            "k1 had oldest accessed_at, should be evicted"
        );
        assert_eq!(cache.store.entries.len(), 5, "should stay at max capacity");
    }

    #[test]
    fn test_cache_stats() {
        let mut cache = test_cache();
        cache.put("k1".into(), "r1".into(), 100);
        cache.put("k2".into(), "r2".into(), 200);
        let _ = cache.get("k1"); // 1 hit
        let _ = cache.get("k1"); // 2 hits
        let _ = cache.get("k2"); // 1 hit
        let stats = cache.stats();
        assert_eq!(stats.total_entries, 2);
        assert_eq!(stats.total_hits, 3);
        assert_eq!(stats.total_tokens_saved, 100 * 2 + 200);
    }

    #[test]
    fn test_cache_clear() {
        let mut cache = test_cache();
        cache.put("k1".into(), "r1".into(), 10);
        cache.clear();
        assert_eq!(cache.stats().total_entries, 0);
        assert!(cache.is_empty());
    }

    #[test]
    fn test_cache_hit_increments_count() {
        let mut cache = test_cache();
        cache.put("k".into(), "r".into(), 10);
        let _ = cache.get("k");
        let _ = cache.get("k");
        let entry = cache.store.entries.get("k").unwrap();
        assert_eq!(entry.hit_count, 2);
    }

    #[test]
    fn test_cache_len_and_is_empty() {
        let mut cache = test_cache();
        assert!(cache.is_empty());
        assert_eq!(cache.len(), 0);
        cache.put("a".into(), "b".into(), 1);
        assert!(!cache.is_empty());
        assert_eq!(cache.len(), 1);
    }

    #[test]
    fn test_cache_key_no_separator_collision() {
        // "a|b" as model with empty system should differ from "a" model with "b" system
        let k1 = ResponseCache::cache_key("a|b", "", "c");
        let k2 = ResponseCache::cache_key("a", "b", "c");
        assert_ne!(
            k1, k2,
            "length-prefixed encoding must prevent separator collisions"
        );
    }

    #[test]
    fn test_max_entries_zero_clamped() {
        let cache = ResponseCache {
            store: CacheStore::default(),
            path: PathBuf::from("/tmp/zeptoclaw-test-clamp.json"),
            ttl_secs: 3600,
            max_entries: 0,
        };
        // Direct struct construction bypasses the clamp in new(), but
        // the eviction loop still needs to not infinite-loop. We test
        // via new() which clamps to 1.
        let cache2 = ResponseCache::new(3600, 0);
        assert_eq!(cache2.max_entries, 1);
        // Even with direct construction at 0, we need the guard
        drop(cache);
    }

    #[test]
    fn test_cache_config_defaults() {
        use crate::config::CacheConfig;
        let cfg = CacheConfig::default();
        assert!(!cfg.enabled);
        assert_eq!(cfg.ttl_secs, 3600);
        assert_eq!(cfg.max_entries, 500);
    }
}