use anyhow::Result;
use chrono::{Duration, Local};
use parking_lot::Mutex;
use rusqlite::{params, Connection};
use sha2::{Digest, Sha256};
use std::path::{Path, PathBuf};
pub struct ResponseCache {
conn: Mutex<Connection>,
#[allow(dead_code)]
db_path: PathBuf,
ttl_minutes: i64,
max_entries: usize,
}
impl ResponseCache {
pub fn new(workspace_dir: &Path, ttl_minutes: u32, max_entries: usize) -> Result<Self> {
let db_dir = workspace_dir.join("memory");
std::fs::create_dir_all(&db_dir)?;
let db_path = db_dir.join("response_cache.db");
let conn = Connection::open(&db_path)?;
conn.execute_batch(
"PRAGMA journal_mode = WAL;
PRAGMA synchronous = NORMAL;
PRAGMA temp_store = MEMORY;",
)?;
conn.execute_batch(
"CREATE TABLE IF NOT EXISTS response_cache (
prompt_hash TEXT PRIMARY KEY,
model TEXT NOT NULL,
response TEXT NOT NULL,
token_count INTEGER NOT NULL DEFAULT 0,
created_at TEXT NOT NULL,
accessed_at TEXT NOT NULL,
hit_count INTEGER NOT NULL DEFAULT 0
);
CREATE INDEX IF NOT EXISTS idx_rc_accessed ON response_cache(accessed_at);
CREATE INDEX IF NOT EXISTS idx_rc_created ON response_cache(created_at);",
)?;
Ok(Self {
conn: Mutex::new(conn),
db_path,
ttl_minutes: i64::from(ttl_minutes),
max_entries,
})
}
pub fn cache_key(model: &str, system_prompt: Option<&str>, user_prompt: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(model.as_bytes());
hasher.update(b"|");
if let Some(sys) = system_prompt {
hasher.update(sys.as_bytes());
}
hasher.update(b"|");
hasher.update(user_prompt.as_bytes());
let hash = hasher.finalize();
format!("{:064x}", hash)
}
pub fn get(&self, key: &str) -> Result<Option<String>> {
let conn = self.conn.lock();
let now = Local::now();
let cutoff = (now - Duration::minutes(self.ttl_minutes)).to_rfc3339();
let mut stmt = conn.prepare(
"SELECT response FROM response_cache
WHERE prompt_hash = ?1 AND created_at > ?2",
)?;
let result: Option<String> = stmt.query_row(params![key, cutoff], |row| row.get(0)).ok();
if result.is_some() {
let now_str = now.to_rfc3339();
conn.execute(
"UPDATE response_cache
SET accessed_at = ?1, hit_count = hit_count + 1
WHERE prompt_hash = ?2",
params![now_str, key],
)?;
}
Ok(result)
}
pub fn put(&self, key: &str, model: &str, response: &str, token_count: u32) -> Result<()> {
let conn = self.conn.lock();
let now = Local::now().to_rfc3339();
conn.execute(
"INSERT OR REPLACE INTO response_cache
(prompt_hash, model, response, token_count, created_at, accessed_at, hit_count)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, 0)",
params![key, model, response, token_count, now, now],
)?;
let cutoff = (Local::now() - Duration::minutes(self.ttl_minutes)).to_rfc3339();
conn.execute(
"DELETE FROM response_cache WHERE created_at <= ?1",
params![cutoff],
)?;
#[allow(clippy::cast_possible_wrap)]
let max = self.max_entries as i64;
conn.execute(
"DELETE FROM response_cache WHERE prompt_hash IN (
SELECT prompt_hash FROM response_cache
ORDER BY accessed_at ASC
LIMIT MAX(0, (SELECT COUNT(*) FROM response_cache) - ?1)
)",
params![max],
)?;
Ok(())
}
pub fn stats(&self) -> Result<(usize, u64, u64)> {
let conn = self.conn.lock();
let count: i64 =
conn.query_row("SELECT COUNT(*) FROM response_cache", [], |row| row.get(0))?;
let hits: i64 = conn.query_row(
"SELECT COALESCE(SUM(hit_count), 0) FROM response_cache",
[],
|row| row.get(0),
)?;
let tokens_saved: i64 = conn.query_row(
"SELECT COALESCE(SUM(token_count * hit_count), 0) FROM response_cache",
[],
|row| row.get(0),
)?;
#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
Ok((count as usize, hits as u64, tokens_saved as u64))
}
pub fn clear(&self) -> Result<usize> {
let conn = self.conn.lock();
let affected = conn.execute("DELETE FROM response_cache", [])?;
Ok(affected)
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn temp_cache(ttl_minutes: u32) -> (TempDir, ResponseCache) {
let tmp = TempDir::new().unwrap();
let cache = ResponseCache::new(tmp.path(), ttl_minutes, 1000).unwrap();
(tmp, cache)
}
#[test]
fn cache_key_deterministic() {
let k1 = ResponseCache::cache_key("gpt-4", Some("sys"), "hello");
let k2 = ResponseCache::cache_key("gpt-4", Some("sys"), "hello");
assert_eq!(k1, k2);
assert_eq!(k1.len(), 64); }
#[test]
fn cache_key_varies_by_model() {
let k1 = ResponseCache::cache_key("gpt-4", None, "hello");
let k2 = ResponseCache::cache_key("claude-3", None, "hello");
assert_ne!(k1, k2);
}
#[test]
fn cache_key_varies_by_system_prompt() {
let k1 = ResponseCache::cache_key("gpt-4", Some("You are helpful"), "hello");
let k2 = ResponseCache::cache_key("gpt-4", Some("You are rude"), "hello");
assert_ne!(k1, k2);
}
#[test]
fn cache_key_varies_by_prompt() {
let k1 = ResponseCache::cache_key("gpt-4", None, "hello");
let k2 = ResponseCache::cache_key("gpt-4", None, "goodbye");
assert_ne!(k1, k2);
}
#[test]
fn put_and_get() {
let (_tmp, cache) = temp_cache(60);
let key = ResponseCache::cache_key("gpt-4", None, "What is Rust?");
cache
.put(&key, "gpt-4", "Rust is a systems programming language.", 25)
.unwrap();
let result = cache.get(&key).unwrap();
assert_eq!(
result.as_deref(),
Some("Rust is a systems programming language.")
);
}
#[test]
fn miss_returns_none() {
let (_tmp, cache) = temp_cache(60);
let result = cache.get("nonexistent_key").unwrap();
assert!(result.is_none());
}
#[test]
fn expired_entry_returns_none() {
let (_tmp, cache) = temp_cache(0); let key = ResponseCache::cache_key("gpt-4", None, "test");
cache.put(&key, "gpt-4", "response", 10).unwrap();
let result = cache.get(&key).unwrap();
assert!(result.is_none());
}
#[test]
fn hit_count_incremented() {
let (_tmp, cache) = temp_cache(60);
let key = ResponseCache::cache_key("gpt-4", None, "hello");
cache.put(&key, "gpt-4", "Hi!", 5).unwrap();
for _ in 0..3 {
let _ = cache.get(&key).unwrap();
}
let (_, total_hits, _) = cache.stats().unwrap();
assert_eq!(total_hits, 3);
}
#[test]
fn tokens_saved_calculated() {
let (_tmp, cache) = temp_cache(60);
let key = ResponseCache::cache_key("gpt-4", None, "explain rust");
cache.put(&key, "gpt-4", "Rust is...", 100).unwrap();
for _ in 0..5 {
let _ = cache.get(&key).unwrap();
}
let (_, _, tokens_saved) = cache.stats().unwrap();
assert_eq!(tokens_saved, 500);
}
#[test]
fn lru_eviction() {
let tmp = TempDir::new().unwrap();
let cache = ResponseCache::new(tmp.path(), 60, 3).unwrap();
for i in 0..5 {
let key = ResponseCache::cache_key("gpt-4", None, &format!("prompt {i}"));
cache
.put(&key, "gpt-4", &format!("response {i}"), 10)
.unwrap();
}
let (count, _, _) = cache.stats().unwrap();
assert!(count <= 3, "Should have at most 3 entries after eviction");
}
#[test]
fn clear_wipes_all() {
let (_tmp, cache) = temp_cache(60);
for i in 0..10 {
let key = ResponseCache::cache_key("gpt-4", None, &format!("prompt {i}"));
cache
.put(&key, "gpt-4", &format!("response {i}"), 10)
.unwrap();
}
let cleared = cache.clear().unwrap();
assert_eq!(cleared, 10);
let (count, _, _) = cache.stats().unwrap();
assert_eq!(count, 0);
}
#[test]
fn stats_empty_cache() {
let (_tmp, cache) = temp_cache(60);
let (count, hits, tokens) = cache.stats().unwrap();
assert_eq!(count, 0);
assert_eq!(hits, 0);
assert_eq!(tokens, 0);
}
#[test]
fn overwrite_same_key() {
let (_tmp, cache) = temp_cache(60);
let key = ResponseCache::cache_key("gpt-4", None, "question");
cache.put(&key, "gpt-4", "answer v1", 20).unwrap();
cache.put(&key, "gpt-4", "answer v2", 25).unwrap();
let result = cache.get(&key).unwrap();
assert_eq!(result.as_deref(), Some("answer v2"));
let (count, _, _) = cache.stats().unwrap();
assert_eq!(count, 1);
}
#[test]
fn unicode_prompt_handling() {
let (_tmp, cache) = temp_cache(60);
let key = ResponseCache::cache_key("gpt-4", None, "日本語のテスト 🦀");
cache
.put(&key, "gpt-4", "はい、Rustは素晴らしい", 30)
.unwrap();
let result = cache.get(&key).unwrap();
assert_eq!(result.as_deref(), Some("はい、Rustは素晴らしい"));
}
#[test]
fn lru_eviction_keeps_most_recent() {
let tmp = TempDir::new().unwrap();
let cache = ResponseCache::new(tmp.path(), 60, 3).unwrap();
for i in 0..3 {
let key = ResponseCache::cache_key("gpt-4", None, &format!("prompt {i}"));
cache
.put(&key, "gpt-4", &format!("response {i}"), 10)
.unwrap();
}
let key0 = ResponseCache::cache_key("gpt-4", None, "prompt 0");
let _ = cache.get(&key0).unwrap();
let key3 = ResponseCache::cache_key("gpt-4", None, "prompt 3");
cache.put(&key3, "gpt-4", "response 3", 10).unwrap();
let (count, _, _) = cache.stats().unwrap();
assert!(count <= 3, "cache must not exceed max_entries");
let entry0 = cache.get(&key0).unwrap();
assert!(
entry0.is_some(),
"recently accessed entry should survive LRU eviction"
);
}
#[test]
fn cache_handles_zero_max_entries() {
let tmp = TempDir::new().unwrap();
let cache = ResponseCache::new(tmp.path(), 60, 0).unwrap();
let key = ResponseCache::cache_key("gpt-4", None, "test");
cache.put(&key, "gpt-4", "response", 10).unwrap();
let (count, _, _) = cache.stats().unwrap();
assert_eq!(count, 0, "cache with max_entries=0 should evict everything");
}
#[test]
fn cache_concurrent_reads_no_panic() {
let tmp = TempDir::new().unwrap();
let cache = std::sync::Arc::new(ResponseCache::new(tmp.path(), 60, 100).unwrap());
let key = ResponseCache::cache_key("gpt-4", None, "concurrent");
cache.put(&key, "gpt-4", "response", 10).unwrap();
let mut handles = Vec::new();
for _ in 0..10 {
let cache = std::sync::Arc::clone(&cache);
let key = key.clone();
handles.push(std::thread::spawn(move || {
let _ = cache.get(&key).unwrap();
}));
}
for handle in handles {
handle.join().unwrap();
}
let (_, hits, _) = cache.stats().unwrap();
assert_eq!(hits, 10, "all concurrent reads should register as hits");
}
}