Skip to main content

ccboard_core/cache/
metadata_cache.rs

1//! SQLite metadata cache for session files
2//!
3//! Caches session metadata with mtime-based invalidation for 90% startup speedup.
4//!
5//! Schema:
6//! - session_metadata table: stores parsed metadata + mtime + cache_version
7//! - Indexes: project, mtime for fast queries
8//!
9//! Invalidation:
10//! - File watcher detects modification → delete cache entry
11//! - Startup: compare mtime → rescan if stale
12//! - Startup: compare cache_version → auto-clear if mismatch
13//!
14//! Cache Version History:
15//! - v1: Initial version (pre-TokenUsage fix)
16//! - v2: Fixed TokenUsage::total() to include cache_read_tokens + cache_write_tokens
17//! - v3: Added token breakdown fields (input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens)
18//!       to SessionMetadata + real pricing calculation
19
20use crate::models::SessionMetadata;
21use anyhow::{Context, Result};
22use rusqlite::{params, Connection, OptionalExtension};
23use std::path::{Path, PathBuf};
24use std::sync::Mutex;
25use std::time::SystemTime;
26use tracing::{debug, warn};
27
28/// Current cache version
29///
30/// **IMPORTANT**: Increment this version when changing how metadata is calculated:
31/// - TokenUsage fields added/removed
32/// - SessionMetadata structure changed
33/// - Parsing logic modified (e.g., token accumulation)
34///
35/// This triggers automatic cache invalidation on startup, preventing stale data bugs.
36///
37/// Version History:
38/// - v1: Initial version
39/// - v2: Fixed TokenUsage::total() calculation
40/// - v3: Added token breakdown fields
41/// - v4: Added branch field to SessionMetadata
42const CACHE_VERSION: i32 = 4;
43
44/// SQLite-based metadata cache (thread-safe)
45pub struct MetadataCache {
46    conn: Mutex<Connection>,
47    #[allow(dead_code)]
48    cache_path: PathBuf,
49}
50
51impl MetadataCache {
52    /// Create or open cache database
53    pub fn new(cache_dir: &Path) -> Result<Self> {
54        std::fs::create_dir_all(cache_dir).with_context(|| {
55            format!("Failed to create cache directory: {}", cache_dir.display())
56        })?;
57
58        let cache_path = cache_dir.join("session-metadata.db");
59        let conn = Connection::open(&cache_path)
60            .with_context(|| format!("Failed to open cache database: {}", cache_path.display()))?;
61
62        // Enable WAL mode for better concurrency
63        conn.pragma_update(None, "journal_mode", "WAL")
64            .context("Failed to enable WAL mode")?;
65
66        // Initialize schema
67        conn.execute_batch(
68            r#"
69            CREATE TABLE IF NOT EXISTS cache_metadata (
70                key TEXT PRIMARY KEY,
71                value INTEGER NOT NULL
72            );
73
74            CREATE TABLE IF NOT EXISTS session_metadata (
75                path TEXT PRIMARY KEY,
76                mtime INTEGER NOT NULL,
77                project TEXT NOT NULL,
78                session_id TEXT NOT NULL,
79                first_timestamp TEXT,
80                last_timestamp TEXT,
81                message_count INTEGER NOT NULL,
82                total_tokens INTEGER NOT NULL,
83                models_used TEXT NOT NULL,
84                has_subagents INTEGER NOT NULL,
85                first_user_message TEXT,
86                data BLOB NOT NULL
87            );
88
89            CREATE INDEX IF NOT EXISTS idx_project ON session_metadata(project);
90            CREATE INDEX IF NOT EXISTS idx_mtime ON session_metadata(mtime);
91            CREATE INDEX IF NOT EXISTS idx_session_id ON session_metadata(session_id);
92            "#,
93        )
94        .context("Failed to create schema")?;
95
96        // Check cache version and auto-invalidate if mismatch
97        let stored_version: Option<i32> = conn
98            .query_row(
99                "SELECT value FROM cache_metadata WHERE key = 'version'",
100                [],
101                |row| row.get(0),
102            )
103            .optional()
104            .context("Failed to query cache version")?;
105
106        match stored_version {
107            Some(v) if v != CACHE_VERSION => {
108                warn!(
109                    stored = v,
110                    current = CACHE_VERSION,
111                    "Cache version mismatch detected, clearing stale cache"
112                );
113
114                // Clear all session entries
115                conn.execute("DELETE FROM session_metadata", [])
116                    .context("Failed to clear stale cache")?;
117
118                // Update version
119                conn.execute(
120                    "INSERT OR REPLACE INTO cache_metadata (key, value) VALUES ('version', ?)",
121                    params![CACHE_VERSION],
122                )
123                .context("Failed to update cache version")?;
124
125                debug!("Cache cleared and version updated to {}", CACHE_VERSION);
126            }
127            None => {
128                // First run, set version
129                conn.execute(
130                    "INSERT INTO cache_metadata (key, value) VALUES ('version', ?)",
131                    params![CACHE_VERSION],
132                )
133                .context("Failed to initialize cache version")?;
134
135                debug!("Cache version initialized to {}", CACHE_VERSION);
136            }
137            Some(_) => {
138                debug!("Cache version {} matches current", CACHE_VERSION);
139            }
140        }
141
142        let cache = Self {
143            conn: Mutex::new(conn),
144            cache_path: cache_path.clone(),
145        };
146
147        debug!(path = %cache_path.display(), "Metadata cache initialized");
148
149        Ok(cache)
150    }
151
152    /// Get cached metadata if fresh, otherwise None
153    pub fn get(&self, path: &Path, current_mtime: SystemTime) -> Result<Option<SessionMetadata>> {
154        let path_str = path.to_string_lossy();
155        let mtime_secs = current_mtime
156            .duration_since(SystemTime::UNIX_EPOCH)
157            .context("Invalid mtime")?
158            .as_secs();
159
160        let conn = self
161            .conn
162            .lock()
163            .map_err(|e| anyhow::anyhow!("Metadata cache lock poisoned: {}", e))?;
164
165        let result: Option<Vec<u8>> = conn
166            .query_row(
167                "SELECT data FROM session_metadata WHERE path = ? AND mtime = ?",
168                params![path_str.as_ref(), mtime_secs as i64],
169                |row| row.get(0),
170            )
171            .optional()
172            .context("Failed to query cache")?;
173
174        match result {
175            Some(bytes) => {
176                let meta: SessionMetadata = bincode::deserialize(&bytes)
177                    .context("Failed to deserialize cached metadata")?;
178                debug!(path = %path.display(), "Cache hit");
179                Ok(Some(meta))
180            }
181            None => {
182                debug!(path = %path.display(), "Cache miss");
183                Ok(None)
184            }
185        }
186    }
187
188    /// Store metadata in cache
189    pub fn put(&self, path: &Path, meta: &SessionMetadata, mtime: SystemTime) -> Result<()> {
190        let path_str = path.to_string_lossy();
191        let mtime_secs = mtime
192            .duration_since(SystemTime::UNIX_EPOCH)
193            .context("Invalid mtime")?
194            .as_secs();
195
196        let data = bincode::serialize(meta).context("Failed to serialize metadata")?;
197
198        // Extract searchable fields
199        let models_used =
200            serde_json::to_string(&meta.models_used).context("Failed to serialize models")?;
201
202        let conn = self
203            .conn
204            .lock()
205            .map_err(|e| anyhow::anyhow!("Metadata cache lock poisoned: {}", e))?;
206
207        conn.execute(
208            r#"
209                INSERT OR REPLACE INTO session_metadata
210                (path, mtime, project, session_id, first_timestamp, last_timestamp,
211                 message_count, total_tokens, models_used, has_subagents, first_user_message, data)
212                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
213                "#,
214            params![
215                path_str.as_ref(),
216                mtime_secs as i64,
217                meta.project_path.as_str(),
218                meta.id.as_str(),
219                meta.first_timestamp.as_ref().map(|t| t.to_rfc3339()),
220                meta.last_timestamp.as_ref().map(|t| t.to_rfc3339()),
221                meta.message_count as i64,
222                meta.total_tokens as i64,
223                models_used,
224                if meta.has_subagents { 1 } else { 0 },
225                &meta.first_user_message,
226                &data,
227            ],
228        )
229        .context("Failed to insert metadata")?;
230
231        debug!(path = %path.display(), "Metadata cached");
232        Ok(())
233    }
234
235    /// Invalidate cache entry for a path
236    pub fn invalidate(&self, path: &Path) -> Result<()> {
237        let path_str = path.to_string_lossy();
238
239        let conn = self
240            .conn
241            .lock()
242            .map_err(|e| anyhow::anyhow!("Metadata cache lock poisoned: {}", e))?;
243
244        conn.execute(
245            "DELETE FROM session_metadata WHERE path = ?",
246            params![path_str.as_ref()],
247        )
248        .context("Failed to delete cache entry")?;
249
250        debug!(path = %path.display(), "Cache entry invalidated");
251        Ok(())
252    }
253
254    /// Get all cached paths for a project
255    pub fn get_project_paths(&self, project: &str) -> Result<Vec<PathBuf>> {
256        let conn = self
257            .conn
258            .lock()
259            .map_err(|e| anyhow::anyhow!("Metadata cache lock poisoned: {}", e))?;
260
261        let mut stmt = conn
262            .prepare("SELECT path FROM session_metadata WHERE project = ?")
263            .context("Failed to prepare query")?;
264
265        let rows = stmt
266            .query_map(params![project], |row| {
267                let path_str: String = row.get(0)?;
268                Ok(PathBuf::from(path_str))
269            })
270            .context("Failed to query project paths")?;
271
272        let mut paths = Vec::new();
273        for row in rows {
274            paths.push(row.context("Failed to read row")?);
275        }
276
277        Ok(paths)
278    }
279
280    /// Get cache statistics
281    pub fn stats(&self) -> Result<CacheStats> {
282        let conn = self
283            .conn
284            .lock()
285            .map_err(|e| anyhow::anyhow!("Metadata cache lock poisoned: {}", e))?;
286
287        let total_entries: i64 = conn
288            .query_row("SELECT COUNT(*) FROM session_metadata", [], |row| {
289                row.get(0)
290            })
291            .context("Failed to count entries")?;
292
293        let total_size: i64 = conn
294            .query_row(
295                "SELECT SUM(LENGTH(data)) FROM session_metadata",
296                [],
297                |row| row.get(0),
298            )
299            .unwrap_or(0);
300
301        let project_count: i64 = conn
302            .query_row(
303                "SELECT COUNT(DISTINCT project) FROM session_metadata",
304                [],
305                |row| row.get(0),
306            )
307            .context("Failed to count projects")?;
308
309        Ok(CacheStats {
310            total_entries: total_entries as usize,
311            total_size_bytes: total_size as usize,
312            project_count: project_count as usize,
313        })
314    }
315
316    /// Clear all cache entries (for testing or rebuild)
317    pub fn clear(&self) -> Result<()> {
318        let conn = self
319            .conn
320            .lock()
321            .map_err(|e| anyhow::anyhow!("Metadata cache lock poisoned: {}", e))?;
322
323        conn.execute("DELETE FROM session_metadata", [])
324            .context("Failed to clear cache")?;
325
326        debug!("Cache cleared");
327        Ok(())
328    }
329
330    /// Vacuum database to reclaim space
331    pub fn vacuum(&self) -> Result<()> {
332        let conn = self
333            .conn
334            .lock()
335            .map_err(|e| anyhow::anyhow!("Metadata cache lock poisoned: {}", e))?;
336
337        conn.execute("VACUUM", []).context("Failed to vacuum")?;
338
339        debug!("Database vacuumed");
340        Ok(())
341    }
342}
343
344impl Drop for MetadataCache {
345    fn drop(&mut self) {
346        // WAL checkpoint on drop to ensure all data is flushed to main database file
347        // and WAL file doesn't grow unbounded across restarts
348        if let Ok(conn) = self.conn.lock() {
349            if let Err(e) = conn.pragma_update(None, "wal_checkpoint", "TRUNCATE") {
350                warn!("Failed to checkpoint WAL on MetadataCache drop: {}", e);
351            } else {
352                debug!("WAL checkpoint completed on MetadataCache drop");
353            }
354        }
355    }
356}
357
358/// Cache statistics
359#[derive(Debug, Clone)]
360pub struct CacheStats {
361    pub total_entries: usize,
362    pub total_size_bytes: usize,
363    pub project_count: usize,
364}
365
366impl CacheStats {
367    pub fn hit_rate(&self, scanned: usize) -> f64 {
368        if scanned == 0 {
369            return 0.0;
370        }
371        (self.total_entries as f64) / (scanned as f64)
372    }
373}
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378    use crate::models::SessionMetadata;
379    use chrono::Utc;
380    use tempfile::tempdir;
381
382    #[test]
383    fn test_cache_creation() {
384        let dir = tempdir().unwrap();
385        let cache = MetadataCache::new(dir.path()).unwrap();
386
387        let stats = cache.stats().unwrap();
388        assert_eq!(stats.total_entries, 0);
389    }
390
391    #[test]
392    fn test_cache_put_get() {
393        let dir = tempdir().unwrap();
394        let cache = MetadataCache::new(dir.path()).unwrap();
395
396        let path = PathBuf::from("/tmp/test.jsonl");
397        let mut meta = SessionMetadata::from_path(path.clone(), "/test".into());
398        meta.id = "test-123".into();
399        meta.message_count = 42;
400        meta.total_tokens = 1000;
401        meta.models_used = vec!["sonnet".to_string()].into_iter().collect();
402        meta.first_timestamp = Some(Utc::now());
403
404        let mtime = SystemTime::now();
405
406        // Put
407        cache.put(&path, &meta, mtime).unwrap();
408
409        // Get with same mtime (hit)
410        let cached = cache.get(&path, mtime).unwrap();
411        assert!(cached.is_some());
412        let cached = cached.unwrap();
413        assert_eq!(cached.id, "test-123");
414        assert_eq!(cached.message_count, 42);
415
416        // Get with different mtime (miss)
417        let old_mtime = mtime - std::time::Duration::from_secs(3600);
418        let cached = cache.get(&path, old_mtime).unwrap();
419        assert!(cached.is_none());
420    }
421
422    #[test]
423    fn test_cache_invalidate() {
424        let dir = tempdir().unwrap();
425        let cache = MetadataCache::new(dir.path()).unwrap();
426
427        let path = PathBuf::from("/tmp/test.jsonl");
428        let meta = SessionMetadata::from_path(path.clone(), "/test".into());
429        let mtime = SystemTime::now();
430
431        cache.put(&path, &meta, mtime).unwrap();
432
433        // Invalidate
434        cache.invalidate(&path).unwrap();
435
436        // Should be gone
437        let cached = cache.get(&path, mtime).unwrap();
438        assert!(cached.is_none());
439    }
440
441    #[test]
442    fn test_cache_project_paths() {
443        let dir = tempdir().unwrap();
444        let cache = MetadataCache::new(dir.path()).unwrap();
445
446        let mtime = SystemTime::now();
447
448        // Add sessions for two projects
449        for i in 0..3 {
450            let path = PathBuf::from(format!("/tmp/project1/session{}.jsonl", i));
451            let meta = SessionMetadata::from_path(path.clone(), "/project1".into());
452            cache.put(&path, &meta, mtime).unwrap();
453        }
454
455        for i in 0..2 {
456            let path = PathBuf::from(format!("/tmp/project2/session{}.jsonl", i));
457            let meta = SessionMetadata::from_path(path.clone(), "/project2".into());
458            cache.put(&path, &meta, mtime).unwrap();
459        }
460
461        // Get project1 paths
462        let paths = cache.get_project_paths("/project1").unwrap();
463        assert_eq!(paths.len(), 3);
464
465        // Get project2 paths
466        let paths = cache.get_project_paths("/project2").unwrap();
467        assert_eq!(paths.len(), 2);
468    }
469
470    #[test]
471    fn test_cache_stats() {
472        let dir = tempdir().unwrap();
473        let cache = MetadataCache::new(dir.path()).unwrap();
474
475        let mtime = SystemTime::now();
476
477        // Add some entries
478        for i in 0..10 {
479            let path = PathBuf::from(format!("/tmp/session{}.jsonl", i));
480            let meta = SessionMetadata::from_path(path.clone(), "/test".into());
481            cache.put(&path, &meta, mtime).unwrap();
482        }
483
484        let stats = cache.stats().unwrap();
485        assert_eq!(stats.total_entries, 10);
486        assert!(stats.total_size_bytes > 0);
487        assert_eq!(stats.project_count, 1);
488    }
489
490    #[test]
491    fn test_cache_clear() {
492        let dir = tempdir().unwrap();
493        let cache = MetadataCache::new(dir.path()).unwrap();
494
495        let path = PathBuf::from("/tmp/test.jsonl");
496        let meta = SessionMetadata::from_path(path.clone(), "/test".into());
497        cache.put(&path, &meta, SystemTime::now()).unwrap();
498
499        assert_eq!(cache.stats().unwrap().total_entries, 1);
500
501        cache.clear().unwrap();
502
503        assert_eq!(cache.stats().unwrap().total_entries, 0);
504    }
505}