Skip to main content

sqz_engine/
session_store.rs

1use std::path::Path;
2use std::path::PathBuf;
3
4use chrono::{DateTime, Utc};
5use rusqlite::{params, Connection, OpenFlags};
6use serde::{Deserialize, Serialize};
7
8use crate::error::{Result, SqzError};
9use crate::types::{CompressedContent, SessionId, SessionState};
10
11/// A lightweight summary of a session for search results. Doesn't include
12/// the full conversation — just enough to identify and filter sessions.
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct SessionSummary {
15    pub id: SessionId,
16    pub project_dir: PathBuf,
17    pub compressed_summary: String,
18    pub created_at: DateTime<Utc>,
19    pub updated_at: DateTime<Utc>,
20}
21
22/// SQLite-backed persistent session and cache store with FTS5 full-text search.
23///
24/// Stores sessions, cache entries, compression logs, and known files in a
25/// single SQLite database. Uses WAL mode for concurrent read access.
26///
27/// ```rust,no_run
28/// use sqz_engine::SessionStore;
29/// use std::path::Path;
30///
31/// let store = SessionStore::open_or_create(Path::new("~/.sqz/sessions.db")).unwrap();
32/// let results = store.search("authentication refactor").unwrap();
33/// for session in &results {
34///     println!("{}: {}", session.id, session.compressed_summary);
35/// }
36/// ```
37pub struct SessionStore {
38    db: Connection,
39}
40
41// ── Schema ────────────────────────────────────────────────────────────────────
42
43const SCHEMA: &str = r#"
44PRAGMA journal_mode = WAL;
45
46CREATE TABLE IF NOT EXISTS sessions (
47    id               TEXT PRIMARY KEY,
48    project_dir      TEXT NOT NULL,
49    compressed_summary TEXT NOT NULL,
50    created_at       TEXT NOT NULL,
51    updated_at       TEXT NOT NULL,
52    data             BLOB NOT NULL
53);
54
55CREATE VIRTUAL TABLE IF NOT EXISTS sessions_fts USING fts5(
56    id,
57    project_dir,
58    compressed_summary,
59    content='sessions',
60    content_rowid='rowid',
61    tokenize='porter ascii'
62);
63
64CREATE TRIGGER IF NOT EXISTS sessions_ai AFTER INSERT ON sessions BEGIN
65    INSERT INTO sessions_fts(rowid, id, project_dir, compressed_summary)
66    VALUES (new.rowid, new.id, new.project_dir, new.compressed_summary);
67END;
68
69CREATE TRIGGER IF NOT EXISTS sessions_ad AFTER DELETE ON sessions BEGIN
70    INSERT INTO sessions_fts(sessions_fts, rowid, id, project_dir, compressed_summary)
71    VALUES ('delete', old.rowid, old.id, old.project_dir, old.compressed_summary);
72END;
73
74CREATE TRIGGER IF NOT EXISTS sessions_au AFTER UPDATE ON sessions BEGIN
75    INSERT INTO sessions_fts(sessions_fts, rowid, id, project_dir, compressed_summary)
76    VALUES ('delete', old.rowid, old.id, old.project_dir, old.compressed_summary);
77    INSERT INTO sessions_fts(rowid, id, project_dir, compressed_summary)
78    VALUES (new.rowid, new.id, new.project_dir, new.compressed_summary);
79END;
80
81CREATE TABLE IF NOT EXISTS cache_entries (
82    hash        TEXT PRIMARY KEY,
83    data        TEXT NOT NULL,
84    accessed_at TEXT NOT NULL
85);
86
87CREATE TABLE IF NOT EXISTS compression_log (
88    id               INTEGER PRIMARY KEY AUTOINCREMENT,
89    tokens_original  INTEGER NOT NULL,
90    tokens_compressed INTEGER NOT NULL,
91    stages_applied   TEXT NOT NULL,
92    mode             TEXT NOT NULL DEFAULT 'auto',
93    created_at       TEXT NOT NULL
94);
95
96CREATE TABLE IF NOT EXISTS known_files (
97    path        TEXT PRIMARY KEY,
98    added_at    TEXT NOT NULL
99);
100
101-- Small key/value store for engine-wide state that needs to persist across
102-- short-lived sqz processes (each shell-hook invocation is a new process).
103-- Initially used only for the last_compaction_at marker: cache entries with
104-- `accessed_at < last_compaction_at` are treated as stale even if still
105-- within the normal TTL. See cache_manager.rs for the freshness model.
106CREATE TABLE IF NOT EXISTS metadata (
107    key         TEXT PRIMARY KEY,
108    value       TEXT NOT NULL
109);
110"#;
111
112// ── Helpers ───────────────────────────────────────────────────────────────────
113
114pub(crate) fn apply_schema(conn: &Connection) -> rusqlite::Result<()> {
115    conn.execute_batch(SCHEMA)
116}
117
118fn open_connection(path: &Path) -> rusqlite::Result<Connection> {
119    let conn = Connection::open(path)?;
120    apply_schema(&conn)?;
121    Ok(conn)
122}
123
124fn row_to_summary(
125    id: String,
126    project_dir: String,
127    compressed_summary: String,
128    created_at: String,
129    updated_at: String,
130) -> Result<SessionSummary> {
131    let created_at = created_at
132        .parse::<DateTime<Utc>>()
133        .map_err(|e| SqzError::Other(format!("invalid created_at timestamp: {e}")))?;
134    let updated_at = updated_at
135        .parse::<DateTime<Utc>>()
136        .map_err(|e| SqzError::Other(format!("invalid updated_at timestamp: {e}")))?;
137    Ok(SessionSummary {
138        id,
139        project_dir: PathBuf::from(project_dir),
140        compressed_summary,
141        created_at,
142        updated_at,
143    })
144}
145
146// ── SessionStore ──────────────────────────────────────────────────────────────
147
148impl SessionStore {
149    /// Construct a `SessionStore` from an already-open `Connection`.
150    /// Intended for testing (e.g., in-memory databases).
151    #[cfg(test)]
152    pub(crate) fn from_connection(conn: Connection) -> Self {
153        Self { db: conn }
154    }
155
156    /// Open an existing database at `path`. Returns an error if the file does
157    /// not exist or cannot be opened.
158    pub fn open(path: &Path) -> Result<Self> {
159        let conn = Connection::open_with_flags(path, OpenFlags::SQLITE_OPEN_READ_WRITE)?;
160        apply_schema(&conn)?;
161        Ok(Self { db: conn })
162    }
163
164    /// Open the database at `path`, creating it if it does not exist.
165    /// If the database is corrupted, a fresh database is created at the same
166    /// path and a warning is logged to stderr.
167    pub fn open_or_create(path: &Path) -> Result<Self> {
168        match open_connection(path) {
169            Ok(conn) => Ok(Self { db: conn }),
170            Err(e) => {
171                eprintln!(
172                    "sqz warning: session store at '{}' is corrupted or inaccessible ({e}). \
173                     Creating a new database. Prior session data has been lost.",
174                    path.display()
175                );
176                // Remove the corrupted file so we can start fresh.
177                if path.exists() {
178                    let _ = std::fs::remove_file(path);
179                }
180                let conn = open_connection(path)
181                    .map_err(|e2| SqzError::Other(format!("failed to create new session store: {e2}")))?;
182                Ok(Self { db: conn })
183            }
184        }
185    }
186
187    // ── Session CRUD ──────────────────────────────────────────────────────────
188
189    /// Persist a session. Returns the session id.
190    pub fn save_session(&self, session: &SessionState) -> Result<SessionId> {
191        let data = serde_json::to_vec(session)?;
192        let project_dir = session.project_dir.to_string_lossy().to_string();
193        let created_at = session.created_at.to_rfc3339();
194        let updated_at = session.updated_at.to_rfc3339();
195
196        self.db.execute(
197            r#"INSERT INTO sessions (id, project_dir, compressed_summary, created_at, updated_at, data)
198               VALUES (?1, ?2, ?3, ?4, ?5, ?6)
199               ON CONFLICT(id) DO UPDATE SET
200                   project_dir        = excluded.project_dir,
201                   compressed_summary = excluded.compressed_summary,
202                   created_at         = excluded.created_at,
203                   updated_at         = excluded.updated_at,
204                   data               = excluded.data"#,
205            params![
206                session.id,
207                project_dir,
208                session.compressed_summary,
209                created_at,
210                updated_at,
211                data,
212            ],
213        )?;
214
215        Ok(session.id.clone())
216    }
217
218    /// Load a session by id.
219    pub fn load_session(&self, id: SessionId) -> Result<SessionState> {
220        let data: Vec<u8> = self.db.query_row(
221            "SELECT data FROM sessions WHERE id = ?1",
222            params![id],
223            |row| row.get(0),
224        )?;
225        let session: SessionState = serde_json::from_slice(&data)?;
226        Ok(session)
227    }
228
229    // ── Search ────────────────────────────────────────────────────────────────
230
231    /// Full-text search using FTS5 (porter stemmer, ASCII tokenizer).
232    pub fn search(&self, query: &str) -> Result<Vec<SessionSummary>> {
233        let mut stmt = self.db.prepare(
234            r#"SELECT s.id, s.project_dir, s.compressed_summary, s.created_at, s.updated_at
235               FROM sessions s
236               JOIN sessions_fts f ON s.rowid = f.rowid
237               WHERE sessions_fts MATCH ?1
238               ORDER BY rank"#,
239        )?;
240
241        let rows = stmt.query_map(params![query], |row| {
242            Ok((
243                row.get::<_, String>(0)?,
244                row.get::<_, String>(1)?,
245                row.get::<_, String>(2)?,
246                row.get::<_, String>(3)?,
247                row.get::<_, String>(4)?,
248            ))
249        })?;
250
251        let mut results = Vec::new();
252        for row in rows {
253            let (id, project_dir, compressed_summary, created_at, updated_at) = row?;
254            results.push(row_to_summary(id, project_dir, compressed_summary, created_at, updated_at)?);
255        }
256        Ok(results)
257    }
258
259    /// Query sessions whose `updated_at` falls within `[from, to]`.
260    pub fn search_by_date(
261        &self,
262        from: DateTime<Utc>,
263        to: DateTime<Utc>,
264    ) -> Result<Vec<SessionSummary>> {
265        let mut stmt = self.db.prepare(
266            r#"SELECT id, project_dir, compressed_summary, created_at, updated_at
267               FROM sessions
268               WHERE updated_at >= ?1 AND updated_at <= ?2
269               ORDER BY updated_at DESC"#,
270        )?;
271
272        let rows = stmt.query_map(params![from.to_rfc3339(), to.to_rfc3339()], |row| {
273            Ok((
274                row.get::<_, String>(0)?,
275                row.get::<_, String>(1)?,
276                row.get::<_, String>(2)?,
277                row.get::<_, String>(3)?,
278                row.get::<_, String>(4)?,
279            ))
280        })?;
281
282        let mut results = Vec::new();
283        for row in rows {
284            let (id, project_dir, compressed_summary, created_at, updated_at) = row?;
285            results.push(row_to_summary(id, project_dir, compressed_summary, created_at, updated_at)?);
286        }
287        Ok(results)
288    }
289
290    /// Return the most recently updated session, or `None` if no sessions exist.
291    pub fn latest_session(&self) -> Result<Option<SessionSummary>> {
292        let mut stmt = self.db.prepare(
293            r#"SELECT id, project_dir, compressed_summary, created_at, updated_at
294               FROM sessions
295               ORDER BY updated_at DESC
296               LIMIT 1"#,
297        ).map_err(SqzError::SessionStore)?;
298
299        let rows = stmt.query_map([], |row| {
300            Ok((
301                row.get::<_, String>(0)?,
302                row.get::<_, String>(1)?,
303                row.get::<_, String>(2)?,
304                row.get::<_, String>(3)?,
305                row.get::<_, String>(4)?,
306            ))
307        }).map_err(SqzError::SessionStore)?;
308
309        for row in rows {
310            let (id, project_dir, compressed_summary, created_at, updated_at) =
311                row.map_err(SqzError::SessionStore)?;
312            return Ok(Some(row_to_summary(id, project_dir, compressed_summary, created_at, updated_at)?));
313        }
314        Ok(None)
315    }
316
317    /// Query sessions whose `project_dir` matches `dir` exactly.
318    pub fn search_by_project(&self, dir: &Path) -> Result<Vec<SessionSummary>> {
319        let dir_str = dir.to_string_lossy().to_string();
320        let mut stmt = self.db.prepare(
321            r#"SELECT id, project_dir, compressed_summary, created_at, updated_at
322               FROM sessions
323               WHERE project_dir = ?1
324               ORDER BY updated_at DESC"#,
325        )?;
326
327        let rows = stmt.query_map(params![dir_str], |row| {
328            Ok((
329                row.get::<_, String>(0)?,
330                row.get::<_, String>(1)?,
331                row.get::<_, String>(2)?,
332                row.get::<_, String>(3)?,
333                row.get::<_, String>(4)?,
334            ))
335        })?;
336
337        let mut results = Vec::new();
338        for row in rows {
339            let (id, project_dir, compressed_summary, created_at, updated_at) = row?;
340            results.push(row_to_summary(id, project_dir, compressed_summary, created_at, updated_at)?);
341        }
342        Ok(results)
343    }
344
345    // ── Cache entries ─────────────────────────────────────────────────────────
346
347    /// Persist a cache entry keyed by content hash.
348    pub fn save_cache_entry(&self, hash: &str, compressed: &CompressedContent) -> Result<()> {
349        let data = serde_json::to_string(compressed)?;
350        let now = Utc::now().to_rfc3339();
351        self.db.execute(
352            r#"INSERT INTO cache_entries (hash, data, accessed_at)
353               VALUES (?1, ?2, ?3)
354               ON CONFLICT(hash) DO UPDATE SET data = excluded.data, accessed_at = excluded.accessed_at"#,
355            params![hash, data, now],
356        )?;
357        Ok(())
358    }
359
360    /// Delete a cache entry by content hash.
361    pub fn delete_cache_entry(&self, hash: &str) -> Result<()> {
362        self.db.execute(
363            "DELETE FROM cache_entries WHERE hash = ?1",
364            params![hash],
365        )?;
366        Ok(())
367    }
368
369    /// Return all cache entries ordered by `accessed_at` ASC (oldest first),
370    /// as `(hash, size_bytes)` pairs where `size_bytes` is the byte length of
371    /// the stored JSON data.
372    pub fn list_cache_entries_lru(&self) -> Result<Vec<(String, u64)>> {
373        let mut stmt = self.db.prepare(
374            "SELECT hash, length(data) FROM cache_entries ORDER BY accessed_at ASC",
375        )?;
376        let rows = stmt.query_map([], |row| {
377            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
378        })?;
379        let mut entries = Vec::new();
380        for row in rows {
381            let (hash, size) = row?;
382            entries.push((hash, size as u64));
383        }
384        Ok(entries)
385    }
386
387    /// Retrieve a cache entry by content hash, updating `accessed_at`.
388    pub fn get_cache_entry(&self, hash: &str) -> Result<Option<CompressedContent>> {
389        let result: rusqlite::Result<String> = self.db.query_row(
390            "SELECT data FROM cache_entries WHERE hash = ?1",
391            params![hash],
392            |row| row.get(0),
393        );
394
395        match result {
396            Ok(data) => {
397                // Touch accessed_at for LRU tracking.
398                let now = Utc::now().to_rfc3339();
399                let _ = self.db.execute(
400                    "UPDATE cache_entries SET accessed_at = ?1 WHERE hash = ?2",
401                    params![now, hash],
402                );
403                let entry: CompressedContent = serde_json::from_str(&data)?;
404                Ok(Some(entry))
405            }
406            Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
407            Err(e) => Err(SqzError::SessionStore(e)),
408        }
409    }
410
411    /// Read the `accessed_at` timestamp for a cached hash without updating
412    /// it. Returns `None` if the hash is not cached.
413    ///
414    /// Used by the dedup freshness check: if `accessed_at` is recent, the
415    /// LLM likely still has the original content in its context window, so
416    /// returning a ref is safe. If it's old, re-send the full content.
417    pub fn get_cache_entry_accessed_at(&self, hash: &str) -> Result<Option<DateTime<Utc>>> {
418        let result: rusqlite::Result<String> = self.db.query_row(
419            "SELECT accessed_at FROM cache_entries WHERE hash = ?1",
420            params![hash],
421            |row| row.get(0),
422        );
423        match result {
424            Ok(s) => {
425                let ts = s
426                    .parse::<DateTime<Utc>>()
427                    .map_err(|e| SqzError::Other(format!("invalid accessed_at: {e}")))?;
428                Ok(Some(ts))
429            }
430            Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
431            Err(e) => Err(SqzError::SessionStore(e)),
432        }
433    }
434
435    /// Check if a cache entry exists without updating `accessed_at`.
436    pub fn cache_entry_exists(&self, hash: &str) -> Result<bool> {
437        let result: rusqlite::Result<i64> = self.db.query_row(
438            "SELECT 1 FROM cache_entries WHERE hash = ?1",
439            params![hash],
440            |row| row.get(0),
441        );
442        match result {
443            Ok(_) => Ok(true),
444            Err(rusqlite::Error::QueryReturnedNoRows) => Ok(false),
445            Err(e) => Err(SqzError::SessionStore(e)),
446        }
447    }
448
449    /// Update `accessed_at` for a cached hash to the current time. Called by
450    /// the cache manager when a ref is served so the next staleness check
451    /// sees the recent send.
452    pub fn touch_cache_entry(&self, hash: &str) -> Result<()> {
453        let now = Utc::now().to_rfc3339();
454        self.db.execute(
455            "UPDATE cache_entries SET accessed_at = ?1 WHERE hash = ?2",
456            params![now, hash],
457        )?;
458        Ok(())
459    }
460
461    /// Set a metadata key/value. Persists across sqz process boundaries
462    /// (each shell-hook invocation is a short-lived process).
463    pub fn set_metadata(&self, key: &str, value: &str) -> Result<()> {
464        self.db.execute(
465            "INSERT INTO metadata (key, value) VALUES (?1, ?2)
466             ON CONFLICT(key) DO UPDATE SET value = excluded.value",
467            params![key, value],
468        )?;
469        Ok(())
470    }
471
472    /// Get a metadata value. Returns `None` if the key has never been set.
473    pub fn get_metadata(&self, key: &str) -> Result<Option<String>> {
474        let result: rusqlite::Result<String> = self.db.query_row(
475            "SELECT value FROM metadata WHERE key = ?1",
476            params![key],
477            |row| row.get(0),
478        );
479        match result {
480            Ok(v) => Ok(Some(v)),
481            Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
482            Err(e) => Err(SqzError::SessionStore(e)),
483        }
484    }
485
486    /// Log a compression event for cumulative stats tracking.
487    pub fn log_compression(
488        &self,
489        tokens_original: u32,
490        tokens_compressed: u32,
491        stages: &[String],
492        mode: &str,
493    ) -> Result<()> {
494        let now = Utc::now().to_rfc3339();
495        let stages_str = stages.join(",");
496        self.db.execute(
497            "INSERT INTO compression_log (tokens_original, tokens_compressed, stages_applied, mode, created_at) VALUES (?1, ?2, ?3, ?4, ?5)",
498            params![tokens_original, tokens_compressed, stages_str, mode, now],
499        ).map_err(SqzError::SessionStore)?;
500        Ok(())
501    }
502
503    /// Get cumulative compression stats from the log.
504    pub fn compression_stats(&self) -> Result<CompressionStats> {
505        let mut stmt = self.db.prepare(
506            "SELECT COUNT(*), COALESCE(SUM(tokens_original), 0), COALESCE(SUM(tokens_compressed), 0) FROM compression_log",
507        ).map_err(SqzError::SessionStore)?;
508
509        let stats = stmt.query_row([], |row| {
510            Ok(CompressionStats {
511                total_compressions: row.get::<_, u32>(0)?,
512                total_tokens_in: row.get::<_, u64>(1)?,
513                total_tokens_out: row.get::<_, u64>(2)?,
514            })
515        }).map_err(SqzError::SessionStore)?;
516
517        Ok(stats)
518    }
519
520    /// Get daily compression gains for the last N days.
521    pub fn daily_gains(&self, days: u32) -> Result<Vec<DailyGain>> {
522        let mut stmt = self.db.prepare(
523            "SELECT date(created_at) as d, COUNT(*), SUM(tokens_original), SUM(tokens_compressed) \
524             FROM compression_log \
525             WHERE created_at >= date('now', ?1) \
526             GROUP BY d ORDER BY d",
527        ).map_err(SqzError::SessionStore)?;
528
529        let offset = format!("-{days} days");
530        let rows = stmt.query_map(params![offset], |row| {
531            let tokens_in: u64 = row.get(2)?;
532            let tokens_out: u64 = row.get(3)?;
533            Ok(DailyGain {
534                date: row.get(0)?,
535                compressions: row.get(1)?,
536                tokens_in,
537                tokens_saved: tokens_in.saturating_sub(tokens_out),
538            })
539        }).map_err(SqzError::SessionStore)?;
540
541        let mut gains = Vec::new();
542        for row in rows {
543            gains.push(row.map_err(SqzError::SessionStore)?);
544        }
545        Ok(gains)
546    }
547
548    // ── Known files (persistent cross-command context tracking) ───────────
549
550    /// Record a file path as "known" (its content is in the dedup cache).
551    /// Used by cross-command context refs to annotate error messages.
552    pub fn add_known_file(&self, path: &str) -> Result<()> {
553        let now = Utc::now().to_rfc3339();
554        self.db.execute(
555            "INSERT OR REPLACE INTO known_files (path, added_at) VALUES (?1, ?2)",
556            params![path, now],
557        ).map_err(SqzError::SessionStore)?;
558        Ok(())
559    }
560
561    /// Load all known file paths from the persistent store.
562    pub fn known_files(&self) -> Result<Vec<String>> {
563        let mut stmt = self.db.prepare(
564            "SELECT path FROM known_files ORDER BY added_at DESC",
565        ).map_err(SqzError::SessionStore)?;
566
567        let rows = stmt.query_map([], |row| {
568            row.get::<_, String>(0)
569        }).map_err(SqzError::SessionStore)?;
570
571        let mut files = Vec::new();
572        for row in rows {
573            files.push(row.map_err(SqzError::SessionStore)?);
574        }
575        Ok(files)
576    }
577
578    /// Clear all known files (e.g. on session reset).
579    pub fn clear_known_files(&self) -> Result<()> {
580        self.db.execute("DELETE FROM known_files", [])
581            .map_err(SqzError::SessionStore)?;
582        Ok(())
583    }
584}
585
586/// Cumulative compression statistics.
587#[derive(Debug, Clone, Default)]
588pub struct CompressionStats {
589    pub total_compressions: u32,
590    pub total_tokens_in: u64,
591    pub total_tokens_out: u64,
592}
593
594impl CompressionStats {
595    pub fn tokens_saved(&self) -> u64 {
596        self.total_tokens_in.saturating_sub(self.total_tokens_out)
597    }
598
599    pub fn reduction_pct(&self) -> f64 {
600        if self.total_tokens_in == 0 {
601            0.0
602        } else {
603            (1.0 - self.total_tokens_out as f64 / self.total_tokens_in as f64) * 100.0
604        }
605    }
606}
607
608/// A single day's compression gain.
609#[derive(Debug, Clone)]
610pub struct DailyGain {
611    pub date: String,
612    pub compressions: u32,
613    pub tokens_saved: u64,
614    pub tokens_in: u64,
615}
616
617// ── Tests ─────────────────────────────────────────────────────────────────────
618
619#[cfg(test)]
620mod tests {
621    use super::*;
622    use crate::types::{BudgetState, CorrectionLog, ModelFamily, SessionState};
623    use chrono::Utc;
624    use proptest::prelude::*;
625    use std::path::PathBuf;
626
627    fn make_session(id: &str, project_dir: &str, summary: &str) -> SessionState {
628        let now = Utc::now();
629        SessionState {
630            id: id.to_string(),
631            project_dir: PathBuf::from(project_dir),
632            conversation: vec![],
633            corrections: CorrectionLog::default(),
634            pins: vec![],
635            learnings: vec![],
636            compressed_summary: summary.to_string(),
637            budget: BudgetState {
638                window_size: 200_000,
639                consumed: 0,
640                pinned: 0,
641                model_family: ModelFamily::AnthropicClaude,
642            },
643            tool_usage: vec![],
644            created_at: now,
645            updated_at: now,
646        }
647    }
648
649    fn in_memory_store() -> SessionStore {
650        let conn = Connection::open_in_memory().unwrap();
651        apply_schema(&conn).unwrap();
652        SessionStore { db: conn }
653    }
654
655    #[test]
656    fn test_save_and_load_session() {
657        let store = in_memory_store();
658        let session = make_session("sess-1", "/home/user/project", "REST API refactor");
659
660        let id = store.save_session(&session).unwrap();
661        assert_eq!(id, "sess-1");
662
663        let loaded = store.load_session("sess-1".to_string()).unwrap();
664        assert_eq!(loaded.id, session.id);
665        assert_eq!(loaded.compressed_summary, session.compressed_summary);
666        assert_eq!(loaded.project_dir, session.project_dir);
667    }
668
669    #[test]
670    fn test_save_session_upsert() {
671        let store = in_memory_store();
672        let mut session = make_session("sess-2", "/proj", "initial summary");
673        store.save_session(&session).unwrap();
674
675        session.compressed_summary = "updated summary".to_string();
676        store.save_session(&session).unwrap();
677
678        let loaded = store.load_session("sess-2".to_string()).unwrap();
679        assert_eq!(loaded.compressed_summary, "updated summary");
680    }
681
682    #[test]
683    fn test_load_nonexistent_session_errors() {
684        let store = in_memory_store();
685        let result = store.load_session("does-not-exist".to_string());
686        assert!(result.is_err());
687    }
688
689    #[test]
690    fn test_search_fts() {
691        let store = in_memory_store();
692        store.save_session(&make_session("s1", "/proj", "REST API refactor with authentication")).unwrap();
693        store.save_session(&make_session("s2", "/proj", "database migration postgres")).unwrap();
694
695        let results = store.search("authentication").unwrap();
696        assert_eq!(results.len(), 1);
697        assert_eq!(results[0].id, "s1");
698    }
699
700    #[test]
701    fn test_search_by_date() {
702        let store = in_memory_store();
703        let now = Utc::now();
704        let past = now - chrono::Duration::hours(2);
705        let future = now + chrono::Duration::hours(2);
706
707        store.save_session(&make_session("s1", "/proj", "recent session")).unwrap();
708
709        let results = store.search_by_date(past, future).unwrap();
710        assert!(!results.is_empty());
711        assert!(results.iter().any(|r| r.id == "s1"));
712    }
713
714    #[test]
715    fn test_search_by_project() {
716        let store = in_memory_store();
717        store.save_session(&make_session("s1", "/home/user/alpha", "alpha project")).unwrap();
718        store.save_session(&make_session("s2", "/home/user/beta", "beta project")).unwrap();
719
720        let results = store.search_by_project(Path::new("/home/user/alpha")).unwrap();
721        assert_eq!(results.len(), 1);
722        assert_eq!(results[0].id, "s1");
723    }
724
725    #[test]
726    fn test_cache_entry_round_trip() {
727        let store = in_memory_store();
728        let entry = CompressedContent {
729            data: "compressed data".to_string(),
730            tokens_compressed: 10,
731            tokens_original: 50,
732            stages_applied: vec!["strip_nulls".to_string()],
733            compression_ratio: 0.2,
734            provenance: crate::types::Provenance::default(),
735            verify: None,
736        };
737
738        store.save_cache_entry("abc123", &entry).unwrap();
739
740        let loaded = store.get_cache_entry("abc123").unwrap().unwrap();
741        assert_eq!(loaded.data, entry.data);
742        assert_eq!(loaded.tokens_compressed, entry.tokens_compressed);
743        assert_eq!(loaded.tokens_original, entry.tokens_original);
744    }
745
746    #[test]
747    fn test_get_cache_entry_missing_returns_none() {
748        let store = in_memory_store();
749        let result = store.get_cache_entry("nonexistent").unwrap();
750        assert!(result.is_none());
751    }
752
753    #[test]
754    fn test_open_or_create_corrupted_db() {
755        let dir = tempfile::tempdir().unwrap();
756        let path = dir.path().join("store.db");
757
758        // Write garbage bytes to simulate a corrupted database.
759        std::fs::write(&path, b"this is not a valid sqlite database").unwrap();
760
761        // Should succeed by creating a fresh database.
762        let store = SessionStore::open_or_create(&path).unwrap();
763        let session = make_session("s1", "/proj", "after corruption");
764        store.save_session(&session).unwrap();
765        let loaded = store.load_session("s1".to_string()).unwrap();
766        assert_eq!(loaded.id, "s1");
767    }
768
769    // ── Property-based tests ──────────────────────────────────────────────────
770
771    /// Build a `SessionState` with a specific `updated_at` timestamp.
772    fn make_session_at(id: &str, summary: &str, updated_at: DateTime<Utc>) -> SessionState {
773        let now = Utc::now();
774        SessionState {
775            id: id.to_string(),
776            project_dir: PathBuf::from("/proj"),
777            conversation: vec![],
778            corrections: CorrectionLog::default(),
779            pins: vec![],
780            learnings: vec![],
781            compressed_summary: summary.to_string(),
782            budget: BudgetState {
783                window_size: 200_000,
784                consumed: 0,
785                pinned: 0,
786                model_family: ModelFamily::AnthropicClaude,
787            },
788            tool_usage: vec![],
789            created_at: now,
790            updated_at,
791        }
792    }
793
794    // ── Property 26: Session store search correctness ─────────────────────────
795    // **Validates: Requirements 20.2, 20.3, 20.4**
796    //
797    // For any set of sessions saved to the store, a keyword search SHALL return
798    // all sessions whose compressed_summary contains the keyword, and no
799    // sessions that don't contain it.
800
801    proptest! {
802        /// **Validates: Requirements 20.2, 20.3, 20.4**
803        ///
804        /// For any set of sessions saved to the store, a keyword search SHALL
805        /// return all sessions whose `compressed_summary` contains the keyword,
806        /// and no sessions that don't contain it.
807        #[test]
808        fn prop_search_correctness(
809            // A simple ASCII keyword: 5-8 lowercase letters, no common English
810            // words that the porter stemmer might conflate with other terms.
811            keyword in "[b-df-hj-np-tv-z]{5,8}",
812            // 1-6 summaries that embed the keyword
813            matching_suffixes in proptest::collection::vec("[a-z ]{4,20}", 1..=6usize),
814            // 1-6 summaries that do NOT contain the keyword
815            non_matching in proptest::collection::vec("[a-z ]{8,30}", 1..=6usize),
816        ) {
817            // Ensure the keyword doesn't accidentally appear in non-matching summaries.
818            for s in &non_matching {
819                prop_assume!(!s.contains(keyword.as_str()));
820            }
821
822            let store = in_memory_store();
823
824            // Save matching sessions (summary = "<suffix> <keyword> <suffix>")
825            let mut matching_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
826            for (i, suffix) in matching_suffixes.iter().enumerate() {
827                let id = format!("match-{i}");
828                let summary = format!("{} {} end", suffix, keyword);
829                store.save_session(&make_session(&id, "/proj", &summary)).unwrap();
830                matching_ids.insert(id);
831            }
832
833            // Save non-matching sessions
834            let mut non_matching_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
835            for (i, summary) in non_matching.iter().enumerate() {
836                let id = format!("nomatch-{i}");
837                store.save_session(&make_session(&id, "/proj", summary)).unwrap();
838                non_matching_ids.insert(id);
839            }
840
841            let results = store.search(&keyword).unwrap();
842            let result_ids: std::collections::HashSet<String> =
843                results.iter().map(|r| r.id.clone()).collect();
844
845            // Every matching session must appear in results.
846            for id in &matching_ids {
847                prop_assert!(
848                    result_ids.contains(id),
849                    "matching session '{}' not found in search results for keyword '{}'",
850                    id, keyword
851                );
852            }
853
854            // No non-matching session may appear in results.
855            for id in &non_matching_ids {
856                prop_assert!(
857                    !result_ids.contains(id),
858                    "non-matching session '{}' incorrectly appeared in search results for keyword '{}'",
859                    id, keyword
860                );
861            }
862        }
863    }
864
865    // ── Property: search_by_date correctness ─────────────────────────────────
866    // **Validates: Requirements 20.4**
867    //
868    // For any set of sessions with different timestamps, searching by a date
869    // range SHALL return exactly the sessions whose `updated_at` falls within
870    // [from, to], and no sessions outside that range.
871
872    proptest! {
873        /// **Validates: Requirements 20.4**
874        ///
875        /// For any set of sessions with distinct timestamps, `search_by_date`
876        /// SHALL return exactly the sessions whose `updated_at` is within
877        /// `[from, to]`, and no sessions outside that range.
878        #[test]
879        fn prop_search_by_date_correctness(
880            // Generate 2-8 offsets in seconds from epoch (spread over a wide range)
881            offsets in proptest::collection::vec(0i64..=86400i64 * 365, 2..=8usize),
882            // The search window: start and end offsets (relative to the minimum offset)
883            window_start_delta in 0i64..=3600i64,
884            window_end_delta   in 3600i64..=7200i64,
885        ) {
886            use chrono::TimeZone;
887
888            // Deduplicate offsets so each session has a unique timestamp.
889            let mut unique_offsets: Vec<i64> = offsets.clone();
890            unique_offsets.sort_unstable();
891            unique_offsets.dedup();
892            prop_assume!(unique_offsets.len() >= 2);
893
894            let base_offset = unique_offsets[0];
895            let from_offset = base_offset + window_start_delta;
896            let to_offset   = base_offset + window_end_delta;
897
898            let from = Utc.timestamp_opt(from_offset, 0).unwrap();
899            let to   = Utc.timestamp_opt(to_offset,   0).unwrap();
900
901            let store = in_memory_store();
902
903            let mut in_range_ids:  std::collections::HashSet<String> = std::collections::HashSet::new();
904            let mut out_range_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
905
906            for (i, &offset) in unique_offsets.iter().enumerate() {
907                let ts = Utc.timestamp_opt(offset, 0).unwrap();
908                let id = format!("sess-{i}");
909                let session = make_session_at(&id, "some summary", ts);
910                store.save_session(&session).unwrap();
911
912                if ts >= from && ts <= to {
913                    in_range_ids.insert(id);
914                } else {
915                    out_range_ids.insert(id);
916                }
917            }
918
919            let results = store.search_by_date(from, to).unwrap();
920            let result_ids: std::collections::HashSet<String> =
921                results.iter().map(|r| r.id.clone()).collect();
922
923            // Every in-range session must appear.
924            for id in &in_range_ids {
925                prop_assert!(
926                    result_ids.contains(id),
927                    "in-range session '{}' missing from search_by_date results",
928                    id
929                );
930            }
931
932            // No out-of-range session may appear.
933            for id in &out_range_ids {
934                prop_assert!(
935                    !result_ids.contains(id),
936                    "out-of-range session '{}' incorrectly appeared in search_by_date results",
937                    id
938                );
939            }
940        }
941    }
942}