agtrace_index/
db.rs

1use anyhow::{Context, Result};
2use rusqlite::{Connection, OptionalExtension, params};
3use std::path::Path;
4
5// Schema version (increment when changing table definitions)
6const SCHEMA_VERSION: i32 = 1;
7
8// NOTE: Database Design Rationale (Pointer Edition)
9//
10// Why Schema-on-Read (not Schema-on-Write)?
11// - Provider logs change format frequently; parsing logic needs flexibility
12// - Event normalization is complex (Gemini unfold, Codex dedup, etc.)
13// - Raw logs are source of truth; DB is just an index for fast lookup
14// - Keeps DB lightweight and migration-free when schema evolves
15//
16// Why hash-based project identification?
17// - Gemini logs contain projectHash but not projectRoot path
18// - Hash allows cross-provider session grouping before path resolution
19// - Enables "same project" detection across Claude/Codex/Gemini
20//
21// Why soft delete (is_valid flag)?
22// - Avoid orphaned log_files entries when session is deleted
23// - Enable "undo" or audit trail without complex cascade logic
24// - Simplifies cleanup: UPDATE instead of multi-table DELETE transaction
25
26#[derive(Debug, Clone)]
27pub struct ProjectRecord {
28    pub hash: String,
29    pub root_path: Option<String>,
30    pub last_scanned_at: Option<String>,
31}
32
33#[derive(Debug, Clone)]
34pub struct SessionRecord {
35    pub id: String,
36    pub project_hash: String,
37    pub provider: String,
38    pub start_ts: Option<String>,
39    pub end_ts: Option<String>,
40    pub snippet: Option<String>,
41    pub is_valid: bool,
42}
43
44#[derive(Debug, Clone)]
45pub struct LogFileRecord {
46    pub path: String,
47    pub session_id: String,
48    pub role: String,
49    pub file_size: Option<i64>,
50    pub mod_time: Option<String>,
51}
52
53#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
54pub struct SessionSummary {
55    pub id: String,
56    pub provider: String,
57    pub project_hash: String,
58    pub start_ts: Option<String>,
59    pub snippet: Option<String>,
60}
61
62pub struct Database {
63    conn: Connection,
64}
65
66impl Database {
67    pub fn open(db_path: &Path) -> Result<Self> {
68        let conn = Connection::open(db_path)
69            .with_context(|| format!("Failed to open database: {}", db_path.display()))?;
70
71        let db = Self { conn };
72        db.init_schema()?;
73        Ok(db)
74    }
75
76    pub fn open_in_memory() -> Result<Self> {
77        let conn = Connection::open_in_memory()?;
78        let db = Self { conn };
79        db.init_schema()?;
80        Ok(db)
81    }
82
83    pub fn init_schema(&self) -> Result<()> {
84        let current_version: i32 = self
85            .conn
86            .query_row("PRAGMA user_version", [], |row| row.get(0))?;
87
88        if current_version != SCHEMA_VERSION {
89            self.drop_all_tables()?;
90        }
91
92        self.conn.execute_batch(
93            r#"
94            CREATE TABLE IF NOT EXISTS projects (
95                hash TEXT PRIMARY KEY,
96                root_path TEXT,
97                last_scanned_at TEXT
98            );
99
100            CREATE TABLE IF NOT EXISTS sessions (
101                id TEXT PRIMARY KEY,
102                project_hash TEXT NOT NULL,
103                provider TEXT NOT NULL,
104                start_ts TEXT,
105                end_ts TEXT,
106                snippet TEXT,
107                is_valid BOOLEAN DEFAULT 1,
108                FOREIGN KEY (project_hash) REFERENCES projects(hash)
109            );
110
111            CREATE TABLE IF NOT EXISTS log_files (
112                path TEXT PRIMARY KEY,
113                session_id TEXT NOT NULL,
114                role TEXT NOT NULL,
115                file_size INTEGER,
116                mod_time TEXT,
117                FOREIGN KEY (session_id) REFERENCES sessions(id)
118            );
119
120            CREATE INDEX IF NOT EXISTS idx_sessions_project ON sessions(project_hash);
121            CREATE INDEX IF NOT EXISTS idx_sessions_ts ON sessions(start_ts DESC);
122            CREATE INDEX IF NOT EXISTS idx_files_session ON log_files(session_id);
123            "#,
124        )?;
125
126        self.conn
127            .execute(&format!("PRAGMA user_version = {}", SCHEMA_VERSION), [])?;
128
129        Ok(())
130    }
131
132    fn drop_all_tables(&self) -> Result<()> {
133        self.conn.execute_batch(
134            r#"
135            DROP TABLE IF EXISTS log_files;
136            DROP TABLE IF EXISTS sessions;
137            DROP TABLE IF EXISTS projects;
138            "#,
139        )?;
140        Ok(())
141    }
142
143    pub fn insert_or_update_project(&self, project: &ProjectRecord) -> Result<()> {
144        self.conn.execute(
145            r#"
146            INSERT INTO projects (hash, root_path, last_scanned_at)
147            VALUES (?1, ?2, ?3)
148            ON CONFLICT(hash) DO UPDATE SET
149                root_path = COALESCE(?2, root_path),
150                last_scanned_at = ?3
151            "#,
152            params![&project.hash, &project.root_path, &project.last_scanned_at],
153        )?;
154
155        Ok(())
156    }
157
158    pub fn insert_or_update_session(&self, session: &SessionRecord) -> Result<()> {
159        self.conn.execute(
160            r#"
161            INSERT INTO sessions (id, project_hash, provider, start_ts, end_ts, snippet, is_valid)
162            VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)
163            ON CONFLICT(id) DO UPDATE SET
164                project_hash = ?2,
165                provider = ?3,
166                start_ts = COALESCE(?4, start_ts),
167                end_ts = COALESCE(?5, end_ts),
168                snippet = COALESCE(?6, snippet),
169                is_valid = ?7
170            "#,
171            params![
172                &session.id,
173                &session.project_hash,
174                &session.provider,
175                &session.start_ts,
176                &session.end_ts,
177                &session.snippet,
178                &session.is_valid
179            ],
180        )?;
181
182        Ok(())
183    }
184
185    pub fn insert_or_update_log_file(&self, log_file: &LogFileRecord) -> Result<()> {
186        self.conn.execute(
187            r#"
188            INSERT INTO log_files (path, session_id, role, file_size, mod_time)
189            VALUES (?1, ?2, ?3, ?4, ?5)
190            ON CONFLICT(path) DO UPDATE SET
191                session_id = ?2,
192                role = ?3,
193                file_size = ?4,
194                mod_time = ?5
195            "#,
196            params![
197                &log_file.path,
198                &log_file.session_id,
199                &log_file.role,
200                &log_file.file_size,
201                &log_file.mod_time
202            ],
203        )?;
204
205        Ok(())
206    }
207
208    pub fn get_session_by_id(&self, session_id: &str) -> Result<Option<SessionSummary>> {
209        let mut stmt = self.conn.prepare(
210            r#"
211            SELECT id, provider, project_hash, start_ts, snippet
212            FROM sessions
213            WHERE id = ?1 AND is_valid = 1
214            "#,
215        )?;
216
217        let mut rows = stmt.query([session_id])?;
218        if let Some(row) = rows.next()? {
219            Ok(Some(SessionSummary {
220                id: row.get(0)?,
221                provider: row.get(1)?,
222                project_hash: row.get(2)?,
223                start_ts: row.get(3)?,
224                snippet: row.get(4)?,
225            }))
226        } else {
227            Ok(None)
228        }
229    }
230
231    pub fn list_sessions(
232        &self,
233        project_hash: Option<&str>,
234        limit: usize,
235    ) -> Result<Vec<SessionSummary>> {
236        let query = if let Some(hash) = project_hash {
237            format!(
238                r#"
239                SELECT id, provider, project_hash, start_ts, snippet
240                FROM sessions
241                WHERE project_hash = '{}' AND is_valid = 1
242                ORDER BY start_ts DESC
243                LIMIT {}
244                "#,
245                hash, limit
246            )
247        } else {
248            format!(
249                r#"
250                SELECT id, provider, project_hash, start_ts, snippet
251                FROM sessions
252                WHERE is_valid = 1
253                ORDER BY start_ts DESC
254                LIMIT {}
255                "#,
256                limit
257            )
258        };
259
260        let mut stmt = self.conn.prepare(&query)?;
261        let sessions = stmt
262            .query_map([], |row| {
263                Ok(SessionSummary {
264                    id: row.get(0)?,
265                    provider: row.get(1)?,
266                    project_hash: row.get(2)?,
267                    start_ts: row.get(3)?,
268                    snippet: row.get(4)?,
269                })
270            })?
271            .collect::<Result<Vec<_>, _>>()?;
272
273        Ok(sessions)
274    }
275
276    pub fn get_session_files(&self, session_id: &str) -> Result<Vec<LogFileRecord>> {
277        let mut stmt = self.conn.prepare(
278            r#"
279            SELECT path, session_id, role, file_size, mod_time
280            FROM log_files
281            WHERE session_id = ?1
282            ORDER BY role
283            "#,
284        )?;
285
286        let files = stmt
287            .query_map([session_id], |row| {
288                Ok(LogFileRecord {
289                    path: row.get(0)?,
290                    session_id: row.get(1)?,
291                    role: row.get(2)?,
292                    file_size: row.get(3)?,
293                    mod_time: row.get(4)?,
294                })
295            })?
296            .collect::<Result<Vec<_>, _>>()?;
297
298        Ok(files)
299    }
300
301    /// Find session by ID prefix (supports short IDs like "7f2abd2d")
302    pub fn find_session_by_prefix(&self, prefix: &str) -> Result<Option<String>> {
303        let mut stmt = self.conn.prepare(
304            r#"
305            SELECT id
306            FROM sessions
307            WHERE id LIKE ?1
308            LIMIT 2
309            "#,
310        )?;
311
312        let pattern = format!("{}%", prefix);
313        let mut matches: Vec<String> = stmt
314            .query_map([&pattern], |row| row.get(0))?
315            .collect::<Result<Vec<_>, _>>()?;
316
317        match matches.len() {
318            0 => Ok(None),
319            1 => Ok(Some(matches.remove(0))),
320            _ => anyhow::bail!(
321                "Ambiguous session ID prefix '{}': multiple sessions match",
322                prefix
323            ),
324        }
325    }
326
327    pub fn get_project(&self, hash: &str) -> Result<Option<ProjectRecord>> {
328        let result = self
329            .conn
330            .query_row(
331                r#"
332            SELECT hash, root_path, last_scanned_at
333            FROM projects
334            WHERE hash = ?1
335            "#,
336                [hash],
337                |row| {
338                    Ok(ProjectRecord {
339                        hash: row.get(0)?,
340                        root_path: row.get(1)?,
341                        last_scanned_at: row.get(2)?,
342                    })
343                },
344            )
345            .optional()?;
346
347        Ok(result)
348    }
349
350    pub fn list_projects(&self) -> Result<Vec<ProjectRecord>> {
351        let mut stmt = self.conn.prepare(
352            r#"
353            SELECT hash, root_path, last_scanned_at
354            FROM projects
355            ORDER BY last_scanned_at DESC
356            "#,
357        )?;
358
359        let projects = stmt
360            .query_map([], |row| {
361                Ok(ProjectRecord {
362                    hash: row.get(0)?,
363                    root_path: row.get(1)?,
364                    last_scanned_at: row.get(2)?,
365                })
366            })?
367            .collect::<Result<Vec<_>, _>>()?;
368
369        Ok(projects)
370    }
371
372    pub fn count_sessions_for_project(&self, project_hash: &str) -> Result<usize> {
373        let count: i64 = self.conn.query_row(
374            r#"
375            SELECT COUNT(*)
376            FROM sessions
377            WHERE project_hash = ?1 AND is_valid = 1
378            "#,
379            [project_hash],
380            |row| row.get(0),
381        )?;
382
383        Ok(count as usize)
384    }
385
386    pub fn vacuum(&self) -> Result<()> {
387        self.conn.execute("VACUUM", [])?;
388        println!("Database vacuumed successfully");
389        Ok(())
390    }
391
392    pub fn get_all_log_files(&self) -> Result<Vec<LogFileRecord>> {
393        let mut stmt = self.conn.prepare(
394            r#"
395            SELECT path, session_id, role, file_size, mod_time
396            FROM log_files
397            ORDER BY path
398            "#,
399        )?;
400
401        let files = stmt
402            .query_map([], |row| {
403                Ok(LogFileRecord {
404                    path: row.get(0)?,
405                    session_id: row.get(1)?,
406                    role: row.get(2)?,
407                    file_size: row.get(3)?,
408                    mod_time: row.get(4)?,
409                })
410            })?
411            .collect::<Result<Vec<_>, _>>()?;
412
413        Ok(files)
414    }
415}
416
417#[cfg(test)]
418mod tests {
419    use super::*;
420
421    #[test]
422    fn test_schema_initialization() {
423        let db = Database::open_in_memory().unwrap();
424
425        let projects = db.list_projects().unwrap();
426        assert_eq!(projects.len(), 0);
427    }
428
429    #[test]
430    fn test_insert_project() {
431        let db = Database::open_in_memory().unwrap();
432
433        let project = ProjectRecord {
434            hash: "abc123".to_string(),
435            root_path: Some("/path/to/project".to_string()),
436            last_scanned_at: Some("2025-12-10T10:00:00Z".to_string()),
437        };
438
439        db.insert_or_update_project(&project).unwrap();
440
441        let retrieved = db.get_project("abc123").unwrap().unwrap();
442        assert_eq!(retrieved.hash, "abc123");
443        assert_eq!(retrieved.root_path, Some("/path/to/project".to_string()));
444    }
445
446    #[test]
447    fn test_insert_session_with_fk() {
448        let db = Database::open_in_memory().unwrap();
449
450        let project = ProjectRecord {
451            hash: "abc123".to_string(),
452            root_path: Some("/path/to/project".to_string()),
453            last_scanned_at: Some("2025-12-10T10:00:00Z".to_string()),
454        };
455        db.insert_or_update_project(&project).unwrap();
456
457        let session = SessionRecord {
458            id: "session-001".to_string(),
459            project_hash: "abc123".to_string(),
460            provider: "claude".to_string(),
461            start_ts: Some("2025-12-10T10:05:00Z".to_string()),
462            end_ts: Some("2025-12-10T10:15:00Z".to_string()),
463            snippet: Some("Test session".to_string()),
464            is_valid: true,
465        };
466
467        db.insert_or_update_session(&session).unwrap();
468
469        let sessions = db.list_sessions(Some("abc123"), 10).unwrap();
470        assert_eq!(sessions.len(), 1);
471        assert_eq!(sessions[0].id, "session-001");
472        assert_eq!(sessions[0].provider, "claude");
473    }
474
475    #[test]
476    fn test_insert_log_file() {
477        let db = Database::open_in_memory().unwrap();
478
479        let project = ProjectRecord {
480            hash: "abc123".to_string(),
481            root_path: Some("/path/to/project".to_string()),
482            last_scanned_at: Some("2025-12-10T10:00:00Z".to_string()),
483        };
484        db.insert_or_update_project(&project).unwrap();
485
486        let session = SessionRecord {
487            id: "session-001".to_string(),
488            project_hash: "abc123".to_string(),
489            provider: "claude".to_string(),
490            start_ts: Some("2025-12-10T10:05:00Z".to_string()),
491            end_ts: None,
492            snippet: None,
493            is_valid: true,
494        };
495        db.insert_or_update_session(&session).unwrap();
496
497        let log_file = LogFileRecord {
498            path: "/path/to/log.jsonl".to_string(),
499            session_id: "session-001".to_string(),
500            role: "main".to_string(),
501            file_size: Some(1024),
502            mod_time: Some("2025-12-10T10:05:00Z".to_string()),
503        };
504
505        db.insert_or_update_log_file(&log_file).unwrap();
506
507        let files = db.get_session_files("session-001").unwrap();
508        assert_eq!(files.len(), 1);
509        assert_eq!(files[0].path, "/path/to/log.jsonl");
510        assert_eq!(files[0].role, "main");
511    }
512
513    #[test]
514    fn test_list_sessions_query() {
515        let db = Database::open_in_memory().unwrap();
516
517        let project = ProjectRecord {
518            hash: "abc123".to_string(),
519            root_path: Some("/path/to/project".to_string()),
520            last_scanned_at: Some("2025-12-10T10:00:00Z".to_string()),
521        };
522        db.insert_or_update_project(&project).unwrap();
523
524        for i in 1..=5 {
525            let session = SessionRecord {
526                id: format!("session-{:03}", i),
527                project_hash: "abc123".to_string(),
528                provider: "claude".to_string(),
529                start_ts: Some(format!("2025-12-10T10:{:02}:00Z", i)),
530                end_ts: None,
531                snippet: Some(format!("Session {}", i)),
532                is_valid: true,
533            };
534            db.insert_or_update_session(&session).unwrap();
535        }
536
537        let sessions = db.list_sessions(Some("abc123"), 10).unwrap();
538        assert_eq!(sessions.len(), 5);
539
540        let sessions_limited = db.list_sessions(Some("abc123"), 3).unwrap();
541        assert_eq!(sessions_limited.len(), 3);
542    }
543
544    #[test]
545    fn test_count_sessions_for_project() {
546        let db = Database::open_in_memory().unwrap();
547
548        let project = ProjectRecord {
549            hash: "abc123".to_string(),
550            root_path: Some("/path/to/project".to_string()),
551            last_scanned_at: Some("2025-12-10T10:00:00Z".to_string()),
552        };
553        db.insert_or_update_project(&project).unwrap();
554
555        for i in 1..=3 {
556            let session = SessionRecord {
557                id: format!("session-{:03}", i),
558                project_hash: "abc123".to_string(),
559                provider: "claude".to_string(),
560                start_ts: Some(format!("2025-12-10T10:{:02}:00Z", i)),
561                end_ts: None,
562                snippet: None,
563                is_valid: true,
564            };
565            db.insert_or_update_session(&session).unwrap();
566        }
567
568        let count = db.count_sessions_for_project("abc123").unwrap();
569        assert_eq!(count, 3);
570    }
571
572    #[test]
573    fn test_schema_version_set_on_init() {
574        let db = Database::open_in_memory().unwrap();
575
576        let version: i32 = db
577            .conn
578            .query_row("PRAGMA user_version", [], |row| row.get(0))
579            .unwrap();
580
581        assert_eq!(version, SCHEMA_VERSION);
582    }
583
584    #[test]
585    fn test_schema_rebuild_on_version_mismatch() {
586        let conn = Connection::open_in_memory().unwrap();
587
588        conn.execute_batch(
589            r#"
590            CREATE TABLE projects (hash TEXT PRIMARY KEY);
591            CREATE TABLE sessions (id TEXT PRIMARY KEY);
592            PRAGMA user_version = 999;
593            "#,
594        )
595        .unwrap();
596
597        conn.execute(
598            "INSERT INTO projects (hash) VALUES (?1)",
599            params!["old_data"],
600        )
601        .unwrap();
602
603        let db = Database { conn };
604        db.init_schema().unwrap();
605
606        let version: i32 = db
607            .conn
608            .query_row("PRAGMA user_version", [], |row| row.get(0))
609            .unwrap();
610        assert_eq!(version, SCHEMA_VERSION);
611
612        let count: i64 = db
613            .conn
614            .query_row("SELECT COUNT(*) FROM projects", [], |row| row.get(0))
615            .unwrap();
616        assert_eq!(count, 0);
617    }
618
619    #[test]
620    fn test_schema_preserved_on_version_match() {
621        let db = Database::open_in_memory().unwrap();
622
623        let project = ProjectRecord {
624            hash: "abc123".to_string(),
625            root_path: Some("/path/to/project".to_string()),
626            last_scanned_at: Some("2025-12-10T10:00:00Z".to_string()),
627        };
628        db.insert_or_update_project(&project).unwrap();
629
630        db.init_schema().unwrap();
631
632        let retrieved = db.get_project("abc123").unwrap();
633        assert!(retrieved.is_some());
634        assert_eq!(retrieved.unwrap().hash, "abc123");
635    }
636}