Skip to main content

kardo_core/db/
project_db.rs

1//! Project-level database operations.
2//!
3//! Each Git project has its own `SQLite` DB at `<project>/.kardo/kardo.db`.
4
5use std::path::{Path, PathBuf};
6
7use rusqlite::{params, Connection, OptionalExtension, Result as RusqliteResult};
8use serde::{Deserialize, Serialize};
9
10use super::{apply_pragmas, migrations, DbResult};
11
12/// A scan history entry.
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct ScanHistoryRecord {
15    pub id: i64,
16    pub total_score: f64,
17    pub traffic_light: String,
18    pub files_scanned: i64,
19    pub duration_ms: i64,
20    pub component_scores: Option<String>,
21    pub scanned_at: String,
22    #[serde(default)]
23    pub commit_sha: Option<String>,
24    #[serde(default)]
25    pub branch: Option<String>,
26    #[serde(default)]
27    pub delta_from_previous: Option<f64>,
28}
29
30/// A baseline snapshot for comparison.
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct BaselineRecord {
33    pub id: i64,
34    pub name: String,
35    pub total_score: f64,
36    pub commit_sha: Option<String>,
37    pub component_scores: Option<String>,
38    pub is_active: bool,
39    pub created_at: String,
40}
41
42/// A full-text search result.
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct SearchResult {
45    pub path: String,
46    pub snippet: String,
47    pub rank: f64,
48}
49
50/// A classification record from the DB.
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct ClassificationRecord {
53    pub id: i64,
54    pub file_id: i64,
55    pub relative_path: String,
56    pub doc_type: String,
57    pub subcategory: Option<String>,
58    pub confidence: f64,
59    pub source: String,
60    pub created_at: String,
61}
62
63/// A tracked file record.
64#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct FileRecord {
66    pub id: i64,
67    pub path: String,
68    pub relative_path: String,
69    pub content_hash: Option<String>,
70    pub size: Option<i64>,
71    pub modified_at: Option<String>,
72    pub extension: Option<String>,
73    pub is_markdown: bool,
74    pub created_at: String,
75    pub updated_at: String,
76}
77
78/// Project database handle.
79pub struct ProjectDb {
80    pub(crate) conn: Connection,
81    #[allow(dead_code)]
82    db_path: PathBuf,
83}
84
85impl ProjectDb {
86    /// Open or create a project database at the given path.
87    pub fn open(db_path: &Path) -> DbResult<Self> {
88        if let Some(parent) = db_path.parent() {
89            std::fs::create_dir_all(parent)?;
90        }
91
92        let conn = Connection::open(db_path)?;
93        apply_pragmas(&conn)?;
94        migrations::run_project_migrations(&conn)?;
95
96        Ok(Self {
97            conn,
98            db_path: db_path.to_path_buf(),
99        })
100    }
101
102    /// Insert a new file record. Returns the row id.
103    pub fn insert_file(&self, file: &FileRecord) -> DbResult<i64> {
104        self.conn.execute(
105            "INSERT INTO files (path, relative_path, content_hash, size, modified_at, extension, is_markdown)
106             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
107            params![
108                file.path,
109                file.relative_path,
110                file.content_hash,
111                file.size,
112                file.modified_at,
113                file.extension,
114                file.is_markdown,
115            ],
116        )?;
117        Ok(self.conn.last_insert_rowid())
118    }
119
120    /// Get a file record by absolute path.
121    pub fn get_file_by_path(&self, path: &str) -> DbResult<Option<FileRecord>> {
122        let result = self
123            .conn
124            .query_row(
125                "SELECT id, path, relative_path, content_hash, size, modified_at, extension, is_markdown, created_at, updated_at
126                 FROM files WHERE path = ?1",
127                params![path],
128                |row| {
129                    Ok(FileRecord {
130                        id: row.get(0)?,
131                        path: row.get(1)?,
132                        relative_path: row.get(2)?,
133                        content_hash: row.get(3)?,
134                        size: row.get(4)?,
135                        modified_at: row.get(5)?,
136                        extension: row.get(6)?,
137                        is_markdown: row.get(7)?,
138                        created_at: row.get(8)?,
139                        updated_at: row.get(9)?,
140                    })
141                },
142            )
143            .optional()?;
144        Ok(result)
145    }
146
147    /// Update the content hash for a file by id.
148    pub fn update_file_hash(&self, id: i64, hash: &str) -> DbResult<()> {
149        self.conn.execute(
150            "UPDATE files SET content_hash = ?1, updated_at = datetime('now') WHERE id = ?2",
151            params![hash, id],
152        )?;
153        Ok(())
154    }
155
156    /// List all file records ordered by path.
157    pub fn list_files(&self) -> DbResult<Vec<FileRecord>> {
158        let mut stmt = self.conn.prepare(
159            "SELECT id, path, relative_path, content_hash, size, modified_at, extension, is_markdown, created_at, updated_at
160             FROM files ORDER BY path",
161        )?;
162        let rows = stmt.query_map([], |row| {
163            Ok(FileRecord {
164                id: row.get(0)?,
165                path: row.get(1)?,
166                relative_path: row.get(2)?,
167                content_hash: row.get(3)?,
168                size: row.get(4)?,
169                modified_at: row.get(5)?,
170                extension: row.get(6)?,
171                is_markdown: row.get(7)?,
172                created_at: row.get(8)?,
173                updated_at: row.get(9)?,
174            })
175        })?;
176
177        let mut files = Vec::new();
178        for row in rows {
179            files.push(row?);
180        }
181        Ok(files)
182    }
183
184    /// Get a project config value.
185    pub fn get_config(&self, key: &str) -> DbResult<Option<String>> {
186        let result = self
187            .conn
188            .query_row(
189                "SELECT value FROM project_config WHERE key = ?1",
190                params![key],
191                |row| row.get(0),
192            )
193            .optional()?;
194        Ok(result)
195    }
196
197    /// Set a project config value.
198    pub fn set_config(&self, key: &str, value: &str) -> DbResult<()> {
199        self.conn.execute(
200            "INSERT INTO project_config (key, value) VALUES (?1, ?2)
201             ON CONFLICT(key) DO UPDATE SET value = excluded.value",
202            params![key, value],
203        )?;
204        Ok(())
205    }
206
207    /// Insert a scan history record. Returns the row id.
208    pub fn insert_scan_history(
209        &self,
210        total_score: f64,
211        traffic_light: &str,
212        files_scanned: usize,
213        duration_ms: u64,
214        component_scores: &str,
215    ) -> DbResult<i64> {
216        self.conn.execute(
217            "INSERT INTO scan_history (total_score, traffic_light, files_scanned, duration_ms, component_scores)
218             VALUES (?1, ?2, ?3, ?4, ?5)",
219            params![total_score, traffic_light, files_scanned as i64, duration_ms as i64, component_scores],
220        )?;
221        Ok(self.conn.last_insert_rowid())
222    }
223
224    /// Get the previous scan's component_scores JSON (most recent scan).
225    pub fn get_previous_component_scores(&self) -> DbResult<Option<String>> {
226        let result: Option<String> = self.conn.query_row(
227            "SELECT component_scores FROM scan_history ORDER BY scanned_at DESC LIMIT 1",
228            [],
229            |row| row.get(0),
230        ).optional()?;
231        Ok(result)
232    }
233
234    /// Insert a scan history record with git context and auto-computed delta.
235    #[allow(clippy::too_many_arguments)]
236    pub fn insert_scan_history_v2(
237        &self,
238        total_score: f64,
239        traffic_light: &str,
240        files_scanned: usize,
241        duration_ms: u64,
242        component_scores: &str,
243        commit_sha: Option<&str>,
244        branch: Option<&str>,
245    ) -> DbResult<(i64, Option<f64>)> {
246        // Get previous scan score to compute delta
247        let prev_score: Option<f64> = self.conn.query_row(
248            "SELECT total_score FROM scan_history ORDER BY scanned_at DESC LIMIT 1",
249            [],
250            |row| row.get(0),
251        ).optional()?;
252
253        let delta = prev_score.map(|prev| total_score - prev);
254
255        self.conn.execute(
256            "INSERT INTO scan_history (total_score, traffic_light, files_scanned, duration_ms, component_scores, commit_sha, branch, delta_from_previous)
257             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
258            params![total_score, traffic_light, files_scanned as i64, duration_ms as i64, component_scores, commit_sha, branch, delta],
259        )?;
260        Ok((self.conn.last_insert_rowid(), delta))
261    }
262
263    /// Insert a quality issue record.
264    #[allow(clippy::too_many_arguments)]
265    pub fn insert_quality_issue(
266        &self,
267        scan_id: i64,
268        issue_id: &str,
269        file_path: Option<&str>,
270        category: &str,
271        severity: &str,
272        title: &str,
273        attribution: &str,
274        suggestion: Option<&str>,
275    ) -> DbResult<()> {
276        self.conn.execute(
277            "INSERT INTO quality_issues (scan_id, issue_id, file_path, category, severity, title, attribution, suggestion)
278             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
279            params![scan_id, issue_id, file_path, category, severity, title, attribution, suggestion],
280        )?;
281        Ok(())
282    }
283
284    /// Upsert a file record (insert or update by relative_path).
285    #[allow(clippy::too_many_arguments)]
286    pub fn upsert_file(
287        &self,
288        path: &str,
289        relative_path: &str,
290        content_hash: Option<&str>,
291        size: Option<i64>,
292        modified_at: Option<&str>,
293        extension: Option<&str>,
294        is_markdown: bool,
295    ) -> DbResult<()> {
296        self.conn.execute(
297            "INSERT INTO files (path, relative_path, content_hash, size, modified_at, extension, is_markdown)
298             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)
299             ON CONFLICT(relative_path) DO UPDATE SET
300               path = excluded.path,
301               content_hash = excluded.content_hash,
302               size = excluded.size,
303               modified_at = excluded.modified_at,
304               extension = excluded.extension,
305               is_markdown = excluded.is_markdown,
306               updated_at = datetime('now')",
307            params![path, relative_path, content_hash, size, modified_at, extension, is_markdown],
308        )?;
309        Ok(())
310    }
311
312    /// Get scan history entries (most recent first, limited).
313    pub fn get_scan_history(&self, limit: usize) -> DbResult<Vec<ScanHistoryRecord>> {
314        let mut stmt = self.conn.prepare(
315            "SELECT id, total_score, traffic_light, files_scanned, duration_ms, component_scores, scanned_at,
316                    commit_sha, branch, delta_from_previous
317             FROM scan_history ORDER BY scanned_at DESC LIMIT ?1",
318        )?;
319        let records = stmt.query_map(params![limit as i64], |row| {
320            Ok(ScanHistoryRecord {
321                id: row.get(0)?,
322                total_score: row.get(1)?,
323                traffic_light: row.get(2)?,
324                files_scanned: row.get(3)?,
325                duration_ms: row.get(4)?,
326                component_scores: row.get(5)?,
327                scanned_at: row.get(6)?,
328                commit_sha: row.get(7)?,
329                branch: row.get(8)?,
330                delta_from_previous: row.get(9)?,
331            })
332        })?;
333        records.collect::<Result<Vec<_>, _>>().map_err(|e| e.into())
334    }
335
336    /// Populate FTS index for a file.
337    pub fn index_file_content(&self, relative_path: &str, content: &str) -> DbResult<()> {
338        // Delete old entry first
339        self.conn.execute(
340            "DELETE FROM files_fts WHERE path = ?1",
341            params![relative_path],
342        )?;
343        // Insert new content
344        self.conn.execute(
345            "INSERT INTO files_fts (path, content) VALUES (?1, ?2)",
346            params![relative_path, content],
347        )?;
348        Ok(())
349    }
350
351    /// Search files using FTS5 full-text search.
352    pub fn search_files(&self, query: &str, limit: usize) -> DbResult<Vec<SearchResult>> {
353        if query.trim().is_empty() {
354            return Ok(vec![]);
355        }
356
357        let fts_query = query
358            .split_whitespace()
359            .map(|w| format!("\"{}\"*", w.replace('"', "")))
360            .collect::<Vec<_>>()
361            .join(" ");
362
363        let mut stmt = self.conn.prepare(
364            "SELECT path, snippet(files_fts, 1, '<mark>', '</mark>', '...', 32) as snippet,
365                    rank
366             FROM files_fts
367             WHERE files_fts MATCH ?1
368             ORDER BY rank
369             LIMIT ?2",
370        )?;
371
372        let results = stmt.query_map(params![fts_query, limit as i64], |row| {
373            Ok(SearchResult {
374                path: row.get(0)?,
375                snippet: row.get(1)?,
376                rank: row.get(2)?,
377            })
378        })?;
379
380        results
381            .collect::<RusqliteResult<Vec<_>>>()
382            .map_err(|e| e.into())
383    }
384
385    /// Clear the entire FTS index.
386    pub fn clear_fts_index(&self) -> DbResult<()> {
387        self.conn.execute("DELETE FROM files_fts", [])?;
388        Ok(())
389    }
390
391    /// Get file ID by relative path.
392    pub fn get_file_id_by_relative_path(&self, relative_path: &str) -> DbResult<Option<i64>> {
393        let result = self
394            .conn
395            .query_row(
396                "SELECT id FROM files WHERE relative_path = ?1",
397                params![relative_path],
398                |row| row.get(0),
399            )
400            .optional()?;
401        Ok(result)
402    }
403
404    /// Insert a classification record for a file.
405    pub fn insert_classification(
406        &self,
407        file_id: i64,
408        doc_type: &str,
409        subcategory: Option<&str>,
410        confidence: f64,
411        source: &str,
412    ) -> DbResult<i64> {
413        self.conn.execute(
414            "INSERT INTO classifications (file_id, doc_type, subcategory, confidence, source)
415             VALUES (?1, ?2, ?3, ?4, ?5)",
416            params![file_id, doc_type, subcategory, confidence, source],
417        )?;
418        Ok(self.conn.last_insert_rowid())
419    }
420
421    /// Clear all classifications (before re-scan).
422    pub fn clear_classifications(&self) -> DbResult<()> {
423        self.conn.execute("DELETE FROM classifications", [])?;
424        Ok(())
425    }
426
427    /// Remove files from DB that are no longer on disk.
428    /// Accepts a set of relative paths that currently exist.
429    /// Deletes all files (and their classifications) not in the set.
430    pub fn purge_stale_files(&self, current_paths: &std::collections::HashSet<String>) -> DbResult<usize> {
431        // Get all relative paths in DB
432        let mut stmt = self.conn.prepare("SELECT id, relative_path FROM files")?;
433        let stale_ids: Vec<i64> = stmt
434            .query_map([], |row| {
435                let id: i64 = row.get(0)?;
436                let rp: String = row.get(1)?;
437                Ok((id, rp))
438            })?
439            .filter_map(|r| r.ok())
440            .filter(|(_, rp)| !current_paths.contains(rp))
441            .map(|(id, _)| id)
442            .collect();
443
444        let count = stale_ids.len();
445        for id in &stale_ids {
446            self.conn.execute("DELETE FROM classifications WHERE file_id = ?1", params![id])?;
447            self.conn.execute("DELETE FROM files WHERE id = ?1", params![id])?;
448        }
449        Ok(count)
450    }
451
452    /// Insert a user correction and update classification to user source with confidence 1.0.
453    pub fn insert_user_correction(
454        &self,
455        file_id: i64,
456        doc_type: &str,
457        subcategory: Option<&str>,
458    ) -> DbResult<()> {
459        // Get current classification doc_type for original_type
460        let original_type: Option<String> = self
461            .conn
462            .query_row(
463                "SELECT doc_type FROM classifications WHERE file_id = ?1 ORDER BY created_at DESC LIMIT 1",
464                params![file_id],
465                |row| row.get(0),
466            )
467            .optional()?;
468
469        let original = original_type.unwrap_or_else(|| "unknown".to_string());
470
471        // Insert user correction record
472        self.conn.execute(
473            "INSERT INTO user_corrections (file_id, original_type, corrected_type) VALUES (?1, ?2, ?3)",
474            params![file_id, original, doc_type],
475        )?;
476
477        // Replace classification with user override
478        self.conn.execute(
479            "DELETE FROM classifications WHERE file_id = ?1",
480            params![file_id],
481        )?;
482        self.conn.execute(
483            "INSERT INTO classifications (file_id, doc_type, subcategory, confidence, source) VALUES (?1, ?2, ?3, 1.0, 'user')",
484            params![file_id, doc_type, subcategory],
485        )?;
486
487        Ok(())
488    }
489
490    /// Delete classification for a specific file.
491    pub fn delete_classification_for_file(&self, file_id: i64) -> DbResult<()> {
492        self.conn.execute(
493            "DELETE FROM classifications WHERE file_id = ?1",
494            params![file_id],
495        )?;
496        Ok(())
497    }
498
499    /// Get all classifications with file paths (most recent per file).
500    pub fn get_classifications(&self) -> DbResult<Vec<ClassificationRecord>> {
501        let mut stmt = self.conn.prepare(
502            "SELECT c.id, c.file_id, f.relative_path, c.doc_type, c.subcategory, c.confidence, c.source, c.created_at
503             FROM classifications c
504             JOIN files f ON f.id = c.file_id
505             ORDER BY f.relative_path",
506        )?;
507        let rows = stmt.query_map([], |row| {
508            Ok(ClassificationRecord {
509                id: row.get(0)?,
510                file_id: row.get(1)?,
511                relative_path: row.get(2)?,
512                doc_type: row.get(3)?,
513                subcategory: row.get(4)?,
514                confidence: row.get(5)?,
515                source: row.get(6)?,
516                created_at: row.get(7)?,
517            })
518        })?;
519        rows.collect::<Result<Vec<_>, _>>().map_err(|e| e.into())
520    }
521
522    /// Create a named baseline from the current score.
523    pub fn set_baseline(
524        &self,
525        name: &str,
526        total_score: f64,
527        commit_sha: Option<&str>,
528        component_scores: Option<&str>,
529    ) -> DbResult<i64> {
530        // Deactivate all existing baselines
531        self.conn.execute("UPDATE baselines SET is_active = 0", [])?;
532        // Insert new active baseline
533        self.conn.execute(
534            "INSERT INTO baselines (name, total_score, commit_sha, component_scores, is_active)
535             VALUES (?1, ?2, ?3, ?4, 1)",
536            params![name, total_score, commit_sha, component_scores],
537        )?;
538        Ok(self.conn.last_insert_rowid())
539    }
540
541    /// Get the currently active baseline.
542    pub fn get_active_baseline(&self) -> DbResult<Option<BaselineRecord>> {
543        let result = self.conn.query_row(
544            "SELECT id, name, total_score, commit_sha, component_scores, is_active, created_at
545             FROM baselines WHERE is_active = 1 LIMIT 1",
546            [],
547            |row| {
548                Ok(BaselineRecord {
549                    id: row.get(0)?,
550                    name: row.get(1)?,
551                    total_score: row.get(2)?,
552                    commit_sha: row.get(3)?,
553                    component_scores: row.get(4)?,
554                    is_active: row.get::<_, i32>(5)? != 0,
555                    created_at: row.get(6)?,
556                })
557            },
558        ).optional()?;
559        Ok(result)
560    }
561
562    /// Return the last N total scores (0.0–1.0) ordered oldest to newest.
563    /// Used for sparkline visualization in the CLI header.
564    pub fn get_last_n_scores(&self, n: usize) -> DbResult<Vec<f64>> {
565        let mut stmt = self.conn.prepare(
566            "SELECT total_score FROM scan_history ORDER BY scanned_at DESC LIMIT ?1",
567        )?;
568        let rows = stmt.query_map(params![n as i64], |row| row.get::<_, f64>(0))?;
569        let mut scores: Vec<f64> = rows.collect::<rusqlite::Result<Vec<_>>>()?;
570        // Reverse so result is oldest-first (left→right = past→present)
571        scores.reverse();
572        Ok(scores)
573    }
574
575    /// Bump (replace) the active baseline with the latest scan data.
576    pub fn bump_baseline(&self) -> DbResult<bool> {
577        let latest = self.conn.query_row(
578            "SELECT total_score, commit_sha, component_scores FROM scan_history ORDER BY scanned_at DESC LIMIT 1",
579            [],
580            |row| Ok((row.get::<_, f64>(0)?, row.get::<_, Option<String>>(1)?, row.get::<_, Option<String>>(2)?)),
581        ).optional()?;
582
583        if let Some((score, sha, components)) = latest {
584            self.set_baseline("auto-bump", score, sha.as_deref(), components.as_deref())?;
585            Ok(true)
586        } else {
587            Ok(false)
588        }
589    }
590}