Skip to main content

codelens_engine/db/
mod.rs

1use anyhow::{Context, Result};
2use rusqlite::{Connection, OptionalExtension};
3use sha2::{Digest, Sha256};
4use std::fs;
5use std::path::{Path, PathBuf};
6use std::time::{Duration, SystemTime, UNIX_EPOCH};
7
8mod ops;
9
10#[cfg(test)]
11mod tests;
12
13// MUST match the highest (version, _) entry in `IndexDb::MIGRATIONS`.
14// `migrate()` early-exits when `current >= SCHEMA_VERSION`, so any
15// mismatch here permanently prevents later migrations from running on
16// DBs that are already at the stale constant's value. Bumped to 7 to
17// cover the P1-4 `end_line` ALTER added in migration 7; the
18// regression test `opening_a_db_at_the_previous_schema_version_runs_every_subsequent_migration`
19// locks the invariant in.
20const SCHEMA_VERSION: i64 = 7;
21
22/// SQLite-backed symbol and import index for a single project.
23pub struct IndexDb {
24    pub(super) conn: Connection,
25}
26
27#[derive(Debug, Clone)]
28pub struct FileRow {
29    pub id: i64,
30    pub relative_path: String,
31    pub mtime_ms: i64,
32    pub content_hash: String,
33    pub size_bytes: i64,
34    pub language: Option<String>,
35}
36
37#[derive(Debug, Clone)]
38pub struct SymbolRow {
39    pub id: i64,
40    pub file_id: i64,
41    pub name: String,
42    pub kind: String,
43    pub line: i64,
44    pub column_num: i64,
45    pub start_byte: i64,
46    pub end_byte: i64,
47    pub signature: String,
48    pub name_path: String,
49    pub parent_id: Option<i64>,
50    /// Inclusive end line (1-indexed). Added in migration 7. Rows
51    /// written before the migration read back as 0 → callers treat
52    /// it as "unknown" and fall back to `line`.
53    pub end_line: i64,
54}
55
56/// Symbol with resolved file path — for embedding pipeline batch processing.
57#[derive(Debug, Clone)]
58pub struct SymbolWithFile {
59    pub name: String,
60    pub kind: String,
61    pub file_path: String,
62    pub line: i64,
63    pub signature: String,
64    pub name_path: String,
65    pub start_byte: i64,
66    pub end_byte: i64,
67}
68
69#[derive(Debug, Clone)]
70pub struct ImportRow {
71    pub source_file_id: i64,
72    pub target_path: String,
73    pub raw_import: String,
74}
75
76#[derive(Debug, Clone, Default, serde::Serialize)]
77pub struct IndexFailureSummary {
78    pub total_failures: usize,
79    pub recent_failures: usize,
80    pub stale_failures: usize,
81    pub persistent_failures: usize,
82}
83
84/// Per-directory aggregate: file count, symbol count, import count.
85#[derive(Debug, Clone, serde::Serialize)]
86pub struct DirStats {
87    pub dir: String,
88    pub files: usize,
89    pub symbols: usize,
90    pub imports_from_others: usize,
91}
92
93/// Symbol data for insertion (no id yet).
94/// Uses borrowed references to avoid String clones during bulk insert.
95#[derive(Debug, Clone)]
96pub struct NewSymbol<'a> {
97    pub name: &'a str,
98    pub kind: &'a str,
99    pub line: i64,
100    pub column_num: i64,
101    pub start_byte: i64,
102    pub end_byte: i64,
103    pub signature: &'a str,
104    pub name_path: &'a str,
105    pub parent_id: Option<i64>,
106    /// Inclusive end line (1-indexed). 0 = unknown.
107    pub end_line: i64,
108}
109
110/// Import data for insertion.
111#[derive(Debug, Clone)]
112pub struct NewImport {
113    pub target_path: String,
114    pub raw_import: String,
115}
116
117/// Call edge data for insertion.
118#[derive(Debug, Clone)]
119pub struct NewCall {
120    pub caller_name: String,
121    pub callee_name: String,
122    pub line: i64,
123}
124
125// Re-export free functions for crate-internal use (e.g. symbols::writer uses db::upsert_file)
126pub(crate) use ops::{
127    all_file_paths, delete_file, get_fresh_file, insert_calls, insert_imports, insert_symbols,
128    upsert_file,
129};
130
131impl IndexDb {
132    /// Open or create the index database at the given path.
133    pub fn open(db_path: &Path) -> Result<Self> {
134        open_derived_sqlite_with_recovery(db_path, "symbol index", || {
135            let conn = Connection::open(db_path)
136                .with_context(|| format!("failed to open db at {}", db_path.display()))?;
137            conn.execute_batch(
138                "PRAGMA journal_mode = WAL; PRAGMA synchronous = NORMAL; PRAGMA foreign_keys = ON; PRAGMA busy_timeout = 5000; PRAGMA cache_size = -8000; PRAGMA auto_vacuum = INCREMENTAL;",
139            )?;
140            let mut db = Self { conn };
141            db.migrate()?;
142            Ok(db)
143        })
144    }
145
146    /// Open existing database in read-only mode (no migration, no WAL creation).
147    /// Returns None if the DB file does not exist.
148    pub fn open_readonly(db_path: &Path) -> Result<Option<Self>> {
149        if !db_path.is_file() {
150            return Ok(None);
151        }
152        let conn = Connection::open_with_flags(
153            db_path,
154            rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
155        )
156        .with_context(|| format!("failed to open db readonly at {}", db_path.display()))?;
157        conn.execute_batch("PRAGMA busy_timeout = 5000;")?;
158        Ok(Some(Self { conn }))
159    }
160
161    /// Open an in-memory database (for testing).
162    pub fn open_memory() -> Result<Self> {
163        let conn = Connection::open_in_memory()?;
164        conn.execute_batch("PRAGMA foreign_keys = ON;")?;
165        let mut db = Self { conn };
166        db.migrate()?;
167        Ok(db)
168    }
169
170    /// Sequential migrations. Each entry is (version, SQL).
171    /// Applied in order; only migrations newer than the current version run.
172    const MIGRATIONS: &'static [(i64, &'static str)] = &[
173        (
174            1,
175            "CREATE TABLE IF NOT EXISTS files (
176                id INTEGER PRIMARY KEY,
177                relative_path TEXT UNIQUE NOT NULL,
178                mtime_ms INTEGER NOT NULL,
179                content_hash TEXT NOT NULL,
180                size_bytes INTEGER NOT NULL,
181                language TEXT,
182                indexed_at INTEGER NOT NULL
183            );
184            CREATE TABLE IF NOT EXISTS symbols (
185                id INTEGER PRIMARY KEY,
186                file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
187                name TEXT NOT NULL,
188                kind TEXT NOT NULL,
189                line INTEGER NOT NULL,
190                column_num INTEGER NOT NULL,
191                start_byte INTEGER NOT NULL,
192                end_byte INTEGER NOT NULL,
193                signature TEXT NOT NULL,
194                name_path TEXT NOT NULL,
195                parent_id INTEGER REFERENCES symbols(id)
196            );
197            CREATE TABLE IF NOT EXISTS imports (
198                source_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
199                target_path TEXT NOT NULL,
200                raw_import TEXT NOT NULL,
201                PRIMARY KEY (source_file_id, target_path)
202            );
203            CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
204            CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_id);
205            CREATE INDEX IF NOT EXISTS idx_symbols_name_path ON symbols(name_path);
206            CREATE INDEX IF NOT EXISTS idx_imports_target ON imports(target_path);",
207        ),
208        (
209            2,
210            "CREATE TABLE IF NOT EXISTS calls (
211                id INTEGER PRIMARY KEY,
212                caller_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
213                caller_name TEXT NOT NULL,
214                callee_name TEXT NOT NULL,
215                line INTEGER NOT NULL
216            );
217            CREATE INDEX IF NOT EXISTS idx_calls_callee ON calls(callee_name);
218            CREATE INDEX IF NOT EXISTS idx_calls_caller ON calls(caller_name);
219            CREATE INDEX IF NOT EXISTS idx_calls_file ON calls(caller_file_id);",
220        ),
221        (
222            3,
223            "CREATE TABLE IF NOT EXISTS index_failures (
224                id INTEGER PRIMARY KEY,
225                file_path TEXT NOT NULL,
226                error_type TEXT NOT NULL,
227                error_message TEXT NOT NULL,
228                failed_at INTEGER NOT NULL,
229                retry_count INTEGER NOT NULL DEFAULT 0,
230                UNIQUE(file_path)
231            );
232            CREATE INDEX IF NOT EXISTS idx_failures_path ON index_failures(file_path);",
233        ),
234        (
235            4,
236            "CREATE VIRTUAL TABLE IF NOT EXISTS symbols_fts USING fts5(
237                name, name_path, signature,
238                content=symbols, content_rowid=id,
239                tokenize='unicode61 remove_diacritics 2 separators _'
240            );",
241        ),
242        (
243            5,
244            // Composite index: eliminates TEMP B-TREE sort for ranked_context / all_symbols_with_bytes
245            // Kind index: accelerates files_with_symbol_kinds (type_hierarchy, etc.)
246            "CREATE INDEX IF NOT EXISTS idx_symbols_file_byte ON symbols(file_id, start_byte);
247             CREATE INDEX IF NOT EXISTS idx_symbols_kind ON symbols(kind);",
248        ),
249        (
250            6,
251            // Rebuild FTS with underscore separator so snake_case names are tokenized:
252            // "parse_symbols" → ["parse", "symbols"] enabling FTS match on individual words.
253            "DROP TABLE IF EXISTS symbols_fts;
254             CREATE VIRTUAL TABLE IF NOT EXISTS symbols_fts USING fts5(
255                name, name_path, signature,
256                content=symbols, content_rowid=id,
257                tokenize='unicode61 remove_diacritics 2 separators _'
258             );",
259        ),
260        (
261            7,
262            // P1-4 per-symbol LSP boost needs the symbol's end line to
263            // run containment-based proximity scoring. Default to 0 so
264            // pre-migration rows read back as "unknown"; callers fall
265            // back to `line` when they see 0.
266            "ALTER TABLE symbols ADD COLUMN end_line INTEGER NOT NULL DEFAULT 0;",
267        ),
268    ];
269
270    fn migrate(&mut self) -> Result<()> {
271        self.conn.execute_batch(
272            "CREATE TABLE IF NOT EXISTS meta (
273                key TEXT PRIMARY KEY,
274                value TEXT NOT NULL
275            );",
276        )?;
277
278        let version: Option<i64> = self
279            .conn
280            .query_row(
281                "SELECT CAST(value AS INTEGER) FROM meta WHERE key = 'schema_version'",
282                [],
283                |row| row.get(0),
284            )
285            .optional()?;
286        let current = version.unwrap_or(0);
287
288        if current >= SCHEMA_VERSION {
289            return Ok(());
290        }
291
292        let tx = self.conn.transaction()?;
293        for &(ver, sql) in Self::MIGRATIONS {
294            if current < ver {
295                tx.execute_batch(sql)?;
296                tx.execute(
297                    "INSERT OR REPLACE INTO meta (key, value) VALUES ('schema_version', ?1)",
298                    rusqlite::params![ver.to_string()],
299                )?;
300            }
301        }
302        tx.commit()?;
303        Ok(())
304    }
305
306    // ---- Transaction support ----
307
308    /// Execute a closure within an RAII transaction.
309    /// Automatically rolls back on error or panic; commits only on success.
310    pub fn with_transaction<F, T>(&mut self, mut f: F) -> Result<T>
311    where
312        F: FnMut(&Connection) -> Result<T>,
313    {
314        const MAX_ATTEMPTS: usize = 4;
315        const BACKOFF_MS: [u64; MAX_ATTEMPTS - 1] = [25, 75, 150];
316
317        let mut attempt = 0usize;
318        loop {
319            let tx = match self.conn.transaction() {
320                Ok(tx) => tx,
321                Err(error) if is_lock_contention(&error) && attempt + 1 < MAX_ATTEMPTS => {
322                    std::thread::sleep(Duration::from_millis(BACKOFF_MS[attempt]));
323                    attempt += 1;
324                    continue;
325                }
326                Err(error) => return Err(error.into()),
327            };
328
329            match f(&tx) {
330                Ok(result) => match tx.commit() {
331                    Ok(()) => return Ok(result),
332                    Err(error) if is_lock_contention(&error) && attempt + 1 < MAX_ATTEMPTS => {
333                        std::thread::sleep(Duration::from_millis(BACKOFF_MS[attempt]));
334                        attempt += 1;
335                    }
336                    Err(error) => return Err(error.into()),
337                },
338                Err(error) if is_lock_contention_anyhow(&error) && attempt + 1 < MAX_ATTEMPTS => {
339                    drop(tx);
340                    std::thread::sleep(Duration::from_millis(BACKOFF_MS[attempt]));
341                    attempt += 1;
342                }
343                Err(error) => return Err(error),
344            }
345        }
346    }
347}
348
349pub(crate) fn open_derived_sqlite_with_recovery<T, F>(
350    db_path: &Path,
351    kind: &str,
352    mut init: F,
353) -> Result<T>
354where
355    F: FnMut() -> Result<T>,
356{
357    ensure_db_parent_dir(db_path)?;
358
359    match init() {
360        Ok(value) => Ok(value),
361        Err(error) if is_recoverable_sqlite_anyhow(&error) => {
362            let backups = quarantine_corrupt_sqlite_files(db_path)?;
363            tracing::warn!(
364                path = %db_path.display(),
365                kind,
366                backups = ?backups,
367                error = %error,
368                "recovering derived sqlite index from corruption"
369            );
370            init().with_context(|| {
371                format!(
372                    "failed to recreate recovered {} at {}",
373                    kind,
374                    db_path.display()
375                )
376            })
377        }
378        Err(error) => Err(error),
379    }
380}
381
382fn is_lock_contention(error: &rusqlite::Error) -> bool {
383    matches!(
384        error,
385        rusqlite::Error::SqliteFailure(code, _)
386            if matches!(
387                code.code,
388                rusqlite::ErrorCode::DatabaseBusy | rusqlite::ErrorCode::DatabaseLocked
389            )
390    )
391}
392
393fn is_lock_contention_anyhow(error: &anyhow::Error) -> bool {
394    error.chain().any(|cause| {
395        cause
396            .downcast_ref::<rusqlite::Error>()
397            .is_some_and(is_lock_contention)
398    })
399}
400
401fn ensure_db_parent_dir(db_path: &Path) -> Result<()> {
402    if let Some(parent) = db_path.parent() {
403        fs::create_dir_all(parent)
404            .with_context(|| format!("failed to create {}", parent.display()))?;
405    }
406    Ok(())
407}
408
409fn is_recoverable_sqlite_error(error: &rusqlite::Error) -> bool {
410    matches!(
411        error,
412        rusqlite::Error::SqliteFailure(code, maybe_msg)
413            if matches!(
414                code.code,
415                rusqlite::ErrorCode::SystemIoFailure
416                    | rusqlite::ErrorCode::DatabaseCorrupt
417                    | rusqlite::ErrorCode::NotADatabase
418            ) || maybe_msg
419                .as_deref()
420                .is_some_and(sqlite_message_suggests_recovery)
421    )
422}
423
424fn is_recoverable_sqlite_anyhow(error: &anyhow::Error) -> bool {
425    error.chain().any(|cause| {
426        cause
427            .downcast_ref::<rusqlite::Error>()
428            .is_some_and(is_recoverable_sqlite_error)
429            || sqlite_message_suggests_recovery(&cause.to_string())
430    })
431}
432
433fn sqlite_message_suggests_recovery(message: &str) -> bool {
434    let message = message.to_ascii_lowercase();
435    message.contains("disk i/o error")
436        || message.contains("database disk image is malformed")
437        || message.contains("file is not a database")
438}
439
440fn quarantine_corrupt_sqlite_files(db_path: &Path) -> Result<Vec<PathBuf>> {
441    let suffix = format!(
442        "corrupt-{}-{}",
443        SystemTime::now()
444            .duration_since(UNIX_EPOCH)
445            .unwrap_or_default()
446            .as_millis(),
447        std::process::id()
448    );
449    let mut backups = Vec::new();
450
451    for path in sqlite_related_paths(db_path) {
452        if !path.exists() {
453            continue;
454        }
455
456        let file_name = path
457            .file_name()
458            .map(|name| name.to_string_lossy().into_owned())
459            .unwrap_or_else(|| "sqlite-index".to_owned());
460        let backup_path = path.with_file_name(format!("{file_name}.{suffix}"));
461
462        match fs::rename(&path, &backup_path) {
463            Ok(()) => backups.push(backup_path),
464            Err(error) if error.kind() == std::io::ErrorKind::NotFound => {}
465            Err(error) => {
466                return Err(error).with_context(|| {
467                    format!(
468                        "failed to quarantine corrupt sqlite file {}",
469                        path.display()
470                    )
471                });
472            }
473        }
474    }
475
476    Ok(backups)
477}
478
479fn sqlite_related_paths(db_path: &Path) -> [PathBuf; 3] {
480    let file_name = db_path.file_name().unwrap_or_default();
481
482    let mut wal_name = file_name.to_os_string();
483    wal_name.push("-wal");
484
485    let mut shm_name = file_name.to_os_string();
486    shm_name.push("-shm");
487
488    [
489        db_path.to_path_buf(),
490        db_path.with_file_name(wal_name),
491        db_path.with_file_name(shm_name),
492    ]
493}
494
495/// Compute SHA-256 hex digest of content.
496pub fn content_hash(content: &[u8]) -> String {
497    let mut hasher = Sha256::new();
498    hasher.update(content);
499    format!("{:x}", hasher.finalize())
500}
501
502/// Standard path for the index database within a project.
503pub fn index_db_path(project_root: &Path) -> PathBuf {
504    project_root.join(".codelens/index/symbols.db")
505}