1use anyhow::{Context, Result};
2use rusqlite::{Connection, OptionalExtension};
3use sha2::{Digest, Sha256};
4use std::fs;
5use std::path::{Path, PathBuf};
6use std::time::{Duration, SystemTime, UNIX_EPOCH};
7
8mod ops;
9
10#[cfg(test)]
11mod tests;
12
13const SCHEMA_VERSION: i64 = 7;
21
22pub struct IndexDb {
24 pub(super) conn: Connection,
25}
26
27#[derive(Debug, Clone)]
28pub struct FileRow {
29 pub id: i64,
30 pub relative_path: String,
31 pub mtime_ms: i64,
32 pub content_hash: String,
33 pub size_bytes: i64,
34 pub language: Option<String>,
35}
36
37#[derive(Debug, Clone)]
38pub struct SymbolRow {
39 pub id: i64,
40 pub file_id: i64,
41 pub name: String,
42 pub kind: String,
43 pub line: i64,
44 pub column_num: i64,
45 pub start_byte: i64,
46 pub end_byte: i64,
47 pub signature: String,
48 pub name_path: String,
49 pub parent_id: Option<i64>,
50 pub end_line: i64,
54}
55
56#[derive(Debug, Clone)]
58pub struct SymbolWithFile {
59 pub name: String,
60 pub kind: String,
61 pub file_path: String,
62 pub line: i64,
63 pub signature: String,
64 pub name_path: String,
65 pub start_byte: i64,
66 pub end_byte: i64,
67}
68
69#[derive(Debug, Clone)]
70pub struct ImportRow {
71 pub source_file_id: i64,
72 pub target_path: String,
73 pub raw_import: String,
74}
75
76#[derive(Debug, Clone, Default, serde::Serialize)]
77pub struct IndexFailureSummary {
78 pub total_failures: usize,
79 pub recent_failures: usize,
80 pub stale_failures: usize,
81 pub persistent_failures: usize,
82}
83
84#[derive(Debug, Clone, serde::Serialize)]
86pub struct DirStats {
87 pub dir: String,
88 pub files: usize,
89 pub symbols: usize,
90 pub imports_from_others: usize,
91}
92
93#[derive(Debug, Clone)]
96pub struct NewSymbol<'a> {
97 pub name: &'a str,
98 pub kind: &'a str,
99 pub line: i64,
100 pub column_num: i64,
101 pub start_byte: i64,
102 pub end_byte: i64,
103 pub signature: &'a str,
104 pub name_path: &'a str,
105 pub parent_id: Option<i64>,
106 pub end_line: i64,
108}
109
110#[derive(Debug, Clone)]
112pub struct NewImport {
113 pub target_path: String,
114 pub raw_import: String,
115}
116
117#[derive(Debug, Clone)]
119pub struct NewCall {
120 pub caller_name: String,
121 pub callee_name: String,
122 pub line: i64,
123}
124
125pub(crate) use ops::{
127 all_file_paths, delete_file, get_fresh_file, insert_calls, insert_imports, insert_symbols,
128 upsert_file,
129};
130
131impl IndexDb {
132 pub fn open(db_path: &Path) -> Result<Self> {
134 open_derived_sqlite_with_recovery(db_path, "symbol index", || {
135 let conn = Connection::open(db_path)
136 .with_context(|| format!("failed to open db at {}", db_path.display()))?;
137 conn.execute_batch(
138 "PRAGMA journal_mode = WAL; PRAGMA synchronous = NORMAL; PRAGMA foreign_keys = ON; PRAGMA busy_timeout = 5000; PRAGMA cache_size = -8000; PRAGMA auto_vacuum = INCREMENTAL;",
139 )?;
140 let mut db = Self { conn };
141 db.migrate()?;
142 Ok(db)
143 })
144 }
145
146 pub fn open_readonly(db_path: &Path) -> Result<Option<Self>> {
149 if !db_path.is_file() {
150 return Ok(None);
151 }
152 let conn = Connection::open_with_flags(
153 db_path,
154 rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
155 )
156 .with_context(|| format!("failed to open db readonly at {}", db_path.display()))?;
157 conn.execute_batch("PRAGMA busy_timeout = 5000;")?;
158 Ok(Some(Self { conn }))
159 }
160
161 pub fn open_memory() -> Result<Self> {
163 let conn = Connection::open_in_memory()?;
164 conn.execute_batch("PRAGMA foreign_keys = ON;")?;
165 let mut db = Self { conn };
166 db.migrate()?;
167 Ok(db)
168 }
169
170 const MIGRATIONS: &'static [(i64, &'static str)] = &[
173 (
174 1,
175 "CREATE TABLE IF NOT EXISTS files (
176 id INTEGER PRIMARY KEY,
177 relative_path TEXT UNIQUE NOT NULL,
178 mtime_ms INTEGER NOT NULL,
179 content_hash TEXT NOT NULL,
180 size_bytes INTEGER NOT NULL,
181 language TEXT,
182 indexed_at INTEGER NOT NULL
183 );
184 CREATE TABLE IF NOT EXISTS symbols (
185 id INTEGER PRIMARY KEY,
186 file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
187 name TEXT NOT NULL,
188 kind TEXT NOT NULL,
189 line INTEGER NOT NULL,
190 column_num INTEGER NOT NULL,
191 start_byte INTEGER NOT NULL,
192 end_byte INTEGER NOT NULL,
193 signature TEXT NOT NULL,
194 name_path TEXT NOT NULL,
195 parent_id INTEGER REFERENCES symbols(id)
196 );
197 CREATE TABLE IF NOT EXISTS imports (
198 source_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
199 target_path TEXT NOT NULL,
200 raw_import TEXT NOT NULL,
201 PRIMARY KEY (source_file_id, target_path)
202 );
203 CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
204 CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_id);
205 CREATE INDEX IF NOT EXISTS idx_symbols_name_path ON symbols(name_path);
206 CREATE INDEX IF NOT EXISTS idx_imports_target ON imports(target_path);",
207 ),
208 (
209 2,
210 "CREATE TABLE IF NOT EXISTS calls (
211 id INTEGER PRIMARY KEY,
212 caller_file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
213 caller_name TEXT NOT NULL,
214 callee_name TEXT NOT NULL,
215 line INTEGER NOT NULL
216 );
217 CREATE INDEX IF NOT EXISTS idx_calls_callee ON calls(callee_name);
218 CREATE INDEX IF NOT EXISTS idx_calls_caller ON calls(caller_name);
219 CREATE INDEX IF NOT EXISTS idx_calls_file ON calls(caller_file_id);",
220 ),
221 (
222 3,
223 "CREATE TABLE IF NOT EXISTS index_failures (
224 id INTEGER PRIMARY KEY,
225 file_path TEXT NOT NULL,
226 error_type TEXT NOT NULL,
227 error_message TEXT NOT NULL,
228 failed_at INTEGER NOT NULL,
229 retry_count INTEGER NOT NULL DEFAULT 0,
230 UNIQUE(file_path)
231 );
232 CREATE INDEX IF NOT EXISTS idx_failures_path ON index_failures(file_path);",
233 ),
234 (
235 4,
236 "CREATE VIRTUAL TABLE IF NOT EXISTS symbols_fts USING fts5(
237 name, name_path, signature,
238 content=symbols, content_rowid=id,
239 tokenize='unicode61 remove_diacritics 2 separators _'
240 );",
241 ),
242 (
243 5,
244 "CREATE INDEX IF NOT EXISTS idx_symbols_file_byte ON symbols(file_id, start_byte);
247 CREATE INDEX IF NOT EXISTS idx_symbols_kind ON symbols(kind);",
248 ),
249 (
250 6,
251 "DROP TABLE IF EXISTS symbols_fts;
254 CREATE VIRTUAL TABLE IF NOT EXISTS symbols_fts USING fts5(
255 name, name_path, signature,
256 content=symbols, content_rowid=id,
257 tokenize='unicode61 remove_diacritics 2 separators _'
258 );",
259 ),
260 (
261 7,
262 "ALTER TABLE symbols ADD COLUMN end_line INTEGER NOT NULL DEFAULT 0;",
267 ),
268 ];
269
270 fn migrate(&mut self) -> Result<()> {
271 self.conn.execute_batch(
272 "CREATE TABLE IF NOT EXISTS meta (
273 key TEXT PRIMARY KEY,
274 value TEXT NOT NULL
275 );",
276 )?;
277
278 let version: Option<i64> = self
279 .conn
280 .query_row(
281 "SELECT CAST(value AS INTEGER) FROM meta WHERE key = 'schema_version'",
282 [],
283 |row| row.get(0),
284 )
285 .optional()?;
286 let current = version.unwrap_or(0);
287
288 if current >= SCHEMA_VERSION {
289 return Ok(());
290 }
291
292 let tx = self.conn.transaction()?;
293 for &(ver, sql) in Self::MIGRATIONS {
294 if current < ver {
295 tx.execute_batch(sql)?;
296 tx.execute(
297 "INSERT OR REPLACE INTO meta (key, value) VALUES ('schema_version', ?1)",
298 rusqlite::params![ver.to_string()],
299 )?;
300 }
301 }
302 tx.commit()?;
303 Ok(())
304 }
305
306 pub fn with_transaction<F, T>(&mut self, mut f: F) -> Result<T>
311 where
312 F: FnMut(&Connection) -> Result<T>,
313 {
314 const MAX_ATTEMPTS: usize = 4;
315 const BACKOFF_MS: [u64; MAX_ATTEMPTS - 1] = [25, 75, 150];
316
317 let mut attempt = 0usize;
318 loop {
319 let tx = match self.conn.transaction() {
320 Ok(tx) => tx,
321 Err(error) if is_lock_contention(&error) && attempt + 1 < MAX_ATTEMPTS => {
322 std::thread::sleep(Duration::from_millis(BACKOFF_MS[attempt]));
323 attempt += 1;
324 continue;
325 }
326 Err(error) => return Err(error.into()),
327 };
328
329 match f(&tx) {
330 Ok(result) => match tx.commit() {
331 Ok(()) => return Ok(result),
332 Err(error) if is_lock_contention(&error) && attempt + 1 < MAX_ATTEMPTS => {
333 std::thread::sleep(Duration::from_millis(BACKOFF_MS[attempt]));
334 attempt += 1;
335 }
336 Err(error) => return Err(error.into()),
337 },
338 Err(error) if is_lock_contention_anyhow(&error) && attempt + 1 < MAX_ATTEMPTS => {
339 drop(tx);
340 std::thread::sleep(Duration::from_millis(BACKOFF_MS[attempt]));
341 attempt += 1;
342 }
343 Err(error) => return Err(error),
344 }
345 }
346 }
347}
348
349pub(crate) fn open_derived_sqlite_with_recovery<T, F>(
350 db_path: &Path,
351 kind: &str,
352 mut init: F,
353) -> Result<T>
354where
355 F: FnMut() -> Result<T>,
356{
357 ensure_db_parent_dir(db_path)?;
358
359 match init() {
360 Ok(value) => Ok(value),
361 Err(error) if is_recoverable_sqlite_anyhow(&error) => {
362 let backups = quarantine_corrupt_sqlite_files(db_path)?;
363 tracing::warn!(
364 path = %db_path.display(),
365 kind,
366 backups = ?backups,
367 error = %error,
368 "recovering derived sqlite index from corruption"
369 );
370 init().with_context(|| {
371 format!(
372 "failed to recreate recovered {} at {}",
373 kind,
374 db_path.display()
375 )
376 })
377 }
378 Err(error) => Err(error),
379 }
380}
381
382fn is_lock_contention(error: &rusqlite::Error) -> bool {
383 matches!(
384 error,
385 rusqlite::Error::SqliteFailure(code, _)
386 if matches!(
387 code.code,
388 rusqlite::ErrorCode::DatabaseBusy | rusqlite::ErrorCode::DatabaseLocked
389 )
390 )
391}
392
393fn is_lock_contention_anyhow(error: &anyhow::Error) -> bool {
394 error.chain().any(|cause| {
395 cause
396 .downcast_ref::<rusqlite::Error>()
397 .is_some_and(is_lock_contention)
398 })
399}
400
401fn ensure_db_parent_dir(db_path: &Path) -> Result<()> {
402 if let Some(parent) = db_path.parent() {
403 fs::create_dir_all(parent)
404 .with_context(|| format!("failed to create {}", parent.display()))?;
405 }
406 Ok(())
407}
408
409fn is_recoverable_sqlite_error(error: &rusqlite::Error) -> bool {
410 matches!(
411 error,
412 rusqlite::Error::SqliteFailure(code, maybe_msg)
413 if matches!(
414 code.code,
415 rusqlite::ErrorCode::SystemIoFailure
416 | rusqlite::ErrorCode::DatabaseCorrupt
417 | rusqlite::ErrorCode::NotADatabase
418 ) || maybe_msg
419 .as_deref()
420 .is_some_and(sqlite_message_suggests_recovery)
421 )
422}
423
424fn is_recoverable_sqlite_anyhow(error: &anyhow::Error) -> bool {
425 error.chain().any(|cause| {
426 cause
427 .downcast_ref::<rusqlite::Error>()
428 .is_some_and(is_recoverable_sqlite_error)
429 || sqlite_message_suggests_recovery(&cause.to_string())
430 })
431}
432
433fn sqlite_message_suggests_recovery(message: &str) -> bool {
434 let message = message.to_ascii_lowercase();
435 message.contains("disk i/o error")
436 || message.contains("database disk image is malformed")
437 || message.contains("file is not a database")
438}
439
440fn quarantine_corrupt_sqlite_files(db_path: &Path) -> Result<Vec<PathBuf>> {
441 let suffix = format!(
442 "corrupt-{}-{}",
443 SystemTime::now()
444 .duration_since(UNIX_EPOCH)
445 .unwrap_or_default()
446 .as_millis(),
447 std::process::id()
448 );
449 let mut backups = Vec::new();
450
451 for path in sqlite_related_paths(db_path) {
452 if !path.exists() {
453 continue;
454 }
455
456 let file_name = path
457 .file_name()
458 .map(|name| name.to_string_lossy().into_owned())
459 .unwrap_or_else(|| "sqlite-index".to_owned());
460 let backup_path = path.with_file_name(format!("{file_name}.{suffix}"));
461
462 match fs::rename(&path, &backup_path) {
463 Ok(()) => backups.push(backup_path),
464 Err(error) if error.kind() == std::io::ErrorKind::NotFound => {}
465 Err(error) => {
466 return Err(error).with_context(|| {
467 format!(
468 "failed to quarantine corrupt sqlite file {}",
469 path.display()
470 )
471 });
472 }
473 }
474 }
475
476 Ok(backups)
477}
478
479fn sqlite_related_paths(db_path: &Path) -> [PathBuf; 3] {
480 let file_name = db_path.file_name().unwrap_or_default();
481
482 let mut wal_name = file_name.to_os_string();
483 wal_name.push("-wal");
484
485 let mut shm_name = file_name.to_os_string();
486 shm_name.push("-shm");
487
488 [
489 db_path.to_path_buf(),
490 db_path.with_file_name(wal_name),
491 db_path.with_file_name(shm_name),
492 ]
493}
494
495pub fn content_hash(content: &[u8]) -> String {
497 let mut hasher = Sha256::new();
498 hasher.update(content);
499 format!("{:x}", hasher.finalize())
500}
501
502pub fn index_db_path(project_root: &Path) -> PathBuf {
504 project_root.join(".codelens/index/symbols.db")
505}