gabb_cli/
store.rs

1use anyhow::{Context, Result};
2use log::info;
3use rusqlite::types::Value;
4use rusqlite::{params, params_from_iter, Connection, OptionalExtension};
5use serde::Serialize;
6use std::cell::RefCell;
7use std::collections::HashSet;
8use std::fs;
9use std::path::{Path, PathBuf};
10use std::time::{SystemTime, UNIX_EPOCH};
11
12// Schema version constants
13pub const SCHEMA_MAJOR: u32 = 1;
14pub const SCHEMA_MINOR: u32 = 0;
15
16/// Schema version for database compatibility checking.
17#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
18pub struct SchemaVersion {
19    pub major: u32,
20    pub minor: u32,
21}
22
23impl SchemaVersion {
24    pub fn current() -> Self {
25        Self {
26            major: SCHEMA_MAJOR,
27            minor: SCHEMA_MINOR,
28        }
29    }
30
31    pub fn parse(s: &str) -> Option<Self> {
32        let parts: Vec<&str> = s.split('.').collect();
33        if parts.len() == 2 {
34            Some(Self {
35                major: parts[0].parse().ok()?,
36                minor: parts[1].parse().ok()?,
37            })
38        } else {
39            None
40        }
41    }
42
43    pub fn requires_regeneration(&self, current: &Self) -> bool {
44        self.major != current.major
45    }
46
47    pub fn requires_migration(&self, current: &Self) -> bool {
48        self.major == current.major && self.minor < current.minor
49    }
50}
51
52impl std::fmt::Display for SchemaVersion {
53    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54        write!(f, "{}.{}", self.major, self.minor)
55    }
56}
57
58/// Result of attempting to open a database.
59pub enum DbOpenResult {
60    /// Database is ready to use
61    Ready(IndexStore),
62    /// Database needs regeneration before use
63    NeedsRegeneration {
64        reason: RegenerationReason,
65        path: PathBuf,
66    },
67}
68
69/// Reason why database regeneration is needed.
70#[derive(Debug)]
71pub enum RegenerationReason {
72    /// Schema major version is incompatible
73    MajorVersionMismatch {
74        db_version: String,
75        app_version: String,
76    },
77    /// Database predates version tracking
78    LegacyDatabase,
79    /// Database file is corrupted
80    CorruptDatabase(String),
81    /// User explicitly requested rebuild
82    UserRequested,
83}
84
85impl RegenerationReason {
86    /// Get a user-friendly message explaining the regeneration reason.
87    pub fn message(&self) -> String {
88        match self {
89            RegenerationReason::MajorVersionMismatch {
90                db_version,
91                app_version,
92            } => {
93                format!(
94                    "Index schema version {} is incompatible with gabb schema {}",
95                    db_version, app_version
96                )
97            }
98            RegenerationReason::LegacyDatabase => {
99                "Found legacy index without version tracking".to_string()
100            }
101            RegenerationReason::CorruptDatabase(err) => {
102                format!("Index database appears corrupted: {}", err)
103            }
104            RegenerationReason::UserRequested => "Rebuild requested by user".to_string(),
105        }
106    }
107}
108
109/// A database migration from one schema version to another.
110struct Migration {
111    from_version: SchemaVersion,
112    to_version: SchemaVersion,
113    description: &'static str,
114    migrate: fn(&Connection) -> Result<()>,
115}
116
117#[derive(Debug, Clone)]
118pub struct FileRecord {
119    pub path: String,
120    pub hash: String,
121    pub mtime: i64,
122    pub indexed_at: i64,
123}
124
125#[derive(Debug, Clone, Serialize)]
126pub struct SymbolRecord {
127    pub id: String,
128    pub file: String,
129    pub kind: String,
130    pub name: String,
131    pub start: i64,
132    pub end: i64,
133    pub qualifier: Option<String>,
134    pub visibility: Option<String>,
135    pub container: Option<String>,
136    /// Blake3 hash of normalized symbol body for duplicate detection
137    pub content_hash: Option<String>,
138}
139
140#[derive(Debug, Clone, Serialize)]
141pub struct EdgeRecord {
142    pub src: String,
143    pub dst: String,
144    pub kind: String,
145}
146
147#[derive(Debug, Clone, Serialize)]
148pub struct ReferenceRecord {
149    pub file: String,
150    pub start: i64,
151    pub end: i64,
152    pub symbol_id: String,
153}
154
155/// Pre-computed file statistics for O(1) aggregate queries.
156/// Used by CLI stats commands and daemon status reporting.
157#[allow(dead_code)]
158#[derive(Debug, Clone)]
159pub struct FileStats {
160    pub file: String,
161    pub symbol_count: i64,
162    pub function_count: i64,
163    pub class_count: i64,
164    pub interface_count: i64,
165}
166
167/// File dependency record for tracking imports/includes.
168/// Used by incremental indexing and dependency graph queries.
169#[allow(dead_code)]
170#[derive(Debug, Clone)]
171pub struct FileDependency {
172    /// The file that contains the import/use statement
173    pub from_file: String,
174    /// The file being imported
175    pub to_file: String,
176    /// Type of dependency (e.g., "import", "use", "include")
177    pub kind: String,
178}
179
180/// Represents a group of duplicate symbols sharing the same content hash.
181#[derive(Debug, Clone)]
182pub struct DuplicateGroup {
183    pub content_hash: String,
184    pub symbols: Vec<SymbolRecord>,
185}
186
187use std::collections::HashMap;
188
189/// In-memory dependency cache for O(1) lookups.
190/// Caches both forward (file -> dependencies) and reverse (file -> dependents) mappings.
191/// Used by daemon for fast invalidation during file watching.
192#[allow(dead_code)]
193#[derive(Debug, Default)]
194pub struct DependencyCache {
195    /// Forward dependencies: file -> files it depends on
196    forward: HashMap<String, Vec<String>>,
197    /// Reverse dependencies: file -> files that depend on it
198    reverse: HashMap<String, Vec<String>>,
199    /// Whether the cache is populated
200    populated: bool,
201}
202
203#[allow(dead_code)]
204impl DependencyCache {
205    /// Create a new empty cache.
206    pub fn new() -> Self {
207        Self::default()
208    }
209
210    /// Check if cache is populated.
211    pub fn is_populated(&self) -> bool {
212        self.populated
213    }
214
215    /// Get files that a file depends on (O(1) lookup).
216    pub fn get_dependencies(&self, file: &str) -> Option<&Vec<String>> {
217        self.forward.get(file)
218    }
219
220    /// Get files that depend on a file (O(1) lookup).
221    pub fn get_dependents(&self, file: &str) -> Option<&Vec<String>> {
222        self.reverse.get(file)
223    }
224
225    /// Clear the cache.
226    pub fn clear(&mut self) {
227        self.forward.clear();
228        self.reverse.clear();
229        self.populated = false;
230    }
231
232    /// Populate cache from a list of dependencies.
233    pub fn populate(&mut self, dependencies: &[FileDependency]) {
234        self.clear();
235
236        for dep in dependencies {
237            // Forward mapping
238            self.forward
239                .entry(dep.from_file.clone())
240                .or_default()
241                .push(dep.to_file.clone());
242
243            // Reverse mapping
244            self.reverse
245                .entry(dep.to_file.clone())
246                .or_default()
247                .push(dep.from_file.clone());
248        }
249
250        self.populated = true;
251    }
252
253    /// Invalidate cache entries for a specific file (when it changes).
254    pub fn invalidate_file(&mut self, file: &str) {
255        // Remove forward dependencies
256        if let Some(deps) = self.forward.remove(file) {
257            // Also remove from reverse mappings
258            for dep in deps {
259                if let Some(rev) = self.reverse.get_mut(&dep) {
260                    rev.retain(|f| f != file);
261                }
262            }
263        }
264
265        // Remove reverse dependencies
266        if let Some(dependents) = self.reverse.remove(file) {
267            // Also remove from forward mappings
268            for dependent in dependents {
269                if let Some(fwd) = self.forward.get_mut(&dependent) {
270                    fwd.retain(|f| f != file);
271                }
272            }
273        }
274    }
275}
276
277#[derive(Debug)]
278pub struct IndexStore {
279    conn: RefCell<Connection>,
280    db_path: PathBuf,
281}
282
283impl IndexStore {
284    /// Open a database, creating it if it doesn't exist.
285    /// This method always succeeds if the file can be created/opened.
286    /// Use `try_open()` for version-aware opening with migration support.
287    pub fn open(path: &Path) -> Result<Self> {
288        if let Some(parent) = path.parent() {
289            fs::create_dir_all(parent)?;
290        }
291        let conn = Connection::open(path)?;
292        let store = Self {
293            conn: RefCell::new(conn),
294            db_path: path.to_path_buf(),
295        };
296        store.init_schema()?;
297        Ok(store)
298    }
299
300    /// Try to open a database with version checking.
301    /// Returns `DbOpenResult::Ready` if the database is compatible.
302    /// Returns `DbOpenResult::NeedsRegeneration` if the database needs to be rebuilt.
303    pub fn try_open(path: &Path) -> Result<DbOpenResult> {
304        // If file doesn't exist, create new database
305        if !path.exists() {
306            return Ok(DbOpenResult::Ready(Self::open(path)?));
307        }
308
309        // Open existing database for inspection
310        let conn = Connection::open(path).context("failed to open index database")?;
311
312        // Quick integrity check
313        if let Err(e) = conn.query_row("PRAGMA quick_check", [], |row| row.get::<_, String>(0)) {
314            return Ok(DbOpenResult::NeedsRegeneration {
315                reason: RegenerationReason::CorruptDatabase(e.to_string()),
316                path: path.to_path_buf(),
317            });
318        }
319
320        // Check for schema_meta table (indicates versioned database)
321        if !Self::has_schema_meta(&conn) {
322            return Ok(DbOpenResult::NeedsRegeneration {
323                reason: RegenerationReason::LegacyDatabase,
324                path: path.to_path_buf(),
325            });
326        }
327
328        // Read version from database
329        let db_version_str: Option<String> = conn
330            .query_row(
331                "SELECT value FROM schema_meta WHERE key = 'schema_version'",
332                [],
333                |row| row.get(0),
334            )
335            .optional()?;
336
337        let db_version = match db_version_str.and_then(|s| SchemaVersion::parse(&s)) {
338            Some(v) => v,
339            None => {
340                return Ok(DbOpenResult::NeedsRegeneration {
341                    reason: RegenerationReason::LegacyDatabase,
342                    path: path.to_path_buf(),
343                });
344            }
345        };
346
347        let current = SchemaVersion::current();
348
349        // Check for major version mismatch (requires regeneration)
350        if db_version.requires_regeneration(&current) {
351            return Ok(DbOpenResult::NeedsRegeneration {
352                reason: RegenerationReason::MajorVersionMismatch {
353                    db_version: db_version.to_string(),
354                    app_version: current.to_string(),
355                },
356                path: path.to_path_buf(),
357            });
358        }
359
360        // Close the inspection connection and properly open with schema init
361        drop(conn);
362        let store = Self::open(path)?;
363
364        // Apply migrations if needed (minor version upgrade)
365        if db_version.requires_migration(&current) {
366            info!(
367                "Migrating index from schema {} to {}...",
368                db_version, current
369            );
370            store.apply_migrations(&db_version, &current)?;
371            info!("Migration complete");
372        }
373
374        Ok(DbOpenResult::Ready(store))
375    }
376
377    /// Apply migrations from one version to another.
378    fn apply_migrations(&self, from: &SchemaVersion, to: &SchemaVersion) -> Result<()> {
379        let migrations = Self::get_migrations();
380        let mut current = from.clone();
381
382        for migration in migrations {
383            if migration.from_version == current && migration.to_version <= *to {
384                info!("Applying migration: {}", migration.description);
385                (migration.migrate)(&self.conn.borrow())?;
386
387                // Update stored version
388                self.conn.borrow().execute(
389                    "UPDATE schema_meta SET value = ?1 WHERE key = 'schema_version'",
390                    params![migration.to_version.to_string()],
391                )?;
392                self.conn.borrow().execute(
393                    "UPDATE schema_meta SET value = ?1 WHERE key = 'last_migration'",
394                    params![now_unix().to_string()],
395                )?;
396
397                current = migration.to_version.clone();
398            }
399        }
400
401        Ok(())
402    }
403
404    /// Get the list of available migrations.
405    /// Migrations are applied in order from older to newer versions.
406    fn get_migrations() -> Vec<Migration> {
407        vec![
408            // Future migrations will be added here, e.g.:
409            // Migration {
410            //     from_version: SchemaVersion { major: 1, minor: 0 },
411            //     to_version: SchemaVersion { major: 1, minor: 1 },
412            //     description: "Add symbol signature column",
413            //     migrate: |conn| {
414            //         conn.execute("ALTER TABLE symbols ADD COLUMN signature TEXT", [])?;
415            //         Ok(())
416            //     },
417            // },
418        ]
419    }
420
421    fn init_schema(&self) -> Result<()> {
422        self.conn.borrow().execute_batch(
423            r#"
424            PRAGMA journal_mode = WAL;
425            PRAGMA synchronous = NORMAL;
426            PRAGMA cache_size = -64000;
427            PRAGMA mmap_size = 268435456;
428            PRAGMA page_size = 4096;
429            PRAGMA temp_store = MEMORY;
430            CREATE TABLE IF NOT EXISTS files (
431                path TEXT PRIMARY KEY,
432                hash TEXT NOT NULL,
433                mtime INTEGER NOT NULL,
434                indexed_at INTEGER NOT NULL
435            );
436            CREATE TABLE IF NOT EXISTS symbols (
437                id TEXT PRIMARY KEY,
438                file TEXT NOT NULL,
439                kind TEXT NOT NULL,
440                name TEXT NOT NULL,
441                start INTEGER NOT NULL,
442                end INTEGER NOT NULL,
443                qualifier TEXT,
444                visibility TEXT,
445                container TEXT,
446                content_hash TEXT
447            );
448            -- B-tree indices for O(log n) lookups
449            CREATE INDEX IF NOT EXISTS symbols_file_idx ON symbols(file);
450            CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
451            CREATE INDEX IF NOT EXISTS idx_symbols_position ON symbols(file, start, end);
452            CREATE INDEX IF NOT EXISTS idx_symbols_kind_name ON symbols(kind, name);
453            CREATE INDEX IF NOT EXISTS idx_symbols_content_hash ON symbols(content_hash);
454            -- Compound index for multi-filter queries (file + kind + name)
455            CREATE INDEX IF NOT EXISTS idx_symbols_file_kind_name ON symbols(file, kind, name);
456            -- Tertiary index for kind + visibility filtered searches
457            CREATE INDEX IF NOT EXISTS idx_symbols_kind_visibility ON symbols(kind, visibility);
458
459            CREATE TABLE IF NOT EXISTS edges (
460                src TEXT NOT NULL,
461                dst TEXT NOT NULL,
462                kind TEXT NOT NULL
463            );
464            -- Covering indices for edges table (include all columns for index-only scans)
465            CREATE INDEX IF NOT EXISTS idx_edges_src_covering ON edges(src, dst, kind);
466            CREATE INDEX IF NOT EXISTS idx_edges_dst_covering ON edges(dst, src, kind);
467
468            CREATE TABLE IF NOT EXISTS references_tbl (
469                file TEXT NOT NULL,
470                start INTEGER NOT NULL,
471                end INTEGER NOT NULL,
472                symbol_id TEXT NOT NULL
473            );
474            -- Covering index for reference lookups by symbol_id (includes all columns)
475            CREATE INDEX IF NOT EXISTS idx_refs_symbol_covering ON references_tbl(symbol_id, file, start, end);
476            CREATE INDEX IF NOT EXISTS idx_refs_file_position ON references_tbl(file, start, end, symbol_id);
477
478            -- FTS5 virtual table for full-text symbol search with trigram tokenization
479            CREATE VIRTUAL TABLE IF NOT EXISTS symbols_fts USING fts5(
480                name,
481                qualifier,
482                content='symbols',
483                content_rowid='rowid',
484                tokenize='trigram'
485            );
486
487            -- Pre-computed aggregates for instant file statistics
488            CREATE TABLE IF NOT EXISTS file_stats (
489                file TEXT PRIMARY KEY,
490                symbol_count INTEGER NOT NULL DEFAULT 0,
491                function_count INTEGER NOT NULL DEFAULT 0,
492                class_count INTEGER NOT NULL DEFAULT 0,
493                interface_count INTEGER NOT NULL DEFAULT 0
494            );
495
496            -- File dependency graph for incremental rebuild ordering
497            CREATE TABLE IF NOT EXISTS file_dependencies (
498                from_file TEXT NOT NULL,
499                to_file TEXT NOT NULL,
500                kind TEXT NOT NULL,
501                PRIMARY KEY (from_file, to_file)
502            );
503            -- Index for reverse dependency lookups (find all files that depend on X)
504            CREATE INDEX IF NOT EXISTS idx_deps_to_file ON file_dependencies(to_file, from_file);
505
506            -- Schema metadata for version tracking and migrations
507            CREATE TABLE IF NOT EXISTS schema_meta (
508                key TEXT PRIMARY KEY,
509                value TEXT NOT NULL
510            );
511
512            -- Triggers to keep FTS5 index in sync with symbols table
513            CREATE TRIGGER IF NOT EXISTS symbols_ai AFTER INSERT ON symbols BEGIN
514                INSERT INTO symbols_fts(rowid, name, qualifier)
515                VALUES (NEW.rowid, NEW.name, NEW.qualifier);
516            END;
517            CREATE TRIGGER IF NOT EXISTS symbols_ad AFTER DELETE ON symbols BEGIN
518                INSERT INTO symbols_fts(symbols_fts, rowid, name, qualifier)
519                VALUES ('delete', OLD.rowid, OLD.name, OLD.qualifier);
520            END;
521            CREATE TRIGGER IF NOT EXISTS symbols_au AFTER UPDATE ON symbols BEGIN
522                INSERT INTO symbols_fts(symbols_fts, rowid, name, qualifier)
523                VALUES ('delete', OLD.rowid, OLD.name, OLD.qualifier);
524                INSERT INTO symbols_fts(rowid, name, qualifier)
525                VALUES (NEW.rowid, NEW.name, NEW.qualifier);
526            END;
527            "#,
528        )?;
529        self.ensure_column("symbols", "qualifier", "TEXT")?;
530        self.ensure_column("symbols", "visibility", "TEXT")?;
531        self.ensure_column("symbols", "content_hash", "TEXT")?;
532        self.ensure_index(
533            "idx_symbols_content_hash",
534            "CREATE INDEX IF NOT EXISTS idx_symbols_content_hash ON symbols(content_hash)",
535        )?;
536        // Initialize schema version if not present (new database)
537        self.ensure_schema_version()?;
538        Ok(())
539    }
540
541    /// Ensure schema_meta has version info. Only inserts if not already present.
542    fn ensure_schema_version(&self) -> Result<()> {
543        let conn = self.conn.borrow();
544        let version = SchemaVersion::current();
545        let now = now_unix();
546
547        // Insert version if not exists
548        conn.execute(
549            "INSERT OR IGNORE INTO schema_meta (key, value) VALUES ('schema_version', ?1)",
550            params![version.to_string()],
551        )?;
552        conn.execute(
553            "INSERT OR IGNORE INTO schema_meta (key, value) VALUES ('gabb_version', ?1)",
554            params![env!("CARGO_PKG_VERSION")],
555        )?;
556        conn.execute(
557            "INSERT OR IGNORE INTO schema_meta (key, value) VALUES ('created_at', ?1)",
558            params![now.to_string()],
559        )?;
560        conn.execute(
561            "INSERT OR IGNORE INTO schema_meta (key, value) VALUES ('last_migration', ?1)",
562            params![now.to_string()],
563        )?;
564        Ok(())
565    }
566
567    /// Check if schema_meta table exists (indicates versioned database).
568    fn has_schema_meta(conn: &Connection) -> bool {
569        conn.query_row(
570            "SELECT 1 FROM sqlite_master WHERE type='table' AND name='schema_meta'",
571            [],
572            |_| Ok(true),
573        )
574        .unwrap_or(false)
575    }
576
577    fn ensure_column(&self, table: &str, column: &str, ty: &str) -> Result<()> {
578        let conn = self.conn.borrow();
579        let mut stmt = conn.prepare(&format!("PRAGMA table_info({table})"))?;
580        let mut rows = stmt.query([])?;
581        while let Some(row) = rows.next()? {
582            let name: String = row.get(1)?;
583            if name == column {
584                return Ok(());
585            }
586        }
587        drop(rows);
588        conn.execute(&format!("ALTER TABLE {table} ADD COLUMN {column} {ty}"), [])?;
589        Ok(())
590    }
591
592    fn ensure_index(&self, index_name: &str, create_sql: &str) -> Result<()> {
593        let conn = self.conn.borrow();
594        let exists: bool = conn
595            .query_row(
596                "SELECT 1 FROM sqlite_master WHERE type='index' AND name=?1",
597                params![index_name],
598                |_| Ok(true),
599            )
600            .unwrap_or(false);
601        if !exists {
602            conn.execute(create_sql, [])?;
603        }
604        Ok(())
605    }
606
607    pub fn remove_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
608        let path_str = normalize_path(path.as_ref());
609        self.conn
610            .borrow()
611            .execute("DELETE FROM files WHERE path = ?1", params![path_str])?;
612        self.conn.borrow().execute(
613            "DELETE FROM references_tbl WHERE file = ?1",
614            params![path_str.clone()],
615        )?;
616        self.conn.borrow().execute(
617            "DELETE FROM edges WHERE src IN (SELECT id FROM symbols WHERE file = ?1)",
618            params![path_str.clone()],
619        )?;
620        self.conn.borrow().execute(
621            "DELETE FROM symbols WHERE file = ?1",
622            params![path_str.clone()],
623        )?;
624        self.conn.borrow().execute(
625            "DELETE FROM file_stats WHERE file = ?1",
626            params![path_str.clone()],
627        )?;
628        self.conn.borrow().execute(
629            "DELETE FROM file_dependencies WHERE from_file = ?1 OR to_file = ?1",
630            params![path_str],
631        )?;
632        Ok(())
633    }
634
635    pub fn list_paths(&self) -> Result<HashSet<String>> {
636        let conn = self.conn.borrow();
637        let mut stmt = conn.prepare("SELECT path FROM files")?;
638        let rows = stmt
639            .query_map([], |row| row.get::<_, String>(0))?
640            .collect::<rusqlite::Result<HashSet<String>>>()?;
641        Ok(rows)
642    }
643
644    pub fn save_file_index(
645        &self,
646        file_record: &FileRecord,
647        symbols: &[SymbolRecord],
648        edges: &[EdgeRecord],
649        references: &[ReferenceRecord],
650    ) -> Result<()> {
651        let conn = &mut *self.conn.borrow_mut();
652        let tx = conn.transaction()?;
653        tx.execute(
654            "DELETE FROM references_tbl WHERE file = ?1",
655            params![file_record.path.clone()],
656        )?;
657        tx.execute(
658            "DELETE FROM edges WHERE src IN (SELECT id FROM symbols WHERE file = ?1)",
659            params![file_record.path.clone()],
660        )?;
661        tx.execute(
662            "DELETE FROM symbols WHERE file = ?1",
663            params![file_record.path.clone()],
664        )?;
665
666        for sym in symbols {
667            tx.execute(
668                "INSERT INTO symbols(id, file, kind, name, start, end, qualifier, visibility, container, content_hash) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
669                params![
670                    sym.id,
671                    sym.file,
672                    sym.kind,
673                    sym.name,
674                    sym.start,
675                    sym.end,
676                    sym.qualifier,
677                    sym.visibility,
678                    sym.container,
679                    sym.content_hash
680                ],
681            )?;
682        }
683
684        for edge in edges {
685            tx.execute(
686                "INSERT INTO edges(src, dst, kind) VALUES (?1, ?2, ?3)",
687                params![edge.src, edge.dst, edge.kind],
688            )?;
689        }
690
691        for r in references {
692            tx.execute(
693                "INSERT INTO references_tbl(file, start, end, symbol_id) VALUES (?1, ?2, ?3, ?4)",
694                params![r.file, r.start, r.end, r.symbol_id],
695            )?;
696        }
697
698        tx.execute(
699            r#"
700            INSERT INTO files(path, hash, mtime, indexed_at)
701            VALUES (?1, ?2, ?3, ?4)
702            ON CONFLICT(path) DO UPDATE SET
703                hash=excluded.hash,
704                mtime=excluded.mtime,
705                indexed_at=excluded.indexed_at
706            "#,
707            params![
708                file_record.path,
709                file_record.hash,
710                file_record.mtime,
711                file_record.indexed_at
712            ],
713        )?;
714
715        // Update pre-computed aggregates for file statistics
716        let symbol_count = symbols.len() as i64;
717        let function_count = symbols.iter().filter(|s| s.kind == "function").count() as i64;
718        let class_count = symbols.iter().filter(|s| s.kind == "class").count() as i64;
719        let interface_count = symbols.iter().filter(|s| s.kind == "interface").count() as i64;
720
721        tx.execute(
722            r#"
723            INSERT INTO file_stats(file, symbol_count, function_count, class_count, interface_count)
724            VALUES (?1, ?2, ?3, ?4, ?5)
725            ON CONFLICT(file) DO UPDATE SET
726                symbol_count = excluded.symbol_count,
727                function_count = excluded.function_count,
728                class_count = excluded.class_count,
729                interface_count = excluded.interface_count
730            "#,
731            params![
732                file_record.path,
733                symbol_count,
734                function_count,
735                class_count,
736                interface_count
737            ],
738        )?;
739
740        tx.commit()?;
741        Ok(())
742    }
743
744    /// Save file index without references (used in two-phase indexing first pass)
745    pub fn save_file_index_without_refs(
746        &self,
747        file_record: &FileRecord,
748        symbols: &[SymbolRecord],
749        edges: &[EdgeRecord],
750    ) -> Result<()> {
751        let conn = &mut *self.conn.borrow_mut();
752        let tx = conn.transaction()?;
753
754        // Clear existing data for this file
755        tx.execute(
756            "DELETE FROM references_tbl WHERE file = ?1",
757            params![file_record.path.clone()],
758        )?;
759        tx.execute(
760            "DELETE FROM edges WHERE src IN (SELECT id FROM symbols WHERE file = ?1)",
761            params![file_record.path.clone()],
762        )?;
763        tx.execute(
764            "DELETE FROM symbols WHERE file = ?1",
765            params![file_record.path.clone()],
766        )?;
767
768        for sym in symbols {
769            tx.execute(
770                "INSERT INTO symbols(id, file, kind, name, start, end, qualifier, visibility, container, content_hash) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
771                params![
772                    sym.id,
773                    sym.file,
774                    sym.kind,
775                    sym.name,
776                    sym.start,
777                    sym.end,
778                    sym.qualifier,
779                    sym.visibility,
780                    sym.container,
781                    sym.content_hash
782                ],
783            )?;
784        }
785
786        for edge in edges {
787            tx.execute(
788                "INSERT INTO edges(src, dst, kind) VALUES (?1, ?2, ?3)",
789                params![edge.src, edge.dst, edge.kind],
790            )?;
791        }
792
793        tx.execute(
794            r#"
795            INSERT INTO files(path, hash, mtime, indexed_at)
796            VALUES (?1, ?2, ?3, ?4)
797            ON CONFLICT(path) DO UPDATE SET
798                hash=excluded.hash,
799                mtime=excluded.mtime,
800                indexed_at=excluded.indexed_at
801            "#,
802            params![
803                file_record.path,
804                file_record.hash,
805                file_record.mtime,
806                file_record.indexed_at
807            ],
808        )?;
809
810        // Update pre-computed aggregates for file statistics
811        let symbol_count = symbols.len() as i64;
812        let function_count = symbols.iter().filter(|s| s.kind == "function").count() as i64;
813        let class_count = symbols.iter().filter(|s| s.kind == "class").count() as i64;
814        let interface_count = symbols.iter().filter(|s| s.kind == "interface").count() as i64;
815
816        tx.execute(
817            r#"
818            INSERT INTO file_stats(file, symbol_count, function_count, class_count, interface_count)
819            VALUES (?1, ?2, ?3, ?4, ?5)
820            ON CONFLICT(file) DO UPDATE SET
821                symbol_count = excluded.symbol_count,
822                function_count = excluded.function_count,
823                class_count = excluded.class_count,
824                interface_count = excluded.interface_count
825            "#,
826            params![
827                file_record.path,
828                symbol_count,
829                function_count,
830                class_count,
831                interface_count
832            ],
833        )?;
834
835        tx.commit()?;
836        Ok(())
837    }
838
839    /// Save resolved references for a file (used in two-phase indexing second pass)
840    pub fn save_references(&self, file_path: &str, references: &[ReferenceRecord]) -> Result<()> {
841        let conn = &mut *self.conn.borrow_mut();
842        let tx = conn.transaction()?;
843
844        // Clear existing references for this file (in case of re-indexing)
845        tx.execute(
846            "DELETE FROM references_tbl WHERE file = ?1",
847            params![file_path],
848        )?;
849
850        for r in references {
851            tx.execute(
852                "INSERT INTO references_tbl(file, start, end, symbol_id) VALUES (?1, ?2, ?3, ?4)",
853                params![r.file, r.start, r.end, r.symbol_id],
854            )?;
855        }
856
857        tx.commit()?;
858        Ok(())
859    }
860
861    pub fn db_path(&self) -> &Path {
862        &self.db_path
863    }
864
865    /// Update query optimizer statistics for better index usage.
866    /// Should be called after bulk indexing operations.
867    pub fn analyze(&self) -> Result<()> {
868        self.conn.borrow().execute_batch("ANALYZE")?;
869        Ok(())
870    }
871
872    /// Get pre-computed statistics for a file (O(1) lookup).
873    #[allow(dead_code)]
874    pub fn get_file_stats(&self, file: &str) -> Result<Option<FileStats>> {
875        let file_norm = normalize_path(Path::new(file));
876        let conn = self.conn.borrow();
877        let mut stmt = conn.prepare(
878            "SELECT file, symbol_count, function_count, class_count, interface_count FROM file_stats WHERE file = ?1",
879        )?;
880        let mut rows = stmt.query(params![file_norm])?;
881        if let Some(row) = rows.next()? {
882            Ok(Some(FileStats {
883                file: row.get(0)?,
884                symbol_count: row.get(1)?,
885                function_count: row.get(2)?,
886                class_count: row.get(3)?,
887                interface_count: row.get(4)?,
888            }))
889        } else {
890            Ok(None)
891        }
892    }
893
894    /// Get total symbol counts across all indexed files (O(1) aggregate).
895    #[allow(dead_code)]
896    pub fn get_total_stats(&self) -> Result<FileStats> {
897        let conn = self.conn.borrow();
898        let mut stmt = conn.prepare(
899            "SELECT COALESCE(SUM(symbol_count), 0), COALESCE(SUM(function_count), 0), COALESCE(SUM(class_count), 0), COALESCE(SUM(interface_count), 0) FROM file_stats",
900        )?;
901        let mut rows = stmt.query([])?;
902        let row = rows.next()?.expect("aggregate query always returns a row");
903        Ok(FileStats {
904            file: "".into(),
905            symbol_count: row.get(0)?,
906            function_count: row.get(1)?,
907            class_count: row.get(2)?,
908            interface_count: row.get(3)?,
909        })
910    }
911
912    /// Save file dependencies for a source file, replacing any existing dependencies.
913    #[allow(dead_code)]
914    pub fn save_file_dependencies(
915        &self,
916        from_file: &str,
917        dependencies: &[FileDependency],
918    ) -> Result<()> {
919        let from_norm = normalize_path(Path::new(from_file));
920        let conn = &mut *self.conn.borrow_mut();
921        let tx = conn.transaction()?;
922
923        // Remove existing dependencies for this file
924        tx.execute(
925            "DELETE FROM file_dependencies WHERE from_file = ?1",
926            params![from_norm],
927        )?;
928
929        // Insert new dependencies
930        for dep in dependencies {
931            tx.execute(
932                "INSERT OR REPLACE INTO file_dependencies(from_file, to_file, kind) VALUES (?1, ?2, ?3)",
933                params![from_norm, normalize_path(Path::new(&dep.to_file)), dep.kind],
934            )?;
935        }
936
937        tx.commit()?;
938        Ok(())
939    }
940
941    /// Get files that a given file depends on (imports/uses).
942    #[allow(dead_code)]
943    pub fn get_file_dependencies(&self, file: &str) -> Result<Vec<FileDependency>> {
944        let file_norm = normalize_path(Path::new(file));
945        let conn = self.conn.borrow();
946        let mut stmt = conn.prepare_cached(
947            "SELECT from_file, to_file, kind FROM file_dependencies WHERE from_file = ?1",
948        )?;
949        let rows = stmt
950            .query_map(params![file_norm], |row| {
951                Ok(FileDependency {
952                    from_file: row.get(0)?,
953                    to_file: row.get(1)?,
954                    kind: row.get(2)?,
955                })
956            })?
957            .collect::<rusqlite::Result<Vec<_>>>()?;
958        Ok(rows)
959    }
960
961    /// Get files that depend on a given file (reverse dependencies for invalidation).
962    #[allow(dead_code)]
963    pub fn get_dependents(&self, file: &str) -> Result<Vec<String>> {
964        let file_norm = normalize_path(Path::new(file));
965        let conn = self.conn.borrow();
966        let mut stmt =
967            conn.prepare_cached("SELECT from_file FROM file_dependencies WHERE to_file = ?1")?;
968        let rows = stmt
969            .query_map(params![file_norm], |row| row.get::<_, String>(0))?
970            .collect::<rusqlite::Result<Vec<_>>>()?;
971        Ok(rows)
972    }
973
974    /// Get all file dependencies in the workspace.
975    #[allow(dead_code)]
976    pub fn get_all_dependencies(&self) -> Result<Vec<FileDependency>> {
977        let conn = self.conn.borrow();
978        let mut stmt = conn.prepare("SELECT from_file, to_file, kind FROM file_dependencies")?;
979        let rows = stmt
980            .query_map([], |row| {
981                Ok(FileDependency {
982                    from_file: row.get(0)?,
983                    to_file: row.get(1)?,
984                    kind: row.get(2)?,
985                })
986            })?
987            .collect::<rusqlite::Result<Vec<_>>>()?;
988        Ok(rows)
989    }
990
991    /// Topologically sort files for rebuild ordering.
992    /// Returns files in an order where dependencies come before dependents.
993    /// Uses Kahn's algorithm with O(V + E) complexity.
994    /// Files with cycles are appended at the end in arbitrary order.
995    #[allow(dead_code)]
996    pub fn topological_sort(&self, files: &[String]) -> Result<Vec<String>> {
997        use std::collections::{HashMap, VecDeque};
998
999        if files.is_empty() {
1000            return Ok(Vec::new());
1001        }
1002
1003        // Build adjacency list and in-degree count for the subgraph
1004        let file_set: HashSet<String> = files.iter().cloned().collect();
1005        let mut in_degree: HashMap<String, usize> = HashMap::new();
1006        let mut adjacency: HashMap<String, Vec<String>> = HashMap::new();
1007
1008        // Initialize all files with 0 in-degree
1009        for file in files {
1010            in_degree.entry(file.clone()).or_insert(0);
1011            adjacency.entry(file.clone()).or_default();
1012        }
1013
1014        // Build graph from dependencies (only within the file set)
1015        for file in files {
1016            let deps = self.get_file_dependencies(file)?;
1017            for dep in deps {
1018                // Only count edges where both files are in our set
1019                if file_set.contains(&dep.to_file) {
1020                    // from_file depends on to_file, so to_file -> from_file edge
1021                    adjacency
1022                        .entry(dep.to_file.clone())
1023                        .or_default()
1024                        .push(file.clone());
1025                    *in_degree.entry(file.clone()).or_insert(0) += 1;
1026                }
1027            }
1028        }
1029
1030        // Kahn's algorithm
1031        let mut queue: VecDeque<String> = VecDeque::new();
1032        let mut result = Vec::new();
1033
1034        // Start with nodes that have no dependencies (in-degree 0)
1035        for (file, &degree) in &in_degree {
1036            if degree == 0 {
1037                queue.push_back(file.clone());
1038            }
1039        }
1040
1041        while let Some(file) = queue.pop_front() {
1042            result.push(file.clone());
1043
1044            if let Some(dependents) = adjacency.get(&file) {
1045                for dependent in dependents {
1046                    if let Some(degree) = in_degree.get_mut(dependent) {
1047                        *degree -= 1;
1048                        if *degree == 0 {
1049                            queue.push_back(dependent.clone());
1050                        }
1051                    }
1052                }
1053            }
1054        }
1055
1056        // Handle any remaining files (cycles) by appending them
1057        for file in files {
1058            if !result.contains(file) {
1059                result.push(file.clone());
1060            }
1061        }
1062
1063        Ok(result)
1064    }
1065
1066    /// Get all files that need to be invalidated when a file changes.
1067    /// Returns the transitive closure of reverse dependencies.
1068    /// Useful for incremental rebuilds when a source file is modified.
1069    #[allow(dead_code)]
1070    pub fn get_invalidation_set(&self, changed_file: &str) -> Result<Vec<String>> {
1071        let file_norm = normalize_path(Path::new(changed_file));
1072        let mut visited = HashSet::new();
1073        let mut to_visit = vec![file_norm.clone()];
1074        let mut result = Vec::new();
1075
1076        while let Some(file) = to_visit.pop() {
1077            if visited.contains(&file) {
1078                continue;
1079            }
1080            visited.insert(file.clone());
1081            result.push(file.clone());
1082
1083            // Get all files that depend on this file
1084            let dependents = self.get_dependents(&file)?;
1085            for dependent in dependents {
1086                if !visited.contains(&dependent) {
1087                    to_visit.push(dependent);
1088                }
1089            }
1090        }
1091
1092        // Sort topologically for proper rebuild order
1093        self.topological_sort(&result)
1094    }
1095
1096    /// Get files that need invalidation for multiple changed files.
1097    /// Returns the union of invalidation sets, topologically sorted.
1098    #[allow(dead_code)]
1099    pub fn get_batch_invalidation_set(&self, changed_files: &[String]) -> Result<Vec<String>> {
1100        let mut all_files = HashSet::new();
1101
1102        for file in changed_files {
1103            let invalidated = self.get_invalidation_set(file)?;
1104            all_files.extend(invalidated);
1105        }
1106
1107        let files: Vec<String> = all_files.into_iter().collect();
1108        self.topological_sort(&files)
1109    }
1110
1111    /// Load all dependencies into a DependencyCache for O(1) lookups.
1112    /// Call this once at startup for long-running processes.
1113    #[allow(dead_code)]
1114    pub fn load_dependency_cache(&self) -> Result<DependencyCache> {
1115        let deps = self.get_all_dependencies()?;
1116        let mut cache = DependencyCache::new();
1117        cache.populate(&deps);
1118        Ok(cache)
1119    }
1120
1121    pub fn list_symbols(
1122        &self,
1123        file: Option<&str>,
1124        kind: Option<&str>,
1125        name: Option<&str>,
1126        limit: Option<usize>,
1127    ) -> Result<Vec<SymbolRecord>> {
1128        let file_norm = file.map(|f| normalize_path(Path::new(f)));
1129        let mut sql = String::from(
1130            "SELECT id, file, kind, name, start, end, qualifier, visibility, container, content_hash FROM symbols",
1131        );
1132        let mut values: Vec<Value> = Vec::new();
1133        let mut clauses: Vec<&str> = Vec::new();
1134
1135        if let Some(f) = file_norm {
1136            clauses.push("file = ?");
1137            values.push(Value::from(f));
1138        }
1139
1140        if let Some(k) = kind {
1141            clauses.push("kind = ?");
1142            values.push(Value::from(k.to_string()));
1143        }
1144
1145        if let Some(n) = name {
1146            clauses.push("name = ?");
1147            values.push(Value::from(n.to_string()));
1148        }
1149
1150        if !clauses.is_empty() {
1151            sql.push_str(" WHERE ");
1152            sql.push_str(&clauses.join(" AND "));
1153        }
1154
1155        if let Some(lim) = limit {
1156            sql.push_str(" LIMIT ?");
1157            values.push(Value::from(lim as i64));
1158        }
1159
1160        let conn = self.conn.borrow();
1161        let mut stmt = conn.prepare(&sql)?;
1162        let rows = stmt
1163            .query_map(params_from_iter(values.iter()), |row| {
1164                Ok(SymbolRecord {
1165                    id: row.get(0)?,
1166                    file: row.get(1)?,
1167                    kind: row.get(2)?,
1168                    name: row.get(3)?,
1169                    start: row.get(4)?,
1170                    end: row.get(5)?,
1171                    qualifier: row.get(6)?,
1172                    visibility: row.get(7)?,
1173                    container: row.get(8)?,
1174                    content_hash: row.get(9)?,
1175                })
1176            })?
1177            .collect::<rusqlite::Result<Vec<_>>>()?;
1178        Ok(rows)
1179    }
1180
1181    /// Query edges by destination with cached prepared statement.
1182    pub fn edges_to(&self, dst: &str) -> Result<Vec<EdgeRecord>> {
1183        let conn = self.conn.borrow();
1184        let mut stmt = conn.prepare_cached("SELECT src, dst, kind FROM edges WHERE dst = ?1")?;
1185        let edges = stmt
1186            .query_map(params![dst], |row| {
1187                Ok(EdgeRecord {
1188                    src: row.get(0)?,
1189                    dst: row.get(1)?,
1190                    kind: row.get(2)?,
1191                })
1192            })?
1193            .collect::<rusqlite::Result<Vec<_>>>()?;
1194        Ok(edges)
1195    }
1196
1197    /// Query edges by source with cached prepared statement.
1198    pub fn edges_from(&self, src: &str) -> Result<Vec<EdgeRecord>> {
1199        let conn = self.conn.borrow();
1200        let mut stmt = conn.prepare_cached("SELECT src, dst, kind FROM edges WHERE src = ?1")?;
1201        let edges = stmt
1202            .query_map(params![src], |row| {
1203                Ok(EdgeRecord {
1204                    src: row.get(0)?,
1205                    dst: row.get(1)?,
1206                    kind: row.get(2)?,
1207                })
1208            })?
1209            .collect::<rusqlite::Result<Vec<_>>>()?;
1210        Ok(edges)
1211    }
1212
1213    pub fn symbols_by_ids(&self, ids: &[String]) -> Result<Vec<SymbolRecord>> {
1214        if ids.is_empty() {
1215            return Ok(Vec::new());
1216        }
1217        let placeholders = std::iter::repeat_n("?", ids.len())
1218            .collect::<Vec<_>>()
1219            .join(", ");
1220        let sql = format!(
1221            "SELECT id, file, kind, name, start, end, qualifier, visibility, container, content_hash FROM symbols WHERE id IN ({})",
1222            placeholders
1223        );
1224        let conn = self.conn.borrow();
1225        let mut stmt = conn.prepare(&sql)?;
1226        let rows = stmt
1227            .query_map(params_from_iter(ids.iter()), |row| {
1228                Ok(SymbolRecord {
1229                    id: row.get(0)?,
1230                    file: row.get(1)?,
1231                    kind: row.get(2)?,
1232                    name: row.get(3)?,
1233                    start: row.get(4)?,
1234                    end: row.get(5)?,
1235                    qualifier: row.get(6)?,
1236                    visibility: row.get(7)?,
1237                    container: row.get(8)?,
1238                    content_hash: row.get(9)?,
1239                })
1240            })?
1241            .collect::<rusqlite::Result<Vec<_>>>()?;
1242        Ok(rows)
1243    }
1244
1245    /// Query references by symbol ID with cached prepared statement.
1246    pub fn references_for_symbol(&self, symbol_id: &str) -> Result<Vec<ReferenceRecord>> {
1247        let conn = self.conn.borrow();
1248        let mut stmt = conn.prepare_cached(
1249            "SELECT file, start, end, symbol_id FROM references_tbl WHERE symbol_id = ?1",
1250        )?;
1251        let rows = stmt
1252            .query_map(params![symbol_id], |row| {
1253                Ok(ReferenceRecord {
1254                    file: row.get(0)?,
1255                    start: row.get(1)?,
1256                    end: row.get(2)?,
1257                    symbol_id: row.get(3)?,
1258                })
1259            })?
1260            .collect::<rusqlite::Result<Vec<_>>>()?;
1261        Ok(rows)
1262    }
1263
1264    /// Find a reference at a specific file and byte offset.
1265    /// Returns the reference record if the offset falls within a recorded reference span.
1266    pub fn reference_at_position(
1267        &self,
1268        file: &str,
1269        offset: i64,
1270    ) -> Result<Option<ReferenceRecord>> {
1271        let conn = self.conn.borrow();
1272        let mut stmt = conn.prepare_cached(
1273            "SELECT file, start, end, symbol_id FROM references_tbl
1274             WHERE file = ?1 AND start <= ?2 AND end > ?2
1275             ORDER BY (end - start) ASC
1276             LIMIT 1",
1277        )?;
1278        let result = stmt
1279            .query_row(params![file, offset], |row| {
1280                Ok(ReferenceRecord {
1281                    file: row.get(0)?,
1282                    start: row.get(1)?,
1283                    end: row.get(2)?,
1284                    symbol_id: row.get(3)?,
1285                })
1286            })
1287            .optional()?;
1288        Ok(result)
1289    }
1290
1291    /// Find all groups of duplicate symbols (symbols with the same content_hash).
1292    /// Returns groups sorted by count (most duplicates first).
1293    /// Only includes groups with 2+ symbols and content_hash is not null.
1294    pub fn find_duplicate_groups(
1295        &self,
1296        min_count: usize,
1297        kind_filter: Option<&str>,
1298        file_filter: Option<&[String]>,
1299    ) -> Result<Vec<DuplicateGroup>> {
1300        let conn = self.conn.borrow();
1301
1302        // First, find all content_hashes with duplicates
1303        let mut sql = String::from(
1304            "SELECT content_hash, COUNT(*) as cnt FROM symbols
1305             WHERE content_hash IS NOT NULL",
1306        );
1307        let mut values: Vec<Value> = Vec::new();
1308
1309        if let Some(kind) = kind_filter {
1310            sql.push_str(" AND kind = ?");
1311            values.push(Value::from(kind.to_string()));
1312        }
1313
1314        if let Some(files) = file_filter {
1315            if !files.is_empty() {
1316                let placeholders = std::iter::repeat_n("?", files.len())
1317                    .collect::<Vec<_>>()
1318                    .join(", ");
1319                sql.push_str(&format!(" AND file IN ({})", placeholders));
1320                for f in files {
1321                    values.push(Value::from(f.clone()));
1322                }
1323            }
1324        }
1325
1326        sql.push_str(" GROUP BY content_hash HAVING COUNT(*) >= ?");
1327        values.push(Value::from(min_count as i64));
1328        sql.push_str(" ORDER BY cnt DESC");
1329
1330        let mut stmt = conn.prepare(&sql)?;
1331        let hashes: Vec<String> = stmt
1332            .query_map(params_from_iter(values.iter()), |row| row.get(0))?
1333            .collect::<rusqlite::Result<Vec<_>>>()?;
1334
1335        // Now fetch symbols for each hash
1336        let mut groups = Vec::new();
1337        for hash in hashes {
1338            let symbols = self.symbols_by_content_hash(&hash)?;
1339            if symbols.len() >= min_count {
1340                groups.push(DuplicateGroup {
1341                    content_hash: hash,
1342                    symbols,
1343                });
1344            }
1345        }
1346
1347        Ok(groups)
1348    }
1349
1350    /// Find all symbols with a specific content hash.
1351    pub fn symbols_by_content_hash(&self, hash: &str) -> Result<Vec<SymbolRecord>> {
1352        let conn = self.conn.borrow();
1353        let mut stmt = conn.prepare_cached(
1354            "SELECT id, file, kind, name, start, end, qualifier, visibility, container, content_hash
1355             FROM symbols WHERE content_hash = ?1"
1356        )?;
1357        let rows = stmt
1358            .query_map(params![hash], |row| {
1359                Ok(SymbolRecord {
1360                    id: row.get(0)?,
1361                    file: row.get(1)?,
1362                    kind: row.get(2)?,
1363                    name: row.get(3)?,
1364                    start: row.get(4)?,
1365                    end: row.get(5)?,
1366                    qualifier: row.get(6)?,
1367                    visibility: row.get(7)?,
1368                    container: row.get(8)?,
1369                    content_hash: row.get(9)?,
1370                })
1371            })?
1372            .collect::<rusqlite::Result<Vec<_>>>()?;
1373        Ok(rows)
1374    }
1375
1376    /// Get content hashes for symbols in specific files.
1377    /// Used for --uncommitted flag to find duplicates involving changed files.
1378    #[allow(dead_code)]
1379    pub fn content_hashes_in_files(&self, files: &[String]) -> Result<HashSet<String>> {
1380        if files.is_empty() {
1381            return Ok(HashSet::new());
1382        }
1383        let conn = self.conn.borrow();
1384        let placeholders = std::iter::repeat_n("?", files.len())
1385            .collect::<Vec<_>>()
1386            .join(", ");
1387        let sql = format!(
1388            "SELECT DISTINCT content_hash FROM symbols WHERE file IN ({}) AND content_hash IS NOT NULL",
1389            placeholders
1390        );
1391        let mut stmt = conn.prepare(&sql)?;
1392        let hashes: HashSet<String> = stmt
1393            .query_map(params_from_iter(files.iter()), |row| row.get(0))?
1394            .collect::<rusqlite::Result<HashSet<_>>>()?;
1395        Ok(hashes)
1396    }
1397
1398    /// Search symbols using FTS5 full-text search.
1399    /// Supports prefix queries (e.g., "getUser*") and substring matching via trigram tokenization.
1400    /// Uses cached prepared statement for repeated searches.
1401    #[allow(dead_code)]
1402    pub fn search_symbols_fts(&self, query: &str) -> Result<Vec<SymbolRecord>> {
1403        let conn = self.conn.borrow();
1404        let mut stmt = conn.prepare_cached(
1405            r#"
1406            SELECT s.id, s.file, s.kind, s.name, s.start, s.end, s.qualifier, s.visibility, s.container, s.content_hash
1407            FROM symbols s
1408            JOIN symbols_fts fts ON s.rowid = fts.rowid
1409            WHERE symbols_fts MATCH ?1
1410            ORDER BY rank
1411            "#,
1412        )?;
1413        let rows = stmt
1414            .query_map(params![query], |row| {
1415                Ok(SymbolRecord {
1416                    id: row.get(0)?,
1417                    file: row.get(1)?,
1418                    kind: row.get(2)?,
1419                    name: row.get(3)?,
1420                    start: row.get(4)?,
1421                    end: row.get(5)?,
1422                    qualifier: row.get(6)?,
1423                    visibility: row.get(7)?,
1424                    container: row.get(8)?,
1425                    content_hash: row.get(9)?,
1426                })
1427            })?
1428            .collect::<rusqlite::Result<Vec<_>>>()?;
1429        Ok(rows)
1430    }
1431
1432    /// Query symbols with cursor-based pagination for streaming large result sets.
1433    /// Returns (results, next_cursor) where next_cursor can be used to fetch the next page.
1434    #[allow(dead_code)]
1435    pub fn list_symbols_paginated(
1436        &self,
1437        file: Option<&str>,
1438        kind: Option<&str>,
1439        name: Option<&str>,
1440        cursor: Option<&str>,
1441        page_size: usize,
1442    ) -> Result<(Vec<SymbolRecord>, Option<String>)> {
1443        let file_norm = file.map(|f| normalize_path(Path::new(f)));
1444        let mut sql = String::from(
1445            "SELECT id, file, kind, name, start, end, qualifier, visibility, container, content_hash FROM symbols",
1446        );
1447        let mut values: Vec<Value> = Vec::new();
1448        let mut clauses: Vec<&str> = Vec::new();
1449
1450        if let Some(f) = &file_norm {
1451            clauses.push("file = ?");
1452            values.push(Value::from(f.clone()));
1453        }
1454
1455        if let Some(k) = kind {
1456            clauses.push("kind = ?");
1457            values.push(Value::from(k.to_string()));
1458        }
1459
1460        if let Some(n) = name {
1461            clauses.push("name = ?");
1462            values.push(Value::from(n.to_string()));
1463        }
1464
1465        // Cursor-based pagination using id as cursor (keyset pagination)
1466        if let Some(c) = cursor {
1467            clauses.push("id > ?");
1468            values.push(Value::from(c.to_string()));
1469        }
1470
1471        if !clauses.is_empty() {
1472            sql.push_str(" WHERE ");
1473            sql.push_str(&clauses.join(" AND "));
1474        }
1475
1476        // Order by id for consistent pagination
1477        sql.push_str(" ORDER BY id");
1478
1479        // Fetch one extra to determine if there's a next page
1480        sql.push_str(" LIMIT ?");
1481        values.push(Value::from((page_size + 1) as i64));
1482
1483        let conn = self.conn.borrow();
1484        let mut stmt = conn.prepare(&sql)?;
1485        let mut rows: Vec<SymbolRecord> = stmt
1486            .query_map(params_from_iter(values.iter()), |row| {
1487                Ok(SymbolRecord {
1488                    id: row.get(0)?,
1489                    file: row.get(1)?,
1490                    kind: row.get(2)?,
1491                    name: row.get(3)?,
1492                    start: row.get(4)?,
1493                    end: row.get(5)?,
1494                    qualifier: row.get(6)?,
1495                    visibility: row.get(7)?,
1496                    container: row.get(8)?,
1497                    content_hash: row.get(9)?,
1498                })
1499            })?
1500            .collect::<rusqlite::Result<Vec<_>>>()?;
1501
1502        // Determine next cursor
1503        let next_cursor = if rows.len() > page_size {
1504            rows.pop(); // Remove the extra row
1505            rows.last().map(|r| r.id.clone())
1506        } else {
1507            None
1508        };
1509
1510        Ok((rows, next_cursor))
1511    }
1512}
1513
1514pub fn normalize_path(path: &Path) -> String {
1515    path.to_string_lossy().replace('\\', "/")
1516}
1517
1518pub fn now_unix() -> i64 {
1519    SystemTime::now()
1520        .duration_since(UNIX_EPOCH)
1521        .unwrap_or_default()
1522        .as_secs() as i64
1523}
1524
1525#[cfg(test)]
1526mod tests {
1527    use super::*;
1528    use tempfile::tempdir;
1529
1530    fn mk_file_record(path: &Path) -> FileRecord {
1531        FileRecord {
1532            path: normalize_path(path),
1533            hash: "abc".into(),
1534            mtime: 0,
1535            indexed_at: now_unix(),
1536        }
1537    }
1538
1539    fn mk_symbol(path: &Path, name: &str) -> SymbolRecord {
1540        SymbolRecord {
1541            id: format!("{}#0-1", normalize_path(path)),
1542            file: normalize_path(path),
1543            kind: "function".into(),
1544            name: name.into(),
1545            start: 0,
1546            end: 1,
1547            qualifier: None,
1548            visibility: None,
1549            container: None,
1550            content_hash: None,
1551        }
1552    }
1553
1554    #[test]
1555    fn store_roundtrip_save_list_and_remove() {
1556        let dir = tempdir().unwrap();
1557        let db_path = dir.path().join("index.db");
1558        let store = IndexStore::open(&db_path).unwrap();
1559
1560        let file_path = dir.path().join("foo.ts");
1561        let file_rec = mk_file_record(&file_path);
1562        let sym = mk_symbol(&file_path, "hello");
1563        let edges = vec![EdgeRecord {
1564            src: sym.id.clone(),
1565            dst: "target".into(),
1566            kind: "implements".into(),
1567        }];
1568        let refs = vec![ReferenceRecord {
1569            file: sym.file.clone(),
1570            start: 0,
1571            end: 1,
1572            symbol_id: sym.id.clone(),
1573        }];
1574
1575        store
1576            .save_file_index(&file_rec, std::slice::from_ref(&sym), &edges, &refs)
1577            .unwrap();
1578
1579        let paths = store.list_paths().unwrap();
1580        assert!(paths.contains(&file_rec.path));
1581
1582        let symbols = store
1583            .list_symbols(Some(&file_rec.path), None, Some("hello"), None)
1584            .unwrap();
1585        assert_eq!(symbols.len(), 1);
1586        assert_eq!(symbols[0].name, "hello");
1587
1588        let edges_back = store.edges_to("target").unwrap();
1589        assert_eq!(edges_back.len(), 1);
1590        assert_eq!(edges_back[0].src, sym.id);
1591
1592        let edges_out = store.edges_from(&sym.id).unwrap();
1593        assert_eq!(edges_out.len(), 1);
1594        assert_eq!(edges_out[0].dst, "target");
1595
1596        store.remove_file(&file_path).unwrap();
1597        let paths_after = store.list_paths().unwrap();
1598        assert!(!paths_after.contains(&file_rec.path));
1599    }
1600
1601    /// Test that B-tree indices exist for O(log n) lookups
1602    #[test]
1603    fn btree_indices_exist() {
1604        let dir = tempdir().unwrap();
1605        let db_path = dir.path().join("index.db");
1606        let store = IndexStore::open(&db_path).unwrap();
1607
1608        let conn = store.conn.borrow();
1609        let mut stmt = conn
1610            .prepare("SELECT name FROM sqlite_master WHERE type='index' ORDER BY name")
1611            .unwrap();
1612        let indices: Vec<String> = stmt
1613            .query_map([], |row| row.get(0))
1614            .unwrap()
1615            .collect::<rusqlite::Result<Vec<_>>>()
1616            .unwrap();
1617
1618        // Verify critical indices for O(log n) performance exist
1619        assert!(
1620            indices.iter().any(|n| n == "idx_symbols_name"),
1621            "Missing idx_symbols_name index for symbol name lookups. Found: {:?}",
1622            indices
1623        );
1624        assert!(
1625            indices.iter().any(|n| n == "idx_symbols_position"),
1626            "Missing idx_symbols_position index for position queries. Found: {:?}",
1627            indices
1628        );
1629        assert!(
1630            indices.iter().any(|n| n == "idx_symbols_kind_name"),
1631            "Missing idx_symbols_kind_name compound index. Found: {:?}",
1632            indices
1633        );
1634        assert!(
1635            indices.iter().any(|n| n == "idx_refs_file_position"),
1636            "Missing idx_refs_file_position index for reference lookups. Found: {:?}",
1637            indices
1638        );
1639        // Verify covering indices for index-only scans
1640        assert!(
1641            indices.iter().any(|n| n == "idx_edges_src_covering"),
1642            "Missing idx_edges_src_covering covering index. Found: {:?}",
1643            indices
1644        );
1645        assert!(
1646            indices.iter().any(|n| n == "idx_edges_dst_covering"),
1647            "Missing idx_edges_dst_covering covering index. Found: {:?}",
1648            indices
1649        );
1650        assert!(
1651            indices.iter().any(|n| n == "idx_refs_symbol_covering"),
1652            "Missing idx_refs_symbol_covering covering index. Found: {:?}",
1653            indices
1654        );
1655        // Verify compound index for multi-filter queries
1656        assert!(
1657            indices.iter().any(|n| n == "idx_symbols_file_kind_name"),
1658            "Missing idx_symbols_file_kind_name compound index. Found: {:?}",
1659            indices
1660        );
1661        // Verify tertiary index for kind+visibility
1662        assert!(
1663            indices.iter().any(|n| n == "idx_symbols_kind_visibility"),
1664            "Missing idx_symbols_kind_visibility tertiary index. Found: {:?}",
1665            indices
1666        );
1667    }
1668
1669    /// Test that symbol name lookup uses the index (O(log n))
1670    #[test]
1671    fn symbol_name_lookup_uses_index() {
1672        let dir = tempdir().unwrap();
1673        let db_path = dir.path().join("index.db");
1674        let store = IndexStore::open(&db_path).unwrap();
1675
1676        let conn = store.conn.borrow();
1677        let mut stmt = conn
1678            .prepare("EXPLAIN QUERY PLAN SELECT * FROM symbols WHERE name = ?")
1679            .unwrap();
1680        let plan: String = stmt
1681            .query_map(["test"], |row| row.get::<_, String>(3))
1682            .unwrap()
1683            .next()
1684            .unwrap()
1685            .unwrap();
1686
1687        assert!(
1688            plan.contains("idx_symbols_name") || plan.contains("USING INDEX"),
1689            "Symbol name lookup not using index. Query plan: {}",
1690            plan
1691        );
1692    }
1693
1694    /// Test that position-based symbol lookup uses covering index
1695    #[test]
1696    fn position_lookup_uses_covering_index() {
1697        let dir = tempdir().unwrap();
1698        let db_path = dir.path().join("index.db");
1699        let store = IndexStore::open(&db_path).unwrap();
1700
1701        let conn = store.conn.borrow();
1702        let mut stmt = conn
1703            .prepare(
1704                "EXPLAIN QUERY PLAN SELECT * FROM symbols WHERE file = ? AND start <= ? AND end >= ?",
1705            )
1706            .unwrap();
1707        let plan: String = stmt
1708            .query_map(["test.ts", "100", "100"], |row| row.get::<_, String>(3))
1709            .unwrap()
1710            .next()
1711            .unwrap()
1712            .unwrap();
1713
1714        assert!(
1715            plan.contains("idx_symbols_position") || plan.contains("USING INDEX"),
1716            "Position lookup not using index. Query plan: {}",
1717            plan
1718        );
1719    }
1720
1721    /// Test that ANALYZE updates optimizer statistics
1722    #[test]
1723    fn analyze_updates_statistics() {
1724        let dir = tempdir().unwrap();
1725        let db_path = dir.path().join("index.db");
1726        let store = IndexStore::open(&db_path).unwrap();
1727
1728        // Insert some test data
1729        let file_path = dir.path().join("test.ts");
1730        let file_rec = mk_file_record(&file_path);
1731        let symbols: Vec<SymbolRecord> = (0..100)
1732            .map(|i| SymbolRecord {
1733                id: format!("sym_{}", i),
1734                file: normalize_path(&file_path),
1735                kind: "function".into(),
1736                name: format!("func_{}", i),
1737                start: i * 10,
1738                end: i * 10 + 5,
1739                qualifier: None,
1740                visibility: None,
1741                container: None,
1742                content_hash: None,
1743            })
1744            .collect();
1745
1746        store
1747            .save_file_index(&file_rec, &symbols, &[], &[])
1748            .unwrap();
1749
1750        // Run ANALYZE
1751        store.analyze().unwrap();
1752
1753        // Verify sqlite_stat1 table exists and has data
1754        let conn = store.conn.borrow();
1755        let count: i64 = conn
1756            .query_row("SELECT COUNT(*) FROM sqlite_stat1", [], |row| row.get(0))
1757            .unwrap();
1758        assert!(count > 0, "ANALYZE should populate sqlite_stat1 table");
1759    }
1760
1761    /// Test O(log n) performance characteristic by verifying index usage on filtered queries
1762    #[test]
1763    fn filtered_queries_use_compound_index() {
1764        let dir = tempdir().unwrap();
1765        let db_path = dir.path().join("index.db");
1766        let store = IndexStore::open(&db_path).unwrap();
1767
1768        let conn = store.conn.borrow();
1769
1770        // Test kind+name compound index
1771        let mut stmt = conn
1772            .prepare("EXPLAIN QUERY PLAN SELECT * FROM symbols WHERE kind = ? AND name = ?")
1773            .unwrap();
1774        let plan: String = stmt
1775            .query_map(["function", "test"], |row| row.get::<_, String>(3))
1776            .unwrap()
1777            .next()
1778            .unwrap()
1779            .unwrap();
1780
1781        assert!(
1782            plan.contains("idx_symbols_kind_name") || plan.contains("USING INDEX"),
1783            "Kind+name query not using compound index. Query plan: {}",
1784            plan
1785        );
1786    }
1787
1788    /// Test that reference lookups use covering index for symbol_id queries
1789    #[test]
1790    fn reference_symbol_lookup_uses_covering_index() {
1791        let dir = tempdir().unwrap();
1792        let db_path = dir.path().join("index.db");
1793        let store = IndexStore::open(&db_path).unwrap();
1794
1795        let conn = store.conn.borrow();
1796
1797        // Query for references by symbol_id - this should use the covering index
1798        let mut stmt = conn
1799            .prepare(
1800                "EXPLAIN QUERY PLAN SELECT file, start, end, symbol_id FROM references_tbl WHERE symbol_id = ?",
1801            )
1802            .unwrap();
1803        let plan: String = stmt
1804            .query_map(["test_sym"], |row| row.get::<_, String>(3))
1805            .unwrap()
1806            .next()
1807            .unwrap()
1808            .unwrap();
1809
1810        // Must show "COVERING INDEX" to avoid table lookups
1811        assert!(
1812            plan.contains("COVERING INDEX"),
1813            "Reference symbol lookup must use COVERING INDEX to avoid table lookups. Query plan: {}",
1814            plan
1815        );
1816    }
1817
1818    /// Test that edges lookup by dst achieves index-only scan
1819    #[test]
1820    fn edges_dst_lookup_uses_covering_index() {
1821        let dir = tempdir().unwrap();
1822        let db_path = dir.path().join("index.db");
1823        let store = IndexStore::open(&db_path).unwrap();
1824
1825        let conn = store.conn.borrow();
1826
1827        // Query edges by dst - should use covering index
1828        let mut stmt = conn
1829            .prepare("EXPLAIN QUERY PLAN SELECT src, dst, kind FROM edges WHERE dst = ?")
1830            .unwrap();
1831        let plan: String = stmt
1832            .query_map(["target"], |row| row.get::<_, String>(3))
1833            .unwrap()
1834            .next()
1835            .unwrap()
1836            .unwrap();
1837
1838        // Must show "COVERING INDEX" to avoid table lookups
1839        assert!(
1840            plan.contains("COVERING INDEX"),
1841            "Edges dst lookup must use COVERING INDEX to avoid table lookups. Query plan: {}",
1842            plan
1843        );
1844    }
1845
1846    /// Test that edges lookup by src achieves index-only scan
1847    #[test]
1848    fn edges_src_lookup_uses_covering_index() {
1849        let dir = tempdir().unwrap();
1850        let db_path = dir.path().join("index.db");
1851        let store = IndexStore::open(&db_path).unwrap();
1852
1853        let conn = store.conn.borrow();
1854
1855        // Query edges by src - should use covering index
1856        let mut stmt = conn
1857            .prepare("EXPLAIN QUERY PLAN SELECT src, dst, kind FROM edges WHERE src = ?")
1858            .unwrap();
1859        let plan: String = stmt
1860            .query_map(["source"], |row| row.get::<_, String>(3))
1861            .unwrap()
1862            .next()
1863            .unwrap()
1864            .unwrap();
1865
1866        // Must show "COVERING INDEX" to avoid table lookups
1867        assert!(
1868            plan.contains("COVERING INDEX"),
1869            "Edges src lookup must use COVERING INDEX to avoid table lookups. Query plan: {}",
1870            plan
1871        );
1872    }
1873
1874    /// Test compound index for file+name queries (common pattern in resolve_symbol_at)
1875    #[test]
1876    fn file_and_name_query_uses_index() {
1877        let dir = tempdir().unwrap();
1878        let db_path = dir.path().join("index.db");
1879        let store = IndexStore::open(&db_path).unwrap();
1880
1881        let conn = store.conn.borrow();
1882
1883        // Query by file and name - should use an index
1884        let mut stmt = conn
1885            .prepare("EXPLAIN QUERY PLAN SELECT * FROM symbols WHERE file = ? AND name = ?")
1886            .unwrap();
1887        let plan: String = stmt
1888            .query_map(["test.ts", "foo"], |row| row.get::<_, String>(3))
1889            .unwrap()
1890            .next()
1891            .unwrap()
1892            .unwrap();
1893
1894        // Should use an index, not full table scan
1895        assert!(
1896            plan.contains("USING INDEX") || plan.contains("SEARCH"),
1897            "File+name query should use index. Query plan: {}",
1898            plan
1899        );
1900    }
1901
1902    /// Test compound index for file+kind queries
1903    #[test]
1904    fn file_and_kind_query_uses_index() {
1905        let dir = tempdir().unwrap();
1906        let db_path = dir.path().join("index.db");
1907        let store = IndexStore::open(&db_path).unwrap();
1908
1909        let conn = store.conn.borrow();
1910
1911        // Query by file and kind - should use an index
1912        let mut stmt = conn
1913            .prepare("EXPLAIN QUERY PLAN SELECT * FROM symbols WHERE file = ? AND kind = ?")
1914            .unwrap();
1915        let plan: String = stmt
1916            .query_map(["test.ts", "function"], |row| row.get::<_, String>(3))
1917            .unwrap()
1918            .next()
1919            .unwrap()
1920            .unwrap();
1921
1922        // Should use an index, not full table scan
1923        assert!(
1924            plan.contains("USING INDEX") || plan.contains("SEARCH"),
1925            "File+kind query should use index. Query plan: {}",
1926            plan
1927        );
1928    }
1929
1930    /// Test compound index for three-way filter (file + kind + name)
1931    #[test]
1932    fn file_kind_name_query_uses_compound_index() {
1933        let dir = tempdir().unwrap();
1934        let db_path = dir.path().join("index.db");
1935        let store = IndexStore::open(&db_path).unwrap();
1936
1937        let conn = store.conn.borrow();
1938
1939        // Query by file, kind, and name - should use the compound index
1940        let mut stmt = conn
1941            .prepare(
1942                "EXPLAIN QUERY PLAN SELECT * FROM symbols WHERE file = ? AND kind = ? AND name = ?",
1943            )
1944            .unwrap();
1945        let plan: String = stmt
1946            .query_map(["test.ts", "function", "foo"], |row| {
1947                row.get::<_, String>(3)
1948            })
1949            .unwrap()
1950            .next()
1951            .unwrap()
1952            .unwrap();
1953
1954        // Should use the compound index for best performance
1955        assert!(
1956            plan.contains("idx_symbols_file_kind_name"),
1957            "File+kind+name query should use compound index idx_symbols_file_kind_name. Query plan: {}",
1958            plan
1959        );
1960    }
1961
1962    /// Test that FTS5 table exists for full-text symbol search
1963    #[test]
1964    fn fts5_symbols_table_exists() {
1965        let dir = tempdir().unwrap();
1966        let db_path = dir.path().join("index.db");
1967        let store = IndexStore::open(&db_path).unwrap();
1968
1969        let conn = store.conn.borrow();
1970        let table_exists: i64 = conn
1971            .query_row(
1972                "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='symbols_fts'",
1973                [],
1974                |row| row.get(0),
1975            )
1976            .unwrap();
1977
1978        assert_eq!(table_exists, 1, "symbols_fts FTS5 table should exist");
1979    }
1980
1981    /// Test FTS5 prefix search on symbol names
1982    #[test]
1983    fn fts5_prefix_search_works() {
1984        let dir = tempdir().unwrap();
1985        let db_path = dir.path().join("index.db");
1986        let store = IndexStore::open(&db_path).unwrap();
1987
1988        // Insert test symbols
1989        let file_path = dir.path().join("test.ts");
1990        let file_rec = mk_file_record(&file_path);
1991        let symbols: Vec<SymbolRecord> = vec![
1992            SymbolRecord {
1993                id: "sym_1".into(),
1994                file: normalize_path(&file_path),
1995                kind: "function".into(),
1996                name: "getUserProfile".into(),
1997                start: 0,
1998                end: 10,
1999                qualifier: None,
2000                visibility: None,
2001                container: None,
2002                content_hash: None,
2003            },
2004            SymbolRecord {
2005                id: "sym_2".into(),
2006                file: normalize_path(&file_path),
2007                kind: "function".into(),
2008                name: "getUserSettings".into(),
2009                start: 20,
2010                end: 30,
2011                qualifier: None,
2012                visibility: None,
2013                container: None,
2014                content_hash: None,
2015            },
2016            SymbolRecord {
2017                id: "sym_3".into(),
2018                file: normalize_path(&file_path),
2019                kind: "function".into(),
2020                name: "setUserProfile".into(),
2021                start: 40,
2022                end: 50,
2023                qualifier: None,
2024                visibility: None,
2025                container: None,
2026                content_hash: None,
2027            },
2028        ];
2029
2030        store
2031            .save_file_index(&file_rec, &symbols, &[], &[])
2032            .unwrap();
2033
2034        // Search with prefix "getUser*"
2035        let results = store.search_symbols_fts("getUser*").unwrap();
2036        assert_eq!(
2037            results.len(),
2038            2,
2039            "Should find 2 symbols starting with 'getUser'"
2040        );
2041        assert!(results.iter().any(|s| s.name == "getUserProfile"));
2042        assert!(results.iter().any(|s| s.name == "getUserSettings"));
2043    }
2044
2045    /// Test that kind+visibility queries use the tertiary index
2046    #[test]
2047    fn kind_visibility_query_uses_tertiary_index() {
2048        let dir = tempdir().unwrap();
2049        let db_path = dir.path().join("index.db");
2050        let store = IndexStore::open(&db_path).unwrap();
2051
2052        let conn = store.conn.borrow();
2053
2054        // Query by kind and visibility - should use tertiary index
2055        let mut stmt = conn
2056            .prepare("EXPLAIN QUERY PLAN SELECT * FROM symbols WHERE kind = ? AND visibility = ?")
2057            .unwrap();
2058        let plan: String = stmt
2059            .query_map(["function", "public"], |row| row.get::<_, String>(3))
2060            .unwrap()
2061            .next()
2062            .unwrap()
2063            .unwrap();
2064
2065        // Should use idx_symbols_kind_visibility index
2066        assert!(
2067            plan.contains("idx_symbols_kind_visibility") || plan.contains("USING INDEX"),
2068            "Kind+visibility query should use idx_symbols_kind_visibility index. Query plan: {}",
2069            plan
2070        );
2071    }
2072
2073    /// Test that position queries use the idx_symbols_position index
2074    #[test]
2075    fn position_query_uses_secondary_index() {
2076        let dir = tempdir().unwrap();
2077        let db_path = dir.path().join("index.db");
2078        let store = IndexStore::open(&db_path).unwrap();
2079
2080        let conn = store.conn.borrow();
2081
2082        // Query symbols at a specific position (file + byte offset range)
2083        let mut stmt = conn
2084            .prepare(
2085                "EXPLAIN QUERY PLAN SELECT * FROM symbols WHERE file = ? AND start <= ? AND ? < end",
2086            )
2087            .unwrap();
2088        let plan: String = stmt
2089            .query_map(["test.ts", "100", "100"], |row| row.get::<_, String>(3))
2090            .unwrap()
2091            .next()
2092            .unwrap()
2093            .unwrap();
2094
2095        // Should use idx_symbols_position index
2096        assert!(
2097            plan.contains("idx_symbols_position") || plan.contains("USING INDEX"),
2098            "Position query should use idx_symbols_position index. Query plan: {}",
2099            plan
2100        );
2101    }
2102
2103    /// Test FTS5 substring/trigram search
2104    #[test]
2105    fn fts5_substring_search_works() {
2106        let dir = tempdir().unwrap();
2107        let db_path = dir.path().join("index.db");
2108        let store = IndexStore::open(&db_path).unwrap();
2109
2110        // Insert test symbols
2111        let file_path = dir.path().join("test.ts");
2112        let file_rec = mk_file_record(&file_path);
2113        let symbols: Vec<SymbolRecord> = vec![
2114            SymbolRecord {
2115                id: "sym_1".into(),
2116                file: normalize_path(&file_path),
2117                kind: "function".into(),
2118                name: "getUserProfile".into(),
2119                start: 0,
2120                end: 10,
2121                qualifier: None,
2122                visibility: None,
2123                container: None,
2124                content_hash: None,
2125            },
2126            SymbolRecord {
2127                id: "sym_2".into(),
2128                file: normalize_path(&file_path),
2129                kind: "class".into(),
2130                name: "UserProfileService".into(),
2131                start: 20,
2132                end: 30,
2133                qualifier: None,
2134                visibility: None,
2135                container: None,
2136                content_hash: None,
2137            },
2138        ];
2139
2140        store
2141            .save_file_index(&file_rec, &symbols, &[], &[])
2142            .unwrap();
2143
2144        // Search for "Profile" substring
2145        let results = store.search_symbols_fts("Profile").unwrap();
2146        assert_eq!(
2147            results.len(),
2148            2,
2149            "Should find 2 symbols containing 'Profile'"
2150        );
2151    }
2152
2153    /// Test that FTS5 efficiently handles prefix queries for autocomplete
2154    #[test]
2155    fn fts5_handles_prefix_autocomplete() {
2156        let dir = tempdir().unwrap();
2157        let db_path = dir.path().join("index.db");
2158        let store = IndexStore::open(&db_path).unwrap();
2159
2160        let conn = store.conn.borrow();
2161
2162        // FTS5 prefix queries use the trigram index efficiently
2163        let mut stmt = conn
2164            .prepare("EXPLAIN QUERY PLAN SELECT * FROM symbols_fts WHERE symbols_fts MATCH 'get*'")
2165            .unwrap();
2166        let plan: String = stmt
2167            .query_map([], |row| row.get::<_, String>(3))
2168            .unwrap()
2169            .next()
2170            .unwrap()
2171            .unwrap();
2172
2173        // FTS5 uses its internal index structure for matching
2174        assert!(
2175            plan.contains("symbols_fts") || plan.contains("VIRTUAL TABLE"),
2176            "FTS5 prefix query should use virtual table index. Query plan: {}",
2177            plan
2178        );
2179    }
2180
2181    /// Test prefix search functionality using list_symbols
2182    #[test]
2183    fn prefix_search_returns_matching_symbols() {
2184        let dir = tempdir().unwrap();
2185        let db_path = dir.path().join("index.db");
2186        let store = IndexStore::open(&db_path).unwrap();
2187
2188        // Insert test symbols
2189        let file_path = dir.path().join("test.ts");
2190        let file_rec = mk_file_record(&file_path);
2191        let symbols: Vec<SymbolRecord> = vec![
2192            SymbolRecord {
2193                id: "sym_1".into(),
2194                file: normalize_path(&file_path),
2195                kind: "function".into(),
2196                name: "getUser".into(),
2197                start: 0,
2198                end: 10,
2199                qualifier: None,
2200                visibility: Some("public".into()),
2201                container: None,
2202                content_hash: None,
2203            },
2204            SymbolRecord {
2205                id: "sym_2".into(),
2206                file: normalize_path(&file_path),
2207                kind: "function".into(),
2208                name: "getProfile".into(),
2209                start: 20,
2210                end: 30,
2211                qualifier: None,
2212                visibility: Some("public".into()),
2213                container: None,
2214                content_hash: None,
2215            },
2216            SymbolRecord {
2217                id: "sym_3".into(),
2218                file: normalize_path(&file_path),
2219                kind: "function".into(),
2220                name: "setUser".into(),
2221                start: 40,
2222                end: 50,
2223                qualifier: None,
2224                visibility: Some("private".into()),
2225                container: None,
2226                content_hash: None,
2227            },
2228        ];
2229
2230        store
2231            .save_file_index(&file_rec, &symbols, &[], &[])
2232            .unwrap();
2233
2234        // Use FTS5 for prefix search
2235        let results = store.search_symbols_fts("get*").unwrap();
2236        assert_eq!(
2237            results.len(),
2238            2,
2239            "Should find 2 symbols starting with 'get'"
2240        );
2241        assert!(results.iter().all(|s| s.name.starts_with("get")));
2242    }
2243
2244    /// Test cursor-based pagination for streaming large result sets
2245    #[test]
2246    fn pagination_streams_results_in_pages() {
2247        let dir = tempdir().unwrap();
2248        let db_path = dir.path().join("index.db");
2249        let store = IndexStore::open(&db_path).unwrap();
2250
2251        // Insert 10 test symbols
2252        let file_path = dir.path().join("test.ts");
2253        let file_rec = mk_file_record(&file_path);
2254        let symbols: Vec<SymbolRecord> = (0..10)
2255            .map(|i| SymbolRecord {
2256                id: format!("sym_{:02}", i), // Zero-padded for consistent ordering
2257                file: normalize_path(&file_path),
2258                kind: "function".into(),
2259                name: format!("func_{}", i),
2260                start: i * 10,
2261                end: i * 10 + 5,
2262                qualifier: None,
2263                visibility: None,
2264                container: None,
2265                content_hash: None,
2266            })
2267            .collect();
2268
2269        store
2270            .save_file_index(&file_rec, &symbols, &[], &[])
2271            .unwrap();
2272
2273        // First page (3 items)
2274        let (page1, cursor1) = store
2275            .list_symbols_paginated(None, None, None, None, 3)
2276            .unwrap();
2277        assert_eq!(page1.len(), 3, "First page should have 3 items");
2278        assert!(cursor1.is_some(), "Should have cursor for next page");
2279
2280        // Second page using cursor
2281        let (page2, cursor2) = store
2282            .list_symbols_paginated(None, None, None, cursor1.as_deref(), 3)
2283            .unwrap();
2284        assert_eq!(page2.len(), 3, "Second page should have 3 items");
2285        assert!(cursor2.is_some(), "Should have cursor for next page");
2286
2287        // Verify no overlap between pages
2288        let page1_ids: Vec<_> = page1.iter().map(|s| &s.id).collect();
2289        let page2_ids: Vec<_> = page2.iter().map(|s| &s.id).collect();
2290        assert!(
2291            page1_ids.iter().all(|id| !page2_ids.contains(id)),
2292            "Pages should not overlap"
2293        );
2294
2295        // Continue until exhausted
2296        let (page3, cursor3) = store
2297            .list_symbols_paginated(None, None, None, cursor2.as_deref(), 3)
2298            .unwrap();
2299        assert_eq!(page3.len(), 3, "Third page should have 3 items");
2300
2301        let (page4, cursor4) = store
2302            .list_symbols_paginated(None, None, None, cursor3.as_deref(), 3)
2303            .unwrap();
2304        assert_eq!(page4.len(), 1, "Fourth page should have 1 item");
2305        assert!(cursor4.is_none(), "No more pages");
2306    }
2307
2308    /// Test cold-start query performance (<50ms requirement)
2309    #[test]
2310    fn cold_start_query_completes_under_50ms() {
2311        use std::time::Instant;
2312
2313        let dir = tempdir().unwrap();
2314        let db_path = dir.path().join("index.db");
2315
2316        // Create and populate index with substantial data
2317        {
2318            let store = IndexStore::open(&db_path).unwrap();
2319            let file_path = dir.path().join("test.ts");
2320            let file_rec = mk_file_record(&file_path);
2321
2322            // Insert 1000 symbols to simulate a real codebase
2323            let symbols: Vec<SymbolRecord> = (0..1000)
2324                .map(|i| SymbolRecord {
2325                    id: format!("sym_{:04}", i),
2326                    file: normalize_path(&file_path),
2327                    kind: if i % 3 == 0 {
2328                        "function"
2329                    } else if i % 3 == 1 {
2330                        "class"
2331                    } else {
2332                        "interface"
2333                    }
2334                    .into(),
2335                    name: format!("symbol_{}", i),
2336                    start: i * 100,
2337                    end: i * 100 + 50,
2338                    qualifier: Some(format!("module{}", i % 10)),
2339                    visibility: Some(if i % 2 == 0 { "public" } else { "private" }.into()),
2340                    container: None,
2341                    content_hash: None,
2342                })
2343                .collect();
2344
2345            store
2346                .save_file_index(&file_rec, &symbols, &[], &[])
2347                .unwrap();
2348            store.analyze().unwrap();
2349        } // Close the store to simulate cold start
2350
2351        // Cold start: open fresh connection and query
2352        let start = Instant::now();
2353        let store = IndexStore::open(&db_path).unwrap();
2354
2355        // Perform typical queries
2356        let _symbols = store.list_symbols(None, Some("function"), None, Some(10));
2357        let _search = store.search_symbols_fts("symbol*");
2358        let _paginated = store.list_symbols_paginated(None, None, None, None, 10);
2359
2360        let elapsed = start.elapsed();
2361        assert!(
2362            elapsed.as_millis() < 50,
2363            "Cold-start queries should complete in <50ms, took {}ms",
2364            elapsed.as_millis()
2365        );
2366    }
2367
2368    /// Test pre-computed file statistics are maintained correctly
2369    #[test]
2370    fn file_stats_aggregates_computed_on_index() {
2371        let dir = tempdir().unwrap();
2372        let db_path = dir.path().join("index.db");
2373        let store = IndexStore::open(&db_path).unwrap();
2374
2375        let file_path = dir.path().join("test.ts");
2376        let file_rec = mk_file_record(&file_path);
2377        let symbols: Vec<SymbolRecord> = vec![
2378            SymbolRecord {
2379                id: "sym_1".into(),
2380                file: normalize_path(&file_path),
2381                kind: "function".into(),
2382                name: "func1".into(),
2383                start: 0,
2384                end: 10,
2385                qualifier: None,
2386                visibility: None,
2387                container: None,
2388                content_hash: None,
2389            },
2390            SymbolRecord {
2391                id: "sym_2".into(),
2392                file: normalize_path(&file_path),
2393                kind: "function".into(),
2394                name: "func2".into(),
2395                start: 20,
2396                end: 30,
2397                qualifier: None,
2398                visibility: None,
2399                container: None,
2400                content_hash: None,
2401            },
2402            SymbolRecord {
2403                id: "sym_3".into(),
2404                file: normalize_path(&file_path),
2405                kind: "class".into(),
2406                name: "MyClass".into(),
2407                start: 40,
2408                end: 50,
2409                qualifier: None,
2410                visibility: None,
2411                container: None,
2412                content_hash: None,
2413            },
2414            SymbolRecord {
2415                id: "sym_4".into(),
2416                file: normalize_path(&file_path),
2417                kind: "interface".into(),
2418                name: "MyInterface".into(),
2419                start: 60,
2420                end: 70,
2421                qualifier: None,
2422                visibility: None,
2423                container: None,
2424                content_hash: None,
2425            },
2426        ];
2427
2428        store
2429            .save_file_index(&file_rec, &symbols, &[], &[])
2430            .unwrap();
2431
2432        // Verify file stats
2433        let stats = store
2434            .get_file_stats(&normalize_path(&file_path))
2435            .unwrap()
2436            .expect("file stats should exist");
2437        assert_eq!(stats.symbol_count, 4);
2438        assert_eq!(stats.function_count, 2);
2439        assert_eq!(stats.class_count, 1);
2440        assert_eq!(stats.interface_count, 1);
2441    }
2442
2443    /// Test total stats aggregate across multiple files
2444    #[test]
2445    fn total_stats_aggregates_all_files() {
2446        let dir = tempdir().unwrap();
2447        let db_path = dir.path().join("index.db");
2448        let store = IndexStore::open(&db_path).unwrap();
2449
2450        // Index first file
2451        let file1 = dir.path().join("file1.ts");
2452        let rec1 = mk_file_record(&file1);
2453        let syms1: Vec<SymbolRecord> = (0..5)
2454            .map(|i| SymbolRecord {
2455                id: format!("f1_sym_{}", i),
2456                file: normalize_path(&file1),
2457                kind: "function".into(),
2458                name: format!("func_{}", i),
2459                start: i * 10,
2460                end: i * 10 + 5,
2461                qualifier: None,
2462                visibility: None,
2463                container: None,
2464                content_hash: None,
2465            })
2466            .collect();
2467        store.save_file_index(&rec1, &syms1, &[], &[]).unwrap();
2468
2469        // Index second file
2470        let file2 = dir.path().join("file2.ts");
2471        let rec2 = mk_file_record(&file2);
2472        let syms2: Vec<SymbolRecord> = (0..3)
2473            .map(|i| SymbolRecord {
2474                id: format!("f2_sym_{}", i),
2475                file: normalize_path(&file2),
2476                kind: "class".into(),
2477                name: format!("Class_{}", i),
2478                start: i * 10,
2479                end: i * 10 + 5,
2480                qualifier: None,
2481                visibility: None,
2482                container: None,
2483                content_hash: None,
2484            })
2485            .collect();
2486        store.save_file_index(&rec2, &syms2, &[], &[]).unwrap();
2487
2488        // Verify total stats
2489        let total = store.get_total_stats().unwrap();
2490        assert_eq!(total.symbol_count, 8);
2491        assert_eq!(total.function_count, 5);
2492        assert_eq!(total.class_count, 3);
2493    }
2494
2495    /// Test file dependency graph basic operations
2496    #[test]
2497    fn file_dependency_graph_save_and_query() {
2498        let dir = tempdir().unwrap();
2499        let db_path = dir.path().join("index.db");
2500        let store = IndexStore::open(&db_path).unwrap();
2501
2502        // Save dependencies for main.ts -> [utils.ts, types.ts]
2503        let main_deps = vec![
2504            FileDependency {
2505                from_file: "src/main.ts".into(),
2506                to_file: "src/utils.ts".into(),
2507                kind: "import".into(),
2508            },
2509            FileDependency {
2510                from_file: "src/main.ts".into(),
2511                to_file: "src/types.ts".into(),
2512                kind: "import".into(),
2513            },
2514        ];
2515        store
2516            .save_file_dependencies("src/main.ts", &main_deps)
2517            .unwrap();
2518
2519        // Save dependencies for utils.ts -> [types.ts]
2520        let utils_deps = vec![FileDependency {
2521            from_file: "src/utils.ts".into(),
2522            to_file: "src/types.ts".into(),
2523            kind: "import".into(),
2524        }];
2525        store
2526            .save_file_dependencies("src/utils.ts", &utils_deps)
2527            .unwrap();
2528
2529        // Query dependencies of main.ts
2530        let main_imports = store.get_file_dependencies("src/main.ts").unwrap();
2531        assert_eq!(main_imports.len(), 2);
2532
2533        // Query reverse dependencies (what files depend on types.ts)
2534        let types_dependents = store.get_dependents("src/types.ts").unwrap();
2535        assert_eq!(types_dependents.len(), 2);
2536        assert!(types_dependents.contains(&"src/main.ts".to_string()));
2537        assert!(types_dependents.contains(&"src/utils.ts".to_string()));
2538    }
2539
2540    /// Test dependency graph replacement on re-index
2541    #[test]
2542    fn file_dependency_replaces_on_reindex() {
2543        let dir = tempdir().unwrap();
2544        let db_path = dir.path().join("index.db");
2545        let store = IndexStore::open(&db_path).unwrap();
2546
2547        // Initial dependencies
2548        let deps1 = vec![FileDependency {
2549            from_file: "src/main.ts".into(),
2550            to_file: "src/old.ts".into(),
2551            kind: "import".into(),
2552        }];
2553        store.save_file_dependencies("src/main.ts", &deps1).unwrap();
2554
2555        // Re-index with new dependencies
2556        let deps2 = vec![FileDependency {
2557            from_file: "src/main.ts".into(),
2558            to_file: "src/new.ts".into(),
2559            kind: "import".into(),
2560        }];
2561        store.save_file_dependencies("src/main.ts", &deps2).unwrap();
2562
2563        // Verify old dependencies are replaced
2564        let deps = store.get_file_dependencies("src/main.ts").unwrap();
2565        assert_eq!(deps.len(), 1);
2566        assert_eq!(deps[0].to_file, "src/new.ts");
2567    }
2568
2569    /// Test topological sort orders dependencies before dependents
2570    #[test]
2571    fn topological_sort_orders_dependencies_first() {
2572        let dir = tempdir().unwrap();
2573        let db_path = dir.path().join("index.db");
2574        let store = IndexStore::open(&db_path).unwrap();
2575
2576        // Create dependency chain: main -> utils -> types
2577        let deps = vec![FileDependency {
2578            from_file: "main.ts".into(),
2579            to_file: "utils.ts".into(),
2580            kind: "import".into(),
2581        }];
2582        store.save_file_dependencies("main.ts", &deps).unwrap();
2583
2584        let deps = vec![FileDependency {
2585            from_file: "utils.ts".into(),
2586            to_file: "types.ts".into(),
2587            kind: "import".into(),
2588        }];
2589        store.save_file_dependencies("utils.ts", &deps).unwrap();
2590
2591        store.save_file_dependencies("types.ts", &[]).unwrap();
2592
2593        // Sort all three files
2594        let files = vec!["main.ts".into(), "utils.ts".into(), "types.ts".into()];
2595        let sorted = store.topological_sort(&files).unwrap();
2596
2597        // types.ts must come before utils.ts, which must come before main.ts
2598        let types_pos = sorted.iter().position(|f| f == "types.ts").unwrap();
2599        let utils_pos = sorted.iter().position(|f| f == "utils.ts").unwrap();
2600        let main_pos = sorted.iter().position(|f| f == "main.ts").unwrap();
2601
2602        assert!(
2603            types_pos < utils_pos,
2604            "types.ts should come before utils.ts"
2605        );
2606        assert!(utils_pos < main_pos, "utils.ts should come before main.ts");
2607    }
2608
2609    /// Test topological sort handles independent files
2610    #[test]
2611    fn topological_sort_handles_independent_files() {
2612        let dir = tempdir().unwrap();
2613        let db_path = dir.path().join("index.db");
2614        let store = IndexStore::open(&db_path).unwrap();
2615
2616        // No dependencies between files
2617        store.save_file_dependencies("a.ts", &[]).unwrap();
2618        store.save_file_dependencies("b.ts", &[]).unwrap();
2619        store.save_file_dependencies("c.ts", &[]).unwrap();
2620
2621        let files = vec!["a.ts".into(), "b.ts".into(), "c.ts".into()];
2622        let sorted = store.topological_sort(&files).unwrap();
2623
2624        // All files should be present
2625        assert_eq!(sorted.len(), 3);
2626        assert!(sorted.contains(&"a.ts".into()));
2627        assert!(sorted.contains(&"b.ts".into()));
2628        assert!(sorted.contains(&"c.ts".into()));
2629    }
2630
2631    /// Test invalidation propagation finds all affected files
2632    #[test]
2633    fn invalidation_propagates_through_dependency_chain() {
2634        let dir = tempdir().unwrap();
2635        let db_path = dir.path().join("index.db");
2636        let store = IndexStore::open(&db_path).unwrap();
2637
2638        // Create chain: main -> utils -> types
2639        store
2640            .save_file_dependencies(
2641                "main.ts",
2642                &[FileDependency {
2643                    from_file: "main.ts".into(),
2644                    to_file: "utils.ts".into(),
2645                    kind: "import".into(),
2646                }],
2647            )
2648            .unwrap();
2649        store
2650            .save_file_dependencies(
2651                "utils.ts",
2652                &[FileDependency {
2653                    from_file: "utils.ts".into(),
2654                    to_file: "types.ts".into(),
2655                    kind: "import".into(),
2656                }],
2657            )
2658            .unwrap();
2659        store.save_file_dependencies("types.ts", &[]).unwrap();
2660
2661        // When types.ts changes, all three files need reindexing
2662        let invalidated = store.get_invalidation_set("types.ts").unwrap();
2663        assert_eq!(invalidated.len(), 3);
2664        assert!(invalidated.contains(&"types.ts".to_string()));
2665        assert!(invalidated.contains(&"utils.ts".to_string()));
2666        assert!(invalidated.contains(&"main.ts".to_string()));
2667
2668        // When main.ts changes, only main.ts needs reindexing
2669        let invalidated = store.get_invalidation_set("main.ts").unwrap();
2670        assert_eq!(invalidated.len(), 1);
2671        assert_eq!(invalidated[0], "main.ts");
2672    }
2673
2674    /// Test batch invalidation handles multiple changed files
2675    #[test]
2676    fn batch_invalidation_unions_affected_files() {
2677        let dir = tempdir().unwrap();
2678        let db_path = dir.path().join("index.db");
2679        let store = IndexStore::open(&db_path).unwrap();
2680
2681        // Create two independent chains
2682        // chain1: a -> b
2683        // chain2: c -> d
2684        store
2685            .save_file_dependencies(
2686                "a.ts",
2687                &[FileDependency {
2688                    from_file: "a.ts".into(),
2689                    to_file: "b.ts".into(),
2690                    kind: "import".into(),
2691                }],
2692            )
2693            .unwrap();
2694        store.save_file_dependencies("b.ts", &[]).unwrap();
2695
2696        store
2697            .save_file_dependencies(
2698                "c.ts",
2699                &[FileDependency {
2700                    from_file: "c.ts".into(),
2701                    to_file: "d.ts".into(),
2702                    kind: "import".into(),
2703                }],
2704            )
2705            .unwrap();
2706        store.save_file_dependencies("d.ts", &[]).unwrap();
2707
2708        // When both b.ts and d.ts change
2709        let changed = vec!["b.ts".into(), "d.ts".into()];
2710        let invalidated = store.get_batch_invalidation_set(&changed).unwrap();
2711
2712        // All four files should be invalidated
2713        assert_eq!(invalidated.len(), 4);
2714    }
2715
2716    /// Test DependencyCache provides O(1) lookups
2717    #[test]
2718    fn dependency_cache_provides_o1_lookup() {
2719        let dir = tempdir().unwrap();
2720        let db_path = dir.path().join("index.db");
2721        let store = IndexStore::open(&db_path).unwrap();
2722
2723        // Create dependencies
2724        store
2725            .save_file_dependencies(
2726                "main.ts",
2727                &[
2728                    FileDependency {
2729                        from_file: "main.ts".into(),
2730                        to_file: "utils.ts".into(),
2731                        kind: "import".into(),
2732                    },
2733                    FileDependency {
2734                        from_file: "main.ts".into(),
2735                        to_file: "types.ts".into(),
2736                        kind: "import".into(),
2737                    },
2738                ],
2739            )
2740            .unwrap();
2741        store
2742            .save_file_dependencies(
2743                "utils.ts",
2744                &[FileDependency {
2745                    from_file: "utils.ts".into(),
2746                    to_file: "types.ts".into(),
2747                    kind: "import".into(),
2748                }],
2749            )
2750            .unwrap();
2751
2752        // Load cache
2753        let cache = store.load_dependency_cache().unwrap();
2754        assert!(cache.is_populated());
2755
2756        // O(1) forward lookup
2757        let main_deps = cache.get_dependencies("main.ts").unwrap();
2758        assert_eq!(main_deps.len(), 2);
2759        assert!(main_deps.contains(&"utils.ts".to_string()));
2760        assert!(main_deps.contains(&"types.ts".to_string()));
2761
2762        // O(1) reverse lookup
2763        let types_dependents = cache.get_dependents("types.ts").unwrap();
2764        assert_eq!(types_dependents.len(), 2);
2765        assert!(types_dependents.contains(&"main.ts".to_string()));
2766        assert!(types_dependents.contains(&"utils.ts".to_string()));
2767    }
2768
2769    /// Test DependencyCache invalidation
2770    #[test]
2771    fn dependency_cache_invalidates_correctly() {
2772        let mut cache = DependencyCache::new();
2773        let deps = vec![
2774            FileDependency {
2775                from_file: "a.ts".into(),
2776                to_file: "b.ts".into(),
2777                kind: "import".into(),
2778            },
2779            FileDependency {
2780                from_file: "b.ts".into(),
2781                to_file: "c.ts".into(),
2782                kind: "import".into(),
2783            },
2784        ];
2785        cache.populate(&deps);
2786
2787        // Verify initial state
2788        assert!(cache.get_dependencies("a.ts").is_some());
2789        assert!(cache.get_dependents("b.ts").is_some());
2790
2791        // Invalidate b.ts
2792        cache.invalidate_file("b.ts");
2793
2794        // b.ts should have no entries
2795        assert!(cache.get_dependencies("b.ts").is_none());
2796        assert!(cache.get_dependents("b.ts").is_none());
2797
2798        // a.ts forward deps should no longer include b.ts
2799        let a_deps = cache.get_dependencies("a.ts");
2800        assert!(a_deps.is_none() || a_deps.unwrap().is_empty());
2801
2802        // c.ts reverse deps should no longer include b.ts
2803        let c_dependents = cache.get_dependents("c.ts");
2804        assert!(c_dependents.is_none() || c_dependents.unwrap().is_empty());
2805    }
2806
2807    /// Test DependencyCache clear
2808    #[test]
2809    fn dependency_cache_clears_all_entries() {
2810        let mut cache = DependencyCache::new();
2811        let deps = vec![FileDependency {
2812            from_file: "a.ts".into(),
2813            to_file: "b.ts".into(),
2814            kind: "import".into(),
2815        }];
2816        cache.populate(&deps);
2817        assert!(cache.is_populated());
2818
2819        cache.clear();
2820        assert!(!cache.is_populated());
2821        assert!(cache.get_dependencies("a.ts").is_none());
2822    }
2823
2824    /// Test invalidation handles diamond dependency pattern
2825    #[test]
2826    fn invalidation_handles_diamond_dependencies() {
2827        let dir = tempdir().unwrap();
2828        let db_path = dir.path().join("index.db");
2829        let store = IndexStore::open(&db_path).unwrap();
2830
2831        // Diamond: main -> [utils, helpers] -> shared
2832        store
2833            .save_file_dependencies(
2834                "main.ts",
2835                &[
2836                    FileDependency {
2837                        from_file: "main.ts".into(),
2838                        to_file: "utils.ts".into(),
2839                        kind: "import".into(),
2840                    },
2841                    FileDependency {
2842                        from_file: "main.ts".into(),
2843                        to_file: "helpers.ts".into(),
2844                        kind: "import".into(),
2845                    },
2846                ],
2847            )
2848            .unwrap();
2849        store
2850            .save_file_dependencies(
2851                "utils.ts",
2852                &[FileDependency {
2853                    from_file: "utils.ts".into(),
2854                    to_file: "shared.ts".into(),
2855                    kind: "import".into(),
2856                }],
2857            )
2858            .unwrap();
2859        store
2860            .save_file_dependencies(
2861                "helpers.ts",
2862                &[FileDependency {
2863                    from_file: "helpers.ts".into(),
2864                    to_file: "shared.ts".into(),
2865                    kind: "import".into(),
2866                }],
2867            )
2868            .unwrap();
2869        store.save_file_dependencies("shared.ts", &[]).unwrap();
2870
2871        // When shared.ts changes, all four files need reindexing
2872        let invalidated = store.get_invalidation_set("shared.ts").unwrap();
2873        assert_eq!(invalidated.len(), 4);
2874
2875        // Verify topological order: shared before utils/helpers before main
2876        let shared_pos = invalidated.iter().position(|f| f == "shared.ts").unwrap();
2877        let main_pos = invalidated.iter().position(|f| f == "main.ts").unwrap();
2878        assert!(shared_pos < main_pos, "shared.ts must come before main.ts");
2879    }
2880
2881    /// Test topological sort handles cycles gracefully
2882    #[test]
2883    fn topological_sort_handles_cycles() {
2884        let dir = tempdir().unwrap();
2885        let db_path = dir.path().join("index.db");
2886        let store = IndexStore::open(&db_path).unwrap();
2887
2888        // Create a cycle: a -> b -> c -> a
2889        store
2890            .save_file_dependencies(
2891                "a.ts",
2892                &[FileDependency {
2893                    from_file: "a.ts".into(),
2894                    to_file: "b.ts".into(),
2895                    kind: "import".into(),
2896                }],
2897            )
2898            .unwrap();
2899        store
2900            .save_file_dependencies(
2901                "b.ts",
2902                &[FileDependency {
2903                    from_file: "b.ts".into(),
2904                    to_file: "c.ts".into(),
2905                    kind: "import".into(),
2906                }],
2907            )
2908            .unwrap();
2909        store
2910            .save_file_dependencies(
2911                "c.ts",
2912                &[FileDependency {
2913                    from_file: "c.ts".into(),
2914                    to_file: "a.ts".into(),
2915                    kind: "import".into(),
2916                }],
2917            )
2918            .unwrap();
2919
2920        let files = vec!["a.ts".into(), "b.ts".into(), "c.ts".into()];
2921        let sorted = store.topological_sort(&files).unwrap();
2922
2923        // All files should still be present (cycles handled gracefully)
2924        assert_eq!(sorted.len(), 3);
2925        assert!(sorted.contains(&"a.ts".into()));
2926        assert!(sorted.contains(&"b.ts".into()));
2927        assert!(sorted.contains(&"c.ts".into()));
2928    }
2929
2930    /// Test dependencies are cleaned up when file is removed
2931    #[test]
2932    fn file_dependencies_removed_with_file() {
2933        let dir = tempdir().unwrap();
2934        let db_path = dir.path().join("index.db");
2935        let store = IndexStore::open(&db_path).unwrap();
2936
2937        // Create a file to remove
2938        let file_path = dir.path().join("removeme.ts");
2939        let file_rec = mk_file_record(&file_path);
2940        let sym = mk_symbol(&file_path, "test");
2941        store.save_file_index(&file_rec, &[sym], &[], &[]).unwrap();
2942
2943        // Add dependencies both directions
2944        let deps = vec![FileDependency {
2945            from_file: normalize_path(&file_path),
2946            to_file: "other.ts".into(),
2947            kind: "import".into(),
2948        }];
2949        store
2950            .save_file_dependencies(&normalize_path(&file_path), &deps)
2951            .unwrap();
2952
2953        // Also make another file depend on removeme.ts
2954        let other_deps = vec![FileDependency {
2955            from_file: "depends_on_removeme.ts".into(),
2956            to_file: normalize_path(&file_path),
2957            kind: "import".into(),
2958        }];
2959        store
2960            .save_file_dependencies("depends_on_removeme.ts", &other_deps)
2961            .unwrap();
2962
2963        // Verify dependencies exist
2964        assert_eq!(
2965            store
2966                .get_file_dependencies(&normalize_path(&file_path))
2967                .unwrap()
2968                .len(),
2969            1
2970        );
2971        assert_eq!(
2972            store
2973                .get_dependents(&normalize_path(&file_path))
2974                .unwrap()
2975                .len(),
2976            1
2977        );
2978
2979        // Remove file
2980        store.remove_file(&file_path).unwrap();
2981
2982        // Verify both directions of dependencies are cleaned up
2983        assert!(store
2984            .get_file_dependencies(&normalize_path(&file_path))
2985            .unwrap()
2986            .is_empty());
2987        assert!(store
2988            .get_dependents(&normalize_path(&file_path))
2989            .unwrap()
2990            .is_empty());
2991    }
2992
2993    /// Test that repeated queries use statement caching for better performance
2994    #[test]
2995    fn query_plan_caching_improves_repeated_query_performance() {
2996        use std::time::Instant;
2997
2998        let dir = tempdir().unwrap();
2999        let db_path = dir.path().join("index.db");
3000        let store = IndexStore::open(&db_path).unwrap();
3001
3002        // Insert test data
3003        let file_path = dir.path().join("test.ts");
3004        let file_rec = mk_file_record(&file_path);
3005        let symbols: Vec<SymbolRecord> = (0..100)
3006            .map(|i| SymbolRecord {
3007                id: format!("sym_{:03}", i),
3008                file: normalize_path(&file_path),
3009                kind: "function".into(),
3010                name: format!("func_{}", i),
3011                start: i * 10,
3012                end: i * 10 + 5,
3013                qualifier: None,
3014                visibility: None,
3015                container: None,
3016                content_hash: None,
3017            })
3018            .collect();
3019
3020        let edges: Vec<EdgeRecord> = (0..50)
3021            .map(|i| EdgeRecord {
3022                src: format!("sym_{:03}", i),
3023                dst: format!("sym_{:03}", i + 50),
3024                kind: "implements".into(),
3025            })
3026            .collect();
3027
3028        store
3029            .save_file_index(&file_rec, &symbols, &edges, &[])
3030            .unwrap();
3031
3032        // Warm up (first query compiles statement)
3033        let _ = store.edges_to("sym_050");
3034
3035        // Time repeated cached queries (should be faster than compilation)
3036        let start = Instant::now();
3037        for i in 50..100 {
3038            let _ = store.edges_to(&format!("sym_{:03}", i));
3039        }
3040        let cached_duration = start.elapsed();
3041
3042        // 50 cached queries should complete very quickly
3043        assert!(
3044            cached_duration.as_millis() < 100,
3045            "50 cached queries should complete quickly, took {}ms",
3046            cached_duration.as_millis()
3047        );
3048    }
3049
3050    /// Test file stats are cleaned up on file removal
3051    #[test]
3052    fn file_stats_removed_with_file() {
3053        let dir = tempdir().unwrap();
3054        let db_path = dir.path().join("index.db");
3055        let store = IndexStore::open(&db_path).unwrap();
3056
3057        let file_path = dir.path().join("test.ts");
3058        let file_rec = mk_file_record(&file_path);
3059        let sym = mk_symbol(&file_path, "test");
3060
3061        store.save_file_index(&file_rec, &[sym], &[], &[]).unwrap();
3062
3063        // Verify stats exist
3064        assert!(store
3065            .get_file_stats(&normalize_path(&file_path))
3066            .unwrap()
3067            .is_some());
3068
3069        // Remove file
3070        store.remove_file(&file_path).unwrap();
3071
3072        // Verify stats removed
3073        assert!(store
3074            .get_file_stats(&normalize_path(&file_path))
3075            .unwrap()
3076            .is_none());
3077    }
3078
3079    /// Test that SQLite pragmas are configured for performance
3080    #[test]
3081    fn sqlite_performance_pragmas_configured() {
3082        let dir = tempdir().unwrap();
3083        let db_path = dir.path().join("index.db");
3084        let store = IndexStore::open(&db_path).unwrap();
3085
3086        let conn = store.conn.borrow();
3087
3088        // Check WAL mode
3089        let journal_mode: String = conn
3090            .query_row("PRAGMA journal_mode", [], |row| row.get(0))
3091            .unwrap();
3092        assert_eq!(journal_mode.to_lowercase(), "wal", "Should use WAL mode");
3093
3094        // Check mmap is enabled (non-zero)
3095        let mmap_size: i64 = conn
3096            .query_row("PRAGMA mmap_size", [], |row| row.get(0))
3097            .unwrap();
3098        assert!(
3099            mmap_size > 0,
3100            "mmap should be enabled for memory-mapped I/O"
3101        );
3102
3103        // Check cache size is configured
3104        let cache_size: i64 = conn
3105            .query_row("PRAGMA cache_size", [], |row| row.get(0))
3106            .unwrap();
3107        assert!(
3108            !(0..=1000).contains(&cache_size),
3109            "Cache should be configured (got {})",
3110            cache_size
3111        );
3112    }
3113
3114    // ==================== Schema Migration Tests ====================
3115
3116    /// Test that new database creation includes version tracking
3117    #[test]
3118    fn new_database_has_schema_version() {
3119        let dir = tempdir().unwrap();
3120        let db_path = dir.path().join("index.db");
3121        let store = IndexStore::open(&db_path).unwrap();
3122
3123        let conn = store.conn.borrow();
3124
3125        // Verify schema_meta table exists
3126        let table_exists: i64 = conn
3127            .query_row(
3128                "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='schema_meta'",
3129                [],
3130                |row| row.get(0),
3131            )
3132            .unwrap();
3133        assert_eq!(table_exists, 1, "schema_meta table should exist");
3134
3135        // Verify schema_version is set
3136        let version: String = conn
3137            .query_row(
3138                "SELECT value FROM schema_meta WHERE key = 'schema_version'",
3139                [],
3140                |row| row.get(0),
3141            )
3142            .unwrap();
3143        assert_eq!(
3144            version,
3145            format!("{}.{}", SCHEMA_MAJOR, SCHEMA_MINOR),
3146            "Schema version should be set to current version"
3147        );
3148
3149        // Verify gabb_version is set
3150        let gabb_version: String = conn
3151            .query_row(
3152                "SELECT value FROM schema_meta WHERE key = 'gabb_version'",
3153                [],
3154                |row| row.get(0),
3155            )
3156            .unwrap();
3157        assert_eq!(
3158            gabb_version,
3159            env!("CARGO_PKG_VERSION"),
3160            "Gabb version should be set"
3161        );
3162    }
3163
3164    /// Test that try_open returns Ready for current version database
3165    #[test]
3166    fn try_open_returns_ready_for_current_version() {
3167        let dir = tempdir().unwrap();
3168        let db_path = dir.path().join("index.db");
3169
3170        // Create a fresh database
3171        let _store = IndexStore::open(&db_path).unwrap();
3172        drop(_store);
3173
3174        // try_open should return Ready
3175        match IndexStore::try_open(&db_path).unwrap() {
3176            DbOpenResult::Ready(_) => {}
3177            DbOpenResult::NeedsRegeneration { reason, .. } => {
3178                panic!(
3179                    "Expected Ready, got NeedsRegeneration: {}",
3180                    reason.message()
3181                );
3182            }
3183        }
3184    }
3185
3186    /// Test that legacy database (no schema_meta) triggers regeneration
3187    #[test]
3188    fn legacy_database_triggers_regeneration() {
3189        let dir = tempdir().unwrap();
3190        let db_path = dir.path().join("index.db");
3191
3192        // Create a legacy database without schema_meta table
3193        {
3194            let conn = Connection::open(&db_path).unwrap();
3195            conn.execute(
3196                "CREATE TABLE files (path TEXT PRIMARY KEY, hash TEXT, mtime INTEGER)",
3197                [],
3198            )
3199            .unwrap();
3200            conn.execute(
3201                "CREATE TABLE symbols (id TEXT PRIMARY KEY, file TEXT, kind TEXT, name TEXT)",
3202                [],
3203            )
3204            .unwrap();
3205            // Note: No schema_meta table
3206        }
3207
3208        // try_open should detect legacy database
3209        match IndexStore::try_open(&db_path).unwrap() {
3210            DbOpenResult::Ready(_) => {
3211                panic!("Expected NeedsRegeneration for legacy database");
3212            }
3213            DbOpenResult::NeedsRegeneration { reason, .. } => {
3214                assert!(
3215                    matches!(reason, RegenerationReason::LegacyDatabase),
3216                    "Expected LegacyDatabase reason"
3217                );
3218            }
3219        }
3220    }
3221
3222    /// Test that major version mismatch triggers regeneration
3223    #[test]
3224    fn major_version_mismatch_triggers_regeneration() {
3225        let dir = tempdir().unwrap();
3226        let db_path = dir.path().join("index.db");
3227
3228        // Create a database with a different major version
3229        {
3230            let conn = Connection::open(&db_path).unwrap();
3231            conn.execute(
3232                "CREATE TABLE schema_meta (key TEXT PRIMARY KEY, value TEXT NOT NULL)",
3233                [],
3234            )
3235            .unwrap();
3236            // Insert a future major version
3237            conn.execute(
3238                "INSERT INTO schema_meta (key, value) VALUES ('schema_version', '99.0')",
3239                [],
3240            )
3241            .unwrap();
3242        }
3243
3244        // try_open should detect major version mismatch
3245        match IndexStore::try_open(&db_path).unwrap() {
3246            DbOpenResult::Ready(_) => {
3247                panic!("Expected NeedsRegeneration for major version mismatch");
3248            }
3249            DbOpenResult::NeedsRegeneration { reason, .. } => match reason {
3250                RegenerationReason::MajorVersionMismatch {
3251                    db_version,
3252                    app_version,
3253                } => {
3254                    assert_eq!(db_version, "99.0");
3255                    assert_eq!(app_version, format!("{}.{}", SCHEMA_MAJOR, SCHEMA_MINOR));
3256                }
3257                _ => panic!("Expected MajorVersionMismatch reason"),
3258            },
3259        }
3260    }
3261
3262    /// Test SchemaVersion comparison and parsing
3263    #[test]
3264    fn schema_version_parsing_and_comparison() {
3265        // Test parsing
3266        assert_eq!(
3267            SchemaVersion::parse("1.0"),
3268            Some(SchemaVersion { major: 1, minor: 0 })
3269        );
3270        assert_eq!(
3271            SchemaVersion::parse("2.15"),
3272            Some(SchemaVersion {
3273                major: 2,
3274                minor: 15
3275            })
3276        );
3277        assert_eq!(SchemaVersion::parse("invalid"), None);
3278        assert_eq!(SchemaVersion::parse("1"), None);
3279        assert_eq!(SchemaVersion::parse(""), None);
3280
3281        // Test requires_regeneration (major version difference)
3282        let v1_0 = SchemaVersion { major: 1, minor: 0 };
3283        let v1_5 = SchemaVersion { major: 1, minor: 5 };
3284        let v2_0 = SchemaVersion { major: 2, minor: 0 };
3285
3286        assert!(!v1_0.requires_regeneration(&v1_5)); // Same major, no regen
3287        assert!(v1_0.requires_regeneration(&v2_0)); // Different major, regen
3288        assert!(v2_0.requires_regeneration(&v1_0)); // Different major, regen
3289
3290        // Test requires_migration (same major, lower minor)
3291        assert!(v1_0.requires_migration(&v1_5)); // 1.0 needs migration to 1.5
3292        assert!(!v1_5.requires_migration(&v1_0)); // 1.5 doesn't need migration to 1.0
3293        assert!(!v1_5.requires_migration(&v1_5)); // Same version, no migration
3294        assert!(!v1_0.requires_migration(&v2_0)); // Different major, use regen not migration
3295    }
3296
3297    /// Test RegenerationReason message formatting
3298    #[test]
3299    fn regeneration_reason_messages() {
3300        let legacy = RegenerationReason::LegacyDatabase;
3301        assert!(legacy.message().contains("legacy"));
3302
3303        let mismatch = RegenerationReason::MajorVersionMismatch {
3304            db_version: "1.0".into(),
3305            app_version: "2.0".into(),
3306        };
3307        assert!(mismatch.message().contains("1.0"));
3308        assert!(mismatch.message().contains("2.0"));
3309
3310        let corrupt = RegenerationReason::CorruptDatabase("test error".into());
3311        assert!(corrupt.message().contains("test error"));
3312
3313        let user = RegenerationReason::UserRequested;
3314        assert!(user.message().contains("requested"));
3315    }
3316}