reflex/
cache.rs

1//! Cache management and memory-mapped I/O
2//!
3//! The cache module handles the `.reflex/` directory structure:
4//! - `meta.db`: Metadata, file hashes, and configuration (SQLite)
5//! - `tokens.bin`: Compressed lexical tokens (binary)
6//! - `content.bin`: Memory-mapped file contents (binary)
7//! - `trigrams.bin`: Trigram inverted index (bincode binary)
8//! - `config.toml`: Index settings (TOML text)
9
10use anyhow::{Context, Result};
11use rusqlite::{Connection, OptionalExtension};
12use std::collections::HashMap;
13use std::fs::File;
14use std::path::{Path, PathBuf};
15
16use crate::models::IndexedFile;
17
18/// Default cache directory name
19pub const CACHE_DIR: &str = ".reflex";
20
21/// File names within the cache directory
22pub const META_DB: &str = "meta.db";
23pub const TOKENS_BIN: &str = "tokens.bin";
24pub const HASHES_JSON: &str = "hashes.json";
25pub const CONFIG_TOML: &str = "config.toml";
26
27/// Manages the Reflex cache directory
28#[derive(Clone)]
29pub struct CacheManager {
30    cache_path: PathBuf,
31}
32
33impl CacheManager {
34    /// Create a new cache manager for the given root directory
35    pub fn new(root: impl AsRef<Path>) -> Self {
36        let cache_path = root.as_ref().join(CACHE_DIR);
37        Self { cache_path }
38    }
39
40    /// Initialize the cache directory structure if it doesn't exist
41    pub fn init(&self) -> Result<()> {
42        log::info!("Initializing cache at {:?}", self.cache_path);
43
44        if !self.cache_path.exists() {
45            std::fs::create_dir_all(&self.cache_path)?;
46        }
47
48        // Create meta.db with schema
49        self.init_meta_db()?;
50
51        // Create default config.toml
52        self.init_config_toml()?;
53
54        // Note: tokens.bin removed - was never used
55        // Note: hashes.json is deprecated - hashes are now stored in meta.db
56
57        log::info!("Cache initialized successfully");
58        Ok(())
59    }
60
61    /// Initialize meta.db with SQLite schema
62    fn init_meta_db(&self) -> Result<()> {
63        let db_path = self.cache_path.join(META_DB);
64
65        // Skip if already exists
66        if db_path.exists() {
67            return Ok(());
68        }
69
70        let conn = Connection::open(&db_path)
71            .context("Failed to create meta.db")?;
72
73        // Create files table
74        conn.execute(
75            "CREATE TABLE IF NOT EXISTS files (
76                id INTEGER PRIMARY KEY AUTOINCREMENT,
77                path TEXT NOT NULL UNIQUE,
78                last_indexed INTEGER NOT NULL,
79                language TEXT NOT NULL,
80                token_count INTEGER DEFAULT 0,
81                line_count INTEGER DEFAULT 0
82            )",
83            [],
84        )?;
85
86        conn.execute("CREATE INDEX IF NOT EXISTS idx_files_path ON files(path)", [])?;
87
88        // Create statistics table
89        conn.execute(
90            "CREATE TABLE IF NOT EXISTS statistics (
91                key TEXT PRIMARY KEY,
92                value TEXT NOT NULL,
93                updated_at INTEGER NOT NULL
94            )",
95            [],
96        )?;
97
98        // Initialize default statistics
99        let now = chrono::Utc::now().timestamp();
100        conn.execute(
101            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
102            ["total_files", "0", &now.to_string()],
103        )?;
104        conn.execute(
105            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
106            ["cache_version", "1", &now.to_string()],
107        )?;
108
109        // Store cache schema hash for automatic invalidation detection
110        // This hash is computed at build time from cache-critical source files
111        let schema_hash = env!("CACHE_SCHEMA_HASH");
112        conn.execute(
113            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
114            ["schema_hash", schema_hash, &now.to_string()],
115        )?;
116
117        // Initialize last_compaction timestamp (0 = never compacted)
118        conn.execute(
119            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
120            ["last_compaction", "0", &now.to_string()],
121        )?;
122
123        // Create config table
124        conn.execute(
125            "CREATE TABLE IF NOT EXISTS config (
126                key TEXT PRIMARY KEY,
127                value TEXT NOT NULL
128            )",
129            [],
130        )?;
131
132        // Create branch tracking tables for git-aware indexing
133        conn.execute(
134            "CREATE TABLE IF NOT EXISTS file_branches (
135                file_id INTEGER NOT NULL,
136                branch_id INTEGER NOT NULL,
137                hash TEXT NOT NULL,
138                last_indexed INTEGER NOT NULL,
139                PRIMARY KEY (file_id, branch_id),
140                FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
141                FOREIGN KEY (branch_id) REFERENCES branches(id) ON DELETE CASCADE
142            )",
143            [],
144        )?;
145
146        conn.execute(
147            "CREATE INDEX IF NOT EXISTS idx_branch_lookup ON file_branches(branch_id, file_id)",
148            [],
149        )?;
150
151        conn.execute(
152            "CREATE INDEX IF NOT EXISTS idx_hash_lookup ON file_branches(hash)",
153            [],
154        )?;
155
156        // Create branches metadata table
157        conn.execute(
158            "CREATE TABLE IF NOT EXISTS branches (
159                id INTEGER PRIMARY KEY AUTOINCREMENT,
160                name TEXT NOT NULL UNIQUE,
161                commit_sha TEXT NOT NULL,
162                last_indexed INTEGER NOT NULL,
163                file_count INTEGER DEFAULT 0,
164                is_dirty INTEGER DEFAULT 0
165            )",
166            [],
167        )?;
168
169        // Create file dependencies table for tracking imports/includes
170        conn.execute(
171            "CREATE TABLE IF NOT EXISTS file_dependencies (
172                id INTEGER PRIMARY KEY AUTOINCREMENT,
173                file_id INTEGER NOT NULL,
174                imported_path TEXT NOT NULL,
175                resolved_file_id INTEGER,
176                import_type TEXT NOT NULL,
177                line_number INTEGER NOT NULL,
178                imported_symbols TEXT,
179                FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
180                FOREIGN KEY (resolved_file_id) REFERENCES files(id) ON DELETE SET NULL
181            )",
182            [],
183        )?;
184
185        conn.execute(
186            "CREATE INDEX IF NOT EXISTS idx_deps_file ON file_dependencies(file_id)",
187            [],
188        )?;
189
190        conn.execute(
191            "CREATE INDEX IF NOT EXISTS idx_deps_resolved ON file_dependencies(resolved_file_id)",
192            [],
193        )?;
194
195        conn.execute(
196            "CREATE INDEX IF NOT EXISTS idx_deps_type ON file_dependencies(import_type)",
197            [],
198        )?;
199
200        // Create file exports table for tracking barrel re-exports
201        conn.execute(
202            "CREATE TABLE IF NOT EXISTS file_exports (
203                id INTEGER PRIMARY KEY AUTOINCREMENT,
204                file_id INTEGER NOT NULL,
205                exported_symbol TEXT,
206                source_path TEXT NOT NULL,
207                resolved_source_id INTEGER,
208                line_number INTEGER NOT NULL,
209                FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
210                FOREIGN KEY (resolved_source_id) REFERENCES files(id) ON DELETE SET NULL
211            )",
212            [],
213        )?;
214
215        conn.execute(
216            "CREATE INDEX IF NOT EXISTS idx_exports_file ON file_exports(file_id)",
217            [],
218        )?;
219
220        conn.execute(
221            "CREATE INDEX IF NOT EXISTS idx_exports_resolved ON file_exports(resolved_source_id)",
222            [],
223        )?;
224
225        conn.execute(
226            "CREATE INDEX IF NOT EXISTS idx_exports_symbol ON file_exports(exported_symbol)",
227            [],
228        )?;
229
230        log::debug!("Created meta.db with schema");
231        Ok(())
232    }
233
234    /// Initialize config.toml with defaults
235    fn init_config_toml(&self) -> Result<()> {
236        let config_path = self.cache_path.join(CONFIG_TOML);
237
238        if config_path.exists() {
239            return Ok(());
240        }
241
242        let default_config = r#"[index]
243languages = []  # Empty = all supported languages
244max_file_size = 10485760  # 10 MB
245follow_symlinks = false
246
247[index.include]
248patterns = []
249
250[index.exclude]
251patterns = []
252
253[search]
254default_limit = 100
255fuzzy_threshold = 0.8
256
257[performance]
258parallel_threads = 0  # 0 = auto (80% of available cores), or set a specific number
259compression_level = 3  # zstd level
260
261[semantic]
262# Semantic query generation using LLMs
263# Translate natural language questions into rfx query commands
264provider = "groq"  # Options: openai, anthropic, groq
265# model = "llama-3.3-70b-versatile"  # Optional: override provider default model
266# auto_execute = false  # Optional: auto-execute queries without confirmation
267"#;
268
269        std::fs::write(&config_path, default_config)?;
270
271        log::debug!("Created default config.toml");
272        Ok(())
273    }
274
275    /// Check if cache exists and is valid
276    pub fn exists(&self) -> bool {
277        self.cache_path.exists()
278            && self.cache_path.join(META_DB).exists()
279    }
280
281    /// Validate cache integrity and detect corruption
282    ///
283    /// Performs basic integrity checks on the cache:
284    /// - Verifies all required files exist
285    /// - Checks SQLite database can be opened
286    /// - Validates binary file headers (trigrams.bin, content.bin)
287    ///
288    /// Returns Ok(()) if cache is valid, Err with details if corrupted.
289    pub fn validate(&self) -> Result<()> {
290        let start = std::time::Instant::now();
291
292        // Check if cache directory exists
293        if !self.cache_path.exists() {
294            anyhow::bail!("Cache directory does not exist: {}", self.cache_path.display());
295        }
296
297        // Check meta.db exists and can be opened
298        let db_path = self.cache_path.join(META_DB);
299        if !db_path.exists() {
300            anyhow::bail!("Database file missing: {}", db_path.display());
301        }
302
303        // Try to open database
304        let conn = Connection::open(&db_path)
305            .context("Failed to open meta.db - database may be corrupted")?;
306
307        // Verify schema exists
308        let tables: Result<Vec<String>, _> = conn
309            .prepare("SELECT name FROM sqlite_master WHERE type='table'")
310            .and_then(|mut stmt| {
311                stmt.query_map([], |row| row.get(0))
312                    .map(|rows| rows.collect())
313            })
314            .and_then(|result| result);
315
316        match tables {
317            Ok(table_list) => {
318                // Check for required tables
319                let required_tables = vec!["files", "statistics", "config", "file_branches", "branches", "file_dependencies", "file_exports"];
320                for table in &required_tables {
321                    if !table_list.iter().any(|t| t == table) {
322                        anyhow::bail!("Required table '{}' missing from database schema", table);
323                    }
324                }
325            }
326            Err(e) => {
327                anyhow::bail!("Failed to read database schema: {}", e);
328            }
329        }
330
331        // Run SQLite integrity check (fast quick_check)
332        // Use quick_check instead of integrity_check for speed (<10ms vs 100ms+)
333        let integrity_result: String = conn
334            .query_row("PRAGMA quick_check", [], |row| row.get(0))?;
335
336        if integrity_result != "ok" {
337            log::warn!("Database integrity check failed: {}", integrity_result);
338            anyhow::bail!(
339                "Database integrity check failed: {}. Cache may be corrupted. \
340                 Run 'rfx index' to rebuild cache.",
341                integrity_result
342            );
343        }
344
345        // Check trigrams.bin if it exists
346        let trigrams_path = self.cache_path.join("trigrams.bin");
347        if trigrams_path.exists() {
348            use std::io::Read;
349
350            match File::open(&trigrams_path) {
351                Ok(mut file) => {
352                    let mut header = [0u8; 4];
353                    match file.read_exact(&mut header) {
354                        Ok(_) => {
355                            // Check magic bytes
356                            if &header != b"RFTG" {
357                                log::warn!("trigrams.bin has invalid magic bytes - may be corrupted");
358                                anyhow::bail!("trigrams.bin appears to be corrupted (invalid magic bytes)");
359                            }
360                        }
361                        Err(_) => {
362                            anyhow::bail!("trigrams.bin is too small - appears to be corrupted");
363                        }
364                    }
365                }
366                Err(e) => {
367                    anyhow::bail!("Failed to open trigrams.bin: {}", e);
368                }
369            }
370        }
371
372        // Check content.bin if it exists
373        let content_path = self.cache_path.join("content.bin");
374        if content_path.exists() {
375            use std::io::Read;
376
377            match File::open(&content_path) {
378                Ok(mut file) => {
379                    let mut header = [0u8; 4];
380                    match file.read_exact(&mut header) {
381                        Ok(_) => {
382                            // Check magic bytes
383                            if &header != b"RFCT" {
384                                log::warn!("content.bin has invalid magic bytes - may be corrupted");
385                                anyhow::bail!("content.bin appears to be corrupted (invalid magic bytes)");
386                            }
387                        }
388                        Err(_) => {
389                            anyhow::bail!("content.bin is too small - appears to be corrupted");
390                        }
391                    }
392                }
393                Err(e) => {
394                    anyhow::bail!("Failed to open content.bin: {}", e);
395                }
396            }
397        }
398
399        // Check schema hash for automatic invalidation
400        let current_schema_hash = env!("CACHE_SCHEMA_HASH");
401
402        let stored_schema_hash: Option<String> = conn
403            .query_row(
404                "SELECT value FROM statistics WHERE key = 'schema_hash'",
405                [],
406                |row| row.get(0),
407            )
408            .optional()?;
409
410        if let Some(stored_hash) = stored_schema_hash {
411            if stored_hash != current_schema_hash {
412                log::warn!(
413                    "Cache schema hash mismatch! Stored: {}, Current: {}",
414                    stored_hash,
415                    current_schema_hash
416                );
417                anyhow::bail!(
418                    "Cache schema version mismatch.\n\
419                     \n\
420                     - Cache was built with version {}\n\
421                     - Current binary expects version {}\n\
422                     \n\
423                     The cache format may be incompatible with this version of Reflex.\n\
424                     Please rebuild the index by running:\n\
425                     \n\
426                       rfx index\n\
427                     \n\
428                     This usually happens after upgrading Reflex or making code changes.",
429                    stored_hash,
430                    current_schema_hash
431                );
432            }
433        } else {
434            log::debug!("No schema_hash found in cache - this cache was created before automatic invalidation was implemented");
435            // Don't fail for backward compatibility with old caches
436            // They will get the hash on next rebuild
437        }
438
439        let elapsed = start.elapsed();
440        log::debug!("Cache validation passed (schema hash: {}, took {:?})", current_schema_hash, elapsed);
441        Ok(())
442    }
443
444    /// Get the path to the cache directory
445    pub fn path(&self) -> &Path {
446        &self.cache_path
447    }
448
449    /// Get the workspace root directory (parent of .reflex/)
450    pub fn workspace_root(&self) -> PathBuf {
451        self.cache_path
452            .parent()
453            .expect(".reflex directory should have a parent")
454            .to_path_buf()
455    }
456
457    /// Clear the entire cache
458    pub fn clear(&self) -> Result<()> {
459        log::warn!("Clearing cache at {:?}", self.cache_path);
460
461        if self.cache_path.exists() {
462            std::fs::remove_dir_all(&self.cache_path)?;
463        }
464
465        Ok(())
466    }
467
468    /// Force SQLite WAL (Write-Ahead Log) checkpoint
469    ///
470    /// Ensures all data written in transactions is flushed to the main database file.
471    /// This is critical when spawning background processes that open new connections,
472    /// as they need to see the committed data immediately.
473    ///
474    /// Uses TRUNCATE mode to completely flush and reset the WAL file.
475    pub fn checkpoint_wal(&self) -> Result<()> {
476        let db_path = self.cache_path.join(META_DB);
477
478        if !db_path.exists() {
479            // No database to checkpoint
480            return Ok(());
481        }
482
483        let conn = Connection::open(&db_path)
484            .context("Failed to open meta.db for WAL checkpoint")?;
485
486        // PRAGMA wal_checkpoint(TRUNCATE) forces a full checkpoint and truncates the WAL
487        // This ensures background processes see all committed data
488        // Note: Returns (busy, log_pages, checkpointed_pages) - use query instead of execute
489        conn.query_row("PRAGMA wal_checkpoint(TRUNCATE)", [], |row| {
490            let busy: i64 = row.get(0)?;
491            let log_pages: i64 = row.get(1)?;
492            let checkpointed: i64 = row.get(2)?;
493            log::debug!(
494                "WAL checkpoint completed: busy={}, log_pages={}, checkpointed_pages={}",
495                busy, log_pages, checkpointed
496            );
497            Ok(())
498        }).context("Failed to execute WAL checkpoint")?;
499
500        log::debug!("Executed WAL checkpoint (TRUNCATE) on meta.db");
501        Ok(())
502    }
503
504    /// Load all file hashes across all branches from SQLite
505    ///
506    /// Used by background indexer to get hashes for all indexed files.
507    /// Returns the most recent hash for each file across all branches.
508    pub fn load_all_hashes(&self) -> Result<HashMap<String, String>> {
509        let db_path = self.cache_path.join(META_DB);
510
511        if !db_path.exists() {
512            return Ok(HashMap::new());
513        }
514
515        let conn = Connection::open(&db_path)
516            .context("Failed to open meta.db")?;
517
518        // Get all hashes from file_branches, joined with files to get paths
519        // If a file appears in multiple branches, we'll get multiple entries
520        // (HashMap will keep the last one, which is fine for background indexer)
521        let mut stmt = conn.prepare(
522            "SELECT f.path, fb.hash
523             FROM file_branches fb
524             JOIN files f ON fb.file_id = f.id"
525        )?;
526        let hashes: HashMap<String, String> = stmt.query_map([], |row| {
527            Ok((row.get(0)?, row.get(1)?))
528        })?
529        .collect::<Result<HashMap<_, _>, _>>()?;
530
531        log::debug!("Loaded {} file hashes across all branches from SQLite", hashes.len());
532        Ok(hashes)
533    }
534
535    /// Load file hashes for a specific branch from SQLite
536    ///
537    /// Used by indexer and query engine to get hashes for the current branch.
538    /// This ensures branch-specific incremental indexing and symbol cache lookups.
539    pub fn load_hashes_for_branch(&self, branch: &str) -> Result<HashMap<String, String>> {
540        let db_path = self.cache_path.join(META_DB);
541
542        if !db_path.exists() {
543            return Ok(HashMap::new());
544        }
545
546        let conn = Connection::open(&db_path)
547            .context("Failed to open meta.db")?;
548
549        // Get hashes for specific branch only
550        let mut stmt = conn.prepare(
551            "SELECT f.path, fb.hash
552             FROM file_branches fb
553             JOIN files f ON fb.file_id = f.id
554             JOIN branches b ON fb.branch_id = b.id
555             WHERE b.name = ?"
556        )?;
557        let hashes: HashMap<String, String> = stmt.query_map([branch], |row| {
558            Ok((row.get(0)?, row.get(1)?))
559        })?
560        .collect::<Result<HashMap<_, _>, _>>()?;
561
562        log::debug!("Loaded {} file hashes for branch '{}' from SQLite", hashes.len(), branch);
563        Ok(hashes)
564    }
565
566    /// Save file hashes for incremental indexing
567    ///
568    /// DEPRECATED: Hashes are now saved via record_branch_file() or batch_record_branch_files().
569    /// This method is kept for backward compatibility but does nothing.
570    #[deprecated(note = "Hashes are now stored in file_branches table via record_branch_file()")]
571    pub fn save_hashes(&self, _hashes: &HashMap<String, String>) -> Result<()> {
572        // No-op: hashes are now persisted to SQLite in record_branch_file()
573        Ok(())
574    }
575
576    /// Update file metadata in the files table
577    ///
578    /// Note: File content hashes are stored separately in the file_branches table
579    /// via record_branch_file() or batch_record_branch_files().
580    pub fn update_file(&self, path: &str, language: &str, line_count: usize) -> Result<()> {
581        let db_path = self.cache_path.join(META_DB);
582        let conn = Connection::open(&db_path)
583            .context("Failed to open meta.db for file update")?;
584
585        let now = chrono::Utc::now().timestamp();
586
587        conn.execute(
588            "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
589             VALUES (?, ?, ?, ?)",
590            [path, &now.to_string(), language, &line_count.to_string()],
591        )?;
592
593        Ok(())
594    }
595
596    /// Batch update multiple files in a single transaction for performance
597    ///
598    /// Note: File content hashes are stored separately in the file_branches table
599    /// via batch_update_files_and_branch().
600    pub fn batch_update_files(&self, files: &[(String, String, usize)]) -> Result<()> {
601        let db_path = self.cache_path.join(META_DB);
602        let mut conn = Connection::open(&db_path)
603            .context("Failed to open meta.db for batch update")?;
604
605        let now = chrono::Utc::now().timestamp();
606        let now_str = now.to_string();
607
608        // Use a transaction for batch inserts
609        let tx = conn.transaction()?;
610
611        for (path, language, line_count) in files {
612            tx.execute(
613                "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
614                 VALUES (?, ?, ?, ?)",
615                [path.as_str(), &now_str, language.as_str(), &line_count.to_string()],
616            )?;
617        }
618
619        tx.commit()?;
620        Ok(())
621    }
622
623    /// Batch update files AND record their hashes for a branch in a SINGLE transaction
624    ///
625    /// This is the recommended method for indexing as it ensures atomicity:
626    /// if files are inserted, their branch hashes are guaranteed to be inserted too.
627    pub fn batch_update_files_and_branch(
628        &self,
629        files: &[(String, String, usize)],      // (path, language, line_count)
630        branch_files: &[(String, String)],       // (path, hash)
631        branch: &str,
632        commit_sha: Option<&str>,
633    ) -> Result<()> {
634        log::info!("batch_update_files_and_branch: Processing {} files for branch '{}'", files.len(), branch);
635
636        let db_path = self.cache_path.join(META_DB);
637        let mut conn = Connection::open(&db_path)
638            .context("Failed to open meta.db for batch update and branch recording")?;
639
640        let now = chrono::Utc::now().timestamp();
641        let now_str = now.to_string();
642
643        // Use a SINGLE transaction for both operations
644        let tx = conn.transaction()?;
645
646        // Step 1: Insert/update files table
647        for (path, language, line_count) in files {
648            tx.execute(
649                "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
650                 VALUES (?, ?, ?, ?)",
651                [path.as_str(), &now_str, language.as_str(), &line_count.to_string()],
652            )?;
653        }
654        log::info!("Inserted {} files into files table", files.len());
655
656        // Step 2: Get or create branch_id (within same transaction)
657        let branch_id = self.get_or_create_branch_id(&tx, branch, commit_sha)?;
658        log::debug!("Got branch_id={} for branch '{}'", branch_id, branch);
659
660        // Step 3: Insert file_branches entries (within same transaction)
661        let mut inserted = 0;
662        for (path, hash) in branch_files {
663            // Lookup file_id from path (will find it because we just inserted above)
664            let file_id: i64 = tx.query_row(
665                "SELECT id FROM files WHERE path = ?",
666                [path.as_str()],
667                |row| row.get(0)
668            ).context(format!("File not found in index after insert: {}", path))?;
669
670            // Insert into file_branches using INTEGER values (not strings!)
671            tx.execute(
672                "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
673                 VALUES (?, ?, ?, ?)",
674                rusqlite::params![file_id, branch_id, hash.as_str(), now],
675            )?;
676            inserted += 1;
677        }
678        log::info!("Inserted {} file_branches entries", inserted);
679
680        // Commit the entire transaction atomically
681        tx.commit()?;
682        log::info!("Transaction committed successfully (files + file_branches)");
683
684        // DIAGNOSTIC: Verify data was actually persisted after commit
685        // This helps diagnose WAL synchronization issues where commits succeed but data isn't visible
686        let verify_conn = Connection::open(&db_path)
687            .context("Failed to open meta.db for verification")?;
688
689        // Count actual files in database
690        let actual_file_count: i64 = verify_conn.query_row(
691            "SELECT COUNT(*) FROM files WHERE path IN (SELECT path FROM files ORDER BY id DESC LIMIT ?)",
692            [files.len()],
693            |row| row.get(0)
694        ).unwrap_or(0);
695
696        // Count actual file_branches entries for this branch
697        let actual_fb_count: i64 = verify_conn.query_row(
698            "SELECT COUNT(*) FROM file_branches fb
699             JOIN branches b ON fb.branch_id = b.id
700             WHERE b.name = ?",
701            [branch],
702            |row| row.get(0)
703        ).unwrap_or(0);
704
705        log::info!(
706            "Post-commit verification: {} files in files table (expected {}), {} file_branches entries for '{}' (expected {})",
707            actual_file_count,
708            files.len(),
709            actual_fb_count,
710            branch,
711            inserted
712        );
713
714        // DEFENSIVE: Warn if counts don't match expectations
715        if actual_file_count < files.len() as i64 {
716            log::warn!(
717                "MISMATCH: Expected {} files in database, but only found {}! Data may not have persisted.",
718                files.len(),
719                actual_file_count
720            );
721        }
722        if actual_fb_count < inserted as i64 {
723            log::warn!(
724                "MISMATCH: Expected {} file_branches entries for branch '{}', but only found {}! Data may not have persisted.",
725                inserted,
726                branch,
727                actual_fb_count
728            );
729        }
730
731        Ok(())
732    }
733
734    /// Update statistics after indexing by calculating totals from database for a specific branch
735    ///
736    /// Counts only files indexed for the given branch, not all files across all branches.
737    pub fn update_stats(&self, branch: &str) -> Result<()> {
738        let db_path = self.cache_path.join(META_DB);
739        let conn = Connection::open(&db_path)
740            .context("Failed to open meta.db for stats update")?;
741
742        // Count files for specific branch only (branch-aware statistics)
743        let total_files: usize = conn.query_row(
744            "SELECT COUNT(DISTINCT fb.file_id)
745             FROM file_branches fb
746             JOIN branches b ON fb.branch_id = b.id
747             WHERE b.name = ?",
748            [branch],
749            |row| row.get(0),
750        ).unwrap_or(0);
751
752        let now = chrono::Utc::now().timestamp();
753
754        conn.execute(
755            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
756            ["total_files", &total_files.to_string(), &now.to_string()],
757        )?;
758
759        log::debug!("Updated statistics for branch '{}': {} files", branch, total_files);
760        Ok(())
761    }
762
763    /// Update cache schema hash in statistics table
764    ///
765    /// This should be called after every index operation to ensure the cache
766    /// is marked as compatible with the current binary version.
767    pub fn update_schema_hash(&self) -> Result<()> {
768        let db_path = self.cache_path.join(META_DB);
769        let conn = Connection::open(&db_path)
770            .context("Failed to open meta.db for schema hash update")?;
771
772        let schema_hash = env!("CACHE_SCHEMA_HASH");
773        let now = chrono::Utc::now().timestamp();
774
775        conn.execute(
776            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
777            ["schema_hash", schema_hash, &now.to_string()],
778        )?;
779
780        log::debug!("Updated schema hash to: {}", schema_hash);
781        Ok(())
782    }
783
784    /// Get list of all indexed files
785    pub fn list_files(&self) -> Result<Vec<IndexedFile>> {
786        let db_path = self.cache_path.join(META_DB);
787
788        if !db_path.exists() {
789            return Ok(Vec::new());
790        }
791
792        let conn = Connection::open(&db_path)
793            .context("Failed to open meta.db")?;
794
795        let mut stmt = conn.prepare(
796            "SELECT path, language, last_indexed FROM files ORDER BY path"
797        )?;
798
799        let files = stmt.query_map([], |row| {
800            let path: String = row.get(0)?;
801            let language: String = row.get(1)?;
802            let last_indexed: i64 = row.get(2)?;
803
804            Ok(IndexedFile {
805                path,
806                language,
807                last_indexed: chrono::DateTime::from_timestamp(last_indexed, 0)
808                    .unwrap_or_else(chrono::Utc::now)
809                    .to_rfc3339(),
810            })
811        })?
812        .collect::<Result<Vec<_>, _>>()?;
813
814        Ok(files)
815    }
816
817    /// Get statistics about the current cache
818    ///
819    /// Returns statistics for the current git branch if in a git repo,
820    /// or global statistics if not in a git repo.
821    pub fn stats(&self) -> Result<crate::models::IndexStats> {
822        let db_path = self.cache_path.join(META_DB);
823
824        if !db_path.exists() {
825            // Cache not initialized
826            return Ok(crate::models::IndexStats {
827                total_files: 0,
828                index_size_bytes: 0,
829                last_updated: chrono::Utc::now().to_rfc3339(),
830                files_by_language: std::collections::HashMap::new(),
831                lines_by_language: std::collections::HashMap::new(),
832            });
833        }
834
835        let conn = Connection::open(&db_path)
836            .context("Failed to open meta.db")?;
837
838        // Determine current branch for branch-aware statistics
839        let workspace_root = self.workspace_root();
840        let current_branch = if crate::git::is_git_repo(&workspace_root) {
841            crate::git::get_git_state(&workspace_root)
842                .ok()
843                .map(|state| state.branch)
844        } else {
845            Some("_default".to_string())
846        };
847
848        log::debug!("stats(): current_branch = {:?}", current_branch);
849
850        // Read total files (branch-aware)
851        let total_files: usize = if let Some(ref branch) = current_branch {
852            log::debug!("stats(): Counting files for branch '{}'", branch);
853
854            // Debug: Check all branches
855            let branches: Vec<(i64, String, i64)> = conn.prepare(
856                "SELECT id, name, file_count FROM branches"
857            )
858            .and_then(|mut stmt| {
859                stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)))
860                    .map(|rows| rows.collect())
861            })
862            .and_then(|result| result)
863            .unwrap_or_default();
864
865            for (id, name, count) in &branches {
866                log::debug!("stats(): Branch ID={}, Name='{}', FileCount={}", id, name, count);
867            }
868
869            // Debug: Count file_branches per branch
870            let fb_counts: Vec<(String, i64)> = conn.prepare(
871                "SELECT b.name, COUNT(*) FROM file_branches fb
872                 JOIN branches b ON fb.branch_id = b.id
873                 GROUP BY b.name"
874            )
875            .and_then(|mut stmt| {
876                stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?)))
877                    .map(|rows| rows.collect())
878            })
879            .and_then(|result| result)
880            .unwrap_or_default();
881
882            for (name, count) in &fb_counts {
883                log::debug!("stats(): file_branches count for branch '{}': {}", name, count);
884            }
885
886            // Count files for current branch only
887            let count: usize = conn.query_row(
888                "SELECT COUNT(DISTINCT fb.file_id)
889                 FROM file_branches fb
890                 JOIN branches b ON fb.branch_id = b.id
891                 WHERE b.name = ?",
892                [branch],
893                |row| row.get(0),
894            ).unwrap_or(0);
895
896            log::debug!("stats(): Query returned total_files = {}", count);
897            count
898        } else {
899            // No branch info - should not happen, but return 0
900            log::warn!("stats(): No current_branch detected!");
901            0
902        };
903
904        // Read last updated timestamp
905        let last_updated: String = conn.query_row(
906            "SELECT updated_at FROM statistics WHERE key = 'total_files'",
907            [],
908            |row| {
909                let timestamp: i64 = row.get(0)?;
910                Ok(chrono::DateTime::from_timestamp(timestamp, 0)
911                    .unwrap_or_else(chrono::Utc::now)
912                    .to_rfc3339())
913            },
914        ).unwrap_or_else(|_| chrono::Utc::now().to_rfc3339());
915
916        // Calculate total cache size (all binary files)
917        let mut index_size_bytes: u64 = 0;
918
919        for file_name in [META_DB, TOKENS_BIN, CONFIG_TOML, "content.bin", "trigrams.bin"] {
920            let file_path = self.cache_path.join(file_name);
921            if let Ok(metadata) = std::fs::metadata(&file_path) {
922                index_size_bytes += metadata.len();
923            }
924        }
925
926        // Get file count breakdown by language (branch-aware if possible)
927        let mut files_by_language = std::collections::HashMap::new();
928        if let Some(ref branch) = current_branch {
929            // Query files for current branch only
930            let mut stmt = conn.prepare(
931                "SELECT f.language, COUNT(DISTINCT f.id)
932                 FROM files f
933                 JOIN file_branches fb ON f.id = fb.file_id
934                 JOIN branches b ON fb.branch_id = b.id
935                 WHERE b.name = ?
936                 GROUP BY f.language"
937            )?;
938            let lang_counts = stmt.query_map([branch], |row| {
939                let language: String = row.get(0)?;
940                let count: i64 = row.get(1)?;
941                Ok((language, count as usize))
942            })?;
943
944            for result in lang_counts {
945                let (language, count) = result?;
946                files_by_language.insert(language, count);
947            }
948        } else {
949            // Fallback: query all files
950            let mut stmt = conn.prepare("SELECT language, COUNT(*) FROM files GROUP BY language")?;
951            let lang_counts = stmt.query_map([], |row| {
952                let language: String = row.get(0)?;
953                let count: i64 = row.get(1)?;
954                Ok((language, count as usize))
955            })?;
956
957            for result in lang_counts {
958                let (language, count) = result?;
959                files_by_language.insert(language, count);
960            }
961        }
962
963        // Get line count breakdown by language (branch-aware if possible)
964        let mut lines_by_language = std::collections::HashMap::new();
965        if let Some(ref branch) = current_branch {
966            // Query lines for current branch only
967            let mut stmt = conn.prepare(
968                "SELECT f.language, SUM(f.line_count)
969                 FROM files f
970                 JOIN file_branches fb ON f.id = fb.file_id
971                 JOIN branches b ON fb.branch_id = b.id
972                 WHERE b.name = ?
973                 GROUP BY f.language"
974            )?;
975            let line_counts = stmt.query_map([branch], |row| {
976                let language: String = row.get(0)?;
977                let count: i64 = row.get(1)?;
978                Ok((language, count as usize))
979            })?;
980
981            for result in line_counts {
982                let (language, count) = result?;
983                lines_by_language.insert(language, count);
984            }
985        } else {
986            // Fallback: query all files
987            let mut stmt = conn.prepare("SELECT language, SUM(line_count) FROM files GROUP BY language")?;
988            let line_counts = stmt.query_map([], |row| {
989                let language: String = row.get(0)?;
990                let count: i64 = row.get(1)?;
991                Ok((language, count as usize))
992            })?;
993
994            for result in line_counts {
995                let (language, count) = result?;
996                lines_by_language.insert(language, count);
997            }
998        }
999
1000        Ok(crate::models::IndexStats {
1001            total_files,
1002            index_size_bytes,
1003            last_updated,
1004            files_by_language,
1005            lines_by_language,
1006        })
1007    }
1008
1009    // ===== Branch-aware indexing methods =====
1010
1011    /// Get or create a branch ID by name
1012    ///
1013    /// Returns the numeric branch ID, creating a new entry if needed.
1014    fn get_or_create_branch_id(&self, conn: &Connection, branch_name: &str, commit_sha: Option<&str>) -> Result<i64> {
1015        // Try to get existing branch
1016        let existing_id: Option<i64> = conn
1017            .query_row(
1018                "SELECT id FROM branches WHERE name = ?",
1019                [branch_name],
1020                |row| row.get(0),
1021            )
1022            .optional()?;
1023
1024        if let Some(id) = existing_id {
1025            return Ok(id);
1026        }
1027
1028        // Create new branch entry
1029        let now = chrono::Utc::now().timestamp();
1030        conn.execute(
1031            "INSERT INTO branches (name, commit_sha, last_indexed, file_count, is_dirty)
1032             VALUES (?, ?, ?, 0, 0)",
1033            [branch_name, commit_sha.unwrap_or("unknown"), &now.to_string()],
1034        )?;
1035
1036        // Get the ID we just created
1037        let id: i64 = conn.last_insert_rowid();
1038        Ok(id)
1039    }
1040
1041    /// Record a file's hash for a specific branch
1042    pub fn record_branch_file(
1043        &self,
1044        path: &str,
1045        branch: &str,
1046        hash: &str,
1047        commit_sha: Option<&str>,
1048    ) -> Result<()> {
1049        let db_path = self.cache_path.join(META_DB);
1050        let conn = Connection::open(&db_path)
1051            .context("Failed to open meta.db for branch file recording")?;
1052
1053        // Lookup file_id from path
1054        let file_id: i64 = conn.query_row(
1055            "SELECT id FROM files WHERE path = ?",
1056            [path],
1057            |row| row.get(0)
1058        ).context(format!("File not found in index: {}", path))?;
1059
1060        // Get or create branch_id
1061        let branch_id = self.get_or_create_branch_id(&conn, branch, commit_sha)?;
1062
1063        let now = chrono::Utc::now().timestamp();
1064
1065        // Insert using proper INTEGER types (not strings!)
1066        conn.execute(
1067            "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
1068             VALUES (?, ?, ?, ?)",
1069            rusqlite::params![file_id, branch_id, hash, now],
1070        )?;
1071
1072        Ok(())
1073    }
1074
1075    /// Batch record multiple files for a specific branch in a single transaction
1076    ///
1077    /// IMPORTANT: Files must already exist in the `files` table before calling this method.
1078    /// For atomic insertion of both files and branch hashes, use `batch_update_files_and_branch()` instead.
1079    pub fn batch_record_branch_files(
1080        &self,
1081        files: &[(String, String)],  // (path, hash)
1082        branch: &str,
1083        commit_sha: Option<&str>,
1084    ) -> Result<()> {
1085        log::info!("batch_record_branch_files: Processing {} files for branch '{}'", files.len(), branch);
1086
1087        let db_path = self.cache_path.join(META_DB);
1088        let mut conn = Connection::open(&db_path)
1089            .context("Failed to open meta.db for batch branch recording")?;
1090
1091        let now = chrono::Utc::now().timestamp();
1092
1093        // Use a transaction for batch inserts
1094        let tx = conn.transaction()?;
1095
1096        // Get or create branch_id (use transaction connection)
1097        let branch_id = self.get_or_create_branch_id(&tx, branch, commit_sha)?;
1098        log::debug!("Got branch_id={} for branch '{}'", branch_id, branch);
1099
1100        let mut inserted = 0;
1101        for (path, hash) in files {
1102            // Lookup file_id from path
1103            log::trace!("Looking up file_id for path: {}", path);
1104            let file_id: i64 = tx.query_row(
1105                "SELECT id FROM files WHERE path = ?",
1106                [path.as_str()],
1107                |row| row.get(0)
1108            ).context(format!("File not found in index: {}", path))?;
1109            log::trace!("Found file_id={} for path: {}", file_id, path);
1110
1111            // Insert using proper INTEGER types (not strings!)
1112            tx.execute(
1113                "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
1114                 VALUES (?, ?, ?, ?)",
1115                rusqlite::params![file_id, branch_id, hash.as_str(), now],
1116            )?;
1117            inserted += 1;
1118        }
1119
1120        log::info!("Inserted {} file_branches entries", inserted);
1121        tx.commit()?;
1122        log::info!("Transaction committed successfully");
1123        Ok(())
1124    }
1125
1126    /// Get all files indexed for a specific branch
1127    ///
1128    /// Returns a HashMap of path → hash for all files in the branch.
1129    pub fn get_branch_files(&self, branch: &str) -> Result<HashMap<String, String>> {
1130        let db_path = self.cache_path.join(META_DB);
1131
1132        if !db_path.exists() {
1133            return Ok(HashMap::new());
1134        }
1135
1136        let conn = Connection::open(&db_path)
1137            .context("Failed to open meta.db")?;
1138
1139        let mut stmt = conn.prepare(
1140            "SELECT f.path, fb.hash
1141             FROM file_branches fb
1142             JOIN files f ON fb.file_id = f.id
1143             JOIN branches b ON fb.branch_id = b.id
1144             WHERE b.name = ?"
1145        )?;
1146        let files: HashMap<String, String> = stmt
1147            .query_map([branch], |row| Ok((row.get(0)?, row.get(1)?)))?
1148            .collect::<Result<HashMap<_, _>, _>>()?;
1149
1150        log::debug!(
1151            "Loaded {} files for branch '{}' from file_branches table",
1152            files.len(),
1153            branch
1154        );
1155        Ok(files)
1156    }
1157
1158    /// Check if a branch has any indexed files
1159    ///
1160    /// Fast existence check using LIMIT 1 for O(1) performance.
1161    pub fn branch_exists(&self, branch: &str) -> Result<bool> {
1162        let db_path = self.cache_path.join(META_DB);
1163
1164        if !db_path.exists() {
1165            return Ok(false);
1166        }
1167
1168        let conn = Connection::open(&db_path)
1169            .context("Failed to open meta.db")?;
1170
1171        let count: i64 = conn
1172            .query_row(
1173                "SELECT COUNT(*)
1174                 FROM file_branches fb
1175                 JOIN branches b ON fb.branch_id = b.id
1176                 WHERE b.name = ?
1177                 LIMIT 1",
1178                [branch],
1179                |row| row.get(0),
1180            )
1181            .unwrap_or(0);
1182
1183        Ok(count > 0)
1184    }
1185
1186    /// Get branch metadata (commit, last_indexed, file_count, dirty status)
1187    pub fn get_branch_info(&self, branch: &str) -> Result<BranchInfo> {
1188        let db_path = self.cache_path.join(META_DB);
1189
1190        if !db_path.exists() {
1191            anyhow::bail!("Database not initialized");
1192        }
1193
1194        let conn = Connection::open(&db_path)
1195            .context("Failed to open meta.db")?;
1196
1197        let info = conn.query_row(
1198            "SELECT commit_sha, last_indexed, file_count, is_dirty FROM branches WHERE name = ?",
1199            [branch],
1200            |row| {
1201                Ok(BranchInfo {
1202                    branch: branch.to_string(),
1203                    commit_sha: row.get(0)?,
1204                    last_indexed: row.get(1)?,
1205                    file_count: row.get(2)?,
1206                    is_dirty: row.get::<_, i64>(3)? != 0,
1207                })
1208            },
1209        )?;
1210
1211        Ok(info)
1212    }
1213
1214    /// Update branch metadata after indexing
1215    ///
1216    /// Uses UPDATE instead of INSERT OR REPLACE to preserve branch_id and prevent
1217    /// CASCADE DELETE on file_branches table.
1218    pub fn update_branch_metadata(
1219        &self,
1220        branch: &str,
1221        commit_sha: Option<&str>,
1222        file_count: usize,
1223        is_dirty: bool,
1224    ) -> Result<()> {
1225        let db_path = self.cache_path.join(META_DB);
1226        let conn = Connection::open(&db_path)
1227            .context("Failed to open meta.db for branch metadata update")?;
1228
1229        let now = chrono::Utc::now().timestamp();
1230        let is_dirty_int = if is_dirty { 1 } else { 0 };
1231
1232        // Try UPDATE first to preserve branch_id (prevents CASCADE DELETE)
1233        let rows_updated = conn.execute(
1234            "UPDATE branches
1235             SET commit_sha = ?, last_indexed = ?, file_count = ?, is_dirty = ?
1236             WHERE name = ?",
1237            rusqlite::params![
1238                commit_sha.unwrap_or("unknown"),
1239                now,
1240                file_count,
1241                is_dirty_int,
1242                branch
1243            ],
1244        )?;
1245
1246        // If no rows updated (branch doesn't exist yet), INSERT new one
1247        if rows_updated == 0 {
1248            conn.execute(
1249                "INSERT INTO branches (name, commit_sha, last_indexed, file_count, is_dirty)
1250                 VALUES (?, ?, ?, ?, ?)",
1251                rusqlite::params![
1252                    branch,
1253                    commit_sha.unwrap_or("unknown"),
1254                    now,
1255                    file_count,
1256                    is_dirty_int
1257                ],
1258            )?;
1259        }
1260
1261        log::debug!(
1262            "Updated branch metadata for '{}': commit={}, files={}, dirty={}",
1263            branch,
1264            commit_sha.unwrap_or("unknown"),
1265            file_count,
1266            is_dirty
1267        );
1268        Ok(())
1269    }
1270
1271    /// Find a file with a specific hash (for symbol reuse optimization)
1272    ///
1273    /// Returns the path and branch where this hash was first seen,
1274    /// enabling reuse of parsed symbols across branches.
1275    pub fn find_file_with_hash(&self, hash: &str) -> Result<Option<(String, String)>> {
1276        let db_path = self.cache_path.join(META_DB);
1277
1278        if !db_path.exists() {
1279            return Ok(None);
1280        }
1281
1282        let conn = Connection::open(&db_path)
1283            .context("Failed to open meta.db")?;
1284
1285        let result = conn
1286            .query_row(
1287                "SELECT f.path, b.name
1288                 FROM file_branches fb
1289                 JOIN files f ON fb.file_id = f.id
1290                 JOIN branches b ON fb.branch_id = b.id
1291                 WHERE fb.hash = ?
1292                 LIMIT 1",
1293                [hash],
1294                |row| Ok((row.get(0)?, row.get(1)?)),
1295            )
1296            .optional()?;
1297
1298        Ok(result)
1299    }
1300
1301    /// Get file ID by path
1302    ///
1303    /// Returns the integer ID for a file path, or None if not found.
1304    pub fn get_file_id(&self, path: &str) -> Result<Option<i64>> {
1305        let db_path = self.cache_path.join(META_DB);
1306
1307        if !db_path.exists() {
1308            return Ok(None);
1309        }
1310
1311        let conn = Connection::open(&db_path)
1312            .context("Failed to open meta.db")?;
1313
1314        let result = conn
1315            .query_row(
1316                "SELECT id FROM files WHERE path = ?",
1317                [path],
1318                |row| row.get(0),
1319            )
1320            .optional()?;
1321
1322        Ok(result)
1323    }
1324
1325    /// Batch get file IDs for multiple paths
1326    ///
1327    /// Returns a HashMap of path → file_id for all found paths.
1328    /// Paths not in the database are omitted from the result.
1329    ///
1330    /// Automatically chunks large batches to avoid SQLite parameter limits (999 max).
1331    pub fn batch_get_file_ids(&self, paths: &[String]) -> Result<HashMap<String, i64>> {
1332        let db_path = self.cache_path.join(META_DB);
1333
1334        if !db_path.exists() {
1335            return Ok(HashMap::new());
1336        }
1337
1338        let conn = Connection::open(&db_path)
1339            .context("Failed to open meta.db")?;
1340
1341        // SQLite has a limit of 999 parameters by default
1342        // Chunk requests to stay well under that limit
1343        const BATCH_SIZE: usize = 900;
1344
1345        let mut results = HashMap::new();
1346
1347        for chunk in paths.chunks(BATCH_SIZE) {
1348            // Build IN clause for this chunk
1349            let placeholders = chunk.iter()
1350                .map(|_| "?")
1351                .collect::<Vec<_>>()
1352                .join(", ");
1353
1354            let query = format!("SELECT path, id FROM files WHERE path IN ({})", placeholders);
1355
1356            let params: Vec<&str> = chunk.iter().map(|s| s.as_str()).collect();
1357            let mut stmt = conn.prepare(&query)?;
1358
1359            let chunk_results = stmt.query_map(rusqlite::params_from_iter(params), |row| {
1360                Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
1361            })?
1362            .collect::<Result<HashMap<_, _>, _>>()?;
1363
1364            results.extend(chunk_results);
1365        }
1366
1367        log::debug!("Batch loaded {} file IDs (out of {} requested, {} chunks)",
1368                   results.len(), paths.len(), paths.len().div_ceil(BATCH_SIZE));
1369        Ok(results)
1370    }
1371
1372    // ===== Cache compaction methods =====
1373
1374    /// Check if cache compaction should run
1375    ///
1376    /// Returns true if 24+ hours have passed since last compaction (or never compacted).
1377    /// Compaction threshold: 86400 seconds (24 hours)
1378    pub fn should_compact(&self) -> Result<bool> {
1379        let db_path = self.cache_path.join(META_DB);
1380
1381        if !db_path.exists() {
1382            // No database means no compaction needed
1383            return Ok(false);
1384        }
1385
1386        let conn = Connection::open(&db_path)
1387            .context("Failed to open meta.db for compaction check")?;
1388
1389        // Get last_compaction timestamp (defaults to "0" if not found)
1390        let last_compaction: i64 = conn
1391            .query_row(
1392                "SELECT value FROM statistics WHERE key = 'last_compaction'",
1393                [],
1394                |row| {
1395                    let value: String = row.get(0)?;
1396                    Ok(value.parse::<i64>().unwrap_or(0))
1397                },
1398            )
1399            .unwrap_or(0);
1400
1401        // Get current timestamp
1402        let now = chrono::Utc::now().timestamp();
1403
1404        // Compaction threshold: 24 hours (86400 seconds)
1405        const COMPACTION_THRESHOLD_SECS: i64 = 86400;
1406
1407        let elapsed_secs = now - last_compaction;
1408        let should_run = elapsed_secs >= COMPACTION_THRESHOLD_SECS;
1409
1410        log::debug!(
1411            "Compaction check: last={}, now={}, elapsed={}s, should_compact={}",
1412            last_compaction,
1413            now,
1414            elapsed_secs,
1415            should_run
1416        );
1417
1418        Ok(should_run)
1419    }
1420
1421    /// Update last_compaction timestamp in statistics table
1422    ///
1423    /// Called after successful compaction to record when it ran.
1424    pub fn update_compaction_timestamp(&self) -> Result<()> {
1425        let db_path = self.cache_path.join(META_DB);
1426        let conn = Connection::open(&db_path)
1427            .context("Failed to open meta.db for compaction timestamp update")?;
1428
1429        let now = chrono::Utc::now().timestamp();
1430
1431        conn.execute(
1432            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
1433            ["last_compaction", &now.to_string(), &now.to_string()],
1434        )?;
1435
1436        log::debug!("Updated last_compaction timestamp to: {}", now);
1437        Ok(())
1438    }
1439
1440    /// Compact the cache by removing deleted files and reclaiming disk space
1441    ///
1442    /// This operation:
1443    /// 1. Identifies files in the database that no longer exist on disk
1444    /// 2. Deletes those files from all database tables (CASCADE handles related data)
1445    /// 3. Runs VACUUM to reclaim disk space from deleted rows
1446    /// 4. Updates the last_compaction timestamp
1447    ///
1448    /// Returns a CompactionReport with statistics about the operation.
1449    /// Safe to run concurrently with queries (uses SQLite transactions).
1450    pub fn compact(&self) -> Result<crate::models::CompactionReport> {
1451        let start_time = std::time::Instant::now();
1452        log::info!("Starting cache compaction...");
1453
1454        // Get initial cache size
1455        let size_before = self.calculate_cache_size()?;
1456
1457        // Step 1: Identify deleted files (in DB but not on filesystem)
1458        let deleted_files = self.identify_deleted_files()?;
1459        log::info!("Found {} deleted files to remove from cache", deleted_files.len());
1460
1461        if deleted_files.is_empty() {
1462            log::info!("No deleted files to compact - cache is clean");
1463            // Update timestamp anyway to prevent running compaction too frequently
1464            self.update_compaction_timestamp()?;
1465
1466            return Ok(crate::models::CompactionReport {
1467                files_removed: 0,
1468                space_saved_bytes: 0,
1469                duration_ms: start_time.elapsed().as_millis() as u64,
1470            });
1471        }
1472
1473        // Step 2: Delete from database (CASCADE handles file_branches, file_dependencies, file_exports)
1474        self.delete_files_from_db(&deleted_files)?;
1475        log::info!("Deleted {} files from database", deleted_files.len());
1476
1477        // Step 3: Run VACUUM to reclaim disk space
1478        self.vacuum_database()?;
1479        log::info!("Completed VACUUM operation");
1480
1481        // Get final cache size
1482        let size_after = self.calculate_cache_size()?;
1483        let space_saved = size_before.saturating_sub(size_after);
1484
1485        // Step 4: Update last_compaction timestamp
1486        self.update_compaction_timestamp()?;
1487
1488        let duration_ms = start_time.elapsed().as_millis() as u64;
1489
1490        log::info!(
1491            "Cache compaction completed: {} files removed, {} bytes saved ({:.2} MB), took {}ms",
1492            deleted_files.len(),
1493            space_saved,
1494            space_saved as f64 / 1_048_576.0,
1495            duration_ms
1496        );
1497
1498        Ok(crate::models::CompactionReport {
1499            files_removed: deleted_files.len(),
1500            space_saved_bytes: space_saved,
1501            duration_ms,
1502        })
1503    }
1504
1505    /// Identify files in database that no longer exist on filesystem
1506    ///
1507    /// Returns a Vec of file IDs for files that should be removed from the cache.
1508    fn identify_deleted_files(&self) -> Result<Vec<i64>> {
1509        let db_path = self.cache_path.join(META_DB);
1510        let conn = Connection::open(&db_path)
1511            .context("Failed to open meta.db for deleted file identification")?;
1512
1513        let workspace_root = self.workspace_root();
1514
1515        // Query all files from database (id, path)
1516        let mut stmt = conn.prepare("SELECT id, path FROM files")?;
1517        let files = stmt.query_map([], |row| {
1518            Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
1519        })?
1520        .collect::<Result<Vec<_>, _>>()?;
1521
1522        log::debug!("Checking {} files for deletion status", files.len());
1523
1524        // Check which files no longer exist on disk
1525        let mut deleted_file_ids = Vec::new();
1526        for (file_id, file_path) in files {
1527            let full_path = workspace_root.join(&file_path);
1528            if !full_path.exists() {
1529                log::trace!("File no longer exists: {} (id={})", file_path, file_id);
1530                deleted_file_ids.push(file_id);
1531            }
1532        }
1533
1534        Ok(deleted_file_ids)
1535    }
1536
1537    /// Delete files from database by file ID
1538    ///
1539    /// Uses a transaction for atomicity. CASCADE delete handles:
1540    /// - file_branches entries
1541    /// - file_dependencies entries
1542    /// - file_exports entries
1543    fn delete_files_from_db(&self, file_ids: &[i64]) -> Result<()> {
1544        if file_ids.is_empty() {
1545            return Ok(());
1546        }
1547
1548        let db_path = self.cache_path.join(META_DB);
1549        let mut conn = Connection::open(&db_path)
1550            .context("Failed to open meta.db for file deletion")?;
1551
1552        let tx = conn.transaction()?;
1553
1554        // Delete files in batches to avoid SQLite parameter limit (999 max)
1555        const BATCH_SIZE: usize = 900;
1556
1557        for chunk in file_ids.chunks(BATCH_SIZE) {
1558            let placeholders = chunk.iter()
1559                .map(|_| "?")
1560                .collect::<Vec<_>>()
1561                .join(", ");
1562
1563            let delete_query = format!("DELETE FROM files WHERE id IN ({})", placeholders);
1564
1565            let params: Vec<i64> = chunk.to_vec();
1566            tx.execute(&delete_query, rusqlite::params_from_iter(params))?;
1567        }
1568
1569        tx.commit()?;
1570        log::debug!("Deleted {} files from database (CASCADE handled related tables)", file_ids.len());
1571        Ok(())
1572    }
1573
1574    /// Run VACUUM on SQLite database to reclaim disk space
1575    ///
1576    /// VACUUM rebuilds the database file, removing free pages and compacting the file.
1577    /// This can take several seconds on large databases but significantly reduces disk usage.
1578    fn vacuum_database(&self) -> Result<()> {
1579        let db_path = self.cache_path.join(META_DB);
1580        let conn = Connection::open(&db_path)
1581            .context("Failed to open meta.db for VACUUM")?;
1582
1583        // VACUUM cannot run inside a transaction
1584        // It rebuilds the entire database file
1585        conn.execute("VACUUM", [])?;
1586
1587        log::debug!("VACUUM completed successfully");
1588        Ok(())
1589    }
1590
1591    /// Calculate total cache size in bytes
1592    ///
1593    /// Sums up the size of all cache files:
1594    /// - meta.db (SQLite database)
1595    /// - trigrams.bin (inverted index)
1596    /// - content.bin (file contents)
1597    /// - config.toml (configuration)
1598    fn calculate_cache_size(&self) -> Result<u64> {
1599        let mut total_size: u64 = 0;
1600
1601        for file_name in [META_DB, TOKENS_BIN, CONFIG_TOML, "content.bin", "trigrams.bin"] {
1602            let file_path = self.cache_path.join(file_name);
1603            if let Ok(metadata) = std::fs::metadata(&file_path) {
1604                total_size += metadata.len();
1605            }
1606        }
1607
1608        Ok(total_size)
1609    }
1610}
1611
1612/// Branch metadata information
1613#[derive(Debug, Clone)]
1614pub struct BranchInfo {
1615    pub branch: String,
1616    pub commit_sha: String,
1617    pub last_indexed: i64,
1618    pub file_count: usize,
1619    pub is_dirty: bool,
1620}
1621
1622// TODO: Implement memory-mapped readers for:
1623// - SymbolReader (reads from symbols.bin)
1624// - TokenReader (reads from tokens.bin)
1625// - MetaReader (reads from meta.db)
1626
1627#[cfg(test)]
1628mod tests {
1629    use super::*;
1630    use tempfile::TempDir;
1631
1632    #[test]
1633    fn test_cache_init() {
1634        let temp = TempDir::new().unwrap();
1635        let cache = CacheManager::new(temp.path());
1636
1637        assert!(!cache.exists());
1638        cache.init().unwrap();
1639        assert!(cache.exists());
1640        assert!(cache.path().exists());
1641
1642        // Verify all expected files were created
1643        assert!(cache.path().join(META_DB).exists());
1644        assert!(cache.path().join(CONFIG_TOML).exists());
1645    }
1646
1647    #[test]
1648    fn test_cache_init_idempotent() {
1649        let temp = TempDir::new().unwrap();
1650        let cache = CacheManager::new(temp.path());
1651
1652        // Initialize twice - should not error
1653        cache.init().unwrap();
1654        cache.init().unwrap();
1655
1656        assert!(cache.exists());
1657    }
1658
1659    #[test]
1660    fn test_cache_clear() {
1661        let temp = TempDir::new().unwrap();
1662        let cache = CacheManager::new(temp.path());
1663
1664        cache.init().unwrap();
1665        assert!(cache.exists());
1666
1667        cache.clear().unwrap();
1668        assert!(!cache.exists());
1669    }
1670
1671    #[test]
1672    fn test_cache_clear_nonexistent() {
1673        let temp = TempDir::new().unwrap();
1674        let cache = CacheManager::new(temp.path());
1675
1676        // Clearing non-existent cache should not error
1677        assert!(!cache.exists());
1678        cache.clear().unwrap();
1679        assert!(!cache.exists());
1680    }
1681
1682    #[test]
1683    fn test_load_all_hashes_empty() {
1684        let temp = TempDir::new().unwrap();
1685        let cache = CacheManager::new(temp.path());
1686
1687        cache.init().unwrap();
1688        let hashes = cache.load_all_hashes().unwrap();
1689        assert_eq!(hashes.len(), 0);
1690    }
1691
1692    #[test]
1693    fn test_load_all_hashes_before_init() {
1694        let temp = TempDir::new().unwrap();
1695        let cache = CacheManager::new(temp.path());
1696
1697        // Loading hashes before init should return empty map
1698        let hashes = cache.load_all_hashes().unwrap();
1699        assert_eq!(hashes.len(), 0);
1700    }
1701
1702    #[test]
1703    fn test_load_hashes_for_branch_empty() {
1704        let temp = TempDir::new().unwrap();
1705        let cache = CacheManager::new(temp.path());
1706
1707        cache.init().unwrap();
1708        let hashes = cache.load_hashes_for_branch("main").unwrap();
1709        assert_eq!(hashes.len(), 0);
1710    }
1711
1712    #[test]
1713    fn test_update_file() {
1714        let temp = TempDir::new().unwrap();
1715        let cache = CacheManager::new(temp.path());
1716
1717        cache.init().unwrap();
1718        cache.update_file("src/main.rs", "rust", 100).unwrap();
1719
1720        // Verify file was stored (check via list_files)
1721        let files = cache.list_files().unwrap();
1722        assert_eq!(files.len(), 1);
1723        assert_eq!(files[0].path, "src/main.rs");
1724        assert_eq!(files[0].language, "rust");
1725    }
1726
1727    #[test]
1728    fn test_update_file_multiple() {
1729        let temp = TempDir::new().unwrap();
1730        let cache = CacheManager::new(temp.path());
1731
1732        cache.init().unwrap();
1733        cache.update_file("src/main.rs", "rust", 100).unwrap();
1734        cache.update_file("src/lib.rs", "rust", 200).unwrap();
1735        cache.update_file("README.md", "markdown", 50).unwrap();
1736
1737        // Verify files were stored
1738        let files = cache.list_files().unwrap();
1739        assert_eq!(files.len(), 3);
1740    }
1741
1742    #[test]
1743    fn test_update_file_replace() {
1744        let temp = TempDir::new().unwrap();
1745        let cache = CacheManager::new(temp.path());
1746
1747        cache.init().unwrap();
1748        cache.update_file("src/main.rs", "rust", 100).unwrap();
1749        cache.update_file("src/main.rs", "rust", 150).unwrap();
1750
1751        // Second update should replace the first
1752        let files = cache.list_files().unwrap();
1753        assert_eq!(files.len(), 1);
1754        assert_eq!(files[0].path, "src/main.rs");
1755    }
1756
1757    #[test]
1758    fn test_batch_update_files() {
1759        let temp = TempDir::new().unwrap();
1760        let cache = CacheManager::new(temp.path());
1761
1762        cache.init().unwrap();
1763
1764        let files = vec![
1765            ("src/main.rs".to_string(), "rust".to_string(), 100),
1766            ("src/lib.rs".to_string(), "rust".to_string(), 200),
1767            ("test.py".to_string(), "python".to_string(), 50),
1768        ];
1769
1770        cache.batch_update_files(&files).unwrap();
1771
1772        // Verify files were stored
1773        let stored_files = cache.list_files().unwrap();
1774        assert_eq!(stored_files.len(), 3);
1775    }
1776
1777    #[test]
1778    fn test_update_stats() {
1779        let temp = TempDir::new().unwrap();
1780        let cache = CacheManager::new(temp.path());
1781
1782        cache.init().unwrap();
1783        cache.update_file("src/main.rs", "rust", 100).unwrap();
1784        cache.update_file("src/lib.rs", "rust", 200).unwrap();
1785
1786        // Record files for a test branch
1787        cache.record_branch_file("src/main.rs", "_default", "hash1", None).unwrap();
1788        cache.record_branch_file("src/lib.rs", "_default", "hash2", None).unwrap();
1789        cache.update_stats("_default").unwrap();
1790
1791        let stats = cache.stats().unwrap();
1792        assert_eq!(stats.total_files, 2);
1793    }
1794
1795    #[test]
1796    fn test_stats_empty_cache() {
1797        let temp = TempDir::new().unwrap();
1798        let cache = CacheManager::new(temp.path());
1799
1800        cache.init().unwrap();
1801        let stats = cache.stats().unwrap();
1802
1803        assert_eq!(stats.total_files, 0);
1804        assert_eq!(stats.files_by_language.len(), 0);
1805    }
1806
1807    #[test]
1808    fn test_stats_before_init() {
1809        let temp = TempDir::new().unwrap();
1810        let cache = CacheManager::new(temp.path());
1811
1812        // Stats before init should return zeros
1813        let stats = cache.stats().unwrap();
1814        assert_eq!(stats.total_files, 0);
1815    }
1816
1817    #[test]
1818    fn test_stats_by_language() {
1819        let temp = TempDir::new().unwrap();
1820        let cache = CacheManager::new(temp.path());
1821
1822        cache.init().unwrap();
1823        cache.update_file("main.rs", "Rust", 100).unwrap();
1824        cache.update_file("lib.rs", "Rust", 200).unwrap();
1825        cache.update_file("script.py", "Python", 50).unwrap();
1826        cache.update_file("test.py", "Python", 80).unwrap();
1827
1828        // Record files for a test branch
1829        cache.record_branch_file("main.rs", "_default", "hash1", None).unwrap();
1830        cache.record_branch_file("lib.rs", "_default", "hash2", None).unwrap();
1831        cache.record_branch_file("script.py", "_default", "hash3", None).unwrap();
1832        cache.record_branch_file("test.py", "_default", "hash4", None).unwrap();
1833        cache.update_stats("_default").unwrap();
1834
1835        let stats = cache.stats().unwrap();
1836        assert_eq!(stats.files_by_language.get("Rust"), Some(&2));
1837        assert_eq!(stats.files_by_language.get("Python"), Some(&2));
1838        assert_eq!(stats.lines_by_language.get("Rust"), Some(&300)); // 100 + 200
1839        assert_eq!(stats.lines_by_language.get("Python"), Some(&130)); // 50 + 80
1840    }
1841
1842    #[test]
1843    fn test_list_files_empty() {
1844        let temp = TempDir::new().unwrap();
1845        let cache = CacheManager::new(temp.path());
1846
1847        cache.init().unwrap();
1848        let files = cache.list_files().unwrap();
1849        assert_eq!(files.len(), 0);
1850    }
1851
1852    #[test]
1853    fn test_list_files() {
1854        let temp = TempDir::new().unwrap();
1855        let cache = CacheManager::new(temp.path());
1856
1857        cache.init().unwrap();
1858        cache.update_file("src/main.rs", "rust", 100).unwrap();
1859        cache.update_file("src/lib.rs", "rust", 200).unwrap();
1860
1861        let files = cache.list_files().unwrap();
1862        assert_eq!(files.len(), 2);
1863
1864        // Files should be sorted by path
1865        assert_eq!(files[0].path, "src/lib.rs");
1866        assert_eq!(files[1].path, "src/main.rs");
1867
1868        assert_eq!(files[0].language, "rust");
1869    }
1870
1871    #[test]
1872    fn test_list_files_before_init() {
1873        let temp = TempDir::new().unwrap();
1874        let cache = CacheManager::new(temp.path());
1875
1876        // Listing files before init should return empty vec
1877        let files = cache.list_files().unwrap();
1878        assert_eq!(files.len(), 0);
1879    }
1880
1881    #[test]
1882    fn test_branch_exists() {
1883        let temp = TempDir::new().unwrap();
1884        let cache = CacheManager::new(temp.path());
1885
1886        cache.init().unwrap();
1887
1888        assert!(!cache.branch_exists("main").unwrap());
1889
1890        // Add file to index first (required for record_branch_file)
1891        cache.update_file("src/main.rs", "rust", 100).unwrap();
1892        cache.record_branch_file("src/main.rs", "main", "hash1", Some("commit123")).unwrap();
1893
1894        assert!(cache.branch_exists("main").unwrap());
1895        assert!(!cache.branch_exists("feature-branch").unwrap());
1896    }
1897
1898    #[test]
1899    fn test_record_branch_file() {
1900        let temp = TempDir::new().unwrap();
1901        let cache = CacheManager::new(temp.path());
1902
1903        cache.init().unwrap();
1904        // Add file to index first (required for record_branch_file)
1905        cache.update_file("src/main.rs", "rust", 100).unwrap();
1906        cache.record_branch_file("src/main.rs", "main", "hash1", Some("commit123")).unwrap();
1907
1908        let files = cache.get_branch_files("main").unwrap();
1909        assert_eq!(files.len(), 1);
1910        assert_eq!(files.get("src/main.rs"), Some(&"hash1".to_string()));
1911    }
1912
1913    #[test]
1914    fn test_get_branch_files_empty() {
1915        let temp = TempDir::new().unwrap();
1916        let cache = CacheManager::new(temp.path());
1917
1918        cache.init().unwrap();
1919        let files = cache.get_branch_files("nonexistent").unwrap();
1920        assert_eq!(files.len(), 0);
1921    }
1922
1923    #[test]
1924    fn test_batch_record_branch_files() {
1925        let temp = TempDir::new().unwrap();
1926        let cache = CacheManager::new(temp.path());
1927
1928        cache.init().unwrap();
1929
1930        // Add files to index first (required for batch_record_branch_files)
1931        let file_metadata = vec![
1932            ("src/main.rs".to_string(), "rust".to_string(), 100),
1933            ("src/lib.rs".to_string(), "rust".to_string(), 200),
1934            ("README.md".to_string(), "markdown".to_string(), 50),
1935        ];
1936        cache.batch_update_files(&file_metadata).unwrap();
1937
1938        let files = vec![
1939            ("src/main.rs".to_string(), "hash1".to_string()),
1940            ("src/lib.rs".to_string(), "hash2".to_string()),
1941            ("README.md".to_string(), "hash3".to_string()),
1942        ];
1943
1944        cache.batch_record_branch_files(&files, "main", Some("commit123")).unwrap();
1945
1946        let branch_files = cache.get_branch_files("main").unwrap();
1947        assert_eq!(branch_files.len(), 3);
1948        assert_eq!(branch_files.get("src/main.rs"), Some(&"hash1".to_string()));
1949        assert_eq!(branch_files.get("src/lib.rs"), Some(&"hash2".to_string()));
1950        assert_eq!(branch_files.get("README.md"), Some(&"hash3".to_string()));
1951    }
1952
1953    #[test]
1954    fn test_update_branch_metadata() {
1955        let temp = TempDir::new().unwrap();
1956        let cache = CacheManager::new(temp.path());
1957
1958        cache.init().unwrap();
1959        cache.update_branch_metadata("main", Some("commit123"), 10, false).unwrap();
1960
1961        let info = cache.get_branch_info("main").unwrap();
1962        assert_eq!(info.branch, "main");
1963        assert_eq!(info.commit_sha, "commit123");
1964        assert_eq!(info.file_count, 10);
1965        assert_eq!(info.is_dirty, false);
1966    }
1967
1968    #[test]
1969    fn test_update_branch_metadata_dirty() {
1970        let temp = TempDir::new().unwrap();
1971        let cache = CacheManager::new(temp.path());
1972
1973        cache.init().unwrap();
1974        cache.update_branch_metadata("feature", Some("commit456"), 5, true).unwrap();
1975
1976        let info = cache.get_branch_info("feature").unwrap();
1977        assert_eq!(info.is_dirty, true);
1978    }
1979
1980    #[test]
1981    fn test_find_file_with_hash() {
1982        let temp = TempDir::new().unwrap();
1983        let cache = CacheManager::new(temp.path());
1984
1985        cache.init().unwrap();
1986        // Add file to index first (required for record_branch_file)
1987        cache.update_file("src/main.rs", "rust", 100).unwrap();
1988        cache.record_branch_file("src/main.rs", "main", "unique_hash", Some("commit123")).unwrap();
1989
1990        let result = cache.find_file_with_hash("unique_hash").unwrap();
1991        assert!(result.is_some());
1992
1993        let (path, branch) = result.unwrap();
1994        assert_eq!(path, "src/main.rs");
1995        assert_eq!(branch, "main");
1996    }
1997
1998    #[test]
1999    fn test_find_file_with_hash_not_found() {
2000        let temp = TempDir::new().unwrap();
2001        let cache = CacheManager::new(temp.path());
2002
2003        cache.init().unwrap();
2004
2005        let result = cache.find_file_with_hash("nonexistent_hash").unwrap();
2006        assert!(result.is_none());
2007    }
2008
2009    #[test]
2010    fn test_config_toml_created() {
2011        let temp = TempDir::new().unwrap();
2012        let cache = CacheManager::new(temp.path());
2013
2014        cache.init().unwrap();
2015
2016        let config_path = cache.path().join(CONFIG_TOML);
2017        let config_content = std::fs::read_to_string(&config_path).unwrap();
2018
2019        // Verify config contains expected sections
2020        assert!(config_content.contains("[index]"));
2021        assert!(config_content.contains("[search]"));
2022        assert!(config_content.contains("[performance]"));
2023        assert!(config_content.contains("max_file_size"));
2024    }
2025
2026    #[test]
2027    fn test_meta_db_schema() {
2028        let temp = TempDir::new().unwrap();
2029        let cache = CacheManager::new(temp.path());
2030
2031        cache.init().unwrap();
2032
2033        let db_path = cache.path().join(META_DB);
2034        let conn = Connection::open(&db_path).unwrap();
2035
2036        // Verify tables exist
2037        let tables: Vec<String> = conn
2038            .prepare("SELECT name FROM sqlite_master WHERE type='table'").unwrap()
2039            .query_map([], |row| row.get(0)).unwrap()
2040            .collect::<Result<Vec<_>, _>>().unwrap();
2041
2042        assert!(tables.contains(&"files".to_string()));
2043        assert!(tables.contains(&"statistics".to_string()));
2044        assert!(tables.contains(&"config".to_string()));
2045        assert!(tables.contains(&"file_branches".to_string()));
2046        assert!(tables.contains(&"branches".to_string()));
2047        assert!(tables.contains(&"file_dependencies".to_string()));
2048        assert!(tables.contains(&"file_exports".to_string()));
2049    }
2050
2051    #[test]
2052    fn test_concurrent_file_updates() {
2053        use std::thread;
2054
2055        let temp = TempDir::new().unwrap();
2056        let cache_path = temp.path().to_path_buf();
2057
2058        let cache = CacheManager::new(&cache_path);
2059        cache.init().unwrap();
2060
2061        // Spawn multiple threads updating different files
2062        let handles: Vec<_> = (0..10)
2063            .map(|i| {
2064                let path = cache_path.clone();
2065                thread::spawn(move || {
2066                    let cache = CacheManager::new(&path);
2067                    cache
2068                        .update_file(
2069                            &format!("file_{}.rs", i),
2070                            "rust",
2071                            i * 10,
2072                        )
2073                        .unwrap();
2074                })
2075            })
2076            .collect();
2077
2078        for handle in handles {
2079            handle.join().unwrap();
2080        }
2081
2082        let cache = CacheManager::new(&cache_path);
2083        let files = cache.list_files().unwrap();
2084        assert_eq!(files.len(), 10);
2085    }
2086
2087    // ===== Corruption Detection Tests =====
2088
2089    #[test]
2090    fn test_validate_corrupted_database() {
2091        use std::io::Write;
2092
2093        let temp = TempDir::new().unwrap();
2094        let cache = CacheManager::new(temp.path());
2095
2096        cache.init().unwrap();
2097
2098        // Corrupt the database by overwriting it with invalid data
2099        let db_path = cache.path().join(META_DB);
2100        let mut file = File::create(&db_path).unwrap();
2101        file.write_all(b"CORRUPTED DATA").unwrap();
2102
2103        // Validation should fail due to database corruption
2104        let result = cache.validate();
2105        assert!(result.is_err());
2106        let err_msg = result.unwrap_err().to_string();
2107        eprintln!("Error message: {}", err_msg);
2108        assert!(err_msg.contains("corrupted") || err_msg.contains("not a database"));
2109    }
2110
2111    #[test]
2112    fn test_validate_corrupted_trigrams() {
2113        use std::io::Write;
2114
2115        let temp = TempDir::new().unwrap();
2116        let cache = CacheManager::new(temp.path());
2117
2118        cache.init().unwrap();
2119
2120        // Create trigrams.bin with invalid magic bytes
2121        let trigrams_path = cache.path().join("trigrams.bin");
2122        let mut file = File::create(&trigrams_path).unwrap();
2123        file.write_all(b"BADM").unwrap(); // Wrong magic bytes (should be "RFTG")
2124
2125        // Validation should fail due to invalid magic bytes
2126        let result = cache.validate();
2127        assert!(result.is_err());
2128        let err = result.unwrap_err().to_string();
2129        assert!(err.contains("trigrams.bin") && err.contains("corrupted"));
2130    }
2131
2132    #[test]
2133    fn test_validate_corrupted_content() {
2134        use std::io::Write;
2135
2136        let temp = TempDir::new().unwrap();
2137        let cache = CacheManager::new(temp.path());
2138
2139        cache.init().unwrap();
2140
2141        // Create content.bin with invalid magic bytes
2142        let content_path = cache.path().join("content.bin");
2143        let mut file = File::create(&content_path).unwrap();
2144        file.write_all(b"BADM").unwrap(); // Wrong magic bytes (should be "RFCT")
2145
2146        // Validation should fail due to invalid magic bytes
2147        let result = cache.validate();
2148        assert!(result.is_err());
2149        let err = result.unwrap_err().to_string();
2150        assert!(err.contains("content.bin") && err.contains("corrupted"));
2151    }
2152
2153    #[test]
2154    fn test_validate_missing_schema_table() {
2155        let temp = TempDir::new().unwrap();
2156        let cache = CacheManager::new(temp.path());
2157
2158        cache.init().unwrap();
2159
2160        // Drop a required table to simulate schema corruption
2161        let db_path = cache.path().join(META_DB);
2162        let conn = Connection::open(&db_path).unwrap();
2163        conn.execute("DROP TABLE files", []).unwrap();
2164
2165        // Validation should fail due to missing required table
2166        let result = cache.validate();
2167        assert!(result.is_err());
2168        let err = result.unwrap_err().to_string();
2169        assert!(err.contains("files") && err.contains("missing"));
2170    }
2171}