Skip to main content

reflex/
cache.rs

1//! Cache management and memory-mapped I/O
2//!
3//! The cache module handles the `.reflex/` directory structure:
4//! - `meta.db`: Metadata, file hashes, and configuration (SQLite)
5//! - `tokens.bin`: Compressed lexical tokens (binary)
6//! - `content.bin`: Memory-mapped file contents (binary)
7//! - `trigrams.bin`: Trigram inverted index (custom varint+zstd binary, V3 format)
8//! - `config.toml`: Index settings (TOML text)
9
10use anyhow::{Context, Result};
11use rusqlite::{Connection, OptionalExtension};
12use std::collections::HashMap;
13use std::fs::File;
14use std::path::{Path, PathBuf};
15
16use crate::models::IndexedFile;
17
18/// Default cache directory name
19pub const CACHE_DIR: &str = ".reflex";
20
21/// File names within the cache directory
22pub const META_DB: &str = "meta.db";
23pub const TOKENS_BIN: &str = "tokens.bin";
24pub const HASHES_JSON: &str = "hashes.json";
25pub const CONFIG_TOML: &str = "config.toml";
26
27/// Manages the Reflex cache directory
28#[derive(Clone)]
29pub struct CacheManager {
30    cache_path: PathBuf,
31}
32
33impl CacheManager {
34    /// Create a new cache manager for the given root directory
35    pub fn new(root: impl AsRef<Path>) -> Self {
36        let cache_path = root.as_ref().join(CACHE_DIR);
37        Self { cache_path }
38    }
39
40    /// Initialize the cache directory structure if it doesn't exist
41    pub fn init(&self) -> Result<()> {
42        log::info!("Initializing cache at {:?}", self.cache_path);
43
44        if !self.cache_path.exists() {
45            std::fs::create_dir_all(&self.cache_path)?;
46        }
47
48        // Create meta.db with schema
49        self.init_meta_db()?;
50
51        // Create default config.toml
52        self.init_config_toml()?;
53
54        // Note: tokens.bin removed - was never used
55        // Note: hashes.json is deprecated - hashes are now stored in meta.db
56
57        log::info!("Cache initialized successfully");
58        Ok(())
59    }
60
61    /// Initialize meta.db with SQLite schema
62    fn init_meta_db(&self) -> Result<()> {
63        let db_path = self.cache_path.join(META_DB);
64
65        // Skip if already exists
66        if db_path.exists() {
67            return Ok(());
68        }
69
70        let conn = Connection::open(&db_path)
71            .context("Failed to create meta.db")?;
72
73        // Create files table
74        conn.execute(
75            "CREATE TABLE IF NOT EXISTS files (
76                id INTEGER PRIMARY KEY AUTOINCREMENT,
77                path TEXT NOT NULL UNIQUE,
78                last_indexed INTEGER NOT NULL,
79                language TEXT NOT NULL,
80                token_count INTEGER DEFAULT 0,
81                line_count INTEGER DEFAULT 0
82            )",
83            [],
84        )?;
85
86        conn.execute("CREATE INDEX IF NOT EXISTS idx_files_path ON files(path)", [])?;
87
88        // Create statistics table
89        conn.execute(
90            "CREATE TABLE IF NOT EXISTS statistics (
91                key TEXT PRIMARY KEY,
92                value TEXT NOT NULL,
93                updated_at INTEGER NOT NULL
94            )",
95            [],
96        )?;
97
98        // Initialize default statistics
99        let now = chrono::Utc::now().timestamp();
100        conn.execute(
101            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
102            ["total_files", "0", &now.to_string()],
103        )?;
104        conn.execute(
105            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
106            ["cache_version", "1", &now.to_string()],
107        )?;
108
109        // Store cache schema hash for automatic invalidation detection
110        // This hash is computed at build time from cache-critical source files
111        let schema_hash = env!("CACHE_SCHEMA_HASH");
112        conn.execute(
113            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
114            ["schema_hash", schema_hash, &now.to_string()],
115        )?;
116
117        // Initialize last_compaction timestamp (0 = never compacted)
118        conn.execute(
119            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
120            ["last_compaction", "0", &now.to_string()],
121        )?;
122
123        // Create config table
124        conn.execute(
125            "CREATE TABLE IF NOT EXISTS config (
126                key TEXT PRIMARY KEY,
127                value TEXT NOT NULL
128            )",
129            [],
130        )?;
131
132        // Create branch tracking tables for git-aware indexing
133        conn.execute(
134            "CREATE TABLE IF NOT EXISTS file_branches (
135                file_id INTEGER NOT NULL,
136                branch_id INTEGER NOT NULL,
137                hash TEXT NOT NULL,
138                last_indexed INTEGER NOT NULL,
139                PRIMARY KEY (file_id, branch_id),
140                FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
141                FOREIGN KEY (branch_id) REFERENCES branches(id) ON DELETE CASCADE
142            )",
143            [],
144        )?;
145
146        conn.execute(
147            "CREATE INDEX IF NOT EXISTS idx_branch_lookup ON file_branches(branch_id, file_id)",
148            [],
149        )?;
150
151        conn.execute(
152            "CREATE INDEX IF NOT EXISTS idx_hash_lookup ON file_branches(hash)",
153            [],
154        )?;
155
156        // Create branches metadata table
157        conn.execute(
158            "CREATE TABLE IF NOT EXISTS branches (
159                id INTEGER PRIMARY KEY AUTOINCREMENT,
160                name TEXT NOT NULL UNIQUE,
161                commit_sha TEXT NOT NULL,
162                last_indexed INTEGER NOT NULL,
163                file_count INTEGER DEFAULT 0,
164                is_dirty INTEGER DEFAULT 0
165            )",
166            [],
167        )?;
168
169        // Create file dependencies table for tracking imports/includes
170        conn.execute(
171            "CREATE TABLE IF NOT EXISTS file_dependencies (
172                id INTEGER PRIMARY KEY AUTOINCREMENT,
173                file_id INTEGER NOT NULL,
174                imported_path TEXT NOT NULL,
175                resolved_file_id INTEGER,
176                import_type TEXT NOT NULL,
177                line_number INTEGER NOT NULL,
178                imported_symbols TEXT,
179                FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
180                FOREIGN KEY (resolved_file_id) REFERENCES files(id) ON DELETE SET NULL
181            )",
182            [],
183        )?;
184
185        conn.execute(
186            "CREATE INDEX IF NOT EXISTS idx_deps_file ON file_dependencies(file_id)",
187            [],
188        )?;
189
190        conn.execute(
191            "CREATE INDEX IF NOT EXISTS idx_deps_resolved ON file_dependencies(resolved_file_id)",
192            [],
193        )?;
194
195        conn.execute(
196            "CREATE INDEX IF NOT EXISTS idx_deps_type ON file_dependencies(import_type)",
197            [],
198        )?;
199
200        // Create file exports table for tracking barrel re-exports
201        conn.execute(
202            "CREATE TABLE IF NOT EXISTS file_exports (
203                id INTEGER PRIMARY KEY AUTOINCREMENT,
204                file_id INTEGER NOT NULL,
205                exported_symbol TEXT,
206                source_path TEXT NOT NULL,
207                resolved_source_id INTEGER,
208                line_number INTEGER NOT NULL,
209                FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
210                FOREIGN KEY (resolved_source_id) REFERENCES files(id) ON DELETE SET NULL
211            )",
212            [],
213        )?;
214
215        conn.execute(
216            "CREATE INDEX IF NOT EXISTS idx_exports_file ON file_exports(file_id)",
217            [],
218        )?;
219
220        conn.execute(
221            "CREATE INDEX IF NOT EXISTS idx_exports_resolved ON file_exports(resolved_source_id)",
222            [],
223        )?;
224
225        conn.execute(
226            "CREATE INDEX IF NOT EXISTS idx_exports_symbol ON file_exports(exported_symbol)",
227            [],
228        )?;
229
230        log::debug!("Created meta.db with schema");
231        Ok(())
232    }
233
234    /// Initialize config.toml with defaults
235    fn init_config_toml(&self) -> Result<()> {
236        let config_path = self.cache_path.join(CONFIG_TOML);
237
238        if config_path.exists() {
239            return Ok(());
240        }
241
242        let default_config = r#"[index]
243languages = []  # Empty = all supported languages
244max_file_size = 10485760  # 10 MB
245follow_symlinks = false
246
247[index.include]
248patterns = []
249
250[index.exclude]
251patterns = []
252
253[search]
254default_limit = 100
255fuzzy_threshold = 0.8
256
257[performance]
258parallel_threads = 0  # 0 = auto (80% of available cores), or set a specific number
259compression_level = 3  # zstd level
260
261[semantic]
262# Semantic query generation using LLMs
263# Translate natural language questions into rfx query commands
264provider = "openrouter"  # Options: openai, anthropic, openrouter
265# model = "openai/gpt-4o-mini"  # Optional: override provider default model
266# auto_execute = false  # Optional: auto-execute queries without confirmation
267"#;
268
269        std::fs::write(&config_path, default_config)?;
270
271        log::debug!("Created default config.toml");
272        Ok(())
273    }
274
275    /// Check if cache exists and is valid
276    pub fn exists(&self) -> bool {
277        self.cache_path.exists()
278            && self.cache_path.join(META_DB).exists()
279    }
280
281    /// Validate cache integrity and detect corruption
282    ///
283    /// Performs basic integrity checks on the cache:
284    /// - Verifies all required files exist
285    /// - Checks SQLite database can be opened
286    /// - Validates binary file headers (trigrams.bin, content.bin)
287    ///
288    /// Returns Ok(()) if cache is valid, Err with details if corrupted.
289    pub fn validate(&self) -> Result<()> {
290        let start = std::time::Instant::now();
291
292        // Check if cache directory exists
293        if !self.cache_path.exists() {
294            anyhow::bail!("Cache directory does not exist: {}", self.cache_path.display());
295        }
296
297        // Check meta.db exists and can be opened
298        let db_path = self.cache_path.join(META_DB);
299        if !db_path.exists() {
300            anyhow::bail!("Database file missing: {}", db_path.display());
301        }
302
303        // Try to open database
304        let conn = Connection::open(&db_path)
305            .context("Failed to open meta.db - database may be corrupted")?;
306
307        // Verify schema exists
308        let tables: Result<Vec<String>, _> = conn
309            .prepare("SELECT name FROM sqlite_master WHERE type='table'")
310            .and_then(|mut stmt| {
311                stmt.query_map([], |row| row.get(0))
312                    .map(|rows| rows.collect())
313            })
314            .and_then(|result| result);
315
316        match tables {
317            Ok(table_list) => {
318                // Check for required tables
319                let required_tables = vec!["files", "statistics", "config", "file_branches", "branches", "file_dependencies", "file_exports"];
320                for table in &required_tables {
321                    if !table_list.iter().any(|t| t == table) {
322                        anyhow::bail!("Required table '{}' missing from database schema", table);
323                    }
324                }
325            }
326            Err(e) => {
327                anyhow::bail!("Failed to read database schema: {}", e);
328            }
329        }
330
331        // Run SQLite integrity check (fast quick_check)
332        // Use quick_check instead of integrity_check for speed (<10ms vs 100ms+)
333        let integrity_result: String = conn
334            .query_row("PRAGMA quick_check", [], |row| row.get(0))?;
335
336        if integrity_result != "ok" {
337            log::warn!("Database integrity check failed: {}", integrity_result);
338            anyhow::bail!(
339                "Database integrity check failed: {}. Cache may be corrupted. \
340                 Run 'rfx index' to rebuild cache.",
341                integrity_result
342            );
343        }
344
345        // Check trigrams.bin if it exists
346        let trigrams_path = self.cache_path.join("trigrams.bin");
347        if trigrams_path.exists() {
348            use std::io::Read;
349
350            match File::open(&trigrams_path) {
351                Ok(mut file) => {
352                    let mut header = [0u8; 4];
353                    match file.read_exact(&mut header) {
354                        Ok(_) => {
355                            // Check magic bytes
356                            if &header != b"RFTG" {
357                                log::warn!("trigrams.bin has invalid magic bytes - may be corrupted");
358                                anyhow::bail!("trigrams.bin appears to be corrupted (invalid magic bytes)");
359                            }
360                        }
361                        Err(_) => {
362                            anyhow::bail!("trigrams.bin is too small - appears to be corrupted");
363                        }
364                    }
365                }
366                Err(e) => {
367                    anyhow::bail!("Failed to open trigrams.bin: {}", e);
368                }
369            }
370        }
371
372        // Check content.bin if it exists
373        let content_path = self.cache_path.join("content.bin");
374        if content_path.exists() {
375            use std::io::Read;
376
377            match File::open(&content_path) {
378                Ok(mut file) => {
379                    let mut header = [0u8; 4];
380                    match file.read_exact(&mut header) {
381                        Ok(_) => {
382                            // Check magic bytes
383                            if &header != b"RFCT" {
384                                log::warn!("content.bin has invalid magic bytes - may be corrupted");
385                                anyhow::bail!("content.bin appears to be corrupted (invalid magic bytes)");
386                            }
387                        }
388                        Err(_) => {
389                            anyhow::bail!("content.bin is too small - appears to be corrupted");
390                        }
391                    }
392                }
393                Err(e) => {
394                    anyhow::bail!("Failed to open content.bin: {}", e);
395                }
396            }
397        }
398
399        // Check schema hash for automatic invalidation
400        let current_schema_hash = env!("CACHE_SCHEMA_HASH");
401
402        let stored_schema_hash: Option<String> = conn
403            .query_row(
404                "SELECT value FROM statistics WHERE key = 'schema_hash'",
405                [],
406                |row| row.get(0),
407            )
408            .optional()?;
409
410        if let Some(stored_hash) = stored_schema_hash {
411            if stored_hash != current_schema_hash {
412                log::warn!(
413                    "Cache schema hash mismatch! Stored: {}, Current: {}",
414                    stored_hash,
415                    current_schema_hash
416                );
417                anyhow::bail!(
418                    "Cache schema version mismatch.\n\
419                     \n\
420                     - Cache was built with version {}\n\
421                     - Current binary expects version {}\n\
422                     \n\
423                     The cache format may be incompatible with this version of Reflex.\n\
424                     Please rebuild the index by running:\n\
425                     \n\
426                       rfx index\n\
427                     \n\
428                     This usually happens after upgrading Reflex or making code changes.",
429                    stored_hash,
430                    current_schema_hash
431                );
432            }
433        } else {
434            log::debug!("No schema_hash found in cache - this cache was created before automatic invalidation was implemented");
435            // Don't fail for backward compatibility with old caches
436            // They will get the hash on next rebuild
437        }
438
439        let elapsed = start.elapsed();
440        log::debug!("Cache validation passed (schema hash: {}, took {:?})", current_schema_hash, elapsed);
441        Ok(())
442    }
443
444    /// Get the path to the cache directory
445    pub fn path(&self) -> &Path {
446        &self.cache_path
447    }
448
449    /// Get the workspace root directory (parent of .reflex/)
450    pub fn workspace_root(&self) -> PathBuf {
451        self.cache_path
452            .parent()
453            .expect(".reflex directory should have a parent")
454            .to_path_buf()
455    }
456
457    /// Load IndexConfig from `.reflex/config.toml` if it exists.
458    ///
459    /// Returns `IndexConfig::default()` when the file is absent or a section
460    /// is missing.  Parse errors are surfaced so the user gets a clear message
461    /// rather than silently falling back to defaults.
462    pub fn load_index_config(&self) -> Result<crate::models::IndexConfig> {
463        use crate::models::{IndexConfig, Language};
464
465        let config_path = self.cache_path.join(CONFIG_TOML);
466        if !config_path.exists() {
467            return Ok(IndexConfig::default());
468        }
469
470        let raw = std::fs::read_to_string(&config_path)
471            .with_context(|| format!("Failed to read {}", config_path.display()))?;
472
473        let toml_val: toml::Value = toml::from_str(&raw)
474            .with_context(|| format!("Failed to parse {}", config_path.display()))?;
475
476        let mut cfg = IndexConfig::default();
477
478        if let Some(index_tbl) = toml_val.get("index") {
479            if let Some(langs) = index_tbl.get("languages").and_then(|v| v.as_array()) {
480                let parsed: Vec<Language> = langs
481                    .iter()
482                    .filter_map(|v| v.as_str())
483                    .filter_map(|s| Language::from_name(s).or_else(|| {
484                        log::warn!("Unknown language '{}' in config.toml [index] section — ignoring", s);
485                        None
486                    }))
487                    .collect();
488                if !parsed.is_empty() {
489                    cfg.languages = parsed;
490                }
491            }
492            if let Some(max_size) = index_tbl.get("max_file_size").and_then(|v| v.as_integer()) {
493                cfg.max_file_size = max_size as usize;
494            }
495            if let Some(follow) = index_tbl.get("follow_symlinks").and_then(|v| v.as_bool()) {
496                cfg.follow_symlinks = follow;
497            }
498            if let Some(include) = index_tbl.get("include").and_then(|v| v.get("patterns")).and_then(|v| v.as_array()) {
499                cfg.include_patterns = include.iter().filter_map(|v| v.as_str().map(String::from)).collect();
500            }
501            if let Some(exclude) = index_tbl.get("exclude").and_then(|v| v.get("patterns")).and_then(|v| v.as_array()) {
502                cfg.exclude_patterns = exclude.iter().filter_map(|v| v.as_str().map(String::from)).collect();
503            }
504        }
505
506        if let Some(perf) = toml_val.get("performance") {
507            if let Some(threads) = perf.get("parallel_threads").and_then(|v| v.as_integer()) {
508                cfg.parallel_threads = threads as usize;
509            }
510        }
511
512        log::debug!("Loaded IndexConfig from config.toml: {:?}", cfg);
513        Ok(cfg)
514    }
515
516    /// Clear the entire cache
517    pub fn clear(&self) -> Result<()> {
518        log::info!("Clearing cache at {:?}", self.cache_path);
519
520        if self.cache_path.exists() {
521            std::fs::remove_dir_all(&self.cache_path)?;
522        }
523
524        Ok(())
525    }
526
527    /// Force SQLite WAL (Write-Ahead Log) checkpoint
528    ///
529    /// Ensures all data written in transactions is flushed to the main database file.
530    /// This is critical when spawning background processes that open new connections,
531    /// as they need to see the committed data immediately.
532    ///
533    /// Uses TRUNCATE mode to completely flush and reset the WAL file.
534    pub fn checkpoint_wal(&self) -> Result<()> {
535        let db_path = self.cache_path.join(META_DB);
536
537        if !db_path.exists() {
538            // No database to checkpoint
539            return Ok(());
540        }
541
542        let conn = Connection::open(&db_path)
543            .context("Failed to open meta.db for WAL checkpoint")?;
544
545        // PRAGMA wal_checkpoint(TRUNCATE) forces a full checkpoint and truncates the WAL
546        // This ensures background processes see all committed data
547        // Note: Returns (busy, log_pages, checkpointed_pages) - use query instead of execute
548        conn.query_row("PRAGMA wal_checkpoint(TRUNCATE)", [], |row| {
549            let busy: i64 = row.get(0)?;
550            let log_pages: i64 = row.get(1)?;
551            let checkpointed: i64 = row.get(2)?;
552            log::debug!(
553                "WAL checkpoint completed: busy={}, log_pages={}, checkpointed_pages={}",
554                busy, log_pages, checkpointed
555            );
556            Ok(())
557        }).context("Failed to execute WAL checkpoint")?;
558
559        log::debug!("Executed WAL checkpoint (TRUNCATE) on meta.db");
560        Ok(())
561    }
562
563    /// Load all file hashes across all branches from SQLite
564    ///
565    /// Used by background indexer to get hashes for all indexed files.
566    /// Returns the most recent hash for each file across all branches.
567    pub fn load_all_hashes(&self) -> Result<HashMap<String, String>> {
568        let db_path = self.cache_path.join(META_DB);
569
570        if !db_path.exists() {
571            return Ok(HashMap::new());
572        }
573
574        let conn = Connection::open(&db_path)
575            .context("Failed to open meta.db")?;
576
577        // Get all hashes from file_branches, joined with files to get paths
578        // If a file appears in multiple branches, we'll get multiple entries
579        // (HashMap will keep the last one, which is fine for background indexer)
580        let mut stmt = conn.prepare(
581            "SELECT f.path, fb.hash
582             FROM file_branches fb
583             JOIN files f ON fb.file_id = f.id"
584        )?;
585        let hashes: HashMap<String, String> = stmt.query_map([], |row| {
586            Ok((row.get(0)?, row.get(1)?))
587        })?
588        .collect::<Result<HashMap<_, _>, _>>()?;
589
590        log::debug!("Loaded {} file hashes across all branches from SQLite", hashes.len());
591        Ok(hashes)
592    }
593
594    /// Load file hashes for a specific branch from SQLite
595    ///
596    /// Used by indexer and query engine to get hashes for the current branch.
597    /// This ensures branch-specific incremental indexing and symbol cache lookups.
598    pub fn load_hashes_for_branch(&self, branch: &str) -> Result<HashMap<String, String>> {
599        let db_path = self.cache_path.join(META_DB);
600
601        if !db_path.exists() {
602            return Ok(HashMap::new());
603        }
604
605        let conn = Connection::open(&db_path)
606            .context("Failed to open meta.db")?;
607
608        // Get hashes for specific branch only
609        let mut stmt = conn.prepare(
610            "SELECT f.path, fb.hash
611             FROM file_branches fb
612             JOIN files f ON fb.file_id = f.id
613             JOIN branches b ON fb.branch_id = b.id
614             WHERE b.name = ?"
615        )?;
616        let hashes: HashMap<String, String> = stmt.query_map([branch], |row| {
617            Ok((row.get(0)?, row.get(1)?))
618        })?
619        .collect::<Result<HashMap<_, _>, _>>()?;
620
621        log::debug!("Loaded {} file hashes for branch '{}' from SQLite", hashes.len(), branch);
622        Ok(hashes)
623    }
624
625    /// Save file hashes for incremental indexing
626    ///
627    /// DEPRECATED: Hashes are now saved via record_branch_file() or batch_record_branch_files().
628    /// This method is kept for backward compatibility but does nothing.
629    #[deprecated(note = "Hashes are now stored in file_branches table via record_branch_file()")]
630    pub fn save_hashes(&self, _hashes: &HashMap<String, String>) -> Result<()> {
631        // No-op: hashes are now persisted to SQLite in record_branch_file()
632        Ok(())
633    }
634
635    /// Update file metadata in the files table
636    ///
637    /// Note: File content hashes are stored separately in the file_branches table
638    /// via record_branch_file() or batch_record_branch_files().
639    pub fn update_file(&self, path: &str, language: &str, line_count: usize) -> Result<()> {
640        let db_path = self.cache_path.join(META_DB);
641        let conn = Connection::open(&db_path)
642            .context("Failed to open meta.db for file update")?;
643
644        let now = chrono::Utc::now().timestamp();
645
646        conn.execute(
647            "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
648             VALUES (?, ?, ?, ?)",
649            [path, &now.to_string(), language, &line_count.to_string()],
650        )?;
651
652        Ok(())
653    }
654
655    /// Batch update multiple files in a single transaction for performance
656    ///
657    /// Note: File content hashes are stored separately in the file_branches table
658    /// via batch_update_files_and_branch().
659    pub fn batch_update_files(&self, files: &[(String, String, usize)]) -> Result<()> {
660        let db_path = self.cache_path.join(META_DB);
661        let mut conn = Connection::open(&db_path)
662            .context("Failed to open meta.db for batch update")?;
663
664        let now = chrono::Utc::now().timestamp();
665        let now_str = now.to_string();
666
667        // Use a transaction for batch inserts
668        let tx = conn.transaction()?;
669
670        for (path, language, line_count) in files {
671            tx.execute(
672                "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
673                 VALUES (?, ?, ?, ?)",
674                [path.as_str(), &now_str, language.as_str(), &line_count.to_string()],
675            )?;
676        }
677
678        tx.commit()?;
679        Ok(())
680    }
681
682    /// Batch update files AND record their hashes for a branch in a SINGLE transaction
683    ///
684    /// This is the recommended method for indexing as it ensures atomicity:
685    /// if files are inserted, their branch hashes are guaranteed to be inserted too.
686    pub fn batch_update_files_and_branch(
687        &self,
688        files: &[(String, String, usize)],      // (path, language, line_count)
689        branch_files: &[(String, String)],       // (path, hash)
690        branch: &str,
691        commit_sha: Option<&str>,
692    ) -> Result<()> {
693        log::info!("batch_update_files_and_branch: Processing {} files for branch '{}'", files.len(), branch);
694
695        let db_path = self.cache_path.join(META_DB);
696        let mut conn = Connection::open(&db_path)
697            .context("Failed to open meta.db for batch update and branch recording")?;
698
699        let now = chrono::Utc::now().timestamp();
700        let now_str = now.to_string();
701
702        // Use a SINGLE transaction for both operations
703        let tx = conn.transaction()?;
704
705        // Step 1: Insert/update files table
706        for (path, language, line_count) in files {
707            tx.execute(
708                "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
709                 VALUES (?, ?, ?, ?)",
710                [path.as_str(), &now_str, language.as_str(), &line_count.to_string()],
711            )?;
712        }
713        log::info!("Inserted {} files into files table", files.len());
714
715        // Step 2: Get or create branch_id (within same transaction)
716        let branch_id = self.get_or_create_branch_id(&tx, branch, commit_sha)?;
717        log::debug!("Got branch_id={} for branch '{}'", branch_id, branch);
718
719        // Step 3: Insert file_branches entries (within same transaction)
720        let mut inserted = 0;
721        for (path, hash) in branch_files {
722            // Lookup file_id from path (will find it because we just inserted above)
723            let file_id: i64 = tx.query_row(
724                "SELECT id FROM files WHERE path = ?",
725                [path.as_str()],
726                |row| row.get(0)
727            ).context(format!("File not found in index after insert: {}", path))?;
728
729            // Insert into file_branches using INTEGER values (not strings!)
730            tx.execute(
731                "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
732                 VALUES (?, ?, ?, ?)",
733                rusqlite::params![file_id, branch_id, hash.as_str(), now],
734            )?;
735            inserted += 1;
736        }
737        log::info!("Inserted {} file_branches entries", inserted);
738
739        // Commit the entire transaction atomically
740        tx.commit()?;
741        log::info!("Transaction committed successfully (files + file_branches)");
742
743        // DIAGNOSTIC: Verify data was actually persisted after commit
744        // This helps diagnose WAL synchronization issues where commits succeed but data isn't visible
745        let verify_conn = Connection::open(&db_path)
746            .context("Failed to open meta.db for verification")?;
747
748        // Count actual files in database
749        let actual_file_count: i64 = verify_conn.query_row(
750            "SELECT COUNT(*) FROM files WHERE path IN (SELECT path FROM files ORDER BY id DESC LIMIT ?)",
751            [files.len()],
752            |row| row.get(0)
753        ).unwrap_or(0);
754
755        // Count actual file_branches entries for this branch
756        let actual_fb_count: i64 = verify_conn.query_row(
757            "SELECT COUNT(*) FROM file_branches fb
758             JOIN branches b ON fb.branch_id = b.id
759             WHERE b.name = ?",
760            [branch],
761            |row| row.get(0)
762        ).unwrap_or(0);
763
764        log::info!(
765            "Post-commit verification: {} files in files table (expected {}), {} file_branches entries for '{}' (expected {})",
766            actual_file_count,
767            files.len(),
768            actual_fb_count,
769            branch,
770            inserted
771        );
772
773        // DEFENSIVE: Warn if counts don't match expectations
774        if actual_file_count < files.len() as i64 {
775            log::warn!(
776                "MISMATCH: Expected {} files in database, but only found {}! Data may not have persisted.",
777                files.len(),
778                actual_file_count
779            );
780        }
781        if actual_fb_count < inserted as i64 {
782            log::warn!(
783                "MISMATCH: Expected {} file_branches entries for branch '{}', but only found {}! Data may not have persisted.",
784                inserted,
785                branch,
786                actual_fb_count
787            );
788        }
789
790        Ok(())
791    }
792
793    /// Update statistics after indexing by calculating totals from database for a specific branch
794    ///
795    /// Counts only files indexed for the given branch, not all files across all branches.
796    pub fn update_stats(&self, branch: &str) -> Result<()> {
797        let db_path = self.cache_path.join(META_DB);
798        let conn = Connection::open(&db_path)
799            .context("Failed to open meta.db for stats update")?;
800
801        // Count files for specific branch only (branch-aware statistics)
802        let total_files: usize = conn.query_row(
803            "SELECT COUNT(DISTINCT fb.file_id)
804             FROM file_branches fb
805             JOIN branches b ON fb.branch_id = b.id
806             WHERE b.name = ?",
807            [branch],
808            |row| row.get(0),
809        ).unwrap_or(0);
810
811        let now = chrono::Utc::now().timestamp();
812
813        conn.execute(
814            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
815            ["total_files", &total_files.to_string(), &now.to_string()],
816        )?;
817
818        log::debug!("Updated statistics for branch '{}': {} files", branch, total_files);
819        Ok(())
820    }
821
822    /// Check if the stored schema hash matches the current binary's hash.
823    /// Returns Ok(true) if they match, Ok(false) if they don't, Err on DB errors.
824    pub fn check_schema_hash(&self) -> Result<bool> {
825        let db_path = self.cache_path.join(META_DB);
826        if !db_path.exists() {
827            return Ok(false);
828        }
829        let conn = Connection::open(&db_path)?;
830        let current = env!("CACHE_SCHEMA_HASH");
831        let stored: Option<String> = conn
832            .query_row(
833                "SELECT value FROM statistics WHERE key = 'schema_hash'",
834                [],
835                |row| row.get(0),
836            )
837            .optional()?;
838        Ok(stored.as_deref() == Some(current))
839    }
840
841    /// Update cache schema hash in statistics table
842    ///
843    /// This should be called after every index operation to ensure the cache
844    /// is marked as compatible with the current binary version.
845    pub fn update_schema_hash(&self) -> Result<()> {
846        let db_path = self.cache_path.join(META_DB);
847        let conn = Connection::open(&db_path)
848            .context("Failed to open meta.db for schema hash update")?;
849
850        let schema_hash = env!("CACHE_SCHEMA_HASH");
851        let now = chrono::Utc::now().timestamp();
852
853        conn.execute(
854            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
855            ["schema_hash", schema_hash, &now.to_string()],
856        )?;
857
858        log::debug!("Updated schema hash to: {}", schema_hash);
859        Ok(())
860    }
861
862    /// Get list of all indexed files
863    pub fn list_files(&self) -> Result<Vec<IndexedFile>> {
864        let db_path = self.cache_path.join(META_DB);
865
866        if !db_path.exists() {
867            return Ok(Vec::new());
868        }
869
870        let conn = Connection::open(&db_path)
871            .context("Failed to open meta.db")?;
872
873        let mut stmt = conn.prepare(
874            "SELECT path, language, last_indexed FROM files ORDER BY path"
875        )?;
876
877        let files = stmt.query_map([], |row| {
878            let path: String = row.get(0)?;
879            let language: String = row.get(1)?;
880            let last_indexed: i64 = row.get(2)?;
881
882            Ok(IndexedFile {
883                path,
884                language,
885                last_indexed: chrono::DateTime::from_timestamp(last_indexed, 0)
886                    .unwrap_or_else(chrono::Utc::now)
887                    .to_rfc3339(),
888            })
889        })?
890        .collect::<Result<Vec<_>, _>>()?;
891
892        Ok(files)
893    }
894
895    /// Get statistics about the current cache
896    ///
897    /// Returns statistics for the current git branch if in a git repo,
898    /// or global statistics if not in a git repo.
899    pub fn stats(&self) -> Result<crate::models::IndexStats> {
900        let db_path = self.cache_path.join(META_DB);
901
902        if !db_path.exists() {
903            // Cache not initialized
904            return Ok(crate::models::IndexStats {
905                total_files: 0,
906                index_size_bytes: 0,
907                last_updated: chrono::Utc::now().to_rfc3339(),
908                files_by_language: std::collections::HashMap::new(),
909                lines_by_language: std::collections::HashMap::new(),
910                ..Default::default()
911            });
912        }
913
914        let conn = Connection::open(&db_path)
915            .context("Failed to open meta.db")?;
916
917        // Determine current branch for branch-aware statistics
918        let workspace_root = self.workspace_root();
919        let current_branch = if crate::git::is_git_repo(&workspace_root) {
920            crate::git::get_git_state(&workspace_root)
921                .ok()
922                .map(|state| state.branch)
923        } else {
924            Some("_default".to_string())
925        };
926
927        log::debug!("stats(): current_branch = {:?}", current_branch);
928
929        // Read total files (branch-aware)
930        let total_files: usize = if let Some(ref branch) = current_branch {
931            log::debug!("stats(): Counting files for branch '{}'", branch);
932
933            // Debug: Check all branches
934            let branches: Vec<(i64, String, i64)> = conn.prepare(
935                "SELECT id, name, file_count FROM branches"
936            )
937            .and_then(|mut stmt| {
938                stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)))
939                    .map(|rows| rows.collect())
940            })
941            .and_then(|result| result)
942            .unwrap_or_default();
943
944            for (id, name, count) in &branches {
945                log::debug!("stats(): Branch ID={}, Name='{}', FileCount={}", id, name, count);
946            }
947
948            // Debug: Count file_branches per branch
949            let fb_counts: Vec<(String, i64)> = conn.prepare(
950                "SELECT b.name, COUNT(*) FROM file_branches fb
951                 JOIN branches b ON fb.branch_id = b.id
952                 GROUP BY b.name"
953            )
954            .and_then(|mut stmt| {
955                stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?)))
956                    .map(|rows| rows.collect())
957            })
958            .and_then(|result| result)
959            .unwrap_or_default();
960
961            for (name, count) in &fb_counts {
962                log::debug!("stats(): file_branches count for branch '{}': {}", name, count);
963            }
964
965            // Count files for current branch only
966            let count: usize = conn.query_row(
967                "SELECT COUNT(DISTINCT fb.file_id)
968                 FROM file_branches fb
969                 JOIN branches b ON fb.branch_id = b.id
970                 WHERE b.name = ?",
971                [branch],
972                |row| row.get(0),
973            ).unwrap_or(0);
974
975            log::debug!("stats(): Query returned total_files = {}", count);
976            count
977        } else {
978            // No branch info - should not happen, but return 0
979            log::warn!("stats(): No current_branch detected!");
980            0
981        };
982
983        // Read last updated timestamp
984        let last_updated: String = conn.query_row(
985            "SELECT updated_at FROM statistics WHERE key = 'total_files'",
986            [],
987            |row| {
988                let timestamp: i64 = row.get(0)?;
989                Ok(chrono::DateTime::from_timestamp(timestamp, 0)
990                    .unwrap_or_else(chrono::Utc::now)
991                    .to_rfc3339())
992            },
993        ).unwrap_or_else(|_| chrono::Utc::now().to_rfc3339());
994
995        // Calculate total cache size (all binary files)
996        let mut index_size_bytes: u64 = 0;
997
998        for file_name in [META_DB, TOKENS_BIN, CONFIG_TOML, "content.bin", "trigrams.bin"] {
999            let file_path = self.cache_path.join(file_name);
1000            if let Ok(metadata) = std::fs::metadata(&file_path) {
1001                index_size_bytes += metadata.len();
1002            }
1003        }
1004
1005        // Get file count breakdown by language (branch-aware if possible)
1006        let mut files_by_language = std::collections::HashMap::new();
1007        if let Some(ref branch) = current_branch {
1008            // Query files for current branch only
1009            let mut stmt = conn.prepare(
1010                "SELECT f.language, COUNT(DISTINCT f.id)
1011                 FROM files f
1012                 JOIN file_branches fb ON f.id = fb.file_id
1013                 JOIN branches b ON fb.branch_id = b.id
1014                 WHERE b.name = ?
1015                 GROUP BY f.language"
1016            )?;
1017            let lang_counts = stmt.query_map([branch], |row| {
1018                let language: String = row.get(0)?;
1019                let count: i64 = row.get(1)?;
1020                Ok((language, count as usize))
1021            })?;
1022
1023            for result in lang_counts {
1024                let (language, count) = result?;
1025                files_by_language.insert(language, count);
1026            }
1027        } else {
1028            // Fallback: query all files
1029            let mut stmt = conn.prepare("SELECT language, COUNT(*) FROM files GROUP BY language")?;
1030            let lang_counts = stmt.query_map([], |row| {
1031                let language: String = row.get(0)?;
1032                let count: i64 = row.get(1)?;
1033                Ok((language, count as usize))
1034            })?;
1035
1036            for result in lang_counts {
1037                let (language, count) = result?;
1038                files_by_language.insert(language, count);
1039            }
1040        }
1041
1042        // Get line count breakdown by language (branch-aware if possible)
1043        let mut lines_by_language = std::collections::HashMap::new();
1044        if let Some(ref branch) = current_branch {
1045            // Query lines for current branch only
1046            let mut stmt = conn.prepare(
1047                "SELECT f.language, SUM(f.line_count)
1048                 FROM files f
1049                 JOIN file_branches fb ON f.id = fb.file_id
1050                 JOIN branches b ON fb.branch_id = b.id
1051                 WHERE b.name = ?
1052                 GROUP BY f.language"
1053            )?;
1054            let line_counts = stmt.query_map([branch], |row| {
1055                let language: String = row.get(0)?;
1056                let count: i64 = row.get(1)?;
1057                Ok((language, count as usize))
1058            })?;
1059
1060            for result in line_counts {
1061                let (language, count) = result?;
1062                lines_by_language.insert(language, count);
1063            }
1064        } else {
1065            // Fallback: query all files
1066            let mut stmt = conn.prepare("SELECT language, SUM(line_count) FROM files GROUP BY language")?;
1067            let line_counts = stmt.query_map([], |row| {
1068                let language: String = row.get(0)?;
1069                let count: i64 = row.get(1)?;
1070                Ok((language, count as usize))
1071            })?;
1072
1073            for result in line_counts {
1074                let (language, count) = result?;
1075                lines_by_language.insert(language, count);
1076            }
1077        }
1078
1079        Ok(crate::models::IndexStats {
1080            total_files,
1081            index_size_bytes,
1082            last_updated,
1083            files_by_language,
1084            lines_by_language,
1085            ..Default::default()
1086        })
1087    }
1088
1089    // ===== Branch-aware indexing methods =====
1090
1091    /// Get or create a branch ID by name
1092    ///
1093    /// Returns the numeric branch ID, creating a new entry if needed.
1094    fn get_or_create_branch_id(&self, conn: &Connection, branch_name: &str, commit_sha: Option<&str>) -> Result<i64> {
1095        // Try to get existing branch
1096        let existing_id: Option<i64> = conn
1097            .query_row(
1098                "SELECT id FROM branches WHERE name = ?",
1099                [branch_name],
1100                |row| row.get(0),
1101            )
1102            .optional()?;
1103
1104        if let Some(id) = existing_id {
1105            return Ok(id);
1106        }
1107
1108        // Create new branch entry
1109        let now = chrono::Utc::now().timestamp();
1110        conn.execute(
1111            "INSERT INTO branches (name, commit_sha, last_indexed, file_count, is_dirty)
1112             VALUES (?, ?, ?, 0, 0)",
1113            [branch_name, commit_sha.unwrap_or("unknown"), &now.to_string()],
1114        )?;
1115
1116        // Get the ID we just created
1117        let id: i64 = conn.last_insert_rowid();
1118        Ok(id)
1119    }
1120
1121    /// Record a file's hash for a specific branch
1122    pub fn record_branch_file(
1123        &self,
1124        path: &str,
1125        branch: &str,
1126        hash: &str,
1127        commit_sha: Option<&str>,
1128    ) -> Result<()> {
1129        let db_path = self.cache_path.join(META_DB);
1130        let conn = Connection::open(&db_path)
1131            .context("Failed to open meta.db for branch file recording")?;
1132
1133        // Lookup file_id from path
1134        let file_id: i64 = conn.query_row(
1135            "SELECT id FROM files WHERE path = ?",
1136            [path],
1137            |row| row.get(0)
1138        ).context(format!("File not found in index: {}", path))?;
1139
1140        // Get or create branch_id
1141        let branch_id = self.get_or_create_branch_id(&conn, branch, commit_sha)?;
1142
1143        let now = chrono::Utc::now().timestamp();
1144
1145        // Insert using proper INTEGER types (not strings!)
1146        conn.execute(
1147            "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
1148             VALUES (?, ?, ?, ?)",
1149            rusqlite::params![file_id, branch_id, hash, now],
1150        )?;
1151
1152        Ok(())
1153    }
1154
1155    /// Batch record multiple files for a specific branch in a single transaction
1156    ///
1157    /// IMPORTANT: Files must already exist in the `files` table before calling this method.
1158    /// For atomic insertion of both files and branch hashes, use `batch_update_files_and_branch()` instead.
1159    pub fn batch_record_branch_files(
1160        &self,
1161        files: &[(String, String)],  // (path, hash)
1162        branch: &str,
1163        commit_sha: Option<&str>,
1164    ) -> Result<()> {
1165        log::info!("batch_record_branch_files: Processing {} files for branch '{}'", files.len(), branch);
1166
1167        let db_path = self.cache_path.join(META_DB);
1168        let mut conn = Connection::open(&db_path)
1169            .context("Failed to open meta.db for batch branch recording")?;
1170
1171        let now = chrono::Utc::now().timestamp();
1172
1173        // Use a transaction for batch inserts
1174        let tx = conn.transaction()?;
1175
1176        // Get or create branch_id (use transaction connection)
1177        let branch_id = self.get_or_create_branch_id(&tx, branch, commit_sha)?;
1178        log::debug!("Got branch_id={} for branch '{}'", branch_id, branch);
1179
1180        let mut inserted = 0;
1181        for (path, hash) in files {
1182            // Lookup file_id from path
1183            log::trace!("Looking up file_id for path: {}", path);
1184            let file_id: i64 = tx.query_row(
1185                "SELECT id FROM files WHERE path = ?",
1186                [path.as_str()],
1187                |row| row.get(0)
1188            ).context(format!("File not found in index: {}", path))?;
1189            log::trace!("Found file_id={} for path: {}", file_id, path);
1190
1191            // Insert using proper INTEGER types (not strings!)
1192            tx.execute(
1193                "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
1194                 VALUES (?, ?, ?, ?)",
1195                rusqlite::params![file_id, branch_id, hash.as_str(), now],
1196            )?;
1197            inserted += 1;
1198        }
1199
1200        log::info!("Inserted {} file_branches entries", inserted);
1201        tx.commit()?;
1202        log::info!("Transaction committed successfully");
1203        Ok(())
1204    }
1205
1206    /// Get all files indexed for a specific branch
1207    ///
1208    /// Returns a HashMap of path → hash for all files in the branch.
1209    pub fn get_branch_files(&self, branch: &str) -> Result<HashMap<String, String>> {
1210        let db_path = self.cache_path.join(META_DB);
1211
1212        if !db_path.exists() {
1213            return Ok(HashMap::new());
1214        }
1215
1216        let conn = Connection::open(&db_path)
1217            .context("Failed to open meta.db")?;
1218
1219        let mut stmt = conn.prepare(
1220            "SELECT f.path, fb.hash
1221             FROM file_branches fb
1222             JOIN files f ON fb.file_id = f.id
1223             JOIN branches b ON fb.branch_id = b.id
1224             WHERE b.name = ?"
1225        )?;
1226        let files: HashMap<String, String> = stmt
1227            .query_map([branch], |row| Ok((row.get(0)?, row.get(1)?)))?
1228            .collect::<Result<HashMap<_, _>, _>>()?;
1229
1230        log::debug!(
1231            "Loaded {} files for branch '{}' from file_branches table",
1232            files.len(),
1233            branch
1234        );
1235        Ok(files)
1236    }
1237
1238    /// Check if a branch has any indexed files
1239    ///
1240    /// Fast existence check using LIMIT 1 for O(1) performance.
1241    pub fn branch_exists(&self, branch: &str) -> Result<bool> {
1242        let db_path = self.cache_path.join(META_DB);
1243
1244        if !db_path.exists() {
1245            return Ok(false);
1246        }
1247
1248        let conn = Connection::open(&db_path)
1249            .context("Failed to open meta.db")?;
1250
1251        let count: i64 = conn
1252            .query_row(
1253                "SELECT COUNT(*)
1254                 FROM file_branches fb
1255                 JOIN branches b ON fb.branch_id = b.id
1256                 WHERE b.name = ?
1257                 LIMIT 1",
1258                [branch],
1259                |row| row.get(0),
1260            )
1261            .unwrap_or(0);
1262
1263        Ok(count > 0)
1264    }
1265
1266    /// Get branch metadata (commit, last_indexed, file_count, dirty status)
1267    pub fn get_branch_info(&self, branch: &str) -> Result<BranchInfo> {
1268        let db_path = self.cache_path.join(META_DB);
1269
1270        if !db_path.exists() {
1271            anyhow::bail!("Database not initialized");
1272        }
1273
1274        let conn = Connection::open(&db_path)
1275            .context("Failed to open meta.db")?;
1276
1277        let info = conn.query_row(
1278            "SELECT commit_sha, last_indexed, file_count, is_dirty FROM branches WHERE name = ?",
1279            [branch],
1280            |row| {
1281                Ok(BranchInfo {
1282                    branch: branch.to_string(),
1283                    commit_sha: row.get(0)?,
1284                    last_indexed: row.get(1)?,
1285                    file_count: row.get(2)?,
1286                    is_dirty: row.get::<_, i64>(3)? != 0,
1287                })
1288            },
1289        )?;
1290
1291        Ok(info)
1292    }
1293
1294    /// Update branch metadata after indexing
1295    ///
1296    /// Uses UPDATE instead of INSERT OR REPLACE to preserve branch_id and prevent
1297    /// CASCADE DELETE on file_branches table.
1298    pub fn update_branch_metadata(
1299        &self,
1300        branch: &str,
1301        commit_sha: Option<&str>,
1302        file_count: usize,
1303        is_dirty: bool,
1304    ) -> Result<()> {
1305        let db_path = self.cache_path.join(META_DB);
1306        let conn = Connection::open(&db_path)
1307            .context("Failed to open meta.db for branch metadata update")?;
1308
1309        let now = chrono::Utc::now().timestamp();
1310        let is_dirty_int = if is_dirty { 1 } else { 0 };
1311
1312        // Try UPDATE first to preserve branch_id (prevents CASCADE DELETE)
1313        let rows_updated = conn.execute(
1314            "UPDATE branches
1315             SET commit_sha = ?, last_indexed = ?, file_count = ?, is_dirty = ?
1316             WHERE name = ?",
1317            rusqlite::params![
1318                commit_sha.unwrap_or("unknown"),
1319                now,
1320                file_count,
1321                is_dirty_int,
1322                branch
1323            ],
1324        )?;
1325
1326        // If no rows updated (branch doesn't exist yet), INSERT new one
1327        if rows_updated == 0 {
1328            conn.execute(
1329                "INSERT INTO branches (name, commit_sha, last_indexed, file_count, is_dirty)
1330                 VALUES (?, ?, ?, ?, ?)",
1331                rusqlite::params![
1332                    branch,
1333                    commit_sha.unwrap_or("unknown"),
1334                    now,
1335                    file_count,
1336                    is_dirty_int
1337                ],
1338            )?;
1339        }
1340
1341        log::debug!(
1342            "Updated branch metadata for '{}': commit={}, files={}, dirty={}",
1343            branch,
1344            commit_sha.unwrap_or("unknown"),
1345            file_count,
1346            is_dirty
1347        );
1348        Ok(())
1349    }
1350
1351    /// Find a file with a specific hash (for symbol reuse optimization)
1352    ///
1353    /// Returns the path and branch where this hash was first seen,
1354    /// enabling reuse of parsed symbols across branches.
1355    pub fn find_file_with_hash(&self, hash: &str) -> Result<Option<(String, String)>> {
1356        let db_path = self.cache_path.join(META_DB);
1357
1358        if !db_path.exists() {
1359            return Ok(None);
1360        }
1361
1362        let conn = Connection::open(&db_path)
1363            .context("Failed to open meta.db")?;
1364
1365        let result = conn
1366            .query_row(
1367                "SELECT f.path, b.name
1368                 FROM file_branches fb
1369                 JOIN files f ON fb.file_id = f.id
1370                 JOIN branches b ON fb.branch_id = b.id
1371                 WHERE fb.hash = ?
1372                 LIMIT 1",
1373                [hash],
1374                |row| Ok((row.get(0)?, row.get(1)?)),
1375            )
1376            .optional()?;
1377
1378        Ok(result)
1379    }
1380
1381    /// Get file ID by path
1382    ///
1383    /// Returns the integer ID for a file path, or None if not found.
1384    pub fn get_file_id(&self, path: &str) -> Result<Option<i64>> {
1385        let db_path = self.cache_path.join(META_DB);
1386
1387        if !db_path.exists() {
1388            return Ok(None);
1389        }
1390
1391        let conn = Connection::open(&db_path)
1392            .context("Failed to open meta.db")?;
1393
1394        let result = conn
1395            .query_row(
1396                "SELECT id FROM files WHERE path = ?",
1397                [path],
1398                |row| row.get(0),
1399            )
1400            .optional()?;
1401
1402        Ok(result)
1403    }
1404
1405    /// Batch get file IDs for multiple paths
1406    ///
1407    /// Returns a HashMap of path → file_id for all found paths.
1408    /// Paths not in the database are omitted from the result.
1409    ///
1410    /// Automatically chunks large batches to avoid SQLite parameter limits (999 max).
1411    pub fn batch_get_file_ids(&self, paths: &[String]) -> Result<HashMap<String, i64>> {
1412        let db_path = self.cache_path.join(META_DB);
1413
1414        if !db_path.exists() {
1415            return Ok(HashMap::new());
1416        }
1417
1418        let conn = Connection::open(&db_path)
1419            .context("Failed to open meta.db")?;
1420
1421        // SQLite has a limit of 999 parameters by default
1422        // Chunk requests to stay well under that limit
1423        const BATCH_SIZE: usize = 900;
1424
1425        let mut results = HashMap::new();
1426
1427        for chunk in paths.chunks(BATCH_SIZE) {
1428            // Build IN clause for this chunk
1429            let placeholders = chunk.iter()
1430                .map(|_| "?")
1431                .collect::<Vec<_>>()
1432                .join(", ");
1433
1434            let query = format!("SELECT path, id FROM files WHERE path IN ({})", placeholders);
1435
1436            let params: Vec<&str> = chunk.iter().map(|s| s.as_str()).collect();
1437            let mut stmt = conn.prepare(&query)?;
1438
1439            let chunk_results = stmt.query_map(rusqlite::params_from_iter(params), |row| {
1440                Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
1441            })?
1442            .collect::<Result<HashMap<_, _>, _>>()?;
1443
1444            results.extend(chunk_results);
1445        }
1446
1447        log::debug!("Batch loaded {} file IDs (out of {} requested, {} chunks)",
1448                   results.len(), paths.len(), paths.len().div_ceil(BATCH_SIZE));
1449        Ok(results)
1450    }
1451
1452    // ===== Cache compaction methods =====
1453
1454    /// Check if cache compaction should run
1455    ///
1456    /// Returns true if 24+ hours have passed since last compaction (or never compacted).
1457    /// Compaction threshold: 86400 seconds (24 hours)
1458    pub fn should_compact(&self) -> Result<bool> {
1459        let db_path = self.cache_path.join(META_DB);
1460
1461        if !db_path.exists() {
1462            // No database means no compaction needed
1463            return Ok(false);
1464        }
1465
1466        let conn = Connection::open(&db_path)
1467            .context("Failed to open meta.db for compaction check")?;
1468
1469        // Get last_compaction timestamp (defaults to "0" if not found)
1470        let last_compaction: i64 = conn
1471            .query_row(
1472                "SELECT value FROM statistics WHERE key = 'last_compaction'",
1473                [],
1474                |row| {
1475                    let value: String = row.get(0)?;
1476                    Ok(value.parse::<i64>().unwrap_or(0))
1477                },
1478            )
1479            .unwrap_or(0);
1480
1481        // Get current timestamp
1482        let now = chrono::Utc::now().timestamp();
1483
1484        // Compaction threshold: 24 hours (86400 seconds)
1485        const COMPACTION_THRESHOLD_SECS: i64 = 86400;
1486
1487        let elapsed_secs = now - last_compaction;
1488        let should_run = elapsed_secs >= COMPACTION_THRESHOLD_SECS;
1489
1490        log::debug!(
1491            "Compaction check: last={}, now={}, elapsed={}s, should_compact={}",
1492            last_compaction,
1493            now,
1494            elapsed_secs,
1495            should_run
1496        );
1497
1498        Ok(should_run)
1499    }
1500
1501    /// Update last_compaction timestamp in statistics table
1502    ///
1503    /// Called after successful compaction to record when it ran.
1504    pub fn update_compaction_timestamp(&self) -> Result<()> {
1505        let db_path = self.cache_path.join(META_DB);
1506        let conn = Connection::open(&db_path)
1507            .context("Failed to open meta.db for compaction timestamp update")?;
1508
1509        let now = chrono::Utc::now().timestamp();
1510
1511        conn.execute(
1512            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
1513            ["last_compaction", &now.to_string(), &now.to_string()],
1514        )?;
1515
1516        log::debug!("Updated last_compaction timestamp to: {}", now);
1517        Ok(())
1518    }
1519
1520    /// Compact the cache by removing deleted files and reclaiming disk space
1521    ///
1522    /// This operation:
1523    /// 1. Identifies files in the database that no longer exist on disk
1524    /// 2. Deletes those files from all database tables (CASCADE handles related data)
1525    /// 3. Runs VACUUM to reclaim disk space from deleted rows
1526    /// 4. Updates the last_compaction timestamp
1527    ///
1528    /// Returns a CompactionReport with statistics about the operation.
1529    /// Safe to run concurrently with queries (uses SQLite transactions).
1530    pub fn compact(&self) -> Result<crate::models::CompactionReport> {
1531        let start_time = std::time::Instant::now();
1532        log::info!("Starting cache compaction...");
1533
1534        // Get initial cache size
1535        let size_before = self.calculate_cache_size()?;
1536
1537        // Step 1: Identify deleted files (in DB but not on filesystem)
1538        let deleted_files = self.identify_deleted_files()?;
1539        log::info!("Found {} deleted files to remove from cache", deleted_files.len());
1540
1541        if deleted_files.is_empty() {
1542            log::info!("No deleted files to compact - cache is clean");
1543            // Update timestamp anyway to prevent running compaction too frequently
1544            self.update_compaction_timestamp()?;
1545
1546            return Ok(crate::models::CompactionReport {
1547                files_removed: 0,
1548                space_saved_bytes: 0,
1549                duration_ms: start_time.elapsed().as_millis() as u64,
1550            });
1551        }
1552
1553        // Step 2: Delete from database (CASCADE handles file_branches, file_dependencies, file_exports)
1554        self.delete_files_from_db(&deleted_files)?;
1555        log::info!("Deleted {} files from database", deleted_files.len());
1556
1557        // Step 3: Run VACUUM to reclaim disk space
1558        self.vacuum_database()?;
1559        log::info!("Completed VACUUM operation");
1560
1561        // Get final cache size
1562        let size_after = self.calculate_cache_size()?;
1563        let space_saved = size_before.saturating_sub(size_after);
1564
1565        // Step 4: Update last_compaction timestamp
1566        self.update_compaction_timestamp()?;
1567
1568        let duration_ms = start_time.elapsed().as_millis() as u64;
1569
1570        log::info!(
1571            "Cache compaction completed: {} files removed, {} bytes saved ({:.2} MB), took {}ms",
1572            deleted_files.len(),
1573            space_saved,
1574            space_saved as f64 / 1_048_576.0,
1575            duration_ms
1576        );
1577
1578        Ok(crate::models::CompactionReport {
1579            files_removed: deleted_files.len(),
1580            space_saved_bytes: space_saved,
1581            duration_ms,
1582        })
1583    }
1584
1585    /// Identify files in database that no longer exist on filesystem
1586    ///
1587    /// Returns a Vec of file IDs for files that should be removed from the cache.
1588    fn identify_deleted_files(&self) -> Result<Vec<i64>> {
1589        let db_path = self.cache_path.join(META_DB);
1590        let conn = Connection::open(&db_path)
1591            .context("Failed to open meta.db for deleted file identification")?;
1592
1593        let workspace_root = self.workspace_root();
1594
1595        // Query all files from database (id, path)
1596        let mut stmt = conn.prepare("SELECT id, path FROM files")?;
1597        let files = stmt.query_map([], |row| {
1598            Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
1599        })?
1600        .collect::<Result<Vec<_>, _>>()?;
1601
1602        log::debug!("Checking {} files for deletion status", files.len());
1603
1604        // Check which files no longer exist on disk
1605        let mut deleted_file_ids = Vec::new();
1606        for (file_id, file_path) in files {
1607            let full_path = workspace_root.join(&file_path);
1608            if !full_path.exists() {
1609                log::trace!("File no longer exists: {} (id={})", file_path, file_id);
1610                deleted_file_ids.push(file_id);
1611            }
1612        }
1613
1614        Ok(deleted_file_ids)
1615    }
1616
1617    /// Delete files from database by file ID
1618    ///
1619    /// Uses a transaction for atomicity. CASCADE delete handles:
1620    /// - file_branches entries
1621    /// - file_dependencies entries
1622    /// - file_exports entries
1623    fn delete_files_from_db(&self, file_ids: &[i64]) -> Result<()> {
1624        if file_ids.is_empty() {
1625            return Ok(());
1626        }
1627
1628        let db_path = self.cache_path.join(META_DB);
1629        let mut conn = Connection::open(&db_path)
1630            .context("Failed to open meta.db for file deletion")?;
1631
1632        let tx = conn.transaction()?;
1633
1634        // Delete files in batches to avoid SQLite parameter limit (999 max)
1635        const BATCH_SIZE: usize = 900;
1636
1637        for chunk in file_ids.chunks(BATCH_SIZE) {
1638            let placeholders = chunk.iter()
1639                .map(|_| "?")
1640                .collect::<Vec<_>>()
1641                .join(", ");
1642
1643            let delete_query = format!("DELETE FROM files WHERE id IN ({})", placeholders);
1644
1645            let params: Vec<i64> = chunk.to_vec();
1646            tx.execute(&delete_query, rusqlite::params_from_iter(params))?;
1647        }
1648
1649        tx.commit()?;
1650        log::debug!("Deleted {} files from database (CASCADE handled related tables)", file_ids.len());
1651        Ok(())
1652    }
1653
1654    /// Run VACUUM on SQLite database to reclaim disk space
1655    ///
1656    /// VACUUM rebuilds the database file, removing free pages and compacting the file.
1657    /// This can take several seconds on large databases but significantly reduces disk usage.
1658    fn vacuum_database(&self) -> Result<()> {
1659        let db_path = self.cache_path.join(META_DB);
1660        let conn = Connection::open(&db_path)
1661            .context("Failed to open meta.db for VACUUM")?;
1662
1663        // VACUUM cannot run inside a transaction
1664        // It rebuilds the entire database file
1665        conn.execute("VACUUM", [])?;
1666
1667        log::debug!("VACUUM completed successfully");
1668        Ok(())
1669    }
1670
1671    /// Calculate total cache size in bytes
1672    ///
1673    /// Sums up the size of all cache files:
1674    /// - meta.db (SQLite database)
1675    /// - trigrams.bin (inverted index)
1676    /// - content.bin (file contents)
1677    /// - config.toml (configuration)
1678    fn calculate_cache_size(&self) -> Result<u64> {
1679        let mut total_size: u64 = 0;
1680
1681        for file_name in [META_DB, TOKENS_BIN, CONFIG_TOML, "content.bin", "trigrams.bin"] {
1682            let file_path = self.cache_path.join(file_name);
1683            if let Ok(metadata) = std::fs::metadata(&file_path) {
1684                total_size += metadata.len();
1685            }
1686        }
1687
1688        Ok(total_size)
1689    }
1690}
1691
1692/// Branch metadata information
1693#[derive(Debug, Clone)]
1694pub struct BranchInfo {
1695    pub branch: String,
1696    pub commit_sha: String,
1697    pub last_indexed: i64,
1698    pub file_count: usize,
1699    pub is_dirty: bool,
1700}
1701
1702// TODO: Implement memory-mapped readers for:
1703// - SymbolReader (reads from symbols.bin)
1704// - TokenReader (reads from tokens.bin)
1705// - MetaReader (reads from meta.db)
1706
1707#[cfg(test)]
1708mod tests {
1709    use super::*;
1710    use tempfile::TempDir;
1711
1712    #[test]
1713    fn test_cache_init() {
1714        let temp = TempDir::new().unwrap();
1715        let cache = CacheManager::new(temp.path());
1716
1717        assert!(!cache.exists());
1718        cache.init().unwrap();
1719        assert!(cache.exists());
1720        assert!(cache.path().exists());
1721
1722        // Verify all expected files were created
1723        assert!(cache.path().join(META_DB).exists());
1724        assert!(cache.path().join(CONFIG_TOML).exists());
1725    }
1726
1727    #[test]
1728    fn test_cache_init_idempotent() {
1729        let temp = TempDir::new().unwrap();
1730        let cache = CacheManager::new(temp.path());
1731
1732        // Initialize twice - should not error
1733        cache.init().unwrap();
1734        cache.init().unwrap();
1735
1736        assert!(cache.exists());
1737    }
1738
1739    #[test]
1740    fn test_cache_clear() {
1741        let temp = TempDir::new().unwrap();
1742        let cache = CacheManager::new(temp.path());
1743
1744        cache.init().unwrap();
1745        assert!(cache.exists());
1746
1747        cache.clear().unwrap();
1748        assert!(!cache.exists());
1749    }
1750
1751    #[test]
1752    fn test_cache_clear_nonexistent() {
1753        let temp = TempDir::new().unwrap();
1754        let cache = CacheManager::new(temp.path());
1755
1756        // Clearing non-existent cache should not error
1757        assert!(!cache.exists());
1758        cache.clear().unwrap();
1759        assert!(!cache.exists());
1760    }
1761
1762    #[test]
1763    fn test_load_all_hashes_empty() {
1764        let temp = TempDir::new().unwrap();
1765        let cache = CacheManager::new(temp.path());
1766
1767        cache.init().unwrap();
1768        let hashes = cache.load_all_hashes().unwrap();
1769        assert_eq!(hashes.len(), 0);
1770    }
1771
1772    #[test]
1773    fn test_load_all_hashes_before_init() {
1774        let temp = TempDir::new().unwrap();
1775        let cache = CacheManager::new(temp.path());
1776
1777        // Loading hashes before init should return empty map
1778        let hashes = cache.load_all_hashes().unwrap();
1779        assert_eq!(hashes.len(), 0);
1780    }
1781
1782    #[test]
1783    fn test_load_hashes_for_branch_empty() {
1784        let temp = TempDir::new().unwrap();
1785        let cache = CacheManager::new(temp.path());
1786
1787        cache.init().unwrap();
1788        let hashes = cache.load_hashes_for_branch("main").unwrap();
1789        assert_eq!(hashes.len(), 0);
1790    }
1791
1792    #[test]
1793    fn test_update_file() {
1794        let temp = TempDir::new().unwrap();
1795        let cache = CacheManager::new(temp.path());
1796
1797        cache.init().unwrap();
1798        cache.update_file("src/main.rs", "rust", 100).unwrap();
1799
1800        // Verify file was stored (check via list_files)
1801        let files = cache.list_files().unwrap();
1802        assert_eq!(files.len(), 1);
1803        assert_eq!(files[0].path, "src/main.rs");
1804        assert_eq!(files[0].language, "rust");
1805    }
1806
1807    #[test]
1808    fn test_update_file_multiple() {
1809        let temp = TempDir::new().unwrap();
1810        let cache = CacheManager::new(temp.path());
1811
1812        cache.init().unwrap();
1813        cache.update_file("src/main.rs", "rust", 100).unwrap();
1814        cache.update_file("src/lib.rs", "rust", 200).unwrap();
1815        cache.update_file("README.md", "markdown", 50).unwrap();
1816
1817        // Verify files were stored
1818        let files = cache.list_files().unwrap();
1819        assert_eq!(files.len(), 3);
1820    }
1821
1822    #[test]
1823    fn test_update_file_replace() {
1824        let temp = TempDir::new().unwrap();
1825        let cache = CacheManager::new(temp.path());
1826
1827        cache.init().unwrap();
1828        cache.update_file("src/main.rs", "rust", 100).unwrap();
1829        cache.update_file("src/main.rs", "rust", 150).unwrap();
1830
1831        // Second update should replace the first
1832        let files = cache.list_files().unwrap();
1833        assert_eq!(files.len(), 1);
1834        assert_eq!(files[0].path, "src/main.rs");
1835    }
1836
1837    #[test]
1838    fn test_batch_update_files() {
1839        let temp = TempDir::new().unwrap();
1840        let cache = CacheManager::new(temp.path());
1841
1842        cache.init().unwrap();
1843
1844        let files = vec![
1845            ("src/main.rs".to_string(), "rust".to_string(), 100),
1846            ("src/lib.rs".to_string(), "rust".to_string(), 200),
1847            ("test.py".to_string(), "python".to_string(), 50),
1848        ];
1849
1850        cache.batch_update_files(&files).unwrap();
1851
1852        // Verify files were stored
1853        let stored_files = cache.list_files().unwrap();
1854        assert_eq!(stored_files.len(), 3);
1855    }
1856
1857    #[test]
1858    fn test_update_stats() {
1859        let temp = TempDir::new().unwrap();
1860        let cache = CacheManager::new(temp.path());
1861
1862        cache.init().unwrap();
1863        cache.update_file("src/main.rs", "rust", 100).unwrap();
1864        cache.update_file("src/lib.rs", "rust", 200).unwrap();
1865
1866        // Record files for a test branch
1867        cache.record_branch_file("src/main.rs", "_default", "hash1", None).unwrap();
1868        cache.record_branch_file("src/lib.rs", "_default", "hash2", None).unwrap();
1869        cache.update_stats("_default").unwrap();
1870
1871        let stats = cache.stats().unwrap();
1872        assert_eq!(stats.total_files, 2);
1873    }
1874
1875    #[test]
1876    fn test_stats_empty_cache() {
1877        let temp = TempDir::new().unwrap();
1878        let cache = CacheManager::new(temp.path());
1879
1880        cache.init().unwrap();
1881        let stats = cache.stats().unwrap();
1882
1883        assert_eq!(stats.total_files, 0);
1884        assert_eq!(stats.files_by_language.len(), 0);
1885    }
1886
1887    #[test]
1888    fn test_stats_before_init() {
1889        let temp = TempDir::new().unwrap();
1890        let cache = CacheManager::new(temp.path());
1891
1892        // Stats before init should return zeros
1893        let stats = cache.stats().unwrap();
1894        assert_eq!(stats.total_files, 0);
1895    }
1896
1897    #[test]
1898    fn test_stats_by_language() {
1899        let temp = TempDir::new().unwrap();
1900        let cache = CacheManager::new(temp.path());
1901
1902        cache.init().unwrap();
1903        cache.update_file("main.rs", "Rust", 100).unwrap();
1904        cache.update_file("lib.rs", "Rust", 200).unwrap();
1905        cache.update_file("script.py", "Python", 50).unwrap();
1906        cache.update_file("test.py", "Python", 80).unwrap();
1907
1908        // Record files for a test branch
1909        cache.record_branch_file("main.rs", "_default", "hash1", None).unwrap();
1910        cache.record_branch_file("lib.rs", "_default", "hash2", None).unwrap();
1911        cache.record_branch_file("script.py", "_default", "hash3", None).unwrap();
1912        cache.record_branch_file("test.py", "_default", "hash4", None).unwrap();
1913        cache.update_stats("_default").unwrap();
1914
1915        let stats = cache.stats().unwrap();
1916        assert_eq!(stats.files_by_language.get("Rust"), Some(&2));
1917        assert_eq!(stats.files_by_language.get("Python"), Some(&2));
1918        assert_eq!(stats.lines_by_language.get("Rust"), Some(&300)); // 100 + 200
1919        assert_eq!(stats.lines_by_language.get("Python"), Some(&130)); // 50 + 80
1920    }
1921
1922    #[test]
1923    fn test_list_files_empty() {
1924        let temp = TempDir::new().unwrap();
1925        let cache = CacheManager::new(temp.path());
1926
1927        cache.init().unwrap();
1928        let files = cache.list_files().unwrap();
1929        assert_eq!(files.len(), 0);
1930    }
1931
1932    #[test]
1933    fn test_list_files() {
1934        let temp = TempDir::new().unwrap();
1935        let cache = CacheManager::new(temp.path());
1936
1937        cache.init().unwrap();
1938        cache.update_file("src/main.rs", "rust", 100).unwrap();
1939        cache.update_file("src/lib.rs", "rust", 200).unwrap();
1940
1941        let files = cache.list_files().unwrap();
1942        assert_eq!(files.len(), 2);
1943
1944        // Files should be sorted by path
1945        assert_eq!(files[0].path, "src/lib.rs");
1946        assert_eq!(files[1].path, "src/main.rs");
1947
1948        assert_eq!(files[0].language, "rust");
1949    }
1950
1951    #[test]
1952    fn test_list_files_before_init() {
1953        let temp = TempDir::new().unwrap();
1954        let cache = CacheManager::new(temp.path());
1955
1956        // Listing files before init should return empty vec
1957        let files = cache.list_files().unwrap();
1958        assert_eq!(files.len(), 0);
1959    }
1960
1961    #[test]
1962    fn test_branch_exists() {
1963        let temp = TempDir::new().unwrap();
1964        let cache = CacheManager::new(temp.path());
1965
1966        cache.init().unwrap();
1967
1968        assert!(!cache.branch_exists("main").unwrap());
1969
1970        // Add file to index first (required for record_branch_file)
1971        cache.update_file("src/main.rs", "rust", 100).unwrap();
1972        cache.record_branch_file("src/main.rs", "main", "hash1", Some("commit123")).unwrap();
1973
1974        assert!(cache.branch_exists("main").unwrap());
1975        assert!(!cache.branch_exists("feature-branch").unwrap());
1976    }
1977
1978    #[test]
1979    fn test_record_branch_file() {
1980        let temp = TempDir::new().unwrap();
1981        let cache = CacheManager::new(temp.path());
1982
1983        cache.init().unwrap();
1984        // Add file to index first (required for record_branch_file)
1985        cache.update_file("src/main.rs", "rust", 100).unwrap();
1986        cache.record_branch_file("src/main.rs", "main", "hash1", Some("commit123")).unwrap();
1987
1988        let files = cache.get_branch_files("main").unwrap();
1989        assert_eq!(files.len(), 1);
1990        assert_eq!(files.get("src/main.rs"), Some(&"hash1".to_string()));
1991    }
1992
1993    #[test]
1994    fn test_get_branch_files_empty() {
1995        let temp = TempDir::new().unwrap();
1996        let cache = CacheManager::new(temp.path());
1997
1998        cache.init().unwrap();
1999        let files = cache.get_branch_files("nonexistent").unwrap();
2000        assert_eq!(files.len(), 0);
2001    }
2002
2003    #[test]
2004    fn test_batch_record_branch_files() {
2005        let temp = TempDir::new().unwrap();
2006        let cache = CacheManager::new(temp.path());
2007
2008        cache.init().unwrap();
2009
2010        // Add files to index first (required for batch_record_branch_files)
2011        let file_metadata = vec![
2012            ("src/main.rs".to_string(), "rust".to_string(), 100),
2013            ("src/lib.rs".to_string(), "rust".to_string(), 200),
2014            ("README.md".to_string(), "markdown".to_string(), 50),
2015        ];
2016        cache.batch_update_files(&file_metadata).unwrap();
2017
2018        let files = vec![
2019            ("src/main.rs".to_string(), "hash1".to_string()),
2020            ("src/lib.rs".to_string(), "hash2".to_string()),
2021            ("README.md".to_string(), "hash3".to_string()),
2022        ];
2023
2024        cache.batch_record_branch_files(&files, "main", Some("commit123")).unwrap();
2025
2026        let branch_files = cache.get_branch_files("main").unwrap();
2027        assert_eq!(branch_files.len(), 3);
2028        assert_eq!(branch_files.get("src/main.rs"), Some(&"hash1".to_string()));
2029        assert_eq!(branch_files.get("src/lib.rs"), Some(&"hash2".to_string()));
2030        assert_eq!(branch_files.get("README.md"), Some(&"hash3".to_string()));
2031    }
2032
2033    #[test]
2034    fn test_update_branch_metadata() {
2035        let temp = TempDir::new().unwrap();
2036        let cache = CacheManager::new(temp.path());
2037
2038        cache.init().unwrap();
2039        cache.update_branch_metadata("main", Some("commit123"), 10, false).unwrap();
2040
2041        let info = cache.get_branch_info("main").unwrap();
2042        assert_eq!(info.branch, "main");
2043        assert_eq!(info.commit_sha, "commit123");
2044        assert_eq!(info.file_count, 10);
2045        assert_eq!(info.is_dirty, false);
2046    }
2047
2048    #[test]
2049    fn test_update_branch_metadata_dirty() {
2050        let temp = TempDir::new().unwrap();
2051        let cache = CacheManager::new(temp.path());
2052
2053        cache.init().unwrap();
2054        cache.update_branch_metadata("feature", Some("commit456"), 5, true).unwrap();
2055
2056        let info = cache.get_branch_info("feature").unwrap();
2057        assert_eq!(info.is_dirty, true);
2058    }
2059
2060    #[test]
2061    fn test_find_file_with_hash() {
2062        let temp = TempDir::new().unwrap();
2063        let cache = CacheManager::new(temp.path());
2064
2065        cache.init().unwrap();
2066        // Add file to index first (required for record_branch_file)
2067        cache.update_file("src/main.rs", "rust", 100).unwrap();
2068        cache.record_branch_file("src/main.rs", "main", "unique_hash", Some("commit123")).unwrap();
2069
2070        let result = cache.find_file_with_hash("unique_hash").unwrap();
2071        assert!(result.is_some());
2072
2073        let (path, branch) = result.unwrap();
2074        assert_eq!(path, "src/main.rs");
2075        assert_eq!(branch, "main");
2076    }
2077
2078    #[test]
2079    fn test_find_file_with_hash_not_found() {
2080        let temp = TempDir::new().unwrap();
2081        let cache = CacheManager::new(temp.path());
2082
2083        cache.init().unwrap();
2084
2085        let result = cache.find_file_with_hash("nonexistent_hash").unwrap();
2086        assert!(result.is_none());
2087    }
2088
2089    #[test]
2090    fn test_config_toml_created() {
2091        let temp = TempDir::new().unwrap();
2092        let cache = CacheManager::new(temp.path());
2093
2094        cache.init().unwrap();
2095
2096        let config_path = cache.path().join(CONFIG_TOML);
2097        let config_content = std::fs::read_to_string(&config_path).unwrap();
2098
2099        // Verify config contains expected sections
2100        assert!(config_content.contains("[index]"));
2101        assert!(config_content.contains("[search]"));
2102        assert!(config_content.contains("[performance]"));
2103        assert!(config_content.contains("max_file_size"));
2104    }
2105
2106    #[test]
2107    fn test_meta_db_schema() {
2108        let temp = TempDir::new().unwrap();
2109        let cache = CacheManager::new(temp.path());
2110
2111        cache.init().unwrap();
2112
2113        let db_path = cache.path().join(META_DB);
2114        let conn = Connection::open(&db_path).unwrap();
2115
2116        // Verify tables exist
2117        let tables: Vec<String> = conn
2118            .prepare("SELECT name FROM sqlite_master WHERE type='table'").unwrap()
2119            .query_map([], |row| row.get(0)).unwrap()
2120            .collect::<Result<Vec<_>, _>>().unwrap();
2121
2122        assert!(tables.contains(&"files".to_string()));
2123        assert!(tables.contains(&"statistics".to_string()));
2124        assert!(tables.contains(&"config".to_string()));
2125        assert!(tables.contains(&"file_branches".to_string()));
2126        assert!(tables.contains(&"branches".to_string()));
2127        assert!(tables.contains(&"file_dependencies".to_string()));
2128        assert!(tables.contains(&"file_exports".to_string()));
2129    }
2130
2131    #[test]
2132    fn test_concurrent_file_updates() {
2133        use std::thread;
2134
2135        let temp = TempDir::new().unwrap();
2136        let cache_path = temp.path().to_path_buf();
2137
2138        let cache = CacheManager::new(&cache_path);
2139        cache.init().unwrap();
2140
2141        // Spawn multiple threads updating different files
2142        let handles: Vec<_> = (0..10)
2143            .map(|i| {
2144                let path = cache_path.clone();
2145                thread::spawn(move || {
2146                    let cache = CacheManager::new(&path);
2147                    cache
2148                        .update_file(
2149                            &format!("file_{}.rs", i),
2150                            "rust",
2151                            i * 10,
2152                        )
2153                        .unwrap();
2154                })
2155            })
2156            .collect();
2157
2158        for handle in handles {
2159            handle.join().unwrap();
2160        }
2161
2162        let cache = CacheManager::new(&cache_path);
2163        let files = cache.list_files().unwrap();
2164        assert_eq!(files.len(), 10);
2165    }
2166
2167    // ===== Corruption Detection Tests =====
2168
2169    #[test]
2170    fn test_validate_corrupted_database() {
2171        use std::io::Write;
2172
2173        let temp = TempDir::new().unwrap();
2174        let cache = CacheManager::new(temp.path());
2175
2176        cache.init().unwrap();
2177
2178        // Corrupt the database by overwriting it with invalid data
2179        let db_path = cache.path().join(META_DB);
2180        let mut file = File::create(&db_path).unwrap();
2181        file.write_all(b"CORRUPTED DATA").unwrap();
2182
2183        // Validation should fail due to database corruption
2184        let result = cache.validate();
2185        assert!(result.is_err());
2186        let err_msg = result.unwrap_err().to_string();
2187        eprintln!("Error message: {}", err_msg);
2188        assert!(err_msg.contains("corrupted") || err_msg.contains("not a database"));
2189    }
2190
2191    #[test]
2192    fn test_validate_corrupted_trigrams() {
2193        use std::io::Write;
2194
2195        let temp = TempDir::new().unwrap();
2196        let cache = CacheManager::new(temp.path());
2197
2198        cache.init().unwrap();
2199
2200        // Create trigrams.bin with invalid magic bytes
2201        let trigrams_path = cache.path().join("trigrams.bin");
2202        let mut file = File::create(&trigrams_path).unwrap();
2203        file.write_all(b"BADM").unwrap(); // Wrong magic bytes (should be "RFTG")
2204
2205        // Validation should fail due to invalid magic bytes
2206        let result = cache.validate();
2207        assert!(result.is_err());
2208        let err = result.unwrap_err().to_string();
2209        assert!(err.contains("trigrams.bin") && err.contains("corrupted"));
2210    }
2211
2212    #[test]
2213    fn test_validate_corrupted_content() {
2214        use std::io::Write;
2215
2216        let temp = TempDir::new().unwrap();
2217        let cache = CacheManager::new(temp.path());
2218
2219        cache.init().unwrap();
2220
2221        // Create content.bin with invalid magic bytes
2222        let content_path = cache.path().join("content.bin");
2223        let mut file = File::create(&content_path).unwrap();
2224        file.write_all(b"BADM").unwrap(); // Wrong magic bytes (should be "RFCT")
2225
2226        // Validation should fail due to invalid magic bytes
2227        let result = cache.validate();
2228        assert!(result.is_err());
2229        let err = result.unwrap_err().to_string();
2230        assert!(err.contains("content.bin") && err.contains("corrupted"));
2231    }
2232
2233    #[test]
2234    fn test_validate_missing_schema_table() {
2235        let temp = TempDir::new().unwrap();
2236        let cache = CacheManager::new(temp.path());
2237
2238        cache.init().unwrap();
2239
2240        // Drop a required table to simulate schema corruption
2241        let db_path = cache.path().join(META_DB);
2242        let conn = Connection::open(&db_path).unwrap();
2243        conn.execute("DROP TABLE files", []).unwrap();
2244
2245        // Validation should fail due to missing required table
2246        let result = cache.validate();
2247        assert!(result.is_err());
2248        let err = result.unwrap_err().to_string();
2249        assert!(err.contains("files") && err.contains("missing"));
2250    }
2251}