Skip to main content

reflex/
cache.rs

1//! Cache management and memory-mapped I/O
2//!
3//! The cache module handles the `.reflex/` directory structure:
4//! - `meta.db`: Metadata, file hashes, and configuration (SQLite)
5//! - `tokens.bin`: Compressed lexical tokens (binary)
6//! - `content.bin`: Memory-mapped file contents (binary)
7//! - `trigrams.bin`: Trigram inverted index (custom varint+zstd binary, V3 format)
8//! - `config.toml`: Index settings (TOML text)
9
10use anyhow::{Context, Result};
11use rusqlite::{Connection, OptionalExtension};
12use std::collections::HashMap;
13use std::fs::File;
14use std::path::{Path, PathBuf};
15
16use crate::models::IndexedFile;
17
18/// Default cache directory name
19pub const CACHE_DIR: &str = ".reflex";
20
21/// File names within the cache directory
22pub const META_DB: &str = "meta.db";
23pub const TOKENS_BIN: &str = "tokens.bin";
24pub const HASHES_JSON: &str = "hashes.json";
25pub const CONFIG_TOML: &str = "config.toml";
26
27/// Manages the Reflex cache directory
28#[derive(Clone)]
29pub struct CacheManager {
30    cache_path: PathBuf,
31}
32
33impl CacheManager {
34    /// Create a new cache manager for the given root directory
35    pub fn new(root: impl AsRef<Path>) -> Self {
36        let cache_path = root.as_ref().join(CACHE_DIR);
37        Self { cache_path }
38    }
39
40    /// Initialize the cache directory structure if it doesn't exist
41    pub fn init(&self) -> Result<()> {
42        log::info!("Initializing cache at {:?}", self.cache_path);
43
44        if !self.cache_path.exists() {
45            std::fs::create_dir_all(&self.cache_path)?;
46        }
47
48        // Create meta.db with schema
49        self.init_meta_db()?;
50
51        // Create default config.toml
52        self.init_config_toml()?;
53
54        // Note: tokens.bin removed - was never used
55        // Note: hashes.json is deprecated - hashes are now stored in meta.db
56
57        log::info!("Cache initialized successfully");
58        Ok(())
59    }
60
61    /// Initialize meta.db with SQLite schema
62    fn init_meta_db(&self) -> Result<()> {
63        let db_path = self.cache_path.join(META_DB);
64
65        // Skip if already exists
66        if db_path.exists() {
67            return Ok(());
68        }
69
70        let conn = Connection::open(&db_path).context("Failed to create meta.db")?;
71
72        // Create files table
73        conn.execute(
74            "CREATE TABLE IF NOT EXISTS files (
75                id INTEGER PRIMARY KEY AUTOINCREMENT,
76                path TEXT NOT NULL UNIQUE,
77                last_indexed INTEGER NOT NULL,
78                language TEXT NOT NULL,
79                token_count INTEGER DEFAULT 0,
80                line_count INTEGER DEFAULT 0
81            )",
82            [],
83        )?;
84
85        conn.execute(
86            "CREATE INDEX IF NOT EXISTS idx_files_path ON files(path)",
87            [],
88        )?;
89
90        // Create statistics table
91        conn.execute(
92            "CREATE TABLE IF NOT EXISTS statistics (
93                key TEXT PRIMARY KEY,
94                value TEXT NOT NULL,
95                updated_at INTEGER NOT NULL
96            )",
97            [],
98        )?;
99
100        // Initialize default statistics
101        let now = chrono::Utc::now().timestamp();
102        conn.execute(
103            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
104            ["total_files", "0", &now.to_string()],
105        )?;
106        conn.execute(
107            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
108            ["cache_version", "1", &now.to_string()],
109        )?;
110
111        // Store cache schema hash for automatic invalidation detection
112        // This hash is computed at build time from cache-critical source files
113        let schema_hash = env!("CACHE_SCHEMA_HASH");
114        conn.execute(
115            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
116            ["schema_hash", schema_hash, &now.to_string()],
117        )?;
118
119        // Initialize last_compaction timestamp (0 = never compacted)
120        conn.execute(
121            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
122            ["last_compaction", "0", &now.to_string()],
123        )?;
124
125        // Create config table
126        conn.execute(
127            "CREATE TABLE IF NOT EXISTS config (
128                key TEXT PRIMARY KEY,
129                value TEXT NOT NULL
130            )",
131            [],
132        )?;
133
134        // Create branch tracking tables for git-aware indexing
135        conn.execute(
136            "CREATE TABLE IF NOT EXISTS file_branches (
137                file_id INTEGER NOT NULL,
138                branch_id INTEGER NOT NULL,
139                hash TEXT NOT NULL,
140                last_indexed INTEGER NOT NULL,
141                PRIMARY KEY (file_id, branch_id),
142                FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
143                FOREIGN KEY (branch_id) REFERENCES branches(id) ON DELETE CASCADE
144            )",
145            [],
146        )?;
147
148        conn.execute(
149            "CREATE INDEX IF NOT EXISTS idx_branch_lookup ON file_branches(branch_id, file_id)",
150            [],
151        )?;
152
153        conn.execute(
154            "CREATE INDEX IF NOT EXISTS idx_hash_lookup ON file_branches(hash)",
155            [],
156        )?;
157
158        // Create branches metadata table
159        conn.execute(
160            "CREATE TABLE IF NOT EXISTS branches (
161                id INTEGER PRIMARY KEY AUTOINCREMENT,
162                name TEXT NOT NULL UNIQUE,
163                commit_sha TEXT NOT NULL,
164                last_indexed INTEGER NOT NULL,
165                file_count INTEGER DEFAULT 0,
166                is_dirty INTEGER DEFAULT 0
167            )",
168            [],
169        )?;
170
171        // Create file dependencies table for tracking imports/includes
172        conn.execute(
173            "CREATE TABLE IF NOT EXISTS file_dependencies (
174                id INTEGER PRIMARY KEY AUTOINCREMENT,
175                file_id INTEGER NOT NULL,
176                imported_path TEXT NOT NULL,
177                resolved_file_id INTEGER,
178                import_type TEXT NOT NULL,
179                line_number INTEGER NOT NULL,
180                imported_symbols TEXT,
181                FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
182                FOREIGN KEY (resolved_file_id) REFERENCES files(id) ON DELETE SET NULL
183            )",
184            [],
185        )?;
186
187        conn.execute(
188            "CREATE INDEX IF NOT EXISTS idx_deps_file ON file_dependencies(file_id)",
189            [],
190        )?;
191
192        conn.execute(
193            "CREATE INDEX IF NOT EXISTS idx_deps_resolved ON file_dependencies(resolved_file_id)",
194            [],
195        )?;
196
197        conn.execute(
198            "CREATE INDEX IF NOT EXISTS idx_deps_type ON file_dependencies(import_type)",
199            [],
200        )?;
201
202        // Create file exports table for tracking barrel re-exports
203        conn.execute(
204            "CREATE TABLE IF NOT EXISTS file_exports (
205                id INTEGER PRIMARY KEY AUTOINCREMENT,
206                file_id INTEGER NOT NULL,
207                exported_symbol TEXT,
208                source_path TEXT NOT NULL,
209                resolved_source_id INTEGER,
210                line_number INTEGER NOT NULL,
211                FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
212                FOREIGN KEY (resolved_source_id) REFERENCES files(id) ON DELETE SET NULL
213            )",
214            [],
215        )?;
216
217        conn.execute(
218            "CREATE INDEX IF NOT EXISTS idx_exports_file ON file_exports(file_id)",
219            [],
220        )?;
221
222        conn.execute(
223            "CREATE INDEX IF NOT EXISTS idx_exports_resolved ON file_exports(resolved_source_id)",
224            [],
225        )?;
226
227        conn.execute(
228            "CREATE INDEX IF NOT EXISTS idx_exports_symbol ON file_exports(exported_symbol)",
229            [],
230        )?;
231
232        log::debug!("Created meta.db with schema");
233        Ok(())
234    }
235
236    /// Initialize config.toml with defaults
237    fn init_config_toml(&self) -> Result<()> {
238        let config_path = self.cache_path.join(CONFIG_TOML);
239
240        if config_path.exists() {
241            return Ok(());
242        }
243
244        let default_config = r#"[index]
245languages = []  # Empty = all supported languages
246max_file_size = 10485760  # 10 MB
247follow_symlinks = false
248
249[index.include]
250patterns = []
251
252[index.exclude]
253patterns = []
254
255[search]
256default_limit = 100
257fuzzy_threshold = 0.8
258
259[performance]
260parallel_threads = 0  # 0 = auto (80% of available cores), or set a specific number
261compression_level = 3  # zstd level
262
263[semantic]
264# Semantic query generation using LLMs
265# Translate natural language questions into rfx query commands
266provider = "openrouter"  # Options: openai, anthropic, openrouter
267# model = "openai/gpt-4o-mini"  # Optional: override provider default model
268# auto_execute = false  # Optional: auto-execute queries without confirmation
269"#;
270
271        std::fs::write(&config_path, default_config)?;
272
273        log::debug!("Created default config.toml");
274        Ok(())
275    }
276
277    /// Check if cache exists and is valid
278    pub fn exists(&self) -> bool {
279        self.cache_path.exists() && self.cache_path.join(META_DB).exists()
280    }
281
282    /// Validate cache integrity and detect corruption
283    ///
284    /// Performs basic integrity checks on the cache:
285    /// - Verifies all required files exist
286    /// - Checks SQLite database can be opened
287    /// - Validates binary file headers (trigrams.bin, content.bin)
288    ///
289    /// Returns Ok(()) if cache is valid, Err with details if corrupted.
290    pub fn validate(&self) -> Result<()> {
291        let start = std::time::Instant::now();
292
293        // Check if cache directory exists
294        if !self.cache_path.exists() {
295            anyhow::bail!(
296                "Cache directory does not exist: {}",
297                self.cache_path.display()
298            );
299        }
300
301        // Check meta.db exists and can be opened
302        let db_path = self.cache_path.join(META_DB);
303        if !db_path.exists() {
304            anyhow::bail!("Database file missing: {}", db_path.display());
305        }
306
307        // Try to open database
308        let conn = Connection::open(&db_path)
309            .context("Failed to open meta.db - database may be corrupted")?;
310
311        // Verify schema exists
312        let tables: Result<Vec<String>, _> = conn
313            .prepare("SELECT name FROM sqlite_master WHERE type='table'")
314            .and_then(|mut stmt| {
315                stmt.query_map([], |row| row.get(0))
316                    .map(|rows| rows.collect())
317            })
318            .and_then(|result| result);
319
320        match tables {
321            Ok(table_list) => {
322                // Check for required tables
323                let required_tables = vec![
324                    "files",
325                    "statistics",
326                    "config",
327                    "file_branches",
328                    "branches",
329                    "file_dependencies",
330                    "file_exports",
331                ];
332                for table in &required_tables {
333                    if !table_list.iter().any(|t| t == table) {
334                        anyhow::bail!("Required table '{}' missing from database schema", table);
335                    }
336                }
337            }
338            Err(e) => {
339                anyhow::bail!("Failed to read database schema: {}", e);
340            }
341        }
342
343        // Run SQLite integrity check (fast quick_check)
344        // Use quick_check instead of integrity_check for speed (<10ms vs 100ms+)
345        let integrity_result: String =
346            conn.query_row("PRAGMA quick_check", [], |row| row.get(0))?;
347
348        if integrity_result != "ok" {
349            log::warn!("Database integrity check failed: {}", integrity_result);
350            anyhow::bail!(
351                "Database integrity check failed: {}. Cache may be corrupted. \
352                 Run 'rfx index' to rebuild cache.",
353                integrity_result
354            );
355        }
356
357        // Check trigrams.bin if it exists
358        let trigrams_path = self.cache_path.join("trigrams.bin");
359        if trigrams_path.exists() {
360            use std::io::Read;
361
362            match File::open(&trigrams_path) {
363                Ok(mut file) => {
364                    let mut header = [0u8; 4];
365                    match file.read_exact(&mut header) {
366                        Ok(_) => {
367                            // Check magic bytes
368                            if &header != b"RFTG" {
369                                log::warn!(
370                                    "trigrams.bin has invalid magic bytes - may be corrupted"
371                                );
372                                anyhow::bail!(
373                                    "trigrams.bin appears to be corrupted (invalid magic bytes)"
374                                );
375                            }
376                        }
377                        Err(_) => {
378                            anyhow::bail!("trigrams.bin is too small - appears to be corrupted");
379                        }
380                    }
381                }
382                Err(e) => {
383                    anyhow::bail!("Failed to open trigrams.bin: {}", e);
384                }
385            }
386        }
387
388        // Check content.bin if it exists
389        let content_path = self.cache_path.join("content.bin");
390        if content_path.exists() {
391            use std::io::Read;
392
393            match File::open(&content_path) {
394                Ok(mut file) => {
395                    let mut header = [0u8; 4];
396                    match file.read_exact(&mut header) {
397                        Ok(_) => {
398                            // Check magic bytes
399                            if &header != b"RFCT" {
400                                log::warn!(
401                                    "content.bin has invalid magic bytes - may be corrupted"
402                                );
403                                anyhow::bail!(
404                                    "content.bin appears to be corrupted (invalid magic bytes)"
405                                );
406                            }
407                        }
408                        Err(_) => {
409                            anyhow::bail!("content.bin is too small - appears to be corrupted");
410                        }
411                    }
412                }
413                Err(e) => {
414                    anyhow::bail!("Failed to open content.bin: {}", e);
415                }
416            }
417        }
418
419        // Check schema hash for automatic invalidation
420        let current_schema_hash = env!("CACHE_SCHEMA_HASH");
421
422        let stored_schema_hash: Option<String> = conn
423            .query_row(
424                "SELECT value FROM statistics WHERE key = 'schema_hash'",
425                [],
426                |row| row.get(0),
427            )
428            .optional()?;
429
430        if let Some(stored_hash) = stored_schema_hash {
431            if stored_hash != current_schema_hash {
432                log::warn!(
433                    "Cache schema hash mismatch! Stored: {}, Current: {}",
434                    stored_hash,
435                    current_schema_hash
436                );
437                anyhow::bail!(
438                    "Cache schema version mismatch.\n\
439                     \n\
440                     - Cache was built with version {}\n\
441                     - Current binary expects version {}\n\
442                     \n\
443                     The cache format may be incompatible with this version of Reflex.\n\
444                     Please rebuild the index by running:\n\
445                     \n\
446                       rfx index\n\
447                     \n\
448                     This usually happens after upgrading Reflex or making code changes.",
449                    stored_hash,
450                    current_schema_hash
451                );
452            }
453        } else {
454            log::debug!(
455                "No schema_hash found in cache - this cache was created before automatic invalidation was implemented"
456            );
457            // Don't fail for backward compatibility with old caches
458            // They will get the hash on next rebuild
459        }
460
461        let elapsed = start.elapsed();
462        log::debug!(
463            "Cache validation passed (schema hash: {}, took {:?})",
464            current_schema_hash,
465            elapsed
466        );
467        Ok(())
468    }
469
470    /// Get the path to the cache directory
471    pub fn path(&self) -> &Path {
472        &self.cache_path
473    }
474
475    /// Get the workspace root directory (parent of .reflex/)
476    pub fn workspace_root(&self) -> PathBuf {
477        self.cache_path
478            .parent()
479            .expect(".reflex directory should have a parent")
480            .to_path_buf()
481    }
482
483    /// Load IndexConfig from `.reflex/config.toml` if it exists.
484    ///
485    /// Returns `IndexConfig::default()` when the file is absent or a section
486    /// is missing.  Parse errors are surfaced so the user gets a clear message
487    /// rather than silently falling back to defaults.
488    pub fn load_index_config(&self) -> Result<crate::models::IndexConfig> {
489        use crate::models::{IndexConfig, Language};
490
491        let config_path = self.cache_path.join(CONFIG_TOML);
492        if !config_path.exists() {
493            return Ok(IndexConfig::default());
494        }
495
496        let raw = std::fs::read_to_string(&config_path)
497            .with_context(|| format!("Failed to read {}", config_path.display()))?;
498
499        let toml_val: toml::Value = toml::from_str(&raw)
500            .with_context(|| format!("Failed to parse {}", config_path.display()))?;
501
502        let mut cfg = IndexConfig::default();
503
504        if let Some(index_tbl) = toml_val.get("index") {
505            if let Some(langs) = index_tbl.get("languages").and_then(|v| v.as_array()) {
506                let parsed: Vec<Language> = langs
507                    .iter()
508                    .filter_map(|v| v.as_str())
509                    .filter_map(|s| {
510                        Language::from_name(s).or_else(|| {
511                            log::warn!(
512                                "Unknown language '{}' in config.toml [index] section — ignoring",
513                                s
514                            );
515                            None
516                        })
517                    })
518                    .collect();
519                if !parsed.is_empty() {
520                    cfg.languages = parsed;
521                }
522            }
523            if let Some(max_size) = index_tbl.get("max_file_size").and_then(|v| v.as_integer()) {
524                cfg.max_file_size = max_size as usize;
525            }
526            if let Some(follow) = index_tbl.get("follow_symlinks").and_then(|v| v.as_bool()) {
527                cfg.follow_symlinks = follow;
528            }
529            if let Some(include) = index_tbl
530                .get("include")
531                .and_then(|v| v.get("patterns"))
532                .and_then(|v| v.as_array())
533            {
534                cfg.include_patterns = include
535                    .iter()
536                    .filter_map(|v| v.as_str().map(String::from))
537                    .collect();
538            }
539            if let Some(exclude) = index_tbl
540                .get("exclude")
541                .and_then(|v| v.get("patterns"))
542                .and_then(|v| v.as_array())
543            {
544                cfg.exclude_patterns = exclude
545                    .iter()
546                    .filter_map(|v| v.as_str().map(String::from))
547                    .collect();
548            }
549        }
550
551        if let Some(perf) = toml_val.get("performance") {
552            if let Some(threads) = perf.get("parallel_threads").and_then(|v| v.as_integer()) {
553                cfg.parallel_threads = threads as usize;
554            }
555        }
556
557        log::debug!("Loaded IndexConfig from config.toml: {:?}", cfg);
558        Ok(cfg)
559    }
560
561    /// Clear the entire cache
562    pub fn clear(&self) -> Result<()> {
563        log::info!("Clearing cache at {:?}", self.cache_path);
564
565        if self.cache_path.exists() {
566            std::fs::remove_dir_all(&self.cache_path)?;
567        }
568
569        Ok(())
570    }
571
572    /// Force SQLite WAL (Write-Ahead Log) checkpoint
573    ///
574    /// Ensures all data written in transactions is flushed to the main database file.
575    /// This is critical when spawning background processes that open new connections,
576    /// as they need to see the committed data immediately.
577    ///
578    /// Uses TRUNCATE mode to completely flush and reset the WAL file.
579    pub fn checkpoint_wal(&self) -> Result<()> {
580        let db_path = self.cache_path.join(META_DB);
581
582        if !db_path.exists() {
583            // No database to checkpoint
584            return Ok(());
585        }
586
587        let conn =
588            Connection::open(&db_path).context("Failed to open meta.db for WAL checkpoint")?;
589
590        // PRAGMA wal_checkpoint(TRUNCATE) forces a full checkpoint and truncates the WAL
591        // This ensures background processes see all committed data
592        // Note: Returns (busy, log_pages, checkpointed_pages) - use query instead of execute
593        conn.query_row("PRAGMA wal_checkpoint(TRUNCATE)", [], |row| {
594            let busy: i64 = row.get(0)?;
595            let log_pages: i64 = row.get(1)?;
596            let checkpointed: i64 = row.get(2)?;
597            log::debug!(
598                "WAL checkpoint completed: busy={}, log_pages={}, checkpointed_pages={}",
599                busy,
600                log_pages,
601                checkpointed
602            );
603            Ok(())
604        })
605        .context("Failed to execute WAL checkpoint")?;
606
607        log::debug!("Executed WAL checkpoint (TRUNCATE) on meta.db");
608        Ok(())
609    }
610
611    /// Load all file hashes across all branches from SQLite
612    ///
613    /// Used by background indexer to get hashes for all indexed files.
614    /// Returns the most recent hash for each file across all branches.
615    pub fn load_all_hashes(&self) -> Result<HashMap<String, String>> {
616        let db_path = self.cache_path.join(META_DB);
617
618        if !db_path.exists() {
619            return Ok(HashMap::new());
620        }
621
622        let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
623
624        // Get all hashes from file_branches, joined with files to get paths
625        // If a file appears in multiple branches, we'll get multiple entries
626        // (HashMap will keep the last one, which is fine for background indexer)
627        let mut stmt = conn.prepare(
628            "SELECT f.path, fb.hash
629             FROM file_branches fb
630             JOIN files f ON fb.file_id = f.id",
631        )?;
632        let hashes: HashMap<String, String> = stmt
633            .query_map([], |row| Ok((row.get(0)?, row.get(1)?)))?
634            .collect::<Result<HashMap<_, _>, _>>()?;
635
636        log::debug!(
637            "Loaded {} file hashes across all branches from SQLite",
638            hashes.len()
639        );
640        Ok(hashes)
641    }
642
643    /// Load file hashes for a specific branch from SQLite
644    ///
645    /// Used by indexer and query engine to get hashes for the current branch.
646    /// This ensures branch-specific incremental indexing and symbol cache lookups.
647    pub fn load_hashes_for_branch(&self, branch: &str) -> Result<HashMap<String, String>> {
648        let db_path = self.cache_path.join(META_DB);
649
650        if !db_path.exists() {
651            return Ok(HashMap::new());
652        }
653
654        let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
655
656        // Get hashes for specific branch only
657        let mut stmt = conn.prepare(
658            "SELECT f.path, fb.hash
659             FROM file_branches fb
660             JOIN files f ON fb.file_id = f.id
661             JOIN branches b ON fb.branch_id = b.id
662             WHERE b.name = ?",
663        )?;
664        let hashes: HashMap<String, String> = stmt
665            .query_map([branch], |row| Ok((row.get(0)?, row.get(1)?)))?
666            .collect::<Result<HashMap<_, _>, _>>()?;
667
668        log::debug!(
669            "Loaded {} file hashes for branch '{}' from SQLite",
670            hashes.len(),
671            branch
672        );
673        Ok(hashes)
674    }
675
676    /// Save file hashes for incremental indexing
677    ///
678    /// DEPRECATED: Hashes are now saved via record_branch_file() or batch_record_branch_files().
679    /// This method is kept for backward compatibility but does nothing.
680    #[deprecated(note = "Hashes are now stored in file_branches table via record_branch_file()")]
681    pub fn save_hashes(&self, _hashes: &HashMap<String, String>) -> Result<()> {
682        // No-op: hashes are now persisted to SQLite in record_branch_file()
683        Ok(())
684    }
685
686    /// Update file metadata in the files table
687    ///
688    /// Note: File content hashes are stored separately in the file_branches table
689    /// via record_branch_file() or batch_record_branch_files().
690    pub fn update_file(&self, path: &str, language: &str, line_count: usize) -> Result<()> {
691        let db_path = self.cache_path.join(META_DB);
692        let conn = Connection::open(&db_path).context("Failed to open meta.db for file update")?;
693
694        let now = chrono::Utc::now().timestamp();
695
696        conn.execute(
697            "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
698             VALUES (?, ?, ?, ?)",
699            [path, &now.to_string(), language, &line_count.to_string()],
700        )?;
701
702        Ok(())
703    }
704
705    /// Batch update multiple files in a single transaction for performance
706    ///
707    /// Note: File content hashes are stored separately in the file_branches table
708    /// via batch_update_files_and_branch().
709    pub fn batch_update_files(&self, files: &[(String, String, usize)]) -> Result<()> {
710        let db_path = self.cache_path.join(META_DB);
711        let mut conn =
712            Connection::open(&db_path).context("Failed to open meta.db for batch update")?;
713
714        let now = chrono::Utc::now().timestamp();
715        let now_str = now.to_string();
716
717        // Use a transaction for batch inserts
718        let tx = conn.transaction()?;
719
720        for (path, language, line_count) in files {
721            tx.execute(
722                "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
723                 VALUES (?, ?, ?, ?)",
724                [
725                    path.as_str(),
726                    &now_str,
727                    language.as_str(),
728                    &line_count.to_string(),
729                ],
730            )?;
731        }
732
733        tx.commit()?;
734        Ok(())
735    }
736
737    /// Batch update files AND record their hashes for a branch in a SINGLE transaction
738    ///
739    /// This is the recommended method for indexing as it ensures atomicity:
740    /// if files are inserted, their branch hashes are guaranteed to be inserted too.
741    pub fn batch_update_files_and_branch(
742        &self,
743        files: &[(String, String, usize)], // (path, language, line_count)
744        branch_files: &[(String, String)], // (path, hash)
745        branch: &str,
746        commit_sha: Option<&str>,
747    ) -> Result<()> {
748        log::info!(
749            "batch_update_files_and_branch: Processing {} files for branch '{}'",
750            files.len(),
751            branch
752        );
753
754        let db_path = self.cache_path.join(META_DB);
755        let mut conn = Connection::open(&db_path)
756            .context("Failed to open meta.db for batch update and branch recording")?;
757
758        let now = chrono::Utc::now().timestamp();
759        let now_str = now.to_string();
760
761        // Use a SINGLE transaction for both operations
762        let tx = conn.transaction()?;
763
764        // Step 1: Insert/update files table
765        for (path, language, line_count) in files {
766            tx.execute(
767                "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
768                 VALUES (?, ?, ?, ?)",
769                [
770                    path.as_str(),
771                    &now_str,
772                    language.as_str(),
773                    &line_count.to_string(),
774                ],
775            )?;
776        }
777        log::info!("Inserted {} files into files table", files.len());
778
779        // Step 2: Get or create branch_id (within same transaction)
780        let branch_id = self.get_or_create_branch_id(&tx, branch, commit_sha)?;
781        log::debug!("Got branch_id={} for branch '{}'", branch_id, branch);
782
783        // Step 3: Insert file_branches entries (within same transaction)
784        let mut inserted = 0;
785        for (path, hash) in branch_files {
786            // Lookup file_id from path (will find it because we just inserted above)
787            let file_id: i64 = tx
788                .query_row(
789                    "SELECT id FROM files WHERE path = ?",
790                    [path.as_str()],
791                    |row| row.get(0),
792                )
793                .context(format!("File not found in index after insert: {}", path))?;
794
795            // Insert into file_branches using INTEGER values (not strings!)
796            tx.execute(
797                "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
798                 VALUES (?, ?, ?, ?)",
799                rusqlite::params![file_id, branch_id, hash.as_str(), now],
800            )?;
801            inserted += 1;
802        }
803        log::info!("Inserted {} file_branches entries", inserted);
804
805        // Commit the entire transaction atomically
806        tx.commit()?;
807        log::info!("Transaction committed successfully (files + file_branches)");
808
809        // DIAGNOSTIC: Verify data was actually persisted after commit
810        // This helps diagnose WAL synchronization issues where commits succeed but data isn't visible
811        let verify_conn =
812            Connection::open(&db_path).context("Failed to open meta.db for verification")?;
813
814        // Count actual files in database
815        let actual_file_count: i64 = verify_conn.query_row(
816            "SELECT COUNT(*) FROM files WHERE path IN (SELECT path FROM files ORDER BY id DESC LIMIT ?)",
817            [files.len()],
818            |row| row.get(0)
819        ).unwrap_or(0);
820
821        // Count actual file_branches entries for this branch
822        let actual_fb_count: i64 = verify_conn
823            .query_row(
824                "SELECT COUNT(*) FROM file_branches fb
825             JOIN branches b ON fb.branch_id = b.id
826             WHERE b.name = ?",
827                [branch],
828                |row| row.get(0),
829            )
830            .unwrap_or(0);
831
832        log::info!(
833            "Post-commit verification: {} files in files table (expected {}), {} file_branches entries for '{}' (expected {})",
834            actual_file_count,
835            files.len(),
836            actual_fb_count,
837            branch,
838            inserted
839        );
840
841        // DEFENSIVE: Warn if counts don't match expectations
842        if actual_file_count < files.len() as i64 {
843            log::warn!(
844                "MISMATCH: Expected {} files in database, but only found {}! Data may not have persisted.",
845                files.len(),
846                actual_file_count
847            );
848        }
849        if actual_fb_count < inserted as i64 {
850            log::warn!(
851                "MISMATCH: Expected {} file_branches entries for branch '{}', but only found {}! Data may not have persisted.",
852                inserted,
853                branch,
854                actual_fb_count
855            );
856        }
857
858        Ok(())
859    }
860
861    /// Update statistics after indexing by calculating totals from database for a specific branch
862    ///
863    /// Counts only files indexed for the given branch, not all files across all branches.
864    pub fn update_stats(&self, branch: &str) -> Result<()> {
865        let db_path = self.cache_path.join(META_DB);
866        let conn = Connection::open(&db_path).context("Failed to open meta.db for stats update")?;
867
868        // Count files for specific branch only (branch-aware statistics)
869        let total_files: usize = conn
870            .query_row(
871                "SELECT COUNT(DISTINCT fb.file_id)
872             FROM file_branches fb
873             JOIN branches b ON fb.branch_id = b.id
874             WHERE b.name = ?",
875                [branch],
876                |row| row.get(0),
877            )
878            .unwrap_or(0);
879
880        let now = chrono::Utc::now().timestamp();
881
882        conn.execute(
883            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
884            ["total_files", &total_files.to_string(), &now.to_string()],
885        )?;
886
887        log::debug!(
888            "Updated statistics for branch '{}': {} files",
889            branch,
890            total_files
891        );
892        Ok(())
893    }
894
895    /// Check if the stored schema hash matches the current binary's hash.
896    /// Returns Ok(true) if they match, Ok(false) if they don't, Err on DB errors.
897    pub fn check_schema_hash(&self) -> Result<bool> {
898        let db_path = self.cache_path.join(META_DB);
899        if !db_path.exists() {
900            return Ok(false);
901        }
902        let conn = Connection::open(&db_path)?;
903        let current = env!("CACHE_SCHEMA_HASH");
904        let stored: Option<String> = conn
905            .query_row(
906                "SELECT value FROM statistics WHERE key = 'schema_hash'",
907                [],
908                |row| row.get(0),
909            )
910            .optional()?;
911        Ok(stored.as_deref() == Some(current))
912    }
913
914    /// Update cache schema hash in statistics table
915    ///
916    /// This should be called after every index operation to ensure the cache
917    /// is marked as compatible with the current binary version.
918    pub fn update_schema_hash(&self) -> Result<()> {
919        let db_path = self.cache_path.join(META_DB);
920        let conn =
921            Connection::open(&db_path).context("Failed to open meta.db for schema hash update")?;
922
923        let schema_hash = env!("CACHE_SCHEMA_HASH");
924        let now = chrono::Utc::now().timestamp();
925
926        conn.execute(
927            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
928            ["schema_hash", schema_hash, &now.to_string()],
929        )?;
930
931        log::debug!("Updated schema hash to: {}", schema_hash);
932        Ok(())
933    }
934
935    /// Get list of all indexed files
936    pub fn list_files(&self) -> Result<Vec<IndexedFile>> {
937        let db_path = self.cache_path.join(META_DB);
938
939        if !db_path.exists() {
940            return Ok(Vec::new());
941        }
942
943        let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
944
945        let mut stmt =
946            conn.prepare("SELECT path, language, last_indexed FROM files ORDER BY path")?;
947
948        let files = stmt
949            .query_map([], |row| {
950                let path: String = row.get(0)?;
951                let language: String = row.get(1)?;
952                let last_indexed: i64 = row.get(2)?;
953
954                Ok(IndexedFile {
955                    path,
956                    language,
957                    last_indexed: chrono::DateTime::from_timestamp(last_indexed, 0)
958                        .unwrap_or_else(chrono::Utc::now)
959                        .to_rfc3339(),
960                })
961            })?
962            .collect::<Result<Vec<_>, _>>()?;
963
964        Ok(files)
965    }
966
967    /// Get statistics about the current cache
968    ///
969    /// Returns statistics for the current git branch if in a git repo,
970    /// or global statistics if not in a git repo.
971    pub fn stats(&self) -> Result<crate::models::IndexStats> {
972        let db_path = self.cache_path.join(META_DB);
973
974        if !db_path.exists() {
975            // Cache not initialized
976            return Ok(crate::models::IndexStats {
977                total_files: 0,
978                index_size_bytes: 0,
979                last_updated: chrono::Utc::now().to_rfc3339(),
980                files_by_language: std::collections::HashMap::new(),
981                lines_by_language: std::collections::HashMap::new(),
982                ..Default::default()
983            });
984        }
985
986        let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
987
988        // Determine current branch for branch-aware statistics
989        let workspace_root = self.workspace_root();
990        let current_branch = if crate::git::is_git_repo(&workspace_root) {
991            crate::git::get_git_state(&workspace_root)
992                .ok()
993                .map(|state| state.branch)
994        } else {
995            Some("_default".to_string())
996        };
997
998        log::debug!("stats(): current_branch = {:?}", current_branch);
999
1000        // Read total files (branch-aware)
1001        let total_files: usize = if let Some(ref branch) = current_branch {
1002            log::debug!("stats(): Counting files for branch '{}'", branch);
1003
1004            // Debug: Check all branches
1005            let branches: Vec<(i64, String, i64)> = conn
1006                .prepare("SELECT id, name, file_count FROM branches")
1007                .and_then(|mut stmt| {
1008                    stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)))
1009                        .map(|rows| rows.collect())
1010                })
1011                .and_then(|result| result)
1012                .unwrap_or_default();
1013
1014            for (id, name, count) in &branches {
1015                log::debug!(
1016                    "stats(): Branch ID={}, Name='{}', FileCount={}",
1017                    id,
1018                    name,
1019                    count
1020                );
1021            }
1022
1023            // Debug: Count file_branches per branch
1024            let fb_counts: Vec<(String, i64)> = conn
1025                .prepare(
1026                    "SELECT b.name, COUNT(*) FROM file_branches fb
1027                 JOIN branches b ON fb.branch_id = b.id
1028                 GROUP BY b.name",
1029                )
1030                .and_then(|mut stmt| {
1031                    stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?)))
1032                        .map(|rows| rows.collect())
1033                })
1034                .and_then(|result| result)
1035                .unwrap_or_default();
1036
1037            for (name, count) in &fb_counts {
1038                log::debug!(
1039                    "stats(): file_branches count for branch '{}': {}",
1040                    name,
1041                    count
1042                );
1043            }
1044
1045            // Count files for current branch only
1046            let count: usize = conn
1047                .query_row(
1048                    "SELECT COUNT(DISTINCT fb.file_id)
1049                 FROM file_branches fb
1050                 JOIN branches b ON fb.branch_id = b.id
1051                 WHERE b.name = ?",
1052                    [branch],
1053                    |row| row.get(0),
1054                )
1055                .unwrap_or(0);
1056
1057            log::debug!("stats(): Query returned total_files = {}", count);
1058            count
1059        } else {
1060            // No branch info - should not happen, but return 0
1061            log::warn!("stats(): No current_branch detected!");
1062            0
1063        };
1064
1065        // Read last updated timestamp
1066        let last_updated: String = conn
1067            .query_row(
1068                "SELECT updated_at FROM statistics WHERE key = 'total_files'",
1069                [],
1070                |row| {
1071                    let timestamp: i64 = row.get(0)?;
1072                    Ok(chrono::DateTime::from_timestamp(timestamp, 0)
1073                        .unwrap_or_else(chrono::Utc::now)
1074                        .to_rfc3339())
1075                },
1076            )
1077            .unwrap_or_else(|_| chrono::Utc::now().to_rfc3339());
1078
1079        // Calculate total cache size (all binary files)
1080        let mut index_size_bytes: u64 = 0;
1081
1082        for file_name in [
1083            META_DB,
1084            TOKENS_BIN,
1085            CONFIG_TOML,
1086            "content.bin",
1087            "trigrams.bin",
1088        ] {
1089            let file_path = self.cache_path.join(file_name);
1090            if let Ok(metadata) = std::fs::metadata(&file_path) {
1091                index_size_bytes += metadata.len();
1092            }
1093        }
1094
1095        // Get file count breakdown by language (branch-aware if possible)
1096        let mut files_by_language = std::collections::HashMap::new();
1097        if let Some(ref branch) = current_branch {
1098            // Query files for current branch only
1099            let mut stmt = conn.prepare(
1100                "SELECT f.language, COUNT(DISTINCT f.id)
1101                 FROM files f
1102                 JOIN file_branches fb ON f.id = fb.file_id
1103                 JOIN branches b ON fb.branch_id = b.id
1104                 WHERE b.name = ?
1105                 GROUP BY f.language",
1106            )?;
1107            let lang_counts = stmt.query_map([branch], |row| {
1108                let language: String = row.get(0)?;
1109                let count: i64 = row.get(1)?;
1110                Ok((language, count as usize))
1111            })?;
1112
1113            for result in lang_counts {
1114                let (language, count) = result?;
1115                files_by_language.insert(language, count);
1116            }
1117        } else {
1118            // Fallback: query all files
1119            let mut stmt =
1120                conn.prepare("SELECT language, COUNT(*) FROM files GROUP BY language")?;
1121            let lang_counts = stmt.query_map([], |row| {
1122                let language: String = row.get(0)?;
1123                let count: i64 = row.get(1)?;
1124                Ok((language, count as usize))
1125            })?;
1126
1127            for result in lang_counts {
1128                let (language, count) = result?;
1129                files_by_language.insert(language, count);
1130            }
1131        }
1132
1133        // Get line count breakdown by language (branch-aware if possible)
1134        let mut lines_by_language = std::collections::HashMap::new();
1135        if let Some(ref branch) = current_branch {
1136            // Query lines for current branch only
1137            let mut stmt = conn.prepare(
1138                "SELECT f.language, SUM(f.line_count)
1139                 FROM files f
1140                 JOIN file_branches fb ON f.id = fb.file_id
1141                 JOIN branches b ON fb.branch_id = b.id
1142                 WHERE b.name = ?
1143                 GROUP BY f.language",
1144            )?;
1145            let line_counts = stmt.query_map([branch], |row| {
1146                let language: String = row.get(0)?;
1147                let count: i64 = row.get(1)?;
1148                Ok((language, count as usize))
1149            })?;
1150
1151            for result in line_counts {
1152                let (language, count) = result?;
1153                lines_by_language.insert(language, count);
1154            }
1155        } else {
1156            // Fallback: query all files
1157            let mut stmt =
1158                conn.prepare("SELECT language, SUM(line_count) FROM files GROUP BY language")?;
1159            let line_counts = stmt.query_map([], |row| {
1160                let language: String = row.get(0)?;
1161                let count: i64 = row.get(1)?;
1162                Ok((language, count as usize))
1163            })?;
1164
1165            for result in line_counts {
1166                let (language, count) = result?;
1167                lines_by_language.insert(language, count);
1168            }
1169        }
1170
1171        Ok(crate::models::IndexStats {
1172            total_files,
1173            index_size_bytes,
1174            last_updated,
1175            files_by_language,
1176            lines_by_language,
1177            ..Default::default()
1178        })
1179    }
1180
1181    // ===== Branch-aware indexing methods =====
1182
1183    /// Get or create a branch ID by name
1184    ///
1185    /// Returns the numeric branch ID, creating a new entry if needed.
1186    fn get_or_create_branch_id(
1187        &self,
1188        conn: &Connection,
1189        branch_name: &str,
1190        commit_sha: Option<&str>,
1191    ) -> Result<i64> {
1192        // Try to get existing branch
1193        let existing_id: Option<i64> = conn
1194            .query_row(
1195                "SELECT id FROM branches WHERE name = ?",
1196                [branch_name],
1197                |row| row.get(0),
1198            )
1199            .optional()?;
1200
1201        if let Some(id) = existing_id {
1202            return Ok(id);
1203        }
1204
1205        // Create new branch entry
1206        let now = chrono::Utc::now().timestamp();
1207        conn.execute(
1208            "INSERT INTO branches (name, commit_sha, last_indexed, file_count, is_dirty)
1209             VALUES (?, ?, ?, 0, 0)",
1210            [
1211                branch_name,
1212                commit_sha.unwrap_or("unknown"),
1213                &now.to_string(),
1214            ],
1215        )?;
1216
1217        // Get the ID we just created
1218        let id: i64 = conn.last_insert_rowid();
1219        Ok(id)
1220    }
1221
1222    /// Record a file's hash for a specific branch
1223    pub fn record_branch_file(
1224        &self,
1225        path: &str,
1226        branch: &str,
1227        hash: &str,
1228        commit_sha: Option<&str>,
1229    ) -> Result<()> {
1230        let db_path = self.cache_path.join(META_DB);
1231        let conn = Connection::open(&db_path)
1232            .context("Failed to open meta.db for branch file recording")?;
1233
1234        // Lookup file_id from path
1235        let file_id: i64 = conn
1236            .query_row("SELECT id FROM files WHERE path = ?", [path], |row| {
1237                row.get(0)
1238            })
1239            .context(format!("File not found in index: {}", path))?;
1240
1241        // Get or create branch_id
1242        let branch_id = self.get_or_create_branch_id(&conn, branch, commit_sha)?;
1243
1244        let now = chrono::Utc::now().timestamp();
1245
1246        // Insert using proper INTEGER types (not strings!)
1247        conn.execute(
1248            "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
1249             VALUES (?, ?, ?, ?)",
1250            rusqlite::params![file_id, branch_id, hash, now],
1251        )?;
1252
1253        Ok(())
1254    }
1255
1256    /// Batch record multiple files for a specific branch in a single transaction
1257    ///
1258    /// IMPORTANT: Files must already exist in the `files` table before calling this method.
1259    /// For atomic insertion of both files and branch hashes, use `batch_update_files_and_branch()` instead.
1260    pub fn batch_record_branch_files(
1261        &self,
1262        files: &[(String, String)], // (path, hash)
1263        branch: &str,
1264        commit_sha: Option<&str>,
1265    ) -> Result<()> {
1266        log::info!(
1267            "batch_record_branch_files: Processing {} files for branch '{}'",
1268            files.len(),
1269            branch
1270        );
1271
1272        let db_path = self.cache_path.join(META_DB);
1273        let mut conn = Connection::open(&db_path)
1274            .context("Failed to open meta.db for batch branch recording")?;
1275
1276        let now = chrono::Utc::now().timestamp();
1277
1278        // Use a transaction for batch inserts
1279        let tx = conn.transaction()?;
1280
1281        // Get or create branch_id (use transaction connection)
1282        let branch_id = self.get_or_create_branch_id(&tx, branch, commit_sha)?;
1283        log::debug!("Got branch_id={} for branch '{}'", branch_id, branch);
1284
1285        let mut inserted = 0;
1286        for (path, hash) in files {
1287            // Lookup file_id from path
1288            log::trace!("Looking up file_id for path: {}", path);
1289            let file_id: i64 = tx
1290                .query_row(
1291                    "SELECT id FROM files WHERE path = ?",
1292                    [path.as_str()],
1293                    |row| row.get(0),
1294                )
1295                .context(format!("File not found in index: {}", path))?;
1296            log::trace!("Found file_id={} for path: {}", file_id, path);
1297
1298            // Insert using proper INTEGER types (not strings!)
1299            tx.execute(
1300                "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
1301                 VALUES (?, ?, ?, ?)",
1302                rusqlite::params![file_id, branch_id, hash.as_str(), now],
1303            )?;
1304            inserted += 1;
1305        }
1306
1307        log::info!("Inserted {} file_branches entries", inserted);
1308        tx.commit()?;
1309        log::info!("Transaction committed successfully");
1310        Ok(())
1311    }
1312
1313    /// Get all files indexed for a specific branch
1314    ///
1315    /// Returns a HashMap of path → hash for all files in the branch.
1316    pub fn get_branch_files(&self, branch: &str) -> Result<HashMap<String, String>> {
1317        let db_path = self.cache_path.join(META_DB);
1318
1319        if !db_path.exists() {
1320            return Ok(HashMap::new());
1321        }
1322
1323        let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
1324
1325        let mut stmt = conn.prepare(
1326            "SELECT f.path, fb.hash
1327             FROM file_branches fb
1328             JOIN files f ON fb.file_id = f.id
1329             JOIN branches b ON fb.branch_id = b.id
1330             WHERE b.name = ?",
1331        )?;
1332        let files: HashMap<String, String> = stmt
1333            .query_map([branch], |row| Ok((row.get(0)?, row.get(1)?)))?
1334            .collect::<Result<HashMap<_, _>, _>>()?;
1335
1336        log::debug!(
1337            "Loaded {} files for branch '{}' from file_branches table",
1338            files.len(),
1339            branch
1340        );
1341        Ok(files)
1342    }
1343
1344    /// Check if a branch has any indexed files
1345    ///
1346    /// Fast existence check using LIMIT 1 for O(1) performance.
1347    pub fn branch_exists(&self, branch: &str) -> Result<bool> {
1348        let db_path = self.cache_path.join(META_DB);
1349
1350        if !db_path.exists() {
1351            return Ok(false);
1352        }
1353
1354        let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
1355
1356        let count: i64 = conn
1357            .query_row(
1358                "SELECT COUNT(*)
1359                 FROM file_branches fb
1360                 JOIN branches b ON fb.branch_id = b.id
1361                 WHERE b.name = ?
1362                 LIMIT 1",
1363                [branch],
1364                |row| row.get(0),
1365            )
1366            .unwrap_or(0);
1367
1368        Ok(count > 0)
1369    }
1370
1371    /// Get branch metadata (commit, last_indexed, file_count, dirty status)
1372    pub fn get_branch_info(&self, branch: &str) -> Result<BranchInfo> {
1373        let db_path = self.cache_path.join(META_DB);
1374
1375        if !db_path.exists() {
1376            anyhow::bail!("Database not initialized");
1377        }
1378
1379        let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
1380
1381        let info = conn.query_row(
1382            "SELECT commit_sha, last_indexed, file_count, is_dirty FROM branches WHERE name = ?",
1383            [branch],
1384            |row| {
1385                Ok(BranchInfo {
1386                    branch: branch.to_string(),
1387                    commit_sha: row.get(0)?,
1388                    last_indexed: row.get(1)?,
1389                    file_count: row.get(2)?,
1390                    is_dirty: row.get::<_, i64>(3)? != 0,
1391                })
1392            },
1393        )?;
1394
1395        Ok(info)
1396    }
1397
1398    /// Update branch metadata after indexing
1399    ///
1400    /// Uses UPDATE instead of INSERT OR REPLACE to preserve branch_id and prevent
1401    /// CASCADE DELETE on file_branches table.
1402    pub fn update_branch_metadata(
1403        &self,
1404        branch: &str,
1405        commit_sha: Option<&str>,
1406        file_count: usize,
1407        is_dirty: bool,
1408    ) -> Result<()> {
1409        let db_path = self.cache_path.join(META_DB);
1410        let conn = Connection::open(&db_path)
1411            .context("Failed to open meta.db for branch metadata update")?;
1412
1413        let now = chrono::Utc::now().timestamp();
1414        let is_dirty_int = if is_dirty { 1 } else { 0 };
1415
1416        // Try UPDATE first to preserve branch_id (prevents CASCADE DELETE)
1417        let rows_updated = conn.execute(
1418            "UPDATE branches
1419             SET commit_sha = ?, last_indexed = ?, file_count = ?, is_dirty = ?
1420             WHERE name = ?",
1421            rusqlite::params![
1422                commit_sha.unwrap_or("unknown"),
1423                now,
1424                file_count,
1425                is_dirty_int,
1426                branch
1427            ],
1428        )?;
1429
1430        // If no rows updated (branch doesn't exist yet), INSERT new one
1431        if rows_updated == 0 {
1432            conn.execute(
1433                "INSERT INTO branches (name, commit_sha, last_indexed, file_count, is_dirty)
1434                 VALUES (?, ?, ?, ?, ?)",
1435                rusqlite::params![
1436                    branch,
1437                    commit_sha.unwrap_or("unknown"),
1438                    now,
1439                    file_count,
1440                    is_dirty_int
1441                ],
1442            )?;
1443        }
1444
1445        log::debug!(
1446            "Updated branch metadata for '{}': commit={}, files={}, dirty={}",
1447            branch,
1448            commit_sha.unwrap_or("unknown"),
1449            file_count,
1450            is_dirty
1451        );
1452        Ok(())
1453    }
1454
1455    /// Find a file with a specific hash (for symbol reuse optimization)
1456    ///
1457    /// Returns the path and branch where this hash was first seen,
1458    /// enabling reuse of parsed symbols across branches.
1459    pub fn find_file_with_hash(&self, hash: &str) -> Result<Option<(String, String)>> {
1460        let db_path = self.cache_path.join(META_DB);
1461
1462        if !db_path.exists() {
1463            return Ok(None);
1464        }
1465
1466        let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
1467
1468        let result = conn
1469            .query_row(
1470                "SELECT f.path, b.name
1471                 FROM file_branches fb
1472                 JOIN files f ON fb.file_id = f.id
1473                 JOIN branches b ON fb.branch_id = b.id
1474                 WHERE fb.hash = ?
1475                 LIMIT 1",
1476                [hash],
1477                |row| Ok((row.get(0)?, row.get(1)?)),
1478            )
1479            .optional()?;
1480
1481        Ok(result)
1482    }
1483
1484    /// Get file ID by path
1485    ///
1486    /// Returns the integer ID for a file path, or None if not found.
1487    pub fn get_file_id(&self, path: &str) -> Result<Option<i64>> {
1488        let db_path = self.cache_path.join(META_DB);
1489
1490        if !db_path.exists() {
1491            return Ok(None);
1492        }
1493
1494        let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
1495
1496        let result = conn
1497            .query_row("SELECT id FROM files WHERE path = ?", [path], |row| {
1498                row.get(0)
1499            })
1500            .optional()?;
1501
1502        Ok(result)
1503    }
1504
1505    /// Batch get file IDs for multiple paths
1506    ///
1507    /// Returns a HashMap of path → file_id for all found paths.
1508    /// Paths not in the database are omitted from the result.
1509    ///
1510    /// Automatically chunks large batches to avoid SQLite parameter limits (999 max).
1511    pub fn batch_get_file_ids(&self, paths: &[String]) -> Result<HashMap<String, i64>> {
1512        let db_path = self.cache_path.join(META_DB);
1513
1514        if !db_path.exists() {
1515            return Ok(HashMap::new());
1516        }
1517
1518        let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
1519
1520        // SQLite has a limit of 999 parameters by default
1521        // Chunk requests to stay well under that limit
1522        const BATCH_SIZE: usize = 900;
1523
1524        let mut results = HashMap::new();
1525
1526        for chunk in paths.chunks(BATCH_SIZE) {
1527            // Build IN clause for this chunk
1528            let placeholders = chunk.iter().map(|_| "?").collect::<Vec<_>>().join(", ");
1529
1530            let query = format!(
1531                "SELECT path, id FROM files WHERE path IN ({})",
1532                placeholders
1533            );
1534
1535            let params: Vec<&str> = chunk.iter().map(|s| s.as_str()).collect();
1536            let mut stmt = conn.prepare(&query)?;
1537
1538            let chunk_results = stmt
1539                .query_map(rusqlite::params_from_iter(params), |row| {
1540                    Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
1541                })?
1542                .collect::<Result<HashMap<_, _>, _>>()?;
1543
1544            results.extend(chunk_results);
1545        }
1546
1547        log::debug!(
1548            "Batch loaded {} file IDs (out of {} requested, {} chunks)",
1549            results.len(),
1550            paths.len(),
1551            paths.len().div_ceil(BATCH_SIZE)
1552        );
1553        Ok(results)
1554    }
1555
1556    // ===== Cache compaction methods =====
1557
1558    /// Check if cache compaction should run
1559    ///
1560    /// Returns true if 24+ hours have passed since last compaction (or never compacted).
1561    /// Compaction threshold: 86400 seconds (24 hours)
1562    pub fn should_compact(&self) -> Result<bool> {
1563        let db_path = self.cache_path.join(META_DB);
1564
1565        if !db_path.exists() {
1566            // No database means no compaction needed
1567            return Ok(false);
1568        }
1569
1570        let conn =
1571            Connection::open(&db_path).context("Failed to open meta.db for compaction check")?;
1572
1573        // Get last_compaction timestamp (defaults to "0" if not found)
1574        let last_compaction: i64 = conn
1575            .query_row(
1576                "SELECT value FROM statistics WHERE key = 'last_compaction'",
1577                [],
1578                |row| {
1579                    let value: String = row.get(0)?;
1580                    Ok(value.parse::<i64>().unwrap_or(0))
1581                },
1582            )
1583            .unwrap_or(0);
1584
1585        // Get current timestamp
1586        let now = chrono::Utc::now().timestamp();
1587
1588        // Compaction threshold: 24 hours (86400 seconds)
1589        const COMPACTION_THRESHOLD_SECS: i64 = 86400;
1590
1591        let elapsed_secs = now - last_compaction;
1592        let should_run = elapsed_secs >= COMPACTION_THRESHOLD_SECS;
1593
1594        log::debug!(
1595            "Compaction check: last={}, now={}, elapsed={}s, should_compact={}",
1596            last_compaction,
1597            now,
1598            elapsed_secs,
1599            should_run
1600        );
1601
1602        Ok(should_run)
1603    }
1604
1605    /// Update last_compaction timestamp in statistics table
1606    ///
1607    /// Called after successful compaction to record when it ran.
1608    pub fn update_compaction_timestamp(&self) -> Result<()> {
1609        let db_path = self.cache_path.join(META_DB);
1610        let conn = Connection::open(&db_path)
1611            .context("Failed to open meta.db for compaction timestamp update")?;
1612
1613        let now = chrono::Utc::now().timestamp();
1614
1615        conn.execute(
1616            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
1617            ["last_compaction", &now.to_string(), &now.to_string()],
1618        )?;
1619
1620        log::debug!("Updated last_compaction timestamp to: {}", now);
1621        Ok(())
1622    }
1623
1624    /// Compact the cache by removing deleted files and reclaiming disk space
1625    ///
1626    /// This operation:
1627    /// 1. Identifies files in the database that no longer exist on disk
1628    /// 2. Deletes those files from all database tables (CASCADE handles related data)
1629    /// 3. Runs VACUUM to reclaim disk space from deleted rows
1630    /// 4. Updates the last_compaction timestamp
1631    ///
1632    /// Returns a CompactionReport with statistics about the operation.
1633    /// Safe to run concurrently with queries (uses SQLite transactions).
1634    pub fn compact(&self) -> Result<crate::models::CompactionReport> {
1635        let start_time = std::time::Instant::now();
1636        log::info!("Starting cache compaction...");
1637
1638        // Get initial cache size
1639        let size_before = self.calculate_cache_size()?;
1640
1641        // Step 1: Identify deleted files (in DB but not on filesystem)
1642        let deleted_files = self.identify_deleted_files()?;
1643        log::info!(
1644            "Found {} deleted files to remove from cache",
1645            deleted_files.len()
1646        );
1647
1648        if deleted_files.is_empty() {
1649            log::info!("No deleted files to compact - cache is clean");
1650            // Update timestamp anyway to prevent running compaction too frequently
1651            self.update_compaction_timestamp()?;
1652
1653            return Ok(crate::models::CompactionReport {
1654                files_removed: 0,
1655                space_saved_bytes: 0,
1656                duration_ms: start_time.elapsed().as_millis() as u64,
1657            });
1658        }
1659
1660        // Step 2: Delete from database (CASCADE handles file_branches, file_dependencies, file_exports)
1661        self.delete_files_from_db(&deleted_files)?;
1662        log::info!("Deleted {} files from database", deleted_files.len());
1663
1664        // Step 3: Run VACUUM to reclaim disk space
1665        self.vacuum_database()?;
1666        log::info!("Completed VACUUM operation");
1667
1668        // Get final cache size
1669        let size_after = self.calculate_cache_size()?;
1670        let space_saved = size_before.saturating_sub(size_after);
1671
1672        // Step 4: Update last_compaction timestamp
1673        self.update_compaction_timestamp()?;
1674
1675        let duration_ms = start_time.elapsed().as_millis() as u64;
1676
1677        log::info!(
1678            "Cache compaction completed: {} files removed, {} bytes saved ({:.2} MB), took {}ms",
1679            deleted_files.len(),
1680            space_saved,
1681            space_saved as f64 / 1_048_576.0,
1682            duration_ms
1683        );
1684
1685        Ok(crate::models::CompactionReport {
1686            files_removed: deleted_files.len(),
1687            space_saved_bytes: space_saved,
1688            duration_ms,
1689        })
1690    }
1691
1692    /// Identify files in database that no longer exist on filesystem
1693    ///
1694    /// Returns a Vec of file IDs for files that should be removed from the cache.
1695    fn identify_deleted_files(&self) -> Result<Vec<i64>> {
1696        let db_path = self.cache_path.join(META_DB);
1697        let conn = Connection::open(&db_path)
1698            .context("Failed to open meta.db for deleted file identification")?;
1699
1700        let workspace_root = self.workspace_root();
1701
1702        // Query all files from database (id, path)
1703        let mut stmt = conn.prepare("SELECT id, path FROM files")?;
1704        let files = stmt
1705            .query_map([], |row| {
1706                Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
1707            })?
1708            .collect::<Result<Vec<_>, _>>()?;
1709
1710        log::debug!("Checking {} files for deletion status", files.len());
1711
1712        // Check which files no longer exist on disk
1713        let mut deleted_file_ids = Vec::new();
1714        for (file_id, file_path) in files {
1715            let full_path = workspace_root.join(&file_path);
1716            if !full_path.exists() {
1717                log::trace!("File no longer exists: {} (id={})", file_path, file_id);
1718                deleted_file_ids.push(file_id);
1719            }
1720        }
1721
1722        Ok(deleted_file_ids)
1723    }
1724
1725    /// Delete files from database by file ID
1726    ///
1727    /// Uses a transaction for atomicity. CASCADE delete handles:
1728    /// - file_branches entries
1729    /// - file_dependencies entries
1730    /// - file_exports entries
1731    fn delete_files_from_db(&self, file_ids: &[i64]) -> Result<()> {
1732        if file_ids.is_empty() {
1733            return Ok(());
1734        }
1735
1736        let db_path = self.cache_path.join(META_DB);
1737        let mut conn =
1738            Connection::open(&db_path).context("Failed to open meta.db for file deletion")?;
1739
1740        let tx = conn.transaction()?;
1741
1742        // Delete files in batches to avoid SQLite parameter limit (999 max)
1743        const BATCH_SIZE: usize = 900;
1744
1745        for chunk in file_ids.chunks(BATCH_SIZE) {
1746            let placeholders = chunk.iter().map(|_| "?").collect::<Vec<_>>().join(", ");
1747
1748            let delete_query = format!("DELETE FROM files WHERE id IN ({})", placeholders);
1749
1750            let params: Vec<i64> = chunk.to_vec();
1751            tx.execute(&delete_query, rusqlite::params_from_iter(params))?;
1752        }
1753
1754        tx.commit()?;
1755        log::debug!(
1756            "Deleted {} files from database (CASCADE handled related tables)",
1757            file_ids.len()
1758        );
1759        Ok(())
1760    }
1761
1762    /// Run VACUUM on SQLite database to reclaim disk space
1763    ///
1764    /// VACUUM rebuilds the database file, removing free pages and compacting the file.
1765    /// This can take several seconds on large databases but significantly reduces disk usage.
1766    fn vacuum_database(&self) -> Result<()> {
1767        let db_path = self.cache_path.join(META_DB);
1768        let conn = Connection::open(&db_path).context("Failed to open meta.db for VACUUM")?;
1769
1770        // VACUUM cannot run inside a transaction
1771        // It rebuilds the entire database file
1772        conn.execute("VACUUM", [])?;
1773
1774        log::debug!("VACUUM completed successfully");
1775        Ok(())
1776    }
1777
1778    /// Calculate total cache size in bytes
1779    ///
1780    /// Sums up the size of all cache files:
1781    /// - meta.db (SQLite database)
1782    /// - trigrams.bin (inverted index)
1783    /// - content.bin (file contents)
1784    /// - config.toml (configuration)
1785    fn calculate_cache_size(&self) -> Result<u64> {
1786        let mut total_size: u64 = 0;
1787
1788        for file_name in [
1789            META_DB,
1790            TOKENS_BIN,
1791            CONFIG_TOML,
1792            "content.bin",
1793            "trigrams.bin",
1794        ] {
1795            let file_path = self.cache_path.join(file_name);
1796            if let Ok(metadata) = std::fs::metadata(&file_path) {
1797                total_size += metadata.len();
1798            }
1799        }
1800
1801        Ok(total_size)
1802    }
1803}
1804
1805/// Branch metadata information
1806#[derive(Debug, Clone)]
1807pub struct BranchInfo {
1808    pub branch: String,
1809    pub commit_sha: String,
1810    pub last_indexed: i64,
1811    pub file_count: usize,
1812    pub is_dirty: bool,
1813}
1814
1815// TODO: Implement memory-mapped readers for:
1816// - SymbolReader (reads from symbols.bin)
1817// - TokenReader (reads from tokens.bin)
1818// - MetaReader (reads from meta.db)
1819
1820#[cfg(test)]
1821mod tests {
1822    use super::*;
1823    use tempfile::TempDir;
1824
1825    #[test]
1826    fn test_cache_init() {
1827        let temp = TempDir::new().unwrap();
1828        let cache = CacheManager::new(temp.path());
1829
1830        assert!(!cache.exists());
1831        cache.init().unwrap();
1832        assert!(cache.exists());
1833        assert!(cache.path().exists());
1834
1835        // Verify all expected files were created
1836        assert!(cache.path().join(META_DB).exists());
1837        assert!(cache.path().join(CONFIG_TOML).exists());
1838    }
1839
1840    #[test]
1841    fn test_cache_init_idempotent() {
1842        let temp = TempDir::new().unwrap();
1843        let cache = CacheManager::new(temp.path());
1844
1845        // Initialize twice - should not error
1846        cache.init().unwrap();
1847        cache.init().unwrap();
1848
1849        assert!(cache.exists());
1850    }
1851
1852    #[test]
1853    fn test_cache_clear() {
1854        let temp = TempDir::new().unwrap();
1855        let cache = CacheManager::new(temp.path());
1856
1857        cache.init().unwrap();
1858        assert!(cache.exists());
1859
1860        cache.clear().unwrap();
1861        assert!(!cache.exists());
1862    }
1863
1864    #[test]
1865    fn test_cache_clear_nonexistent() {
1866        let temp = TempDir::new().unwrap();
1867        let cache = CacheManager::new(temp.path());
1868
1869        // Clearing non-existent cache should not error
1870        assert!(!cache.exists());
1871        cache.clear().unwrap();
1872        assert!(!cache.exists());
1873    }
1874
1875    #[test]
1876    fn test_load_all_hashes_empty() {
1877        let temp = TempDir::new().unwrap();
1878        let cache = CacheManager::new(temp.path());
1879
1880        cache.init().unwrap();
1881        let hashes = cache.load_all_hashes().unwrap();
1882        assert_eq!(hashes.len(), 0);
1883    }
1884
1885    #[test]
1886    fn test_load_all_hashes_before_init() {
1887        let temp = TempDir::new().unwrap();
1888        let cache = CacheManager::new(temp.path());
1889
1890        // Loading hashes before init should return empty map
1891        let hashes = cache.load_all_hashes().unwrap();
1892        assert_eq!(hashes.len(), 0);
1893    }
1894
1895    #[test]
1896    fn test_load_hashes_for_branch_empty() {
1897        let temp = TempDir::new().unwrap();
1898        let cache = CacheManager::new(temp.path());
1899
1900        cache.init().unwrap();
1901        let hashes = cache.load_hashes_for_branch("main").unwrap();
1902        assert_eq!(hashes.len(), 0);
1903    }
1904
1905    #[test]
1906    fn test_update_file() {
1907        let temp = TempDir::new().unwrap();
1908        let cache = CacheManager::new(temp.path());
1909
1910        cache.init().unwrap();
1911        cache.update_file("src/main.rs", "rust", 100).unwrap();
1912
1913        // Verify file was stored (check via list_files)
1914        let files = cache.list_files().unwrap();
1915        assert_eq!(files.len(), 1);
1916        assert_eq!(files[0].path, "src/main.rs");
1917        assert_eq!(files[0].language, "rust");
1918    }
1919
1920    #[test]
1921    fn test_update_file_multiple() {
1922        let temp = TempDir::new().unwrap();
1923        let cache = CacheManager::new(temp.path());
1924
1925        cache.init().unwrap();
1926        cache.update_file("src/main.rs", "rust", 100).unwrap();
1927        cache.update_file("src/lib.rs", "rust", 200).unwrap();
1928        cache.update_file("README.md", "markdown", 50).unwrap();
1929
1930        // Verify files were stored
1931        let files = cache.list_files().unwrap();
1932        assert_eq!(files.len(), 3);
1933    }
1934
1935    #[test]
1936    fn test_update_file_replace() {
1937        let temp = TempDir::new().unwrap();
1938        let cache = CacheManager::new(temp.path());
1939
1940        cache.init().unwrap();
1941        cache.update_file("src/main.rs", "rust", 100).unwrap();
1942        cache.update_file("src/main.rs", "rust", 150).unwrap();
1943
1944        // Second update should replace the first
1945        let files = cache.list_files().unwrap();
1946        assert_eq!(files.len(), 1);
1947        assert_eq!(files[0].path, "src/main.rs");
1948    }
1949
1950    #[test]
1951    fn test_batch_update_files() {
1952        let temp = TempDir::new().unwrap();
1953        let cache = CacheManager::new(temp.path());
1954
1955        cache.init().unwrap();
1956
1957        let files = vec![
1958            ("src/main.rs".to_string(), "rust".to_string(), 100),
1959            ("src/lib.rs".to_string(), "rust".to_string(), 200),
1960            ("test.py".to_string(), "python".to_string(), 50),
1961        ];
1962
1963        cache.batch_update_files(&files).unwrap();
1964
1965        // Verify files were stored
1966        let stored_files = cache.list_files().unwrap();
1967        assert_eq!(stored_files.len(), 3);
1968    }
1969
1970    #[test]
1971    fn test_update_stats() {
1972        let temp = TempDir::new().unwrap();
1973        let cache = CacheManager::new(temp.path());
1974
1975        cache.init().unwrap();
1976        cache.update_file("src/main.rs", "rust", 100).unwrap();
1977        cache.update_file("src/lib.rs", "rust", 200).unwrap();
1978
1979        // Record files for a test branch
1980        cache
1981            .record_branch_file("src/main.rs", "_default", "hash1", None)
1982            .unwrap();
1983        cache
1984            .record_branch_file("src/lib.rs", "_default", "hash2", None)
1985            .unwrap();
1986        cache.update_stats("_default").unwrap();
1987
1988        let stats = cache.stats().unwrap();
1989        assert_eq!(stats.total_files, 2);
1990    }
1991
1992    #[test]
1993    fn test_stats_empty_cache() {
1994        let temp = TempDir::new().unwrap();
1995        let cache = CacheManager::new(temp.path());
1996
1997        cache.init().unwrap();
1998        let stats = cache.stats().unwrap();
1999
2000        assert_eq!(stats.total_files, 0);
2001        assert_eq!(stats.files_by_language.len(), 0);
2002    }
2003
2004    #[test]
2005    fn test_stats_before_init() {
2006        let temp = TempDir::new().unwrap();
2007        let cache = CacheManager::new(temp.path());
2008
2009        // Stats before init should return zeros
2010        let stats = cache.stats().unwrap();
2011        assert_eq!(stats.total_files, 0);
2012    }
2013
2014    #[test]
2015    fn test_stats_by_language() {
2016        let temp = TempDir::new().unwrap();
2017        let cache = CacheManager::new(temp.path());
2018
2019        cache.init().unwrap();
2020        cache.update_file("main.rs", "Rust", 100).unwrap();
2021        cache.update_file("lib.rs", "Rust", 200).unwrap();
2022        cache.update_file("script.py", "Python", 50).unwrap();
2023        cache.update_file("test.py", "Python", 80).unwrap();
2024
2025        // Record files for a test branch
2026        cache
2027            .record_branch_file("main.rs", "_default", "hash1", None)
2028            .unwrap();
2029        cache
2030            .record_branch_file("lib.rs", "_default", "hash2", None)
2031            .unwrap();
2032        cache
2033            .record_branch_file("script.py", "_default", "hash3", None)
2034            .unwrap();
2035        cache
2036            .record_branch_file("test.py", "_default", "hash4", None)
2037            .unwrap();
2038        cache.update_stats("_default").unwrap();
2039
2040        let stats = cache.stats().unwrap();
2041        assert_eq!(stats.files_by_language.get("Rust"), Some(&2));
2042        assert_eq!(stats.files_by_language.get("Python"), Some(&2));
2043        assert_eq!(stats.lines_by_language.get("Rust"), Some(&300)); // 100 + 200
2044        assert_eq!(stats.lines_by_language.get("Python"), Some(&130)); // 50 + 80
2045    }
2046
2047    #[test]
2048    fn test_list_files_empty() {
2049        let temp = TempDir::new().unwrap();
2050        let cache = CacheManager::new(temp.path());
2051
2052        cache.init().unwrap();
2053        let files = cache.list_files().unwrap();
2054        assert_eq!(files.len(), 0);
2055    }
2056
2057    #[test]
2058    fn test_list_files() {
2059        let temp = TempDir::new().unwrap();
2060        let cache = CacheManager::new(temp.path());
2061
2062        cache.init().unwrap();
2063        cache.update_file("src/main.rs", "rust", 100).unwrap();
2064        cache.update_file("src/lib.rs", "rust", 200).unwrap();
2065
2066        let files = cache.list_files().unwrap();
2067        assert_eq!(files.len(), 2);
2068
2069        // Files should be sorted by path
2070        assert_eq!(files[0].path, "src/lib.rs");
2071        assert_eq!(files[1].path, "src/main.rs");
2072
2073        assert_eq!(files[0].language, "rust");
2074    }
2075
2076    #[test]
2077    fn test_list_files_before_init() {
2078        let temp = TempDir::new().unwrap();
2079        let cache = CacheManager::new(temp.path());
2080
2081        // Listing files before init should return empty vec
2082        let files = cache.list_files().unwrap();
2083        assert_eq!(files.len(), 0);
2084    }
2085
2086    #[test]
2087    fn test_branch_exists() {
2088        let temp = TempDir::new().unwrap();
2089        let cache = CacheManager::new(temp.path());
2090
2091        cache.init().unwrap();
2092
2093        assert!(!cache.branch_exists("main").unwrap());
2094
2095        // Add file to index first (required for record_branch_file)
2096        cache.update_file("src/main.rs", "rust", 100).unwrap();
2097        cache
2098            .record_branch_file("src/main.rs", "main", "hash1", Some("commit123"))
2099            .unwrap();
2100
2101        assert!(cache.branch_exists("main").unwrap());
2102        assert!(!cache.branch_exists("feature-branch").unwrap());
2103    }
2104
2105    #[test]
2106    fn test_record_branch_file() {
2107        let temp = TempDir::new().unwrap();
2108        let cache = CacheManager::new(temp.path());
2109
2110        cache.init().unwrap();
2111        // Add file to index first (required for record_branch_file)
2112        cache.update_file("src/main.rs", "rust", 100).unwrap();
2113        cache
2114            .record_branch_file("src/main.rs", "main", "hash1", Some("commit123"))
2115            .unwrap();
2116
2117        let files = cache.get_branch_files("main").unwrap();
2118        assert_eq!(files.len(), 1);
2119        assert_eq!(files.get("src/main.rs"), Some(&"hash1".to_string()));
2120    }
2121
2122    #[test]
2123    fn test_get_branch_files_empty() {
2124        let temp = TempDir::new().unwrap();
2125        let cache = CacheManager::new(temp.path());
2126
2127        cache.init().unwrap();
2128        let files = cache.get_branch_files("nonexistent").unwrap();
2129        assert_eq!(files.len(), 0);
2130    }
2131
2132    #[test]
2133    fn test_batch_record_branch_files() {
2134        let temp = TempDir::new().unwrap();
2135        let cache = CacheManager::new(temp.path());
2136
2137        cache.init().unwrap();
2138
2139        // Add files to index first (required for batch_record_branch_files)
2140        let file_metadata = vec![
2141            ("src/main.rs".to_string(), "rust".to_string(), 100),
2142            ("src/lib.rs".to_string(), "rust".to_string(), 200),
2143            ("README.md".to_string(), "markdown".to_string(), 50),
2144        ];
2145        cache.batch_update_files(&file_metadata).unwrap();
2146
2147        let files = vec![
2148            ("src/main.rs".to_string(), "hash1".to_string()),
2149            ("src/lib.rs".to_string(), "hash2".to_string()),
2150            ("README.md".to_string(), "hash3".to_string()),
2151        ];
2152
2153        cache
2154            .batch_record_branch_files(&files, "main", Some("commit123"))
2155            .unwrap();
2156
2157        let branch_files = cache.get_branch_files("main").unwrap();
2158        assert_eq!(branch_files.len(), 3);
2159        assert_eq!(branch_files.get("src/main.rs"), Some(&"hash1".to_string()));
2160        assert_eq!(branch_files.get("src/lib.rs"), Some(&"hash2".to_string()));
2161        assert_eq!(branch_files.get("README.md"), Some(&"hash3".to_string()));
2162    }
2163
2164    #[test]
2165    fn test_update_branch_metadata() {
2166        let temp = TempDir::new().unwrap();
2167        let cache = CacheManager::new(temp.path());
2168
2169        cache.init().unwrap();
2170        cache
2171            .update_branch_metadata("main", Some("commit123"), 10, false)
2172            .unwrap();
2173
2174        let info = cache.get_branch_info("main").unwrap();
2175        assert_eq!(info.branch, "main");
2176        assert_eq!(info.commit_sha, "commit123");
2177        assert_eq!(info.file_count, 10);
2178        assert_eq!(info.is_dirty, false);
2179    }
2180
2181    #[test]
2182    fn test_update_branch_metadata_dirty() {
2183        let temp = TempDir::new().unwrap();
2184        let cache = CacheManager::new(temp.path());
2185
2186        cache.init().unwrap();
2187        cache
2188            .update_branch_metadata("feature", Some("commit456"), 5, true)
2189            .unwrap();
2190
2191        let info = cache.get_branch_info("feature").unwrap();
2192        assert_eq!(info.is_dirty, true);
2193    }
2194
2195    #[test]
2196    fn test_find_file_with_hash() {
2197        let temp = TempDir::new().unwrap();
2198        let cache = CacheManager::new(temp.path());
2199
2200        cache.init().unwrap();
2201        // Add file to index first (required for record_branch_file)
2202        cache.update_file("src/main.rs", "rust", 100).unwrap();
2203        cache
2204            .record_branch_file("src/main.rs", "main", "unique_hash", Some("commit123"))
2205            .unwrap();
2206
2207        let result = cache.find_file_with_hash("unique_hash").unwrap();
2208        assert!(result.is_some());
2209
2210        let (path, branch) = result.unwrap();
2211        assert_eq!(path, "src/main.rs");
2212        assert_eq!(branch, "main");
2213    }
2214
2215    #[test]
2216    fn test_find_file_with_hash_not_found() {
2217        let temp = TempDir::new().unwrap();
2218        let cache = CacheManager::new(temp.path());
2219
2220        cache.init().unwrap();
2221
2222        let result = cache.find_file_with_hash("nonexistent_hash").unwrap();
2223        assert!(result.is_none());
2224    }
2225
2226    #[test]
2227    fn test_config_toml_created() {
2228        let temp = TempDir::new().unwrap();
2229        let cache = CacheManager::new(temp.path());
2230
2231        cache.init().unwrap();
2232
2233        let config_path = cache.path().join(CONFIG_TOML);
2234        let config_content = std::fs::read_to_string(&config_path).unwrap();
2235
2236        // Verify config contains expected sections
2237        assert!(config_content.contains("[index]"));
2238        assert!(config_content.contains("[search]"));
2239        assert!(config_content.contains("[performance]"));
2240        assert!(config_content.contains("max_file_size"));
2241    }
2242
2243    #[test]
2244    fn test_meta_db_schema() {
2245        let temp = TempDir::new().unwrap();
2246        let cache = CacheManager::new(temp.path());
2247
2248        cache.init().unwrap();
2249
2250        let db_path = cache.path().join(META_DB);
2251        let conn = Connection::open(&db_path).unwrap();
2252
2253        // Verify tables exist
2254        let tables: Vec<String> = conn
2255            .prepare("SELECT name FROM sqlite_master WHERE type='table'")
2256            .unwrap()
2257            .query_map([], |row| row.get(0))
2258            .unwrap()
2259            .collect::<Result<Vec<_>, _>>()
2260            .unwrap();
2261
2262        assert!(tables.contains(&"files".to_string()));
2263        assert!(tables.contains(&"statistics".to_string()));
2264        assert!(tables.contains(&"config".to_string()));
2265        assert!(tables.contains(&"file_branches".to_string()));
2266        assert!(tables.contains(&"branches".to_string()));
2267        assert!(tables.contains(&"file_dependencies".to_string()));
2268        assert!(tables.contains(&"file_exports".to_string()));
2269    }
2270
2271    #[test]
2272    fn test_concurrent_file_updates() {
2273        use std::thread;
2274
2275        let temp = TempDir::new().unwrap();
2276        let cache_path = temp.path().to_path_buf();
2277
2278        let cache = CacheManager::new(&cache_path);
2279        cache.init().unwrap();
2280
2281        // Spawn multiple threads updating different files
2282        let handles: Vec<_> = (0..10)
2283            .map(|i| {
2284                let path = cache_path.clone();
2285                thread::spawn(move || {
2286                    let cache = CacheManager::new(&path);
2287                    cache
2288                        .update_file(&format!("file_{}.rs", i), "rust", i * 10)
2289                        .unwrap();
2290                })
2291            })
2292            .collect();
2293
2294        for handle in handles {
2295            handle.join().unwrap();
2296        }
2297
2298        let cache = CacheManager::new(&cache_path);
2299        let files = cache.list_files().unwrap();
2300        assert_eq!(files.len(), 10);
2301    }
2302
2303    // ===== Corruption Detection Tests =====
2304
2305    #[test]
2306    fn test_validate_corrupted_database() {
2307        use std::io::Write;
2308
2309        let temp = TempDir::new().unwrap();
2310        let cache = CacheManager::new(temp.path());
2311
2312        cache.init().unwrap();
2313
2314        // Corrupt the database by overwriting it with invalid data
2315        let db_path = cache.path().join(META_DB);
2316        let mut file = File::create(&db_path).unwrap();
2317        file.write_all(b"CORRUPTED DATA").unwrap();
2318
2319        // Validation should fail due to database corruption
2320        let result = cache.validate();
2321        assert!(result.is_err());
2322        let err_msg = result.unwrap_err().to_string();
2323        eprintln!("Error message: {}", err_msg);
2324        assert!(err_msg.contains("corrupted") || err_msg.contains("not a database"));
2325    }
2326
2327    #[test]
2328    fn test_validate_corrupted_trigrams() {
2329        use std::io::Write;
2330
2331        let temp = TempDir::new().unwrap();
2332        let cache = CacheManager::new(temp.path());
2333
2334        cache.init().unwrap();
2335
2336        // Create trigrams.bin with invalid magic bytes
2337        let trigrams_path = cache.path().join("trigrams.bin");
2338        let mut file = File::create(&trigrams_path).unwrap();
2339        file.write_all(b"BADM").unwrap(); // Wrong magic bytes (should be "RFTG")
2340
2341        // Validation should fail due to invalid magic bytes
2342        let result = cache.validate();
2343        assert!(result.is_err());
2344        let err = result.unwrap_err().to_string();
2345        assert!(err.contains("trigrams.bin") && err.contains("corrupted"));
2346    }
2347
2348    #[test]
2349    fn test_validate_corrupted_content() {
2350        use std::io::Write;
2351
2352        let temp = TempDir::new().unwrap();
2353        let cache = CacheManager::new(temp.path());
2354
2355        cache.init().unwrap();
2356
2357        // Create content.bin with invalid magic bytes
2358        let content_path = cache.path().join("content.bin");
2359        let mut file = File::create(&content_path).unwrap();
2360        file.write_all(b"BADM").unwrap(); // Wrong magic bytes (should be "RFCT")
2361
2362        // Validation should fail due to invalid magic bytes
2363        let result = cache.validate();
2364        assert!(result.is_err());
2365        let err = result.unwrap_err().to_string();
2366        assert!(err.contains("content.bin") && err.contains("corrupted"));
2367    }
2368
2369    #[test]
2370    fn test_validate_missing_schema_table() {
2371        let temp = TempDir::new().unwrap();
2372        let cache = CacheManager::new(temp.path());
2373
2374        cache.init().unwrap();
2375
2376        // Drop a required table to simulate schema corruption
2377        let db_path = cache.path().join(META_DB);
2378        let conn = Connection::open(&db_path).unwrap();
2379        conn.execute("DROP TABLE files", []).unwrap();
2380
2381        // Validation should fail due to missing required table
2382        let result = cache.validate();
2383        assert!(result.is_err());
2384        let err = result.unwrap_err().to_string();
2385        assert!(err.contains("files") && err.contains("missing"));
2386    }
2387}