Skip to main content

reflex/
cache.rs

1//! Cache management and memory-mapped I/O
2//!
3//! The cache module handles the `.reflex/` directory structure:
4//! - `meta.db`: Metadata, file hashes, and configuration (SQLite)
5//! - `tokens.bin`: Compressed lexical tokens (binary)
6//! - `content.bin`: Memory-mapped file contents (binary)
7//! - `trigrams.bin`: Trigram inverted index (bincode binary)
8//! - `config.toml`: Index settings (TOML text)
9
10use anyhow::{Context, Result};
11use rusqlite::{Connection, OptionalExtension};
12use std::collections::HashMap;
13use std::fs::File;
14use std::path::{Path, PathBuf};
15
16use crate::models::IndexedFile;
17
18/// Default cache directory name
19pub const CACHE_DIR: &str = ".reflex";
20
21/// File names within the cache directory
22pub const META_DB: &str = "meta.db";
23pub const TOKENS_BIN: &str = "tokens.bin";
24pub const HASHES_JSON: &str = "hashes.json";
25pub const CONFIG_TOML: &str = "config.toml";
26
27/// Manages the Reflex cache directory
28#[derive(Clone)]
29pub struct CacheManager {
30    cache_path: PathBuf,
31}
32
33impl CacheManager {
34    /// Create a new cache manager for the given root directory
35    pub fn new(root: impl AsRef<Path>) -> Self {
36        let cache_path = root.as_ref().join(CACHE_DIR);
37        Self { cache_path }
38    }
39
40    /// Initialize the cache directory structure if it doesn't exist
41    pub fn init(&self) -> Result<()> {
42        log::info!("Initializing cache at {:?}", self.cache_path);
43
44        if !self.cache_path.exists() {
45            std::fs::create_dir_all(&self.cache_path)?;
46        }
47
48        // Create meta.db with schema
49        self.init_meta_db()?;
50
51        // Create default config.toml
52        self.init_config_toml()?;
53
54        // Note: tokens.bin removed - was never used
55        // Note: hashes.json is deprecated - hashes are now stored in meta.db
56
57        log::info!("Cache initialized successfully");
58        Ok(())
59    }
60
61    /// Initialize meta.db with SQLite schema
62    fn init_meta_db(&self) -> Result<()> {
63        let db_path = self.cache_path.join(META_DB);
64
65        // Skip if already exists
66        if db_path.exists() {
67            return Ok(());
68        }
69
70        let conn = Connection::open(&db_path)
71            .context("Failed to create meta.db")?;
72
73        // Create files table
74        conn.execute(
75            "CREATE TABLE IF NOT EXISTS files (
76                id INTEGER PRIMARY KEY AUTOINCREMENT,
77                path TEXT NOT NULL UNIQUE,
78                last_indexed INTEGER NOT NULL,
79                language TEXT NOT NULL,
80                token_count INTEGER DEFAULT 0,
81                line_count INTEGER DEFAULT 0
82            )",
83            [],
84        )?;
85
86        conn.execute("CREATE INDEX IF NOT EXISTS idx_files_path ON files(path)", [])?;
87
88        // Create statistics table
89        conn.execute(
90            "CREATE TABLE IF NOT EXISTS statistics (
91                key TEXT PRIMARY KEY,
92                value TEXT NOT NULL,
93                updated_at INTEGER NOT NULL
94            )",
95            [],
96        )?;
97
98        // Initialize default statistics
99        let now = chrono::Utc::now().timestamp();
100        conn.execute(
101            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
102            ["total_files", "0", &now.to_string()],
103        )?;
104        conn.execute(
105            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
106            ["cache_version", "1", &now.to_string()],
107        )?;
108
109        // Store cache schema hash for automatic invalidation detection
110        // This hash is computed at build time from cache-critical source files
111        let schema_hash = env!("CACHE_SCHEMA_HASH");
112        conn.execute(
113            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
114            ["schema_hash", schema_hash, &now.to_string()],
115        )?;
116
117        // Initialize last_compaction timestamp (0 = never compacted)
118        conn.execute(
119            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
120            ["last_compaction", "0", &now.to_string()],
121        )?;
122
123        // Create config table
124        conn.execute(
125            "CREATE TABLE IF NOT EXISTS config (
126                key TEXT PRIMARY KEY,
127                value TEXT NOT NULL
128            )",
129            [],
130        )?;
131
132        // Create branch tracking tables for git-aware indexing
133        conn.execute(
134            "CREATE TABLE IF NOT EXISTS file_branches (
135                file_id INTEGER NOT NULL,
136                branch_id INTEGER NOT NULL,
137                hash TEXT NOT NULL,
138                last_indexed INTEGER NOT NULL,
139                PRIMARY KEY (file_id, branch_id),
140                FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
141                FOREIGN KEY (branch_id) REFERENCES branches(id) ON DELETE CASCADE
142            )",
143            [],
144        )?;
145
146        conn.execute(
147            "CREATE INDEX IF NOT EXISTS idx_branch_lookup ON file_branches(branch_id, file_id)",
148            [],
149        )?;
150
151        conn.execute(
152            "CREATE INDEX IF NOT EXISTS idx_hash_lookup ON file_branches(hash)",
153            [],
154        )?;
155
156        // Create branches metadata table
157        conn.execute(
158            "CREATE TABLE IF NOT EXISTS branches (
159                id INTEGER PRIMARY KEY AUTOINCREMENT,
160                name TEXT NOT NULL UNIQUE,
161                commit_sha TEXT NOT NULL,
162                last_indexed INTEGER NOT NULL,
163                file_count INTEGER DEFAULT 0,
164                is_dirty INTEGER DEFAULT 0
165            )",
166            [],
167        )?;
168
169        // Create file dependencies table for tracking imports/includes
170        conn.execute(
171            "CREATE TABLE IF NOT EXISTS file_dependencies (
172                id INTEGER PRIMARY KEY AUTOINCREMENT,
173                file_id INTEGER NOT NULL,
174                imported_path TEXT NOT NULL,
175                resolved_file_id INTEGER,
176                import_type TEXT NOT NULL,
177                line_number INTEGER NOT NULL,
178                imported_symbols TEXT,
179                FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
180                FOREIGN KEY (resolved_file_id) REFERENCES files(id) ON DELETE SET NULL
181            )",
182            [],
183        )?;
184
185        conn.execute(
186            "CREATE INDEX IF NOT EXISTS idx_deps_file ON file_dependencies(file_id)",
187            [],
188        )?;
189
190        conn.execute(
191            "CREATE INDEX IF NOT EXISTS idx_deps_resolved ON file_dependencies(resolved_file_id)",
192            [],
193        )?;
194
195        conn.execute(
196            "CREATE INDEX IF NOT EXISTS idx_deps_type ON file_dependencies(import_type)",
197            [],
198        )?;
199
200        // Create file exports table for tracking barrel re-exports
201        conn.execute(
202            "CREATE TABLE IF NOT EXISTS file_exports (
203                id INTEGER PRIMARY KEY AUTOINCREMENT,
204                file_id INTEGER NOT NULL,
205                exported_symbol TEXT,
206                source_path TEXT NOT NULL,
207                resolved_source_id INTEGER,
208                line_number INTEGER NOT NULL,
209                FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
210                FOREIGN KEY (resolved_source_id) REFERENCES files(id) ON DELETE SET NULL
211            )",
212            [],
213        )?;
214
215        conn.execute(
216            "CREATE INDEX IF NOT EXISTS idx_exports_file ON file_exports(file_id)",
217            [],
218        )?;
219
220        conn.execute(
221            "CREATE INDEX IF NOT EXISTS idx_exports_resolved ON file_exports(resolved_source_id)",
222            [],
223        )?;
224
225        conn.execute(
226            "CREATE INDEX IF NOT EXISTS idx_exports_symbol ON file_exports(exported_symbol)",
227            [],
228        )?;
229
230        log::debug!("Created meta.db with schema");
231        Ok(())
232    }
233
234    /// Initialize config.toml with defaults
235    fn init_config_toml(&self) -> Result<()> {
236        let config_path = self.cache_path.join(CONFIG_TOML);
237
238        if config_path.exists() {
239            return Ok(());
240        }
241
242        let default_config = r#"[index]
243languages = []  # Empty = all supported languages
244max_file_size = 10485760  # 10 MB
245follow_symlinks = false
246
247[index.include]
248patterns = []
249
250[index.exclude]
251patterns = []
252
253[search]
254default_limit = 100
255fuzzy_threshold = 0.8
256
257[performance]
258parallel_threads = 0  # 0 = auto (80% of available cores), or set a specific number
259compression_level = 3  # zstd level
260
261[semantic]
262# Semantic query generation using LLMs
263# Translate natural language questions into rfx query commands
264provider = "openrouter"  # Options: openai, anthropic, openrouter
265# model = "openai/gpt-4o-mini"  # Optional: override provider default model
266# auto_execute = false  # Optional: auto-execute queries without confirmation
267"#;
268
269        std::fs::write(&config_path, default_config)?;
270
271        log::debug!("Created default config.toml");
272        Ok(())
273    }
274
275    /// Check if cache exists and is valid
276    pub fn exists(&self) -> bool {
277        self.cache_path.exists()
278            && self.cache_path.join(META_DB).exists()
279    }
280
281    /// Validate cache integrity and detect corruption
282    ///
283    /// Performs basic integrity checks on the cache:
284    /// - Verifies all required files exist
285    /// - Checks SQLite database can be opened
286    /// - Validates binary file headers (trigrams.bin, content.bin)
287    ///
288    /// Returns Ok(()) if cache is valid, Err with details if corrupted.
289    pub fn validate(&self) -> Result<()> {
290        let start = std::time::Instant::now();
291
292        // Check if cache directory exists
293        if !self.cache_path.exists() {
294            anyhow::bail!("Cache directory does not exist: {}", self.cache_path.display());
295        }
296
297        // Check meta.db exists and can be opened
298        let db_path = self.cache_path.join(META_DB);
299        if !db_path.exists() {
300            anyhow::bail!("Database file missing: {}", db_path.display());
301        }
302
303        // Try to open database
304        let conn = Connection::open(&db_path)
305            .context("Failed to open meta.db - database may be corrupted")?;
306
307        // Verify schema exists
308        let tables: Result<Vec<String>, _> = conn
309            .prepare("SELECT name FROM sqlite_master WHERE type='table'")
310            .and_then(|mut stmt| {
311                stmt.query_map([], |row| row.get(0))
312                    .map(|rows| rows.collect())
313            })
314            .and_then(|result| result);
315
316        match tables {
317            Ok(table_list) => {
318                // Check for required tables
319                let required_tables = vec!["files", "statistics", "config", "file_branches", "branches", "file_dependencies", "file_exports"];
320                for table in &required_tables {
321                    if !table_list.iter().any(|t| t == table) {
322                        anyhow::bail!("Required table '{}' missing from database schema", table);
323                    }
324                }
325            }
326            Err(e) => {
327                anyhow::bail!("Failed to read database schema: {}", e);
328            }
329        }
330
331        // Run SQLite integrity check (fast quick_check)
332        // Use quick_check instead of integrity_check for speed (<10ms vs 100ms+)
333        let integrity_result: String = conn
334            .query_row("PRAGMA quick_check", [], |row| row.get(0))?;
335
336        if integrity_result != "ok" {
337            log::warn!("Database integrity check failed: {}", integrity_result);
338            anyhow::bail!(
339                "Database integrity check failed: {}. Cache may be corrupted. \
340                 Run 'rfx index' to rebuild cache.",
341                integrity_result
342            );
343        }
344
345        // Check trigrams.bin if it exists
346        let trigrams_path = self.cache_path.join("trigrams.bin");
347        if trigrams_path.exists() {
348            use std::io::Read;
349
350            match File::open(&trigrams_path) {
351                Ok(mut file) => {
352                    let mut header = [0u8; 4];
353                    match file.read_exact(&mut header) {
354                        Ok(_) => {
355                            // Check magic bytes
356                            if &header != b"RFTG" {
357                                log::warn!("trigrams.bin has invalid magic bytes - may be corrupted");
358                                anyhow::bail!("trigrams.bin appears to be corrupted (invalid magic bytes)");
359                            }
360                        }
361                        Err(_) => {
362                            anyhow::bail!("trigrams.bin is too small - appears to be corrupted");
363                        }
364                    }
365                }
366                Err(e) => {
367                    anyhow::bail!("Failed to open trigrams.bin: {}", e);
368                }
369            }
370        }
371
372        // Check content.bin if it exists
373        let content_path = self.cache_path.join("content.bin");
374        if content_path.exists() {
375            use std::io::Read;
376
377            match File::open(&content_path) {
378                Ok(mut file) => {
379                    let mut header = [0u8; 4];
380                    match file.read_exact(&mut header) {
381                        Ok(_) => {
382                            // Check magic bytes
383                            if &header != b"RFCT" {
384                                log::warn!("content.bin has invalid magic bytes - may be corrupted");
385                                anyhow::bail!("content.bin appears to be corrupted (invalid magic bytes)");
386                            }
387                        }
388                        Err(_) => {
389                            anyhow::bail!("content.bin is too small - appears to be corrupted");
390                        }
391                    }
392                }
393                Err(e) => {
394                    anyhow::bail!("Failed to open content.bin: {}", e);
395                }
396            }
397        }
398
399        // Check schema hash for automatic invalidation
400        let current_schema_hash = env!("CACHE_SCHEMA_HASH");
401
402        let stored_schema_hash: Option<String> = conn
403            .query_row(
404                "SELECT value FROM statistics WHERE key = 'schema_hash'",
405                [],
406                |row| row.get(0),
407            )
408            .optional()?;
409
410        if let Some(stored_hash) = stored_schema_hash {
411            if stored_hash != current_schema_hash {
412                log::warn!(
413                    "Cache schema hash mismatch! Stored: {}, Current: {}",
414                    stored_hash,
415                    current_schema_hash
416                );
417                anyhow::bail!(
418                    "Cache schema version mismatch.\n\
419                     \n\
420                     - Cache was built with version {}\n\
421                     - Current binary expects version {}\n\
422                     \n\
423                     The cache format may be incompatible with this version of Reflex.\n\
424                     Please rebuild the index by running:\n\
425                     \n\
426                       rfx index\n\
427                     \n\
428                     This usually happens after upgrading Reflex or making code changes.",
429                    stored_hash,
430                    current_schema_hash
431                );
432            }
433        } else {
434            log::debug!("No schema_hash found in cache - this cache was created before automatic invalidation was implemented");
435            // Don't fail for backward compatibility with old caches
436            // They will get the hash on next rebuild
437        }
438
439        let elapsed = start.elapsed();
440        log::debug!("Cache validation passed (schema hash: {}, took {:?})", current_schema_hash, elapsed);
441        Ok(())
442    }
443
444    /// Get the path to the cache directory
445    pub fn path(&self) -> &Path {
446        &self.cache_path
447    }
448
449    /// Get the workspace root directory (parent of .reflex/)
450    pub fn workspace_root(&self) -> PathBuf {
451        self.cache_path
452            .parent()
453            .expect(".reflex directory should have a parent")
454            .to_path_buf()
455    }
456
457    /// Clear the entire cache
458    pub fn clear(&self) -> Result<()> {
459        log::warn!("Clearing cache at {:?}", self.cache_path);
460
461        if self.cache_path.exists() {
462            std::fs::remove_dir_all(&self.cache_path)?;
463        }
464
465        Ok(())
466    }
467
468    /// Force SQLite WAL (Write-Ahead Log) checkpoint
469    ///
470    /// Ensures all data written in transactions is flushed to the main database file.
471    /// This is critical when spawning background processes that open new connections,
472    /// as they need to see the committed data immediately.
473    ///
474    /// Uses TRUNCATE mode to completely flush and reset the WAL file.
475    pub fn checkpoint_wal(&self) -> Result<()> {
476        let db_path = self.cache_path.join(META_DB);
477
478        if !db_path.exists() {
479            // No database to checkpoint
480            return Ok(());
481        }
482
483        let conn = Connection::open(&db_path)
484            .context("Failed to open meta.db for WAL checkpoint")?;
485
486        // PRAGMA wal_checkpoint(TRUNCATE) forces a full checkpoint and truncates the WAL
487        // This ensures background processes see all committed data
488        // Note: Returns (busy, log_pages, checkpointed_pages) - use query instead of execute
489        conn.query_row("PRAGMA wal_checkpoint(TRUNCATE)", [], |row| {
490            let busy: i64 = row.get(0)?;
491            let log_pages: i64 = row.get(1)?;
492            let checkpointed: i64 = row.get(2)?;
493            log::debug!(
494                "WAL checkpoint completed: busy={}, log_pages={}, checkpointed_pages={}",
495                busy, log_pages, checkpointed
496            );
497            Ok(())
498        }).context("Failed to execute WAL checkpoint")?;
499
500        log::debug!("Executed WAL checkpoint (TRUNCATE) on meta.db");
501        Ok(())
502    }
503
504    /// Load all file hashes across all branches from SQLite
505    ///
506    /// Used by background indexer to get hashes for all indexed files.
507    /// Returns the most recent hash for each file across all branches.
508    pub fn load_all_hashes(&self) -> Result<HashMap<String, String>> {
509        let db_path = self.cache_path.join(META_DB);
510
511        if !db_path.exists() {
512            return Ok(HashMap::new());
513        }
514
515        let conn = Connection::open(&db_path)
516            .context("Failed to open meta.db")?;
517
518        // Get all hashes from file_branches, joined with files to get paths
519        // If a file appears in multiple branches, we'll get multiple entries
520        // (HashMap will keep the last one, which is fine for background indexer)
521        let mut stmt = conn.prepare(
522            "SELECT f.path, fb.hash
523             FROM file_branches fb
524             JOIN files f ON fb.file_id = f.id"
525        )?;
526        let hashes: HashMap<String, String> = stmt.query_map([], |row| {
527            Ok((row.get(0)?, row.get(1)?))
528        })?
529        .collect::<Result<HashMap<_, _>, _>>()?;
530
531        log::debug!("Loaded {} file hashes across all branches from SQLite", hashes.len());
532        Ok(hashes)
533    }
534
535    /// Load file hashes for a specific branch from SQLite
536    ///
537    /// Used by indexer and query engine to get hashes for the current branch.
538    /// This ensures branch-specific incremental indexing and symbol cache lookups.
539    pub fn load_hashes_for_branch(&self, branch: &str) -> Result<HashMap<String, String>> {
540        let db_path = self.cache_path.join(META_DB);
541
542        if !db_path.exists() {
543            return Ok(HashMap::new());
544        }
545
546        let conn = Connection::open(&db_path)
547            .context("Failed to open meta.db")?;
548
549        // Get hashes for specific branch only
550        let mut stmt = conn.prepare(
551            "SELECT f.path, fb.hash
552             FROM file_branches fb
553             JOIN files f ON fb.file_id = f.id
554             JOIN branches b ON fb.branch_id = b.id
555             WHERE b.name = ?"
556        )?;
557        let hashes: HashMap<String, String> = stmt.query_map([branch], |row| {
558            Ok((row.get(0)?, row.get(1)?))
559        })?
560        .collect::<Result<HashMap<_, _>, _>>()?;
561
562        log::debug!("Loaded {} file hashes for branch '{}' from SQLite", hashes.len(), branch);
563        Ok(hashes)
564    }
565
566    /// Save file hashes for incremental indexing
567    ///
568    /// DEPRECATED: Hashes are now saved via record_branch_file() or batch_record_branch_files().
569    /// This method is kept for backward compatibility but does nothing.
570    #[deprecated(note = "Hashes are now stored in file_branches table via record_branch_file()")]
571    pub fn save_hashes(&self, _hashes: &HashMap<String, String>) -> Result<()> {
572        // No-op: hashes are now persisted to SQLite in record_branch_file()
573        Ok(())
574    }
575
576    /// Update file metadata in the files table
577    ///
578    /// Note: File content hashes are stored separately in the file_branches table
579    /// via record_branch_file() or batch_record_branch_files().
580    pub fn update_file(&self, path: &str, language: &str, line_count: usize) -> Result<()> {
581        let db_path = self.cache_path.join(META_DB);
582        let conn = Connection::open(&db_path)
583            .context("Failed to open meta.db for file update")?;
584
585        let now = chrono::Utc::now().timestamp();
586
587        conn.execute(
588            "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
589             VALUES (?, ?, ?, ?)",
590            [path, &now.to_string(), language, &line_count.to_string()],
591        )?;
592
593        Ok(())
594    }
595
596    /// Batch update multiple files in a single transaction for performance
597    ///
598    /// Note: File content hashes are stored separately in the file_branches table
599    /// via batch_update_files_and_branch().
600    pub fn batch_update_files(&self, files: &[(String, String, usize)]) -> Result<()> {
601        let db_path = self.cache_path.join(META_DB);
602        let mut conn = Connection::open(&db_path)
603            .context("Failed to open meta.db for batch update")?;
604
605        let now = chrono::Utc::now().timestamp();
606        let now_str = now.to_string();
607
608        // Use a transaction for batch inserts
609        let tx = conn.transaction()?;
610
611        for (path, language, line_count) in files {
612            tx.execute(
613                "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
614                 VALUES (?, ?, ?, ?)",
615                [path.as_str(), &now_str, language.as_str(), &line_count.to_string()],
616            )?;
617        }
618
619        tx.commit()?;
620        Ok(())
621    }
622
623    /// Batch update files AND record their hashes for a branch in a SINGLE transaction
624    ///
625    /// This is the recommended method for indexing as it ensures atomicity:
626    /// if files are inserted, their branch hashes are guaranteed to be inserted too.
627    pub fn batch_update_files_and_branch(
628        &self,
629        files: &[(String, String, usize)],      // (path, language, line_count)
630        branch_files: &[(String, String)],       // (path, hash)
631        branch: &str,
632        commit_sha: Option<&str>,
633    ) -> Result<()> {
634        log::info!("batch_update_files_and_branch: Processing {} files for branch '{}'", files.len(), branch);
635
636        let db_path = self.cache_path.join(META_DB);
637        let mut conn = Connection::open(&db_path)
638            .context("Failed to open meta.db for batch update and branch recording")?;
639
640        let now = chrono::Utc::now().timestamp();
641        let now_str = now.to_string();
642
643        // Use a SINGLE transaction for both operations
644        let tx = conn.transaction()?;
645
646        // Step 1: Insert/update files table
647        for (path, language, line_count) in files {
648            tx.execute(
649                "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
650                 VALUES (?, ?, ?, ?)",
651                [path.as_str(), &now_str, language.as_str(), &line_count.to_string()],
652            )?;
653        }
654        log::info!("Inserted {} files into files table", files.len());
655
656        // Step 2: Get or create branch_id (within same transaction)
657        let branch_id = self.get_or_create_branch_id(&tx, branch, commit_sha)?;
658        log::debug!("Got branch_id={} for branch '{}'", branch_id, branch);
659
660        // Step 3: Insert file_branches entries (within same transaction)
661        let mut inserted = 0;
662        for (path, hash) in branch_files {
663            // Lookup file_id from path (will find it because we just inserted above)
664            let file_id: i64 = tx.query_row(
665                "SELECT id FROM files WHERE path = ?",
666                [path.as_str()],
667                |row| row.get(0)
668            ).context(format!("File not found in index after insert: {}", path))?;
669
670            // Insert into file_branches using INTEGER values (not strings!)
671            tx.execute(
672                "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
673                 VALUES (?, ?, ?, ?)",
674                rusqlite::params![file_id, branch_id, hash.as_str(), now],
675            )?;
676            inserted += 1;
677        }
678        log::info!("Inserted {} file_branches entries", inserted);
679
680        // Commit the entire transaction atomically
681        tx.commit()?;
682        log::info!("Transaction committed successfully (files + file_branches)");
683
684        // DIAGNOSTIC: Verify data was actually persisted after commit
685        // This helps diagnose WAL synchronization issues where commits succeed but data isn't visible
686        let verify_conn = Connection::open(&db_path)
687            .context("Failed to open meta.db for verification")?;
688
689        // Count actual files in database
690        let actual_file_count: i64 = verify_conn.query_row(
691            "SELECT COUNT(*) FROM files WHERE path IN (SELECT path FROM files ORDER BY id DESC LIMIT ?)",
692            [files.len()],
693            |row| row.get(0)
694        ).unwrap_or(0);
695
696        // Count actual file_branches entries for this branch
697        let actual_fb_count: i64 = verify_conn.query_row(
698            "SELECT COUNT(*) FROM file_branches fb
699             JOIN branches b ON fb.branch_id = b.id
700             WHERE b.name = ?",
701            [branch],
702            |row| row.get(0)
703        ).unwrap_or(0);
704
705        log::info!(
706            "Post-commit verification: {} files in files table (expected {}), {} file_branches entries for '{}' (expected {})",
707            actual_file_count,
708            files.len(),
709            actual_fb_count,
710            branch,
711            inserted
712        );
713
714        // DEFENSIVE: Warn if counts don't match expectations
715        if actual_file_count < files.len() as i64 {
716            log::warn!(
717                "MISMATCH: Expected {} files in database, but only found {}! Data may not have persisted.",
718                files.len(),
719                actual_file_count
720            );
721        }
722        if actual_fb_count < inserted as i64 {
723            log::warn!(
724                "MISMATCH: Expected {} file_branches entries for branch '{}', but only found {}! Data may not have persisted.",
725                inserted,
726                branch,
727                actual_fb_count
728            );
729        }
730
731        Ok(())
732    }
733
734    /// Update statistics after indexing by calculating totals from database for a specific branch
735    ///
736    /// Counts only files indexed for the given branch, not all files across all branches.
737    pub fn update_stats(&self, branch: &str) -> Result<()> {
738        let db_path = self.cache_path.join(META_DB);
739        let conn = Connection::open(&db_path)
740            .context("Failed to open meta.db for stats update")?;
741
742        // Count files for specific branch only (branch-aware statistics)
743        let total_files: usize = conn.query_row(
744            "SELECT COUNT(DISTINCT fb.file_id)
745             FROM file_branches fb
746             JOIN branches b ON fb.branch_id = b.id
747             WHERE b.name = ?",
748            [branch],
749            |row| row.get(0),
750        ).unwrap_or(0);
751
752        let now = chrono::Utc::now().timestamp();
753
754        conn.execute(
755            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
756            ["total_files", &total_files.to_string(), &now.to_string()],
757        )?;
758
759        log::debug!("Updated statistics for branch '{}': {} files", branch, total_files);
760        Ok(())
761    }
762
763    /// Check if the stored schema hash matches the current binary's hash.
764    /// Returns Ok(true) if they match, Ok(false) if they don't, Err on DB errors.
765    pub fn check_schema_hash(&self) -> Result<bool> {
766        let db_path = self.cache_path.join(META_DB);
767        if !db_path.exists() {
768            return Ok(false);
769        }
770        let conn = Connection::open(&db_path)?;
771        let current = env!("CACHE_SCHEMA_HASH");
772        let stored: Option<String> = conn
773            .query_row(
774                "SELECT value FROM statistics WHERE key = 'schema_hash'",
775                [],
776                |row| row.get(0),
777            )
778            .optional()?;
779        Ok(stored.as_deref() == Some(current))
780    }
781
782    /// Update cache schema hash in statistics table
783    ///
784    /// This should be called after every index operation to ensure the cache
785    /// is marked as compatible with the current binary version.
786    pub fn update_schema_hash(&self) -> Result<()> {
787        let db_path = self.cache_path.join(META_DB);
788        let conn = Connection::open(&db_path)
789            .context("Failed to open meta.db for schema hash update")?;
790
791        let schema_hash = env!("CACHE_SCHEMA_HASH");
792        let now = chrono::Utc::now().timestamp();
793
794        conn.execute(
795            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
796            ["schema_hash", schema_hash, &now.to_string()],
797        )?;
798
799        log::debug!("Updated schema hash to: {}", schema_hash);
800        Ok(())
801    }
802
803    /// Get list of all indexed files
804    pub fn list_files(&self) -> Result<Vec<IndexedFile>> {
805        let db_path = self.cache_path.join(META_DB);
806
807        if !db_path.exists() {
808            return Ok(Vec::new());
809        }
810
811        let conn = Connection::open(&db_path)
812            .context("Failed to open meta.db")?;
813
814        let mut stmt = conn.prepare(
815            "SELECT path, language, last_indexed FROM files ORDER BY path"
816        )?;
817
818        let files = stmt.query_map([], |row| {
819            let path: String = row.get(0)?;
820            let language: String = row.get(1)?;
821            let last_indexed: i64 = row.get(2)?;
822
823            Ok(IndexedFile {
824                path,
825                language,
826                last_indexed: chrono::DateTime::from_timestamp(last_indexed, 0)
827                    .unwrap_or_else(chrono::Utc::now)
828                    .to_rfc3339(),
829            })
830        })?
831        .collect::<Result<Vec<_>, _>>()?;
832
833        Ok(files)
834    }
835
836    /// Get statistics about the current cache
837    ///
838    /// Returns statistics for the current git branch if in a git repo,
839    /// or global statistics if not in a git repo.
840    pub fn stats(&self) -> Result<crate::models::IndexStats> {
841        let db_path = self.cache_path.join(META_DB);
842
843        if !db_path.exists() {
844            // Cache not initialized
845            return Ok(crate::models::IndexStats {
846                total_files: 0,
847                index_size_bytes: 0,
848                last_updated: chrono::Utc::now().to_rfc3339(),
849                files_by_language: std::collections::HashMap::new(),
850                lines_by_language: std::collections::HashMap::new(),
851            });
852        }
853
854        let conn = Connection::open(&db_path)
855            .context("Failed to open meta.db")?;
856
857        // Determine current branch for branch-aware statistics
858        let workspace_root = self.workspace_root();
859        let current_branch = if crate::git::is_git_repo(&workspace_root) {
860            crate::git::get_git_state(&workspace_root)
861                .ok()
862                .map(|state| state.branch)
863        } else {
864            Some("_default".to_string())
865        };
866
867        log::debug!("stats(): current_branch = {:?}", current_branch);
868
869        // Read total files (branch-aware)
870        let total_files: usize = if let Some(ref branch) = current_branch {
871            log::debug!("stats(): Counting files for branch '{}'", branch);
872
873            // Debug: Check all branches
874            let branches: Vec<(i64, String, i64)> = conn.prepare(
875                "SELECT id, name, file_count FROM branches"
876            )
877            .and_then(|mut stmt| {
878                stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)))
879                    .map(|rows| rows.collect())
880            })
881            .and_then(|result| result)
882            .unwrap_or_default();
883
884            for (id, name, count) in &branches {
885                log::debug!("stats(): Branch ID={}, Name='{}', FileCount={}", id, name, count);
886            }
887
888            // Debug: Count file_branches per branch
889            let fb_counts: Vec<(String, i64)> = conn.prepare(
890                "SELECT b.name, COUNT(*) FROM file_branches fb
891                 JOIN branches b ON fb.branch_id = b.id
892                 GROUP BY b.name"
893            )
894            .and_then(|mut stmt| {
895                stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?)))
896                    .map(|rows| rows.collect())
897            })
898            .and_then(|result| result)
899            .unwrap_or_default();
900
901            for (name, count) in &fb_counts {
902                log::debug!("stats(): file_branches count for branch '{}': {}", name, count);
903            }
904
905            // Count files for current branch only
906            let count: usize = conn.query_row(
907                "SELECT COUNT(DISTINCT fb.file_id)
908                 FROM file_branches fb
909                 JOIN branches b ON fb.branch_id = b.id
910                 WHERE b.name = ?",
911                [branch],
912                |row| row.get(0),
913            ).unwrap_or(0);
914
915            log::debug!("stats(): Query returned total_files = {}", count);
916            count
917        } else {
918            // No branch info - should not happen, but return 0
919            log::warn!("stats(): No current_branch detected!");
920            0
921        };
922
923        // Read last updated timestamp
924        let last_updated: String = conn.query_row(
925            "SELECT updated_at FROM statistics WHERE key = 'total_files'",
926            [],
927            |row| {
928                let timestamp: i64 = row.get(0)?;
929                Ok(chrono::DateTime::from_timestamp(timestamp, 0)
930                    .unwrap_or_else(chrono::Utc::now)
931                    .to_rfc3339())
932            },
933        ).unwrap_or_else(|_| chrono::Utc::now().to_rfc3339());
934
935        // Calculate total cache size (all binary files)
936        let mut index_size_bytes: u64 = 0;
937
938        for file_name in [META_DB, TOKENS_BIN, CONFIG_TOML, "content.bin", "trigrams.bin"] {
939            let file_path = self.cache_path.join(file_name);
940            if let Ok(metadata) = std::fs::metadata(&file_path) {
941                index_size_bytes += metadata.len();
942            }
943        }
944
945        // Get file count breakdown by language (branch-aware if possible)
946        let mut files_by_language = std::collections::HashMap::new();
947        if let Some(ref branch) = current_branch {
948            // Query files for current branch only
949            let mut stmt = conn.prepare(
950                "SELECT f.language, COUNT(DISTINCT f.id)
951                 FROM files f
952                 JOIN file_branches fb ON f.id = fb.file_id
953                 JOIN branches b ON fb.branch_id = b.id
954                 WHERE b.name = ?
955                 GROUP BY f.language"
956            )?;
957            let lang_counts = stmt.query_map([branch], |row| {
958                let language: String = row.get(0)?;
959                let count: i64 = row.get(1)?;
960                Ok((language, count as usize))
961            })?;
962
963            for result in lang_counts {
964                let (language, count) = result?;
965                files_by_language.insert(language, count);
966            }
967        } else {
968            // Fallback: query all files
969            let mut stmt = conn.prepare("SELECT language, COUNT(*) FROM files GROUP BY language")?;
970            let lang_counts = stmt.query_map([], |row| {
971                let language: String = row.get(0)?;
972                let count: i64 = row.get(1)?;
973                Ok((language, count as usize))
974            })?;
975
976            for result in lang_counts {
977                let (language, count) = result?;
978                files_by_language.insert(language, count);
979            }
980        }
981
982        // Get line count breakdown by language (branch-aware if possible)
983        let mut lines_by_language = std::collections::HashMap::new();
984        if let Some(ref branch) = current_branch {
985            // Query lines for current branch only
986            let mut stmt = conn.prepare(
987                "SELECT f.language, SUM(f.line_count)
988                 FROM files f
989                 JOIN file_branches fb ON f.id = fb.file_id
990                 JOIN branches b ON fb.branch_id = b.id
991                 WHERE b.name = ?
992                 GROUP BY f.language"
993            )?;
994            let line_counts = stmt.query_map([branch], |row| {
995                let language: String = row.get(0)?;
996                let count: i64 = row.get(1)?;
997                Ok((language, count as usize))
998            })?;
999
1000            for result in line_counts {
1001                let (language, count) = result?;
1002                lines_by_language.insert(language, count);
1003            }
1004        } else {
1005            // Fallback: query all files
1006            let mut stmt = conn.prepare("SELECT language, SUM(line_count) FROM files GROUP BY language")?;
1007            let line_counts = stmt.query_map([], |row| {
1008                let language: String = row.get(0)?;
1009                let count: i64 = row.get(1)?;
1010                Ok((language, count as usize))
1011            })?;
1012
1013            for result in line_counts {
1014                let (language, count) = result?;
1015                lines_by_language.insert(language, count);
1016            }
1017        }
1018
1019        Ok(crate::models::IndexStats {
1020            total_files,
1021            index_size_bytes,
1022            last_updated,
1023            files_by_language,
1024            lines_by_language,
1025        })
1026    }
1027
1028    // ===== Branch-aware indexing methods =====
1029
1030    /// Get or create a branch ID by name
1031    ///
1032    /// Returns the numeric branch ID, creating a new entry if needed.
1033    fn get_or_create_branch_id(&self, conn: &Connection, branch_name: &str, commit_sha: Option<&str>) -> Result<i64> {
1034        // Try to get existing branch
1035        let existing_id: Option<i64> = conn
1036            .query_row(
1037                "SELECT id FROM branches WHERE name = ?",
1038                [branch_name],
1039                |row| row.get(0),
1040            )
1041            .optional()?;
1042
1043        if let Some(id) = existing_id {
1044            return Ok(id);
1045        }
1046
1047        // Create new branch entry
1048        let now = chrono::Utc::now().timestamp();
1049        conn.execute(
1050            "INSERT INTO branches (name, commit_sha, last_indexed, file_count, is_dirty)
1051             VALUES (?, ?, ?, 0, 0)",
1052            [branch_name, commit_sha.unwrap_or("unknown"), &now.to_string()],
1053        )?;
1054
1055        // Get the ID we just created
1056        let id: i64 = conn.last_insert_rowid();
1057        Ok(id)
1058    }
1059
1060    /// Record a file's hash for a specific branch
1061    pub fn record_branch_file(
1062        &self,
1063        path: &str,
1064        branch: &str,
1065        hash: &str,
1066        commit_sha: Option<&str>,
1067    ) -> Result<()> {
1068        let db_path = self.cache_path.join(META_DB);
1069        let conn = Connection::open(&db_path)
1070            .context("Failed to open meta.db for branch file recording")?;
1071
1072        // Lookup file_id from path
1073        let file_id: i64 = conn.query_row(
1074            "SELECT id FROM files WHERE path = ?",
1075            [path],
1076            |row| row.get(0)
1077        ).context(format!("File not found in index: {}", path))?;
1078
1079        // Get or create branch_id
1080        let branch_id = self.get_or_create_branch_id(&conn, branch, commit_sha)?;
1081
1082        let now = chrono::Utc::now().timestamp();
1083
1084        // Insert using proper INTEGER types (not strings!)
1085        conn.execute(
1086            "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
1087             VALUES (?, ?, ?, ?)",
1088            rusqlite::params![file_id, branch_id, hash, now],
1089        )?;
1090
1091        Ok(())
1092    }
1093
1094    /// Batch record multiple files for a specific branch in a single transaction
1095    ///
1096    /// IMPORTANT: Files must already exist in the `files` table before calling this method.
1097    /// For atomic insertion of both files and branch hashes, use `batch_update_files_and_branch()` instead.
1098    pub fn batch_record_branch_files(
1099        &self,
1100        files: &[(String, String)],  // (path, hash)
1101        branch: &str,
1102        commit_sha: Option<&str>,
1103    ) -> Result<()> {
1104        log::info!("batch_record_branch_files: Processing {} files for branch '{}'", files.len(), branch);
1105
1106        let db_path = self.cache_path.join(META_DB);
1107        let mut conn = Connection::open(&db_path)
1108            .context("Failed to open meta.db for batch branch recording")?;
1109
1110        let now = chrono::Utc::now().timestamp();
1111
1112        // Use a transaction for batch inserts
1113        let tx = conn.transaction()?;
1114
1115        // Get or create branch_id (use transaction connection)
1116        let branch_id = self.get_or_create_branch_id(&tx, branch, commit_sha)?;
1117        log::debug!("Got branch_id={} for branch '{}'", branch_id, branch);
1118
1119        let mut inserted = 0;
1120        for (path, hash) in files {
1121            // Lookup file_id from path
1122            log::trace!("Looking up file_id for path: {}", path);
1123            let file_id: i64 = tx.query_row(
1124                "SELECT id FROM files WHERE path = ?",
1125                [path.as_str()],
1126                |row| row.get(0)
1127            ).context(format!("File not found in index: {}", path))?;
1128            log::trace!("Found file_id={} for path: {}", file_id, path);
1129
1130            // Insert using proper INTEGER types (not strings!)
1131            tx.execute(
1132                "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
1133                 VALUES (?, ?, ?, ?)",
1134                rusqlite::params![file_id, branch_id, hash.as_str(), now],
1135            )?;
1136            inserted += 1;
1137        }
1138
1139        log::info!("Inserted {} file_branches entries", inserted);
1140        tx.commit()?;
1141        log::info!("Transaction committed successfully");
1142        Ok(())
1143    }
1144
1145    /// Get all files indexed for a specific branch
1146    ///
1147    /// Returns a HashMap of path → hash for all files in the branch.
1148    pub fn get_branch_files(&self, branch: &str) -> Result<HashMap<String, String>> {
1149        let db_path = self.cache_path.join(META_DB);
1150
1151        if !db_path.exists() {
1152            return Ok(HashMap::new());
1153        }
1154
1155        let conn = Connection::open(&db_path)
1156            .context("Failed to open meta.db")?;
1157
1158        let mut stmt = conn.prepare(
1159            "SELECT f.path, fb.hash
1160             FROM file_branches fb
1161             JOIN files f ON fb.file_id = f.id
1162             JOIN branches b ON fb.branch_id = b.id
1163             WHERE b.name = ?"
1164        )?;
1165        let files: HashMap<String, String> = stmt
1166            .query_map([branch], |row| Ok((row.get(0)?, row.get(1)?)))?
1167            .collect::<Result<HashMap<_, _>, _>>()?;
1168
1169        log::debug!(
1170            "Loaded {} files for branch '{}' from file_branches table",
1171            files.len(),
1172            branch
1173        );
1174        Ok(files)
1175    }
1176
1177    /// Check if a branch has any indexed files
1178    ///
1179    /// Fast existence check using LIMIT 1 for O(1) performance.
1180    pub fn branch_exists(&self, branch: &str) -> Result<bool> {
1181        let db_path = self.cache_path.join(META_DB);
1182
1183        if !db_path.exists() {
1184            return Ok(false);
1185        }
1186
1187        let conn = Connection::open(&db_path)
1188            .context("Failed to open meta.db")?;
1189
1190        let count: i64 = conn
1191            .query_row(
1192                "SELECT COUNT(*)
1193                 FROM file_branches fb
1194                 JOIN branches b ON fb.branch_id = b.id
1195                 WHERE b.name = ?
1196                 LIMIT 1",
1197                [branch],
1198                |row| row.get(0),
1199            )
1200            .unwrap_or(0);
1201
1202        Ok(count > 0)
1203    }
1204
1205    /// Get branch metadata (commit, last_indexed, file_count, dirty status)
1206    pub fn get_branch_info(&self, branch: &str) -> Result<BranchInfo> {
1207        let db_path = self.cache_path.join(META_DB);
1208
1209        if !db_path.exists() {
1210            anyhow::bail!("Database not initialized");
1211        }
1212
1213        let conn = Connection::open(&db_path)
1214            .context("Failed to open meta.db")?;
1215
1216        let info = conn.query_row(
1217            "SELECT commit_sha, last_indexed, file_count, is_dirty FROM branches WHERE name = ?",
1218            [branch],
1219            |row| {
1220                Ok(BranchInfo {
1221                    branch: branch.to_string(),
1222                    commit_sha: row.get(0)?,
1223                    last_indexed: row.get(1)?,
1224                    file_count: row.get(2)?,
1225                    is_dirty: row.get::<_, i64>(3)? != 0,
1226                })
1227            },
1228        )?;
1229
1230        Ok(info)
1231    }
1232
1233    /// Update branch metadata after indexing
1234    ///
1235    /// Uses UPDATE instead of INSERT OR REPLACE to preserve branch_id and prevent
1236    /// CASCADE DELETE on file_branches table.
1237    pub fn update_branch_metadata(
1238        &self,
1239        branch: &str,
1240        commit_sha: Option<&str>,
1241        file_count: usize,
1242        is_dirty: bool,
1243    ) -> Result<()> {
1244        let db_path = self.cache_path.join(META_DB);
1245        let conn = Connection::open(&db_path)
1246            .context("Failed to open meta.db for branch metadata update")?;
1247
1248        let now = chrono::Utc::now().timestamp();
1249        let is_dirty_int = if is_dirty { 1 } else { 0 };
1250
1251        // Try UPDATE first to preserve branch_id (prevents CASCADE DELETE)
1252        let rows_updated = conn.execute(
1253            "UPDATE branches
1254             SET commit_sha = ?, last_indexed = ?, file_count = ?, is_dirty = ?
1255             WHERE name = ?",
1256            rusqlite::params![
1257                commit_sha.unwrap_or("unknown"),
1258                now,
1259                file_count,
1260                is_dirty_int,
1261                branch
1262            ],
1263        )?;
1264
1265        // If no rows updated (branch doesn't exist yet), INSERT new one
1266        if rows_updated == 0 {
1267            conn.execute(
1268                "INSERT INTO branches (name, commit_sha, last_indexed, file_count, is_dirty)
1269                 VALUES (?, ?, ?, ?, ?)",
1270                rusqlite::params![
1271                    branch,
1272                    commit_sha.unwrap_or("unknown"),
1273                    now,
1274                    file_count,
1275                    is_dirty_int
1276                ],
1277            )?;
1278        }
1279
1280        log::debug!(
1281            "Updated branch metadata for '{}': commit={}, files={}, dirty={}",
1282            branch,
1283            commit_sha.unwrap_or("unknown"),
1284            file_count,
1285            is_dirty
1286        );
1287        Ok(())
1288    }
1289
1290    /// Find a file with a specific hash (for symbol reuse optimization)
1291    ///
1292    /// Returns the path and branch where this hash was first seen,
1293    /// enabling reuse of parsed symbols across branches.
1294    pub fn find_file_with_hash(&self, hash: &str) -> Result<Option<(String, String)>> {
1295        let db_path = self.cache_path.join(META_DB);
1296
1297        if !db_path.exists() {
1298            return Ok(None);
1299        }
1300
1301        let conn = Connection::open(&db_path)
1302            .context("Failed to open meta.db")?;
1303
1304        let result = conn
1305            .query_row(
1306                "SELECT f.path, b.name
1307                 FROM file_branches fb
1308                 JOIN files f ON fb.file_id = f.id
1309                 JOIN branches b ON fb.branch_id = b.id
1310                 WHERE fb.hash = ?
1311                 LIMIT 1",
1312                [hash],
1313                |row| Ok((row.get(0)?, row.get(1)?)),
1314            )
1315            .optional()?;
1316
1317        Ok(result)
1318    }
1319
1320    /// Get file ID by path
1321    ///
1322    /// Returns the integer ID for a file path, or None if not found.
1323    pub fn get_file_id(&self, path: &str) -> Result<Option<i64>> {
1324        let db_path = self.cache_path.join(META_DB);
1325
1326        if !db_path.exists() {
1327            return Ok(None);
1328        }
1329
1330        let conn = Connection::open(&db_path)
1331            .context("Failed to open meta.db")?;
1332
1333        let result = conn
1334            .query_row(
1335                "SELECT id FROM files WHERE path = ?",
1336                [path],
1337                |row| row.get(0),
1338            )
1339            .optional()?;
1340
1341        Ok(result)
1342    }
1343
1344    /// Batch get file IDs for multiple paths
1345    ///
1346    /// Returns a HashMap of path → file_id for all found paths.
1347    /// Paths not in the database are omitted from the result.
1348    ///
1349    /// Automatically chunks large batches to avoid SQLite parameter limits (999 max).
1350    pub fn batch_get_file_ids(&self, paths: &[String]) -> Result<HashMap<String, i64>> {
1351        let db_path = self.cache_path.join(META_DB);
1352
1353        if !db_path.exists() {
1354            return Ok(HashMap::new());
1355        }
1356
1357        let conn = Connection::open(&db_path)
1358            .context("Failed to open meta.db")?;
1359
1360        // SQLite has a limit of 999 parameters by default
1361        // Chunk requests to stay well under that limit
1362        const BATCH_SIZE: usize = 900;
1363
1364        let mut results = HashMap::new();
1365
1366        for chunk in paths.chunks(BATCH_SIZE) {
1367            // Build IN clause for this chunk
1368            let placeholders = chunk.iter()
1369                .map(|_| "?")
1370                .collect::<Vec<_>>()
1371                .join(", ");
1372
1373            let query = format!("SELECT path, id FROM files WHERE path IN ({})", placeholders);
1374
1375            let params: Vec<&str> = chunk.iter().map(|s| s.as_str()).collect();
1376            let mut stmt = conn.prepare(&query)?;
1377
1378            let chunk_results = stmt.query_map(rusqlite::params_from_iter(params), |row| {
1379                Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
1380            })?
1381            .collect::<Result<HashMap<_, _>, _>>()?;
1382
1383            results.extend(chunk_results);
1384        }
1385
1386        log::debug!("Batch loaded {} file IDs (out of {} requested, {} chunks)",
1387                   results.len(), paths.len(), paths.len().div_ceil(BATCH_SIZE));
1388        Ok(results)
1389    }
1390
1391    // ===== Cache compaction methods =====
1392
1393    /// Check if cache compaction should run
1394    ///
1395    /// Returns true if 24+ hours have passed since last compaction (or never compacted).
1396    /// Compaction threshold: 86400 seconds (24 hours)
1397    pub fn should_compact(&self) -> Result<bool> {
1398        let db_path = self.cache_path.join(META_DB);
1399
1400        if !db_path.exists() {
1401            // No database means no compaction needed
1402            return Ok(false);
1403        }
1404
1405        let conn = Connection::open(&db_path)
1406            .context("Failed to open meta.db for compaction check")?;
1407
1408        // Get last_compaction timestamp (defaults to "0" if not found)
1409        let last_compaction: i64 = conn
1410            .query_row(
1411                "SELECT value FROM statistics WHERE key = 'last_compaction'",
1412                [],
1413                |row| {
1414                    let value: String = row.get(0)?;
1415                    Ok(value.parse::<i64>().unwrap_or(0))
1416                },
1417            )
1418            .unwrap_or(0);
1419
1420        // Get current timestamp
1421        let now = chrono::Utc::now().timestamp();
1422
1423        // Compaction threshold: 24 hours (86400 seconds)
1424        const COMPACTION_THRESHOLD_SECS: i64 = 86400;
1425
1426        let elapsed_secs = now - last_compaction;
1427        let should_run = elapsed_secs >= COMPACTION_THRESHOLD_SECS;
1428
1429        log::debug!(
1430            "Compaction check: last={}, now={}, elapsed={}s, should_compact={}",
1431            last_compaction,
1432            now,
1433            elapsed_secs,
1434            should_run
1435        );
1436
1437        Ok(should_run)
1438    }
1439
1440    /// Update last_compaction timestamp in statistics table
1441    ///
1442    /// Called after successful compaction to record when it ran.
1443    pub fn update_compaction_timestamp(&self) -> Result<()> {
1444        let db_path = self.cache_path.join(META_DB);
1445        let conn = Connection::open(&db_path)
1446            .context("Failed to open meta.db for compaction timestamp update")?;
1447
1448        let now = chrono::Utc::now().timestamp();
1449
1450        conn.execute(
1451            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
1452            ["last_compaction", &now.to_string(), &now.to_string()],
1453        )?;
1454
1455        log::debug!("Updated last_compaction timestamp to: {}", now);
1456        Ok(())
1457    }
1458
1459    /// Compact the cache by removing deleted files and reclaiming disk space
1460    ///
1461    /// This operation:
1462    /// 1. Identifies files in the database that no longer exist on disk
1463    /// 2. Deletes those files from all database tables (CASCADE handles related data)
1464    /// 3. Runs VACUUM to reclaim disk space from deleted rows
1465    /// 4. Updates the last_compaction timestamp
1466    ///
1467    /// Returns a CompactionReport with statistics about the operation.
1468    /// Safe to run concurrently with queries (uses SQLite transactions).
1469    pub fn compact(&self) -> Result<crate::models::CompactionReport> {
1470        let start_time = std::time::Instant::now();
1471        log::info!("Starting cache compaction...");
1472
1473        // Get initial cache size
1474        let size_before = self.calculate_cache_size()?;
1475
1476        // Step 1: Identify deleted files (in DB but not on filesystem)
1477        let deleted_files = self.identify_deleted_files()?;
1478        log::info!("Found {} deleted files to remove from cache", deleted_files.len());
1479
1480        if deleted_files.is_empty() {
1481            log::info!("No deleted files to compact - cache is clean");
1482            // Update timestamp anyway to prevent running compaction too frequently
1483            self.update_compaction_timestamp()?;
1484
1485            return Ok(crate::models::CompactionReport {
1486                files_removed: 0,
1487                space_saved_bytes: 0,
1488                duration_ms: start_time.elapsed().as_millis() as u64,
1489            });
1490        }
1491
1492        // Step 2: Delete from database (CASCADE handles file_branches, file_dependencies, file_exports)
1493        self.delete_files_from_db(&deleted_files)?;
1494        log::info!("Deleted {} files from database", deleted_files.len());
1495
1496        // Step 3: Run VACUUM to reclaim disk space
1497        self.vacuum_database()?;
1498        log::info!("Completed VACUUM operation");
1499
1500        // Get final cache size
1501        let size_after = self.calculate_cache_size()?;
1502        let space_saved = size_before.saturating_sub(size_after);
1503
1504        // Step 4: Update last_compaction timestamp
1505        self.update_compaction_timestamp()?;
1506
1507        let duration_ms = start_time.elapsed().as_millis() as u64;
1508
1509        log::info!(
1510            "Cache compaction completed: {} files removed, {} bytes saved ({:.2} MB), took {}ms",
1511            deleted_files.len(),
1512            space_saved,
1513            space_saved as f64 / 1_048_576.0,
1514            duration_ms
1515        );
1516
1517        Ok(crate::models::CompactionReport {
1518            files_removed: deleted_files.len(),
1519            space_saved_bytes: space_saved,
1520            duration_ms,
1521        })
1522    }
1523
1524    /// Identify files in database that no longer exist on filesystem
1525    ///
1526    /// Returns a Vec of file IDs for files that should be removed from the cache.
1527    fn identify_deleted_files(&self) -> Result<Vec<i64>> {
1528        let db_path = self.cache_path.join(META_DB);
1529        let conn = Connection::open(&db_path)
1530            .context("Failed to open meta.db for deleted file identification")?;
1531
1532        let workspace_root = self.workspace_root();
1533
1534        // Query all files from database (id, path)
1535        let mut stmt = conn.prepare("SELECT id, path FROM files")?;
1536        let files = stmt.query_map([], |row| {
1537            Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
1538        })?
1539        .collect::<Result<Vec<_>, _>>()?;
1540
1541        log::debug!("Checking {} files for deletion status", files.len());
1542
1543        // Check which files no longer exist on disk
1544        let mut deleted_file_ids = Vec::new();
1545        for (file_id, file_path) in files {
1546            let full_path = workspace_root.join(&file_path);
1547            if !full_path.exists() {
1548                log::trace!("File no longer exists: {} (id={})", file_path, file_id);
1549                deleted_file_ids.push(file_id);
1550            }
1551        }
1552
1553        Ok(deleted_file_ids)
1554    }
1555
1556    /// Delete files from database by file ID
1557    ///
1558    /// Uses a transaction for atomicity. CASCADE delete handles:
1559    /// - file_branches entries
1560    /// - file_dependencies entries
1561    /// - file_exports entries
1562    fn delete_files_from_db(&self, file_ids: &[i64]) -> Result<()> {
1563        if file_ids.is_empty() {
1564            return Ok(());
1565        }
1566
1567        let db_path = self.cache_path.join(META_DB);
1568        let mut conn = Connection::open(&db_path)
1569            .context("Failed to open meta.db for file deletion")?;
1570
1571        let tx = conn.transaction()?;
1572
1573        // Delete files in batches to avoid SQLite parameter limit (999 max)
1574        const BATCH_SIZE: usize = 900;
1575
1576        for chunk in file_ids.chunks(BATCH_SIZE) {
1577            let placeholders = chunk.iter()
1578                .map(|_| "?")
1579                .collect::<Vec<_>>()
1580                .join(", ");
1581
1582            let delete_query = format!("DELETE FROM files WHERE id IN ({})", placeholders);
1583
1584            let params: Vec<i64> = chunk.to_vec();
1585            tx.execute(&delete_query, rusqlite::params_from_iter(params))?;
1586        }
1587
1588        tx.commit()?;
1589        log::debug!("Deleted {} files from database (CASCADE handled related tables)", file_ids.len());
1590        Ok(())
1591    }
1592
1593    /// Run VACUUM on SQLite database to reclaim disk space
1594    ///
1595    /// VACUUM rebuilds the database file, removing free pages and compacting the file.
1596    /// This can take several seconds on large databases but significantly reduces disk usage.
1597    fn vacuum_database(&self) -> Result<()> {
1598        let db_path = self.cache_path.join(META_DB);
1599        let conn = Connection::open(&db_path)
1600            .context("Failed to open meta.db for VACUUM")?;
1601
1602        // VACUUM cannot run inside a transaction
1603        // It rebuilds the entire database file
1604        conn.execute("VACUUM", [])?;
1605
1606        log::debug!("VACUUM completed successfully");
1607        Ok(())
1608    }
1609
1610    /// Calculate total cache size in bytes
1611    ///
1612    /// Sums up the size of all cache files:
1613    /// - meta.db (SQLite database)
1614    /// - trigrams.bin (inverted index)
1615    /// - content.bin (file contents)
1616    /// - config.toml (configuration)
1617    fn calculate_cache_size(&self) -> Result<u64> {
1618        let mut total_size: u64 = 0;
1619
1620        for file_name in [META_DB, TOKENS_BIN, CONFIG_TOML, "content.bin", "trigrams.bin"] {
1621            let file_path = self.cache_path.join(file_name);
1622            if let Ok(metadata) = std::fs::metadata(&file_path) {
1623                total_size += metadata.len();
1624            }
1625        }
1626
1627        Ok(total_size)
1628    }
1629}
1630
1631/// Branch metadata information
1632#[derive(Debug, Clone)]
1633pub struct BranchInfo {
1634    pub branch: String,
1635    pub commit_sha: String,
1636    pub last_indexed: i64,
1637    pub file_count: usize,
1638    pub is_dirty: bool,
1639}
1640
1641// TODO: Implement memory-mapped readers for:
1642// - SymbolReader (reads from symbols.bin)
1643// - TokenReader (reads from tokens.bin)
1644// - MetaReader (reads from meta.db)
1645
1646#[cfg(test)]
1647mod tests {
1648    use super::*;
1649    use tempfile::TempDir;
1650
1651    #[test]
1652    fn test_cache_init() {
1653        let temp = TempDir::new().unwrap();
1654        let cache = CacheManager::new(temp.path());
1655
1656        assert!(!cache.exists());
1657        cache.init().unwrap();
1658        assert!(cache.exists());
1659        assert!(cache.path().exists());
1660
1661        // Verify all expected files were created
1662        assert!(cache.path().join(META_DB).exists());
1663        assert!(cache.path().join(CONFIG_TOML).exists());
1664    }
1665
1666    #[test]
1667    fn test_cache_init_idempotent() {
1668        let temp = TempDir::new().unwrap();
1669        let cache = CacheManager::new(temp.path());
1670
1671        // Initialize twice - should not error
1672        cache.init().unwrap();
1673        cache.init().unwrap();
1674
1675        assert!(cache.exists());
1676    }
1677
1678    #[test]
1679    fn test_cache_clear() {
1680        let temp = TempDir::new().unwrap();
1681        let cache = CacheManager::new(temp.path());
1682
1683        cache.init().unwrap();
1684        assert!(cache.exists());
1685
1686        cache.clear().unwrap();
1687        assert!(!cache.exists());
1688    }
1689
1690    #[test]
1691    fn test_cache_clear_nonexistent() {
1692        let temp = TempDir::new().unwrap();
1693        let cache = CacheManager::new(temp.path());
1694
1695        // Clearing non-existent cache should not error
1696        assert!(!cache.exists());
1697        cache.clear().unwrap();
1698        assert!(!cache.exists());
1699    }
1700
1701    #[test]
1702    fn test_load_all_hashes_empty() {
1703        let temp = TempDir::new().unwrap();
1704        let cache = CacheManager::new(temp.path());
1705
1706        cache.init().unwrap();
1707        let hashes = cache.load_all_hashes().unwrap();
1708        assert_eq!(hashes.len(), 0);
1709    }
1710
1711    #[test]
1712    fn test_load_all_hashes_before_init() {
1713        let temp = TempDir::new().unwrap();
1714        let cache = CacheManager::new(temp.path());
1715
1716        // Loading hashes before init should return empty map
1717        let hashes = cache.load_all_hashes().unwrap();
1718        assert_eq!(hashes.len(), 0);
1719    }
1720
1721    #[test]
1722    fn test_load_hashes_for_branch_empty() {
1723        let temp = TempDir::new().unwrap();
1724        let cache = CacheManager::new(temp.path());
1725
1726        cache.init().unwrap();
1727        let hashes = cache.load_hashes_for_branch("main").unwrap();
1728        assert_eq!(hashes.len(), 0);
1729    }
1730
1731    #[test]
1732    fn test_update_file() {
1733        let temp = TempDir::new().unwrap();
1734        let cache = CacheManager::new(temp.path());
1735
1736        cache.init().unwrap();
1737        cache.update_file("src/main.rs", "rust", 100).unwrap();
1738
1739        // Verify file was stored (check via list_files)
1740        let files = cache.list_files().unwrap();
1741        assert_eq!(files.len(), 1);
1742        assert_eq!(files[0].path, "src/main.rs");
1743        assert_eq!(files[0].language, "rust");
1744    }
1745
1746    #[test]
1747    fn test_update_file_multiple() {
1748        let temp = TempDir::new().unwrap();
1749        let cache = CacheManager::new(temp.path());
1750
1751        cache.init().unwrap();
1752        cache.update_file("src/main.rs", "rust", 100).unwrap();
1753        cache.update_file("src/lib.rs", "rust", 200).unwrap();
1754        cache.update_file("README.md", "markdown", 50).unwrap();
1755
1756        // Verify files were stored
1757        let files = cache.list_files().unwrap();
1758        assert_eq!(files.len(), 3);
1759    }
1760
1761    #[test]
1762    fn test_update_file_replace() {
1763        let temp = TempDir::new().unwrap();
1764        let cache = CacheManager::new(temp.path());
1765
1766        cache.init().unwrap();
1767        cache.update_file("src/main.rs", "rust", 100).unwrap();
1768        cache.update_file("src/main.rs", "rust", 150).unwrap();
1769
1770        // Second update should replace the first
1771        let files = cache.list_files().unwrap();
1772        assert_eq!(files.len(), 1);
1773        assert_eq!(files[0].path, "src/main.rs");
1774    }
1775
1776    #[test]
1777    fn test_batch_update_files() {
1778        let temp = TempDir::new().unwrap();
1779        let cache = CacheManager::new(temp.path());
1780
1781        cache.init().unwrap();
1782
1783        let files = vec![
1784            ("src/main.rs".to_string(), "rust".to_string(), 100),
1785            ("src/lib.rs".to_string(), "rust".to_string(), 200),
1786            ("test.py".to_string(), "python".to_string(), 50),
1787        ];
1788
1789        cache.batch_update_files(&files).unwrap();
1790
1791        // Verify files were stored
1792        let stored_files = cache.list_files().unwrap();
1793        assert_eq!(stored_files.len(), 3);
1794    }
1795
1796    #[test]
1797    fn test_update_stats() {
1798        let temp = TempDir::new().unwrap();
1799        let cache = CacheManager::new(temp.path());
1800
1801        cache.init().unwrap();
1802        cache.update_file("src/main.rs", "rust", 100).unwrap();
1803        cache.update_file("src/lib.rs", "rust", 200).unwrap();
1804
1805        // Record files for a test branch
1806        cache.record_branch_file("src/main.rs", "_default", "hash1", None).unwrap();
1807        cache.record_branch_file("src/lib.rs", "_default", "hash2", None).unwrap();
1808        cache.update_stats("_default").unwrap();
1809
1810        let stats = cache.stats().unwrap();
1811        assert_eq!(stats.total_files, 2);
1812    }
1813
1814    #[test]
1815    fn test_stats_empty_cache() {
1816        let temp = TempDir::new().unwrap();
1817        let cache = CacheManager::new(temp.path());
1818
1819        cache.init().unwrap();
1820        let stats = cache.stats().unwrap();
1821
1822        assert_eq!(stats.total_files, 0);
1823        assert_eq!(stats.files_by_language.len(), 0);
1824    }
1825
1826    #[test]
1827    fn test_stats_before_init() {
1828        let temp = TempDir::new().unwrap();
1829        let cache = CacheManager::new(temp.path());
1830
1831        // Stats before init should return zeros
1832        let stats = cache.stats().unwrap();
1833        assert_eq!(stats.total_files, 0);
1834    }
1835
1836    #[test]
1837    fn test_stats_by_language() {
1838        let temp = TempDir::new().unwrap();
1839        let cache = CacheManager::new(temp.path());
1840
1841        cache.init().unwrap();
1842        cache.update_file("main.rs", "Rust", 100).unwrap();
1843        cache.update_file("lib.rs", "Rust", 200).unwrap();
1844        cache.update_file("script.py", "Python", 50).unwrap();
1845        cache.update_file("test.py", "Python", 80).unwrap();
1846
1847        // Record files for a test branch
1848        cache.record_branch_file("main.rs", "_default", "hash1", None).unwrap();
1849        cache.record_branch_file("lib.rs", "_default", "hash2", None).unwrap();
1850        cache.record_branch_file("script.py", "_default", "hash3", None).unwrap();
1851        cache.record_branch_file("test.py", "_default", "hash4", None).unwrap();
1852        cache.update_stats("_default").unwrap();
1853
1854        let stats = cache.stats().unwrap();
1855        assert_eq!(stats.files_by_language.get("Rust"), Some(&2));
1856        assert_eq!(stats.files_by_language.get("Python"), Some(&2));
1857        assert_eq!(stats.lines_by_language.get("Rust"), Some(&300)); // 100 + 200
1858        assert_eq!(stats.lines_by_language.get("Python"), Some(&130)); // 50 + 80
1859    }
1860
1861    #[test]
1862    fn test_list_files_empty() {
1863        let temp = TempDir::new().unwrap();
1864        let cache = CacheManager::new(temp.path());
1865
1866        cache.init().unwrap();
1867        let files = cache.list_files().unwrap();
1868        assert_eq!(files.len(), 0);
1869    }
1870
1871    #[test]
1872    fn test_list_files() {
1873        let temp = TempDir::new().unwrap();
1874        let cache = CacheManager::new(temp.path());
1875
1876        cache.init().unwrap();
1877        cache.update_file("src/main.rs", "rust", 100).unwrap();
1878        cache.update_file("src/lib.rs", "rust", 200).unwrap();
1879
1880        let files = cache.list_files().unwrap();
1881        assert_eq!(files.len(), 2);
1882
1883        // Files should be sorted by path
1884        assert_eq!(files[0].path, "src/lib.rs");
1885        assert_eq!(files[1].path, "src/main.rs");
1886
1887        assert_eq!(files[0].language, "rust");
1888    }
1889
1890    #[test]
1891    fn test_list_files_before_init() {
1892        let temp = TempDir::new().unwrap();
1893        let cache = CacheManager::new(temp.path());
1894
1895        // Listing files before init should return empty vec
1896        let files = cache.list_files().unwrap();
1897        assert_eq!(files.len(), 0);
1898    }
1899
1900    #[test]
1901    fn test_branch_exists() {
1902        let temp = TempDir::new().unwrap();
1903        let cache = CacheManager::new(temp.path());
1904
1905        cache.init().unwrap();
1906
1907        assert!(!cache.branch_exists("main").unwrap());
1908
1909        // Add file to index first (required for record_branch_file)
1910        cache.update_file("src/main.rs", "rust", 100).unwrap();
1911        cache.record_branch_file("src/main.rs", "main", "hash1", Some("commit123")).unwrap();
1912
1913        assert!(cache.branch_exists("main").unwrap());
1914        assert!(!cache.branch_exists("feature-branch").unwrap());
1915    }
1916
1917    #[test]
1918    fn test_record_branch_file() {
1919        let temp = TempDir::new().unwrap();
1920        let cache = CacheManager::new(temp.path());
1921
1922        cache.init().unwrap();
1923        // Add file to index first (required for record_branch_file)
1924        cache.update_file("src/main.rs", "rust", 100).unwrap();
1925        cache.record_branch_file("src/main.rs", "main", "hash1", Some("commit123")).unwrap();
1926
1927        let files = cache.get_branch_files("main").unwrap();
1928        assert_eq!(files.len(), 1);
1929        assert_eq!(files.get("src/main.rs"), Some(&"hash1".to_string()));
1930    }
1931
1932    #[test]
1933    fn test_get_branch_files_empty() {
1934        let temp = TempDir::new().unwrap();
1935        let cache = CacheManager::new(temp.path());
1936
1937        cache.init().unwrap();
1938        let files = cache.get_branch_files("nonexistent").unwrap();
1939        assert_eq!(files.len(), 0);
1940    }
1941
1942    #[test]
1943    fn test_batch_record_branch_files() {
1944        let temp = TempDir::new().unwrap();
1945        let cache = CacheManager::new(temp.path());
1946
1947        cache.init().unwrap();
1948
1949        // Add files to index first (required for batch_record_branch_files)
1950        let file_metadata = vec![
1951            ("src/main.rs".to_string(), "rust".to_string(), 100),
1952            ("src/lib.rs".to_string(), "rust".to_string(), 200),
1953            ("README.md".to_string(), "markdown".to_string(), 50),
1954        ];
1955        cache.batch_update_files(&file_metadata).unwrap();
1956
1957        let files = vec![
1958            ("src/main.rs".to_string(), "hash1".to_string()),
1959            ("src/lib.rs".to_string(), "hash2".to_string()),
1960            ("README.md".to_string(), "hash3".to_string()),
1961        ];
1962
1963        cache.batch_record_branch_files(&files, "main", Some("commit123")).unwrap();
1964
1965        let branch_files = cache.get_branch_files("main").unwrap();
1966        assert_eq!(branch_files.len(), 3);
1967        assert_eq!(branch_files.get("src/main.rs"), Some(&"hash1".to_string()));
1968        assert_eq!(branch_files.get("src/lib.rs"), Some(&"hash2".to_string()));
1969        assert_eq!(branch_files.get("README.md"), Some(&"hash3".to_string()));
1970    }
1971
1972    #[test]
1973    fn test_update_branch_metadata() {
1974        let temp = TempDir::new().unwrap();
1975        let cache = CacheManager::new(temp.path());
1976
1977        cache.init().unwrap();
1978        cache.update_branch_metadata("main", Some("commit123"), 10, false).unwrap();
1979
1980        let info = cache.get_branch_info("main").unwrap();
1981        assert_eq!(info.branch, "main");
1982        assert_eq!(info.commit_sha, "commit123");
1983        assert_eq!(info.file_count, 10);
1984        assert_eq!(info.is_dirty, false);
1985    }
1986
1987    #[test]
1988    fn test_update_branch_metadata_dirty() {
1989        let temp = TempDir::new().unwrap();
1990        let cache = CacheManager::new(temp.path());
1991
1992        cache.init().unwrap();
1993        cache.update_branch_metadata("feature", Some("commit456"), 5, true).unwrap();
1994
1995        let info = cache.get_branch_info("feature").unwrap();
1996        assert_eq!(info.is_dirty, true);
1997    }
1998
1999    #[test]
2000    fn test_find_file_with_hash() {
2001        let temp = TempDir::new().unwrap();
2002        let cache = CacheManager::new(temp.path());
2003
2004        cache.init().unwrap();
2005        // Add file to index first (required for record_branch_file)
2006        cache.update_file("src/main.rs", "rust", 100).unwrap();
2007        cache.record_branch_file("src/main.rs", "main", "unique_hash", Some("commit123")).unwrap();
2008
2009        let result = cache.find_file_with_hash("unique_hash").unwrap();
2010        assert!(result.is_some());
2011
2012        let (path, branch) = result.unwrap();
2013        assert_eq!(path, "src/main.rs");
2014        assert_eq!(branch, "main");
2015    }
2016
2017    #[test]
2018    fn test_find_file_with_hash_not_found() {
2019        let temp = TempDir::new().unwrap();
2020        let cache = CacheManager::new(temp.path());
2021
2022        cache.init().unwrap();
2023
2024        let result = cache.find_file_with_hash("nonexistent_hash").unwrap();
2025        assert!(result.is_none());
2026    }
2027
2028    #[test]
2029    fn test_config_toml_created() {
2030        let temp = TempDir::new().unwrap();
2031        let cache = CacheManager::new(temp.path());
2032
2033        cache.init().unwrap();
2034
2035        let config_path = cache.path().join(CONFIG_TOML);
2036        let config_content = std::fs::read_to_string(&config_path).unwrap();
2037
2038        // Verify config contains expected sections
2039        assert!(config_content.contains("[index]"));
2040        assert!(config_content.contains("[search]"));
2041        assert!(config_content.contains("[performance]"));
2042        assert!(config_content.contains("max_file_size"));
2043    }
2044
2045    #[test]
2046    fn test_meta_db_schema() {
2047        let temp = TempDir::new().unwrap();
2048        let cache = CacheManager::new(temp.path());
2049
2050        cache.init().unwrap();
2051
2052        let db_path = cache.path().join(META_DB);
2053        let conn = Connection::open(&db_path).unwrap();
2054
2055        // Verify tables exist
2056        let tables: Vec<String> = conn
2057            .prepare("SELECT name FROM sqlite_master WHERE type='table'").unwrap()
2058            .query_map([], |row| row.get(0)).unwrap()
2059            .collect::<Result<Vec<_>, _>>().unwrap();
2060
2061        assert!(tables.contains(&"files".to_string()));
2062        assert!(tables.contains(&"statistics".to_string()));
2063        assert!(tables.contains(&"config".to_string()));
2064        assert!(tables.contains(&"file_branches".to_string()));
2065        assert!(tables.contains(&"branches".to_string()));
2066        assert!(tables.contains(&"file_dependencies".to_string()));
2067        assert!(tables.contains(&"file_exports".to_string()));
2068    }
2069
2070    #[test]
2071    fn test_concurrent_file_updates() {
2072        use std::thread;
2073
2074        let temp = TempDir::new().unwrap();
2075        let cache_path = temp.path().to_path_buf();
2076
2077        let cache = CacheManager::new(&cache_path);
2078        cache.init().unwrap();
2079
2080        // Spawn multiple threads updating different files
2081        let handles: Vec<_> = (0..10)
2082            .map(|i| {
2083                let path = cache_path.clone();
2084                thread::spawn(move || {
2085                    let cache = CacheManager::new(&path);
2086                    cache
2087                        .update_file(
2088                            &format!("file_{}.rs", i),
2089                            "rust",
2090                            i * 10,
2091                        )
2092                        .unwrap();
2093                })
2094            })
2095            .collect();
2096
2097        for handle in handles {
2098            handle.join().unwrap();
2099        }
2100
2101        let cache = CacheManager::new(&cache_path);
2102        let files = cache.list_files().unwrap();
2103        assert_eq!(files.len(), 10);
2104    }
2105
2106    // ===== Corruption Detection Tests =====
2107
2108    #[test]
2109    fn test_validate_corrupted_database() {
2110        use std::io::Write;
2111
2112        let temp = TempDir::new().unwrap();
2113        let cache = CacheManager::new(temp.path());
2114
2115        cache.init().unwrap();
2116
2117        // Corrupt the database by overwriting it with invalid data
2118        let db_path = cache.path().join(META_DB);
2119        let mut file = File::create(&db_path).unwrap();
2120        file.write_all(b"CORRUPTED DATA").unwrap();
2121
2122        // Validation should fail due to database corruption
2123        let result = cache.validate();
2124        assert!(result.is_err());
2125        let err_msg = result.unwrap_err().to_string();
2126        eprintln!("Error message: {}", err_msg);
2127        assert!(err_msg.contains("corrupted") || err_msg.contains("not a database"));
2128    }
2129
2130    #[test]
2131    fn test_validate_corrupted_trigrams() {
2132        use std::io::Write;
2133
2134        let temp = TempDir::new().unwrap();
2135        let cache = CacheManager::new(temp.path());
2136
2137        cache.init().unwrap();
2138
2139        // Create trigrams.bin with invalid magic bytes
2140        let trigrams_path = cache.path().join("trigrams.bin");
2141        let mut file = File::create(&trigrams_path).unwrap();
2142        file.write_all(b"BADM").unwrap(); // Wrong magic bytes (should be "RFTG")
2143
2144        // Validation should fail due to invalid magic bytes
2145        let result = cache.validate();
2146        assert!(result.is_err());
2147        let err = result.unwrap_err().to_string();
2148        assert!(err.contains("trigrams.bin") && err.contains("corrupted"));
2149    }
2150
2151    #[test]
2152    fn test_validate_corrupted_content() {
2153        use std::io::Write;
2154
2155        let temp = TempDir::new().unwrap();
2156        let cache = CacheManager::new(temp.path());
2157
2158        cache.init().unwrap();
2159
2160        // Create content.bin with invalid magic bytes
2161        let content_path = cache.path().join("content.bin");
2162        let mut file = File::create(&content_path).unwrap();
2163        file.write_all(b"BADM").unwrap(); // Wrong magic bytes (should be "RFCT")
2164
2165        // Validation should fail due to invalid magic bytes
2166        let result = cache.validate();
2167        assert!(result.is_err());
2168        let err = result.unwrap_err().to_string();
2169        assert!(err.contains("content.bin") && err.contains("corrupted"));
2170    }
2171
2172    #[test]
2173    fn test_validate_missing_schema_table() {
2174        let temp = TempDir::new().unwrap();
2175        let cache = CacheManager::new(temp.path());
2176
2177        cache.init().unwrap();
2178
2179        // Drop a required table to simulate schema corruption
2180        let db_path = cache.path().join(META_DB);
2181        let conn = Connection::open(&db_path).unwrap();
2182        conn.execute("DROP TABLE files", []).unwrap();
2183
2184        // Validation should fail due to missing required table
2185        let result = cache.validate();
2186        assert!(result.is_err());
2187        let err = result.unwrap_err().to_string();
2188        assert!(err.contains("files") && err.contains("missing"));
2189    }
2190}