reflex/
cache.rs

1//! Cache management and memory-mapped I/O
2//!
3//! The cache module handles the `.reflex/` directory structure:
4//! - `meta.db`: Metadata, file hashes, and configuration (SQLite)
5//! - `tokens.bin`: Compressed lexical tokens (binary)
6//! - `content.bin`: Memory-mapped file contents (binary)
7//! - `trigrams.bin`: Trigram inverted index (bincode binary)
8//! - `config.toml`: Index settings (TOML text)
9
10use anyhow::{Context, Result};
11use rusqlite::{Connection, OptionalExtension};
12use std::collections::HashMap;
13use std::fs::File;
14use std::io::Write;
15use std::path::{Path, PathBuf};
16
17use crate::models::IndexedFile;
18
19/// Default cache directory name
20pub const CACHE_DIR: &str = ".reflex";
21
22/// File names within the cache directory
23pub const META_DB: &str = "meta.db";
24pub const TOKENS_BIN: &str = "tokens.bin";
25pub const HASHES_JSON: &str = "hashes.json";
26pub const CONFIG_TOML: &str = "config.toml";
27
28/// Manages the Reflex cache directory
29pub struct CacheManager {
30    cache_path: PathBuf,
31}
32
33impl CacheManager {
34    /// Create a new cache manager for the given root directory
35    pub fn new(root: impl AsRef<Path>) -> Self {
36        let cache_path = root.as_ref().join(CACHE_DIR);
37        Self { cache_path }
38    }
39
40    /// Initialize the cache directory structure if it doesn't exist
41    pub fn init(&self) -> Result<()> {
42        log::info!("Initializing cache at {:?}", self.cache_path);
43
44        if !self.cache_path.exists() {
45            std::fs::create_dir_all(&self.cache_path)?;
46        }
47
48        // Create meta.db with schema
49        self.init_meta_db()?;
50
51        // Create empty tokens.bin with header
52        self.init_tokens_bin()?;
53
54        // Create default config.toml
55        self.init_config_toml()?;
56
57        // Note: hashes.json is deprecated - hashes are now stored in meta.db
58
59        log::info!("Cache initialized successfully");
60        Ok(())
61    }
62
63    /// Initialize meta.db with SQLite schema
64    fn init_meta_db(&self) -> Result<()> {
65        let db_path = self.cache_path.join(META_DB);
66
67        // Skip if already exists
68        if db_path.exists() {
69            return Ok(());
70        }
71
72        let conn = Connection::open(&db_path)
73            .context("Failed to create meta.db")?;
74
75        // Create files table
76        conn.execute(
77            "CREATE TABLE IF NOT EXISTS files (
78                id INTEGER PRIMARY KEY AUTOINCREMENT,
79                path TEXT NOT NULL UNIQUE,
80                hash TEXT NOT NULL,
81                last_indexed INTEGER NOT NULL,
82                language TEXT NOT NULL,
83                token_count INTEGER DEFAULT 0,
84                line_count INTEGER DEFAULT 0
85            )",
86            [],
87        )?;
88
89        conn.execute("CREATE INDEX IF NOT EXISTS idx_files_path ON files(path)", [])?;
90        conn.execute("CREATE INDEX IF NOT EXISTS idx_files_hash ON files(hash)", [])?;
91
92        // Create statistics table
93        conn.execute(
94            "CREATE TABLE IF NOT EXISTS statistics (
95                key TEXT PRIMARY KEY,
96                value TEXT NOT NULL,
97                updated_at INTEGER NOT NULL
98            )",
99            [],
100        )?;
101
102        // Initialize default statistics
103        let now = chrono::Utc::now().timestamp();
104        conn.execute(
105            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
106            ["total_files", "0", &now.to_string()],
107        )?;
108        conn.execute(
109            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
110            ["cache_version", "1", &now.to_string()],
111        )?;
112
113        // Create config table
114        conn.execute(
115            "CREATE TABLE IF NOT EXISTS config (
116                key TEXT PRIMARY KEY,
117                value TEXT NOT NULL
118            )",
119            [],
120        )?;
121
122        // Create branch tracking tables for git-aware indexing
123        conn.execute(
124            "CREATE TABLE IF NOT EXISTS file_branches (
125                path TEXT NOT NULL,
126                branch TEXT NOT NULL,
127                hash TEXT NOT NULL,
128                commit_sha TEXT,
129                last_indexed INTEGER NOT NULL,
130                PRIMARY KEY (path, branch)
131            )",
132            [],
133        )?;
134
135        conn.execute(
136            "CREATE INDEX IF NOT EXISTS idx_branch_lookup ON file_branches(branch, path)",
137            [],
138        )?;
139
140        conn.execute(
141            "CREATE INDEX IF NOT EXISTS idx_hash_lookup ON file_branches(hash)",
142            [],
143        )?;
144
145        // Create branches metadata table
146        conn.execute(
147            "CREATE TABLE IF NOT EXISTS branches (
148                branch TEXT PRIMARY KEY,
149                commit_sha TEXT NOT NULL,
150                last_indexed INTEGER NOT NULL,
151                file_count INTEGER DEFAULT 0,
152                is_dirty INTEGER DEFAULT 0
153            )",
154            [],
155        )?;
156
157        log::debug!("Created meta.db with schema");
158        Ok(())
159    }
160
161    /// Initialize tokens.bin with header
162    fn init_tokens_bin(&self) -> Result<()> {
163        let tokens_path = self.cache_path.join(TOKENS_BIN);
164
165        if tokens_path.exists() {
166            return Ok(());
167        }
168
169        let mut file = File::create(&tokens_path)?;
170
171        // Write header: magic bytes + version + compression type + sizes
172        let magic_bytes = b"RFTK"; // Reflex Tokens
173        let version: u32 = 1;
174        let compression_type: u32 = 1; // 1 = zstd
175        let uncompressed_size: u64 = 0;
176        let token_count: u64 = 0;
177        let reserved = [0u8; 8];
178
179        file.write_all(magic_bytes)?;
180        file.write_all(&version.to_le_bytes())?;
181        file.write_all(&compression_type.to_le_bytes())?;
182        file.write_all(&uncompressed_size.to_le_bytes())?;
183        file.write_all(&token_count.to_le_bytes())?;
184        file.write_all(&reserved)?;
185
186        log::debug!("Created empty tokens.bin");
187        Ok(())
188    }
189
190    /// Initialize hashes.json with empty map
191    ///
192    /// DEPRECATED: Hashes are now stored in SQLite (meta.db).
193    /// This function is kept for backward compatibility but is not called by init().
194    #[deprecated(note = "Hashes are now stored in SQLite")]
195    #[allow(dead_code)]
196    fn init_hashes_json(&self) -> Result<()> {
197        let hashes_path = self.cache_path.join(HASHES_JSON);
198
199        if hashes_path.exists() {
200            return Ok(());
201        }
202
203        let empty_map: HashMap<String, String> = HashMap::new();
204        let json = serde_json::to_string_pretty(&empty_map)?;
205        std::fs::write(&hashes_path, json)?;
206
207        log::debug!("Created empty hashes.json");
208        Ok(())
209    }
210
211    /// Initialize config.toml with defaults
212    fn init_config_toml(&self) -> Result<()> {
213        let config_path = self.cache_path.join(CONFIG_TOML);
214
215        if config_path.exists() {
216            return Ok(());
217        }
218
219        let default_config = r#"[index]
220languages = []  # Empty = all supported languages
221max_file_size = 10485760  # 10 MB
222follow_symlinks = false
223
224[index.include]
225patterns = []
226
227[index.exclude]
228patterns = []
229
230[search]
231default_limit = 100
232fuzzy_threshold = 0.8
233
234[performance]
235parallel_threads = 0  # 0 = auto (80% of available cores), or set a specific number
236compression_level = 3  # zstd level
237"#;
238
239        std::fs::write(&config_path, default_config)?;
240
241        log::debug!("Created default config.toml");
242        Ok(())
243    }
244
245    /// Check if cache exists and is valid
246    pub fn exists(&self) -> bool {
247        self.cache_path.exists()
248            && self.cache_path.join(META_DB).exists()
249    }
250
251    /// Validate cache integrity and detect corruption
252    ///
253    /// Performs basic integrity checks on the cache:
254    /// - Verifies all required files exist
255    /// - Checks SQLite database can be opened
256    /// - Validates binary file headers (trigrams.bin, content.bin)
257    ///
258    /// Returns Ok(()) if cache is valid, Err with details if corrupted.
259    pub fn validate(&self) -> Result<()> {
260        // Check if cache directory exists
261        if !self.cache_path.exists() {
262            anyhow::bail!("Cache directory does not exist: {}", self.cache_path.display());
263        }
264
265        // Check meta.db exists and can be opened
266        let db_path = self.cache_path.join(META_DB);
267        if !db_path.exists() {
268            anyhow::bail!("Database file missing: {}", db_path.display());
269        }
270
271        // Try to open database
272        let conn = Connection::open(&db_path)
273            .context("Failed to open meta.db - database may be corrupted")?;
274
275        // Verify schema exists
276        let tables: Result<Vec<String>, _> = conn
277            .prepare("SELECT name FROM sqlite_master WHERE type='table'")
278            .and_then(|mut stmt| {
279                stmt.query_map([], |row| row.get(0))
280                    .map(|rows| rows.collect())
281            })
282            .and_then(|result| result);
283
284        match tables {
285            Ok(table_list) => {
286                // Check for required tables
287                let required_tables = vec!["files", "statistics", "config", "file_branches", "branches"];
288                for table in &required_tables {
289                    if !table_list.iter().any(|t| t == table) {
290                        anyhow::bail!("Required table '{}' missing from database schema", table);
291                    }
292                }
293            }
294            Err(e) => {
295                anyhow::bail!("Failed to read database schema: {}", e);
296            }
297        }
298
299        // Check trigrams.bin if it exists
300        let trigrams_path = self.cache_path.join("trigrams.bin");
301        if trigrams_path.exists() {
302            use std::io::Read;
303
304            match File::open(&trigrams_path) {
305                Ok(mut file) => {
306                    let mut header = [0u8; 4];
307                    match file.read_exact(&mut header) {
308                        Ok(_) => {
309                            // Check magic bytes
310                            if &header != b"RFTG" {
311                                log::warn!("trigrams.bin has invalid magic bytes - may be corrupted");
312                                anyhow::bail!("trigrams.bin appears to be corrupted (invalid magic bytes)");
313                            }
314                        }
315                        Err(_) => {
316                            anyhow::bail!("trigrams.bin is too small - appears to be corrupted");
317                        }
318                    }
319                }
320                Err(e) => {
321                    anyhow::bail!("Failed to open trigrams.bin: {}", e);
322                }
323            }
324        }
325
326        // Check content.bin if it exists
327        let content_path = self.cache_path.join("content.bin");
328        if content_path.exists() {
329            use std::io::Read;
330
331            match File::open(&content_path) {
332                Ok(mut file) => {
333                    let mut header = [0u8; 4];
334                    match file.read_exact(&mut header) {
335                        Ok(_) => {
336                            // Check magic bytes
337                            if &header != b"RFCT" {
338                                log::warn!("content.bin has invalid magic bytes - may be corrupted");
339                                anyhow::bail!("content.bin appears to be corrupted (invalid magic bytes)");
340                            }
341                        }
342                        Err(_) => {
343                            anyhow::bail!("content.bin is too small - appears to be corrupted");
344                        }
345                    }
346                }
347                Err(e) => {
348                    anyhow::bail!("Failed to open content.bin: {}", e);
349                }
350            }
351        }
352
353        log::debug!("Cache validation passed");
354        Ok(())
355    }
356
357    /// Get the path to the cache directory
358    pub fn path(&self) -> &Path {
359        &self.cache_path
360    }
361
362    /// Clear the entire cache
363    pub fn clear(&self) -> Result<()> {
364        log::warn!("Clearing cache at {:?}", self.cache_path);
365
366        if self.cache_path.exists() {
367            std::fs::remove_dir_all(&self.cache_path)?;
368        }
369
370        Ok(())
371    }
372
373    /// Load file hashes for incremental indexing from SQLite
374    pub fn load_hashes(&self) -> Result<HashMap<String, String>> {
375        let db_path = self.cache_path.join(META_DB);
376
377        if !db_path.exists() {
378            return Ok(HashMap::new());
379        }
380
381        let conn = Connection::open(&db_path)
382            .context("Failed to open meta.db")?;
383
384        let mut stmt = conn.prepare("SELECT path, hash FROM files")?;
385        let hashes: HashMap<String, String> = stmt.query_map([], |row| {
386            Ok((row.get(0)?, row.get(1)?))
387        })?
388        .collect::<Result<HashMap<_, _>, _>>()?;
389
390        log::debug!("Loaded {} file hashes from SQLite", hashes.len());
391        Ok(hashes)
392    }
393
394    /// Save file hashes for incremental indexing
395    ///
396    /// DEPRECATED: Hashes are now saved directly to SQLite via update_file().
397    /// This method is kept for backward compatibility but does nothing.
398    #[deprecated(note = "Hashes are now stored in SQLite via update_file()")]
399    pub fn save_hashes(&self, _hashes: &HashMap<String, String>) -> Result<()> {
400        // No-op: hashes are now persisted to SQLite in update_file()
401        Ok(())
402    }
403
404    /// Update file metadata in the files table
405    pub fn update_file(&self, path: &str, hash: &str, language: &str, line_count: usize) -> Result<()> {
406        let db_path = self.cache_path.join(META_DB);
407        let conn = Connection::open(&db_path)
408            .context("Failed to open meta.db for file update")?;
409
410        let now = chrono::Utc::now().timestamp();
411
412        conn.execute(
413            "INSERT OR REPLACE INTO files (path, hash, last_indexed, language, line_count)
414             VALUES (?, ?, ?, ?, ?)",
415            [path, hash, &now.to_string(), language, &line_count.to_string()],
416        )?;
417
418        Ok(())
419    }
420
421    /// Batch update multiple files in a single transaction for performance
422    pub fn batch_update_files(&self, files: &[(String, String, String, usize)]) -> Result<()> {
423        let db_path = self.cache_path.join(META_DB);
424        let mut conn = Connection::open(&db_path)
425            .context("Failed to open meta.db for batch update")?;
426
427        let now = chrono::Utc::now().timestamp();
428        let now_str = now.to_string();
429
430        // Use a transaction for batch inserts
431        let tx = conn.transaction()?;
432
433        for (path, hash, language, line_count) in files {
434            tx.execute(
435                "INSERT OR REPLACE INTO files (path, hash, last_indexed, language, line_count)
436                 VALUES (?, ?, ?, ?, ?)",
437                [path.as_str(), hash.as_str(), &now_str, language.as_str(), &line_count.to_string()],
438            )?;
439        }
440
441        tx.commit()?;
442        Ok(())
443    }
444
445    /// Update statistics after indexing by calculating totals from database
446    pub fn update_stats(&self) -> Result<()> {
447        let db_path = self.cache_path.join(META_DB);
448        let conn = Connection::open(&db_path)
449            .context("Failed to open meta.db for stats update")?;
450
451        // Count total files from files table
452        let total_files: usize = conn.query_row(
453            "SELECT COUNT(*) FROM files",
454            [],
455            |row| row.get(0),
456        ).unwrap_or(0);
457
458        let now = chrono::Utc::now().timestamp();
459
460        conn.execute(
461            "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
462            ["total_files", &total_files.to_string(), &now.to_string()],
463        )?;
464
465        log::debug!("Updated statistics: {} files", total_files);
466        Ok(())
467    }
468
469    /// Get list of all indexed files
470    pub fn list_files(&self) -> Result<Vec<IndexedFile>> {
471        let db_path = self.cache_path.join(META_DB);
472
473        if !db_path.exists() {
474            return Ok(Vec::new());
475        }
476
477        let conn = Connection::open(&db_path)
478            .context("Failed to open meta.db")?;
479
480        let mut stmt = conn.prepare(
481            "SELECT path, language, last_indexed FROM files ORDER BY path"
482        )?;
483
484        let files = stmt.query_map([], |row| {
485            let path: String = row.get(0)?;
486            let language: String = row.get(1)?;
487            let last_indexed: i64 = row.get(2)?;
488
489            Ok(IndexedFile {
490                path,
491                language,
492                last_indexed: chrono::DateTime::from_timestamp(last_indexed, 0)
493                    .unwrap_or_else(chrono::Utc::now)
494                    .to_rfc3339(),
495            })
496        })?
497        .collect::<Result<Vec<_>, _>>()?;
498
499        Ok(files)
500    }
501
502    /// Get statistics about the current cache
503    pub fn stats(&self) -> Result<crate::models::IndexStats> {
504        let db_path = self.cache_path.join(META_DB);
505
506        if !db_path.exists() {
507            // Cache not initialized
508            return Ok(crate::models::IndexStats {
509                total_files: 0,
510                index_size_bytes: 0,
511                last_updated: chrono::Utc::now().to_rfc3339(),
512                files_by_language: std::collections::HashMap::new(),
513                lines_by_language: std::collections::HashMap::new(),
514            });
515        }
516
517        let conn = Connection::open(&db_path)
518            .context("Failed to open meta.db")?;
519
520        // Read total files
521        let total_files: usize = conn.query_row(
522            "SELECT value FROM statistics WHERE key = 'total_files'",
523            [],
524            |row| {
525                let value: String = row.get(0)?;
526                Ok(value.parse().unwrap_or(0))
527            },
528        ).unwrap_or(0);
529
530        // Read last updated timestamp
531        let last_updated: String = conn.query_row(
532            "SELECT updated_at FROM statistics WHERE key = 'total_files'",
533            [],
534            |row| {
535                let timestamp: i64 = row.get(0)?;
536                Ok(chrono::DateTime::from_timestamp(timestamp, 0)
537                    .unwrap_or_else(chrono::Utc::now)
538                    .to_rfc3339())
539            },
540        ).unwrap_or_else(|_| chrono::Utc::now().to_rfc3339());
541
542        // Calculate total cache size (all binary files)
543        let mut index_size_bytes: u64 = 0;
544
545        for file_name in [META_DB, TOKENS_BIN, CONFIG_TOML, "content.bin", "trigrams.bin"] {
546            let file_path = self.cache_path.join(file_name);
547            if let Ok(metadata) = std::fs::metadata(&file_path) {
548                index_size_bytes += metadata.len();
549            }
550        }
551
552        // Get file count breakdown by language
553        let mut files_by_language = std::collections::HashMap::new();
554        let mut stmt = conn.prepare("SELECT language, COUNT(*) FROM files GROUP BY language")?;
555        let lang_counts = stmt.query_map([], |row| {
556            let language: String = row.get(0)?;
557            let count: i64 = row.get(1)?;
558            Ok((language, count as usize))
559        })?;
560
561        for result in lang_counts {
562            let (language, count) = result?;
563            files_by_language.insert(language, count);
564        }
565
566        // Get line count breakdown by language
567        let mut lines_by_language = std::collections::HashMap::new();
568        let mut stmt = conn.prepare("SELECT language, SUM(line_count) FROM files GROUP BY language")?;
569        let line_counts = stmt.query_map([], |row| {
570            let language: String = row.get(0)?;
571            let count: i64 = row.get(1)?;
572            Ok((language, count as usize))
573        })?;
574
575        for result in line_counts {
576            let (language, count) = result?;
577            lines_by_language.insert(language, count);
578        }
579
580        Ok(crate::models::IndexStats {
581            total_files,
582            index_size_bytes,
583            last_updated,
584            files_by_language,
585            lines_by_language,
586        })
587    }
588
589    // ===== Branch-aware indexing methods =====
590
591    /// Record a file's hash for a specific branch
592    pub fn record_branch_file(
593        &self,
594        path: &str,
595        branch: &str,
596        hash: &str,
597        commit_sha: Option<&str>,
598    ) -> Result<()> {
599        let db_path = self.cache_path.join(META_DB);
600        let conn = Connection::open(&db_path)
601            .context("Failed to open meta.db for branch file recording")?;
602
603        let now = chrono::Utc::now().timestamp();
604
605        conn.execute(
606            "INSERT OR REPLACE INTO file_branches (path, branch, hash, commit_sha, last_indexed)
607             VALUES (?, ?, ?, ?, ?)",
608            [
609                path,
610                branch,
611                hash,
612                commit_sha.unwrap_or(""),
613                &now.to_string(),
614            ],
615        )?;
616
617        Ok(())
618    }
619
620    /// Batch record multiple files for a specific branch in a single transaction
621    pub fn batch_record_branch_files(
622        &self,
623        files: &[(String, String)],  // (path, hash)
624        branch: &str,
625        commit_sha: Option<&str>,
626    ) -> Result<()> {
627        let db_path = self.cache_path.join(META_DB);
628        let mut conn = Connection::open(&db_path)
629            .context("Failed to open meta.db for batch branch recording")?;
630
631        let now = chrono::Utc::now().timestamp();
632        let now_str = now.to_string();
633        let commit = commit_sha.unwrap_or("");
634
635        // Use a transaction for batch inserts
636        let tx = conn.transaction()?;
637
638        for (path, hash) in files {
639            tx.execute(
640                "INSERT OR REPLACE INTO file_branches (path, branch, hash, commit_sha, last_indexed)
641                 VALUES (?, ?, ?, ?, ?)",
642                [path.as_str(), branch, hash.as_str(), commit, &now_str],
643            )?;
644        }
645
646        tx.commit()?;
647        Ok(())
648    }
649
650    /// Get all files indexed for a specific branch
651    ///
652    /// Returns a HashMap of path → hash for all files in the branch.
653    pub fn get_branch_files(&self, branch: &str) -> Result<HashMap<String, String>> {
654        let db_path = self.cache_path.join(META_DB);
655
656        if !db_path.exists() {
657            return Ok(HashMap::new());
658        }
659
660        let conn = Connection::open(&db_path)
661            .context("Failed to open meta.db")?;
662
663        let mut stmt = conn.prepare("SELECT path, hash FROM file_branches WHERE branch = ?")?;
664        let files: HashMap<String, String> = stmt
665            .query_map([branch], |row| Ok((row.get(0)?, row.get(1)?)))?
666            .collect::<Result<HashMap<_, _>, _>>()?;
667
668        log::debug!(
669            "Loaded {} files for branch '{}' from file_branches table",
670            files.len(),
671            branch
672        );
673        Ok(files)
674    }
675
676    /// Check if a branch has any indexed files
677    ///
678    /// Fast existence check using LIMIT 1 for O(1) performance.
679    pub fn branch_exists(&self, branch: &str) -> Result<bool> {
680        let db_path = self.cache_path.join(META_DB);
681
682        if !db_path.exists() {
683            return Ok(false);
684        }
685
686        let conn = Connection::open(&db_path)
687            .context("Failed to open meta.db")?;
688
689        let count: i64 = conn
690            .query_row(
691                "SELECT COUNT(*) FROM file_branches WHERE branch = ? LIMIT 1",
692                [branch],
693                |row| row.get(0),
694            )
695            .unwrap_or(0);
696
697        Ok(count > 0)
698    }
699
700    /// Get branch metadata (commit, last_indexed, file_count, dirty status)
701    pub fn get_branch_info(&self, branch: &str) -> Result<BranchInfo> {
702        let db_path = self.cache_path.join(META_DB);
703
704        if !db_path.exists() {
705            anyhow::bail!("Database not initialized");
706        }
707
708        let conn = Connection::open(&db_path)
709            .context("Failed to open meta.db")?;
710
711        let info = conn.query_row(
712            "SELECT commit_sha, last_indexed, file_count, is_dirty FROM branches WHERE branch = ?",
713            [branch],
714            |row| {
715                Ok(BranchInfo {
716                    branch: branch.to_string(),
717                    commit_sha: row.get(0)?,
718                    last_indexed: row.get(1)?,
719                    file_count: row.get(2)?,
720                    is_dirty: row.get::<_, i64>(3)? != 0,
721                })
722            },
723        )?;
724
725        Ok(info)
726    }
727
728    /// Update branch metadata after indexing
729    pub fn update_branch_metadata(
730        &self,
731        branch: &str,
732        commit_sha: Option<&str>,
733        file_count: usize,
734        is_dirty: bool,
735    ) -> Result<()> {
736        let db_path = self.cache_path.join(META_DB);
737        let conn = Connection::open(&db_path)
738            .context("Failed to open meta.db for branch metadata update")?;
739
740        let now = chrono::Utc::now().timestamp();
741
742        conn.execute(
743            "INSERT OR REPLACE INTO branches (branch, commit_sha, last_indexed, file_count, is_dirty)
744             VALUES (?, ?, ?, ?, ?)",
745            [
746                branch,
747                commit_sha.unwrap_or("unknown"),
748                &now.to_string(),
749                &file_count.to_string(),
750                &(if is_dirty { 1 } else { 0 }).to_string(),
751            ],
752        )?;
753
754        log::debug!(
755            "Updated branch metadata for '{}': commit={}, files={}, dirty={}",
756            branch,
757            commit_sha.unwrap_or("unknown"),
758            file_count,
759            is_dirty
760        );
761        Ok(())
762    }
763
764    /// Find a file with a specific hash (for symbol reuse optimization)
765    ///
766    /// Returns the path and branch where this hash was first seen,
767    /// enabling reuse of parsed symbols across branches.
768    pub fn find_file_with_hash(&self, hash: &str) -> Result<Option<(String, String)>> {
769        let db_path = self.cache_path.join(META_DB);
770
771        if !db_path.exists() {
772            return Ok(None);
773        }
774
775        let conn = Connection::open(&db_path)
776            .context("Failed to open meta.db")?;
777
778        let result = conn
779            .query_row(
780                "SELECT path, branch FROM file_branches WHERE hash = ? LIMIT 1",
781                [hash],
782                |row| Ok((row.get(0)?, row.get(1)?)),
783            )
784            .optional()?;
785
786        Ok(result)
787    }
788}
789
790/// Branch metadata information
791#[derive(Debug, Clone)]
792pub struct BranchInfo {
793    pub branch: String,
794    pub commit_sha: String,
795    pub last_indexed: i64,
796    pub file_count: usize,
797    pub is_dirty: bool,
798}
799
800// TODO: Implement memory-mapped readers for:
801// - SymbolReader (reads from symbols.bin)
802// - TokenReader (reads from tokens.bin)
803// - MetaReader (reads from meta.db)
804
805#[cfg(test)]
806mod tests {
807    use super::*;
808    use tempfile::TempDir;
809
810    #[test]
811    fn test_cache_init() {
812        let temp = TempDir::new().unwrap();
813        let cache = CacheManager::new(temp.path());
814
815        assert!(!cache.exists());
816        cache.init().unwrap();
817        assert!(cache.exists());
818        assert!(cache.path().exists());
819
820        // Verify all expected files were created
821        assert!(cache.path().join(META_DB).exists());
822        assert!(cache.path().join(TOKENS_BIN).exists());
823        assert!(cache.path().join(CONFIG_TOML).exists());
824    }
825
826    #[test]
827    fn test_cache_init_idempotent() {
828        let temp = TempDir::new().unwrap();
829        let cache = CacheManager::new(temp.path());
830
831        // Initialize twice - should not error
832        cache.init().unwrap();
833        cache.init().unwrap();
834
835        assert!(cache.exists());
836    }
837
838    #[test]
839    fn test_cache_clear() {
840        let temp = TempDir::new().unwrap();
841        let cache = CacheManager::new(temp.path());
842
843        cache.init().unwrap();
844        assert!(cache.exists());
845
846        cache.clear().unwrap();
847        assert!(!cache.exists());
848    }
849
850    #[test]
851    fn test_cache_clear_nonexistent() {
852        let temp = TempDir::new().unwrap();
853        let cache = CacheManager::new(temp.path());
854
855        // Clearing non-existent cache should not error
856        assert!(!cache.exists());
857        cache.clear().unwrap();
858        assert!(!cache.exists());
859    }
860
861    #[test]
862    fn test_load_hashes_empty() {
863        let temp = TempDir::new().unwrap();
864        let cache = CacheManager::new(temp.path());
865
866        cache.init().unwrap();
867        let hashes = cache.load_hashes().unwrap();
868        assert_eq!(hashes.len(), 0);
869    }
870
871    #[test]
872    fn test_load_hashes_before_init() {
873        let temp = TempDir::new().unwrap();
874        let cache = CacheManager::new(temp.path());
875
876        // Loading hashes before init should return empty map
877        let hashes = cache.load_hashes().unwrap();
878        assert_eq!(hashes.len(), 0);
879    }
880
881    #[test]
882    fn test_update_file() {
883        let temp = TempDir::new().unwrap();
884        let cache = CacheManager::new(temp.path());
885
886        cache.init().unwrap();
887        cache.update_file("src/main.rs", "abc123", "rust", 100).unwrap();
888
889        // Verify file was stored
890        let hashes = cache.load_hashes().unwrap();
891        assert_eq!(hashes.get("src/main.rs"), Some(&"abc123".to_string()));
892    }
893
894    #[test]
895    fn test_update_file_multiple() {
896        let temp = TempDir::new().unwrap();
897        let cache = CacheManager::new(temp.path());
898
899        cache.init().unwrap();
900        cache.update_file("src/main.rs", "abc123", "rust", 100).unwrap();
901        cache.update_file("src/lib.rs", "def456", "rust", 200).unwrap();
902        cache.update_file("README.md", "ghi789", "markdown", 50).unwrap();
903
904        let hashes = cache.load_hashes().unwrap();
905        assert_eq!(hashes.len(), 3);
906        assert_eq!(hashes.get("src/main.rs"), Some(&"abc123".to_string()));
907        assert_eq!(hashes.get("src/lib.rs"), Some(&"def456".to_string()));
908        assert_eq!(hashes.get("README.md"), Some(&"ghi789".to_string()));
909    }
910
911    #[test]
912    fn test_update_file_replace() {
913        let temp = TempDir::new().unwrap();
914        let cache = CacheManager::new(temp.path());
915
916        cache.init().unwrap();
917        cache.update_file("src/main.rs", "abc123", "rust", 100).unwrap();
918        cache.update_file("src/main.rs", "xyz999", "rust", 150).unwrap();
919
920        // Second update should replace the first
921        let hashes = cache.load_hashes().unwrap();
922        assert_eq!(hashes.len(), 1);
923        assert_eq!(hashes.get("src/main.rs"), Some(&"xyz999".to_string()));
924    }
925
926    #[test]
927    fn test_batch_update_files() {
928        let temp = TempDir::new().unwrap();
929        let cache = CacheManager::new(temp.path());
930
931        cache.init().unwrap();
932
933        let files = vec![
934            ("src/main.rs".to_string(), "hash1".to_string(), "rust".to_string(), 100),
935            ("src/lib.rs".to_string(), "hash2".to_string(), "rust".to_string(), 200),
936            ("test.py".to_string(), "hash3".to_string(), "python".to_string(), 50),
937        ];
938
939        cache.batch_update_files(&files).unwrap();
940
941        let hashes = cache.load_hashes().unwrap();
942        assert_eq!(hashes.len(), 3);
943        assert_eq!(hashes.get("src/main.rs"), Some(&"hash1".to_string()));
944        assert_eq!(hashes.get("src/lib.rs"), Some(&"hash2".to_string()));
945        assert_eq!(hashes.get("test.py"), Some(&"hash3".to_string()));
946    }
947
948    #[test]
949    fn test_update_stats() {
950        let temp = TempDir::new().unwrap();
951        let cache = CacheManager::new(temp.path());
952
953        cache.init().unwrap();
954        cache.update_file("src/main.rs", "abc123", "rust", 100).unwrap();
955        cache.update_file("src/lib.rs", "def456", "rust", 200).unwrap();
956        cache.update_stats().unwrap();
957
958        let stats = cache.stats().unwrap();
959        assert_eq!(stats.total_files, 2);
960    }
961
962    #[test]
963    fn test_stats_empty_cache() {
964        let temp = TempDir::new().unwrap();
965        let cache = CacheManager::new(temp.path());
966
967        cache.init().unwrap();
968        let stats = cache.stats().unwrap();
969
970        assert_eq!(stats.total_files, 0);
971        assert_eq!(stats.files_by_language.len(), 0);
972    }
973
974    #[test]
975    fn test_stats_before_init() {
976        let temp = TempDir::new().unwrap();
977        let cache = CacheManager::new(temp.path());
978
979        // Stats before init should return zeros
980        let stats = cache.stats().unwrap();
981        assert_eq!(stats.total_files, 0);
982    }
983
984    #[test]
985    fn test_stats_by_language() {
986        let temp = TempDir::new().unwrap();
987        let cache = CacheManager::new(temp.path());
988
989        cache.init().unwrap();
990        cache.update_file("main.rs", "hash1", "rust", 100).unwrap();
991        cache.update_file("lib.rs", "hash2", "rust", 200).unwrap();
992        cache.update_file("script.py", "hash3", "python", 50).unwrap();
993        cache.update_file("test.py", "hash4", "python", 80).unwrap();
994        cache.update_stats().unwrap();
995
996        let stats = cache.stats().unwrap();
997        assert_eq!(stats.files_by_language.get("rust"), Some(&2));
998        assert_eq!(stats.files_by_language.get("python"), Some(&2));
999        assert_eq!(stats.lines_by_language.get("rust"), Some(&300)); // 100 + 200
1000        assert_eq!(stats.lines_by_language.get("python"), Some(&130)); // 50 + 80
1001    }
1002
1003    #[test]
1004    fn test_list_files_empty() {
1005        let temp = TempDir::new().unwrap();
1006        let cache = CacheManager::new(temp.path());
1007
1008        cache.init().unwrap();
1009        let files = cache.list_files().unwrap();
1010        assert_eq!(files.len(), 0);
1011    }
1012
1013    #[test]
1014    fn test_list_files() {
1015        let temp = TempDir::new().unwrap();
1016        let cache = CacheManager::new(temp.path());
1017
1018        cache.init().unwrap();
1019        cache.update_file("src/main.rs", "hash1", "rust", 100).unwrap();
1020        cache.update_file("src/lib.rs", "hash2", "rust", 200).unwrap();
1021
1022        let files = cache.list_files().unwrap();
1023        assert_eq!(files.len(), 2);
1024
1025        // Files should be sorted by path
1026        assert_eq!(files[0].path, "src/lib.rs");
1027        assert_eq!(files[1].path, "src/main.rs");
1028
1029        assert_eq!(files[0].language, "rust");
1030    }
1031
1032    #[test]
1033    fn test_list_files_before_init() {
1034        let temp = TempDir::new().unwrap();
1035        let cache = CacheManager::new(temp.path());
1036
1037        // Listing files before init should return empty vec
1038        let files = cache.list_files().unwrap();
1039        assert_eq!(files.len(), 0);
1040    }
1041
1042    #[test]
1043    fn test_branch_exists() {
1044        let temp = TempDir::new().unwrap();
1045        let cache = CacheManager::new(temp.path());
1046
1047        cache.init().unwrap();
1048
1049        assert!(!cache.branch_exists("main").unwrap());
1050
1051        cache.record_branch_file("src/main.rs", "main", "hash1", Some("commit123")).unwrap();
1052
1053        assert!(cache.branch_exists("main").unwrap());
1054        assert!(!cache.branch_exists("feature-branch").unwrap());
1055    }
1056
1057    #[test]
1058    fn test_record_branch_file() {
1059        let temp = TempDir::new().unwrap();
1060        let cache = CacheManager::new(temp.path());
1061
1062        cache.init().unwrap();
1063        cache.record_branch_file("src/main.rs", "main", "hash1", Some("commit123")).unwrap();
1064
1065        let files = cache.get_branch_files("main").unwrap();
1066        assert_eq!(files.len(), 1);
1067        assert_eq!(files.get("src/main.rs"), Some(&"hash1".to_string()));
1068    }
1069
1070    #[test]
1071    fn test_get_branch_files_empty() {
1072        let temp = TempDir::new().unwrap();
1073        let cache = CacheManager::new(temp.path());
1074
1075        cache.init().unwrap();
1076        let files = cache.get_branch_files("nonexistent").unwrap();
1077        assert_eq!(files.len(), 0);
1078    }
1079
1080    #[test]
1081    fn test_batch_record_branch_files() {
1082        let temp = TempDir::new().unwrap();
1083        let cache = CacheManager::new(temp.path());
1084
1085        cache.init().unwrap();
1086
1087        let files = vec![
1088            ("src/main.rs".to_string(), "hash1".to_string()),
1089            ("src/lib.rs".to_string(), "hash2".to_string()),
1090            ("README.md".to_string(), "hash3".to_string()),
1091        ];
1092
1093        cache.batch_record_branch_files(&files, "main", Some("commit123")).unwrap();
1094
1095        let branch_files = cache.get_branch_files("main").unwrap();
1096        assert_eq!(branch_files.len(), 3);
1097        assert_eq!(branch_files.get("src/main.rs"), Some(&"hash1".to_string()));
1098        assert_eq!(branch_files.get("src/lib.rs"), Some(&"hash2".to_string()));
1099        assert_eq!(branch_files.get("README.md"), Some(&"hash3".to_string()));
1100    }
1101
1102    #[test]
1103    fn test_update_branch_metadata() {
1104        let temp = TempDir::new().unwrap();
1105        let cache = CacheManager::new(temp.path());
1106
1107        cache.init().unwrap();
1108        cache.update_branch_metadata("main", Some("commit123"), 10, false).unwrap();
1109
1110        let info = cache.get_branch_info("main").unwrap();
1111        assert_eq!(info.branch, "main");
1112        assert_eq!(info.commit_sha, "commit123");
1113        assert_eq!(info.file_count, 10);
1114        assert_eq!(info.is_dirty, false);
1115    }
1116
1117    #[test]
1118    fn test_update_branch_metadata_dirty() {
1119        let temp = TempDir::new().unwrap();
1120        let cache = CacheManager::new(temp.path());
1121
1122        cache.init().unwrap();
1123        cache.update_branch_metadata("feature", Some("commit456"), 5, true).unwrap();
1124
1125        let info = cache.get_branch_info("feature").unwrap();
1126        assert_eq!(info.is_dirty, true);
1127    }
1128
1129    #[test]
1130    fn test_find_file_with_hash() {
1131        let temp = TempDir::new().unwrap();
1132        let cache = CacheManager::new(temp.path());
1133
1134        cache.init().unwrap();
1135        cache.record_branch_file("src/main.rs", "main", "unique_hash", Some("commit123")).unwrap();
1136
1137        let result = cache.find_file_with_hash("unique_hash").unwrap();
1138        assert!(result.is_some());
1139
1140        let (path, branch) = result.unwrap();
1141        assert_eq!(path, "src/main.rs");
1142        assert_eq!(branch, "main");
1143    }
1144
1145    #[test]
1146    fn test_find_file_with_hash_not_found() {
1147        let temp = TempDir::new().unwrap();
1148        let cache = CacheManager::new(temp.path());
1149
1150        cache.init().unwrap();
1151
1152        let result = cache.find_file_with_hash("nonexistent_hash").unwrap();
1153        assert!(result.is_none());
1154    }
1155
1156    #[test]
1157    fn test_tokens_bin_header() {
1158        let temp = TempDir::new().unwrap();
1159        let cache = CacheManager::new(temp.path());
1160
1161        cache.init().unwrap();
1162
1163        let tokens_path = cache.path().join(TOKENS_BIN);
1164        let contents = std::fs::read(&tokens_path).unwrap();
1165
1166        // Verify magic bytes
1167        assert_eq!(&contents[0..4], b"RFTK");
1168
1169        // Verify version (u32 = 4 bytes)
1170        let version = u32::from_le_bytes([contents[4], contents[5], contents[6], contents[7]]);
1171        assert_eq!(version, 1);
1172    }
1173
1174    #[test]
1175    fn test_config_toml_created() {
1176        let temp = TempDir::new().unwrap();
1177        let cache = CacheManager::new(temp.path());
1178
1179        cache.init().unwrap();
1180
1181        let config_path = cache.path().join(CONFIG_TOML);
1182        let config_content = std::fs::read_to_string(&config_path).unwrap();
1183
1184        // Verify config contains expected sections
1185        assert!(config_content.contains("[index]"));
1186        assert!(config_content.contains("[search]"));
1187        assert!(config_content.contains("[performance]"));
1188        assert!(config_content.contains("max_file_size"));
1189    }
1190
1191    #[test]
1192    fn test_meta_db_schema() {
1193        let temp = TempDir::new().unwrap();
1194        let cache = CacheManager::new(temp.path());
1195
1196        cache.init().unwrap();
1197
1198        let db_path = cache.path().join(META_DB);
1199        let conn = Connection::open(&db_path).unwrap();
1200
1201        // Verify tables exist
1202        let tables: Vec<String> = conn
1203            .prepare("SELECT name FROM sqlite_master WHERE type='table'").unwrap()
1204            .query_map([], |row| row.get(0)).unwrap()
1205            .collect::<Result<Vec<_>, _>>().unwrap();
1206
1207        assert!(tables.contains(&"files".to_string()));
1208        assert!(tables.contains(&"statistics".to_string()));
1209        assert!(tables.contains(&"config".to_string()));
1210        assert!(tables.contains(&"file_branches".to_string()));
1211        assert!(tables.contains(&"branches".to_string()));
1212    }
1213
1214    #[test]
1215    fn test_concurrent_file_updates() {
1216        use std::thread;
1217
1218        let temp = TempDir::new().unwrap();
1219        let cache_path = temp.path().to_path_buf();
1220
1221        let cache = CacheManager::new(&cache_path);
1222        cache.init().unwrap();
1223
1224        // Spawn multiple threads updating different files
1225        let handles: Vec<_> = (0..10)
1226            .map(|i| {
1227                let path = cache_path.clone();
1228                thread::spawn(move || {
1229                    let cache = CacheManager::new(&path);
1230                    cache
1231                        .update_file(
1232                            &format!("file_{}.rs", i),
1233                            &format!("hash_{}", i),
1234                            "rust",
1235                            i * 10,
1236                        )
1237                        .unwrap();
1238                })
1239            })
1240            .collect();
1241
1242        for handle in handles {
1243            handle.join().unwrap();
1244        }
1245
1246        let cache = CacheManager::new(&cache_path);
1247        let hashes = cache.load_hashes().unwrap();
1248        assert_eq!(hashes.len(), 10);
1249    }
1250}