1use anyhow::{Context, Result};
11use rusqlite::{Connection, OptionalExtension};
12use std::collections::HashMap;
13use std::fs::File;
14use std::path::{Path, PathBuf};
15
16use crate::models::IndexedFile;
17
18pub const CACHE_DIR: &str = ".reflex";
20
21pub const META_DB: &str = "meta.db";
23pub const TOKENS_BIN: &str = "tokens.bin";
24pub const HASHES_JSON: &str = "hashes.json";
25pub const CONFIG_TOML: &str = "config.toml";
26
27#[derive(Clone)]
29pub struct CacheManager {
30 cache_path: PathBuf,
31}
32
33impl CacheManager {
34 pub fn new(root: impl AsRef<Path>) -> Self {
36 let cache_path = root.as_ref().join(CACHE_DIR);
37 Self { cache_path }
38 }
39
40 pub fn init(&self) -> Result<()> {
42 log::info!("Initializing cache at {:?}", self.cache_path);
43
44 if !self.cache_path.exists() {
45 std::fs::create_dir_all(&self.cache_path)?;
46 }
47
48 self.init_meta_db()?;
50
51 self.init_config_toml()?;
53
54 log::info!("Cache initialized successfully");
58 Ok(())
59 }
60
61 fn init_meta_db(&self) -> Result<()> {
63 let db_path = self.cache_path.join(META_DB);
64
65 if db_path.exists() {
67 return Ok(());
68 }
69
70 let conn = Connection::open(&db_path)
71 .context("Failed to create meta.db")?;
72
73 conn.execute(
75 "CREATE TABLE IF NOT EXISTS files (
76 id INTEGER PRIMARY KEY AUTOINCREMENT,
77 path TEXT NOT NULL UNIQUE,
78 last_indexed INTEGER NOT NULL,
79 language TEXT NOT NULL,
80 token_count INTEGER DEFAULT 0,
81 line_count INTEGER DEFAULT 0
82 )",
83 [],
84 )?;
85
86 conn.execute("CREATE INDEX IF NOT EXISTS idx_files_path ON files(path)", [])?;
87
88 conn.execute(
90 "CREATE TABLE IF NOT EXISTS statistics (
91 key TEXT PRIMARY KEY,
92 value TEXT NOT NULL,
93 updated_at INTEGER NOT NULL
94 )",
95 [],
96 )?;
97
98 let now = chrono::Utc::now().timestamp();
100 conn.execute(
101 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
102 ["total_files", "0", &now.to_string()],
103 )?;
104 conn.execute(
105 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
106 ["cache_version", "1", &now.to_string()],
107 )?;
108
109 let schema_hash = env!("CACHE_SCHEMA_HASH");
112 conn.execute(
113 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
114 ["schema_hash", schema_hash, &now.to_string()],
115 )?;
116
117 conn.execute(
119 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
120 ["last_compaction", "0", &now.to_string()],
121 )?;
122
123 conn.execute(
125 "CREATE TABLE IF NOT EXISTS config (
126 key TEXT PRIMARY KEY,
127 value TEXT NOT NULL
128 )",
129 [],
130 )?;
131
132 conn.execute(
134 "CREATE TABLE IF NOT EXISTS file_branches (
135 file_id INTEGER NOT NULL,
136 branch_id INTEGER NOT NULL,
137 hash TEXT NOT NULL,
138 last_indexed INTEGER NOT NULL,
139 PRIMARY KEY (file_id, branch_id),
140 FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
141 FOREIGN KEY (branch_id) REFERENCES branches(id) ON DELETE CASCADE
142 )",
143 [],
144 )?;
145
146 conn.execute(
147 "CREATE INDEX IF NOT EXISTS idx_branch_lookup ON file_branches(branch_id, file_id)",
148 [],
149 )?;
150
151 conn.execute(
152 "CREATE INDEX IF NOT EXISTS idx_hash_lookup ON file_branches(hash)",
153 [],
154 )?;
155
156 conn.execute(
158 "CREATE TABLE IF NOT EXISTS branches (
159 id INTEGER PRIMARY KEY AUTOINCREMENT,
160 name TEXT NOT NULL UNIQUE,
161 commit_sha TEXT NOT NULL,
162 last_indexed INTEGER NOT NULL,
163 file_count INTEGER DEFAULT 0,
164 is_dirty INTEGER DEFAULT 0
165 )",
166 [],
167 )?;
168
169 conn.execute(
171 "CREATE TABLE IF NOT EXISTS file_dependencies (
172 id INTEGER PRIMARY KEY AUTOINCREMENT,
173 file_id INTEGER NOT NULL,
174 imported_path TEXT NOT NULL,
175 resolved_file_id INTEGER,
176 import_type TEXT NOT NULL,
177 line_number INTEGER NOT NULL,
178 imported_symbols TEXT,
179 FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
180 FOREIGN KEY (resolved_file_id) REFERENCES files(id) ON DELETE SET NULL
181 )",
182 [],
183 )?;
184
185 conn.execute(
186 "CREATE INDEX IF NOT EXISTS idx_deps_file ON file_dependencies(file_id)",
187 [],
188 )?;
189
190 conn.execute(
191 "CREATE INDEX IF NOT EXISTS idx_deps_resolved ON file_dependencies(resolved_file_id)",
192 [],
193 )?;
194
195 conn.execute(
196 "CREATE INDEX IF NOT EXISTS idx_deps_type ON file_dependencies(import_type)",
197 [],
198 )?;
199
200 conn.execute(
202 "CREATE TABLE IF NOT EXISTS file_exports (
203 id INTEGER PRIMARY KEY AUTOINCREMENT,
204 file_id INTEGER NOT NULL,
205 exported_symbol TEXT,
206 source_path TEXT NOT NULL,
207 resolved_source_id INTEGER,
208 line_number INTEGER NOT NULL,
209 FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
210 FOREIGN KEY (resolved_source_id) REFERENCES files(id) ON DELETE SET NULL
211 )",
212 [],
213 )?;
214
215 conn.execute(
216 "CREATE INDEX IF NOT EXISTS idx_exports_file ON file_exports(file_id)",
217 [],
218 )?;
219
220 conn.execute(
221 "CREATE INDEX IF NOT EXISTS idx_exports_resolved ON file_exports(resolved_source_id)",
222 [],
223 )?;
224
225 conn.execute(
226 "CREATE INDEX IF NOT EXISTS idx_exports_symbol ON file_exports(exported_symbol)",
227 [],
228 )?;
229
230 log::debug!("Created meta.db with schema");
231 Ok(())
232 }
233
234 fn init_config_toml(&self) -> Result<()> {
236 let config_path = self.cache_path.join(CONFIG_TOML);
237
238 if config_path.exists() {
239 return Ok(());
240 }
241
242 let default_config = r#"[index]
243languages = [] # Empty = all supported languages
244max_file_size = 10485760 # 10 MB
245follow_symlinks = false
246
247[index.include]
248patterns = []
249
250[index.exclude]
251patterns = []
252
253[search]
254default_limit = 100
255fuzzy_threshold = 0.8
256
257[performance]
258parallel_threads = 0 # 0 = auto (80% of available cores), or set a specific number
259compression_level = 3 # zstd level
260
261[semantic]
262# Semantic query generation using LLMs
263# Translate natural language questions into rfx query commands
264provider = "openrouter" # Options: openai, anthropic, openrouter
265# model = "openai/gpt-4o-mini" # Optional: override provider default model
266# auto_execute = false # Optional: auto-execute queries without confirmation
267"#;
268
269 std::fs::write(&config_path, default_config)?;
270
271 log::debug!("Created default config.toml");
272 Ok(())
273 }
274
275 pub fn exists(&self) -> bool {
277 self.cache_path.exists()
278 && self.cache_path.join(META_DB).exists()
279 }
280
281 pub fn validate(&self) -> Result<()> {
290 let start = std::time::Instant::now();
291
292 if !self.cache_path.exists() {
294 anyhow::bail!("Cache directory does not exist: {}", self.cache_path.display());
295 }
296
297 let db_path = self.cache_path.join(META_DB);
299 if !db_path.exists() {
300 anyhow::bail!("Database file missing: {}", db_path.display());
301 }
302
303 let conn = Connection::open(&db_path)
305 .context("Failed to open meta.db - database may be corrupted")?;
306
307 let tables: Result<Vec<String>, _> = conn
309 .prepare("SELECT name FROM sqlite_master WHERE type='table'")
310 .and_then(|mut stmt| {
311 stmt.query_map([], |row| row.get(0))
312 .map(|rows| rows.collect())
313 })
314 .and_then(|result| result);
315
316 match tables {
317 Ok(table_list) => {
318 let required_tables = vec!["files", "statistics", "config", "file_branches", "branches", "file_dependencies", "file_exports"];
320 for table in &required_tables {
321 if !table_list.iter().any(|t| t == table) {
322 anyhow::bail!("Required table '{}' missing from database schema", table);
323 }
324 }
325 }
326 Err(e) => {
327 anyhow::bail!("Failed to read database schema: {}", e);
328 }
329 }
330
331 let integrity_result: String = conn
334 .query_row("PRAGMA quick_check", [], |row| row.get(0))?;
335
336 if integrity_result != "ok" {
337 log::warn!("Database integrity check failed: {}", integrity_result);
338 anyhow::bail!(
339 "Database integrity check failed: {}. Cache may be corrupted. \
340 Run 'rfx index' to rebuild cache.",
341 integrity_result
342 );
343 }
344
345 let trigrams_path = self.cache_path.join("trigrams.bin");
347 if trigrams_path.exists() {
348 use std::io::Read;
349
350 match File::open(&trigrams_path) {
351 Ok(mut file) => {
352 let mut header = [0u8; 4];
353 match file.read_exact(&mut header) {
354 Ok(_) => {
355 if &header != b"RFTG" {
357 log::warn!("trigrams.bin has invalid magic bytes - may be corrupted");
358 anyhow::bail!("trigrams.bin appears to be corrupted (invalid magic bytes)");
359 }
360 }
361 Err(_) => {
362 anyhow::bail!("trigrams.bin is too small - appears to be corrupted");
363 }
364 }
365 }
366 Err(e) => {
367 anyhow::bail!("Failed to open trigrams.bin: {}", e);
368 }
369 }
370 }
371
372 let content_path = self.cache_path.join("content.bin");
374 if content_path.exists() {
375 use std::io::Read;
376
377 match File::open(&content_path) {
378 Ok(mut file) => {
379 let mut header = [0u8; 4];
380 match file.read_exact(&mut header) {
381 Ok(_) => {
382 if &header != b"RFCT" {
384 log::warn!("content.bin has invalid magic bytes - may be corrupted");
385 anyhow::bail!("content.bin appears to be corrupted (invalid magic bytes)");
386 }
387 }
388 Err(_) => {
389 anyhow::bail!("content.bin is too small - appears to be corrupted");
390 }
391 }
392 }
393 Err(e) => {
394 anyhow::bail!("Failed to open content.bin: {}", e);
395 }
396 }
397 }
398
399 let current_schema_hash = env!("CACHE_SCHEMA_HASH");
401
402 let stored_schema_hash: Option<String> = conn
403 .query_row(
404 "SELECT value FROM statistics WHERE key = 'schema_hash'",
405 [],
406 |row| row.get(0),
407 )
408 .optional()?;
409
410 if let Some(stored_hash) = stored_schema_hash {
411 if stored_hash != current_schema_hash {
412 log::warn!(
413 "Cache schema hash mismatch! Stored: {}, Current: {}",
414 stored_hash,
415 current_schema_hash
416 );
417 anyhow::bail!(
418 "Cache schema version mismatch.\n\
419 \n\
420 - Cache was built with version {}\n\
421 - Current binary expects version {}\n\
422 \n\
423 The cache format may be incompatible with this version of Reflex.\n\
424 Please rebuild the index by running:\n\
425 \n\
426 rfx index\n\
427 \n\
428 This usually happens after upgrading Reflex or making code changes.",
429 stored_hash,
430 current_schema_hash
431 );
432 }
433 } else {
434 log::debug!("No schema_hash found in cache - this cache was created before automatic invalidation was implemented");
435 }
438
439 let elapsed = start.elapsed();
440 log::debug!("Cache validation passed (schema hash: {}, took {:?})", current_schema_hash, elapsed);
441 Ok(())
442 }
443
444 pub fn path(&self) -> &Path {
446 &self.cache_path
447 }
448
449 pub fn workspace_root(&self) -> PathBuf {
451 self.cache_path
452 .parent()
453 .expect(".reflex directory should have a parent")
454 .to_path_buf()
455 }
456
457 pub fn load_index_config(&self) -> Result<crate::models::IndexConfig> {
463 use crate::models::{IndexConfig, Language};
464
465 let config_path = self.cache_path.join(CONFIG_TOML);
466 if !config_path.exists() {
467 return Ok(IndexConfig::default());
468 }
469
470 let raw = std::fs::read_to_string(&config_path)
471 .with_context(|| format!("Failed to read {}", config_path.display()))?;
472
473 let toml_val: toml::Value = toml::from_str(&raw)
474 .with_context(|| format!("Failed to parse {}", config_path.display()))?;
475
476 let mut cfg = IndexConfig::default();
477
478 if let Some(index_tbl) = toml_val.get("index") {
479 if let Some(langs) = index_tbl.get("languages").and_then(|v| v.as_array()) {
480 let parsed: Vec<Language> = langs
481 .iter()
482 .filter_map(|v| v.as_str())
483 .filter_map(|s| Language::from_name(s).or_else(|| {
484 log::warn!("Unknown language '{}' in config.toml [index] section — ignoring", s);
485 None
486 }))
487 .collect();
488 if !parsed.is_empty() {
489 cfg.languages = parsed;
490 }
491 }
492 if let Some(max_size) = index_tbl.get("max_file_size").and_then(|v| v.as_integer()) {
493 cfg.max_file_size = max_size as usize;
494 }
495 if let Some(follow) = index_tbl.get("follow_symlinks").and_then(|v| v.as_bool()) {
496 cfg.follow_symlinks = follow;
497 }
498 if let Some(include) = index_tbl.get("include").and_then(|v| v.get("patterns")).and_then(|v| v.as_array()) {
499 cfg.include_patterns = include.iter().filter_map(|v| v.as_str().map(String::from)).collect();
500 }
501 if let Some(exclude) = index_tbl.get("exclude").and_then(|v| v.get("patterns")).and_then(|v| v.as_array()) {
502 cfg.exclude_patterns = exclude.iter().filter_map(|v| v.as_str().map(String::from)).collect();
503 }
504 }
505
506 if let Some(perf) = toml_val.get("performance") {
507 if let Some(threads) = perf.get("parallel_threads").and_then(|v| v.as_integer()) {
508 cfg.parallel_threads = threads as usize;
509 }
510 }
511
512 log::debug!("Loaded IndexConfig from config.toml: {:?}", cfg);
513 Ok(cfg)
514 }
515
516 pub fn clear(&self) -> Result<()> {
518 log::info!("Clearing cache at {:?}", self.cache_path);
519
520 if self.cache_path.exists() {
521 std::fs::remove_dir_all(&self.cache_path)?;
522 }
523
524 Ok(())
525 }
526
527 pub fn checkpoint_wal(&self) -> Result<()> {
535 let db_path = self.cache_path.join(META_DB);
536
537 if !db_path.exists() {
538 return Ok(());
540 }
541
542 let conn = Connection::open(&db_path)
543 .context("Failed to open meta.db for WAL checkpoint")?;
544
545 conn.query_row("PRAGMA wal_checkpoint(TRUNCATE)", [], |row| {
549 let busy: i64 = row.get(0)?;
550 let log_pages: i64 = row.get(1)?;
551 let checkpointed: i64 = row.get(2)?;
552 log::debug!(
553 "WAL checkpoint completed: busy={}, log_pages={}, checkpointed_pages={}",
554 busy, log_pages, checkpointed
555 );
556 Ok(())
557 }).context("Failed to execute WAL checkpoint")?;
558
559 log::debug!("Executed WAL checkpoint (TRUNCATE) on meta.db");
560 Ok(())
561 }
562
563 pub fn load_all_hashes(&self) -> Result<HashMap<String, String>> {
568 let db_path = self.cache_path.join(META_DB);
569
570 if !db_path.exists() {
571 return Ok(HashMap::new());
572 }
573
574 let conn = Connection::open(&db_path)
575 .context("Failed to open meta.db")?;
576
577 let mut stmt = conn.prepare(
581 "SELECT f.path, fb.hash
582 FROM file_branches fb
583 JOIN files f ON fb.file_id = f.id"
584 )?;
585 let hashes: HashMap<String, String> = stmt.query_map([], |row| {
586 Ok((row.get(0)?, row.get(1)?))
587 })?
588 .collect::<Result<HashMap<_, _>, _>>()?;
589
590 log::debug!("Loaded {} file hashes across all branches from SQLite", hashes.len());
591 Ok(hashes)
592 }
593
594 pub fn load_hashes_for_branch(&self, branch: &str) -> Result<HashMap<String, String>> {
599 let db_path = self.cache_path.join(META_DB);
600
601 if !db_path.exists() {
602 return Ok(HashMap::new());
603 }
604
605 let conn = Connection::open(&db_path)
606 .context("Failed to open meta.db")?;
607
608 let mut stmt = conn.prepare(
610 "SELECT f.path, fb.hash
611 FROM file_branches fb
612 JOIN files f ON fb.file_id = f.id
613 JOIN branches b ON fb.branch_id = b.id
614 WHERE b.name = ?"
615 )?;
616 let hashes: HashMap<String, String> = stmt.query_map([branch], |row| {
617 Ok((row.get(0)?, row.get(1)?))
618 })?
619 .collect::<Result<HashMap<_, _>, _>>()?;
620
621 log::debug!("Loaded {} file hashes for branch '{}' from SQLite", hashes.len(), branch);
622 Ok(hashes)
623 }
624
625 #[deprecated(note = "Hashes are now stored in file_branches table via record_branch_file()")]
630 pub fn save_hashes(&self, _hashes: &HashMap<String, String>) -> Result<()> {
631 Ok(())
633 }
634
635 pub fn update_file(&self, path: &str, language: &str, line_count: usize) -> Result<()> {
640 let db_path = self.cache_path.join(META_DB);
641 let conn = Connection::open(&db_path)
642 .context("Failed to open meta.db for file update")?;
643
644 let now = chrono::Utc::now().timestamp();
645
646 conn.execute(
647 "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
648 VALUES (?, ?, ?, ?)",
649 [path, &now.to_string(), language, &line_count.to_string()],
650 )?;
651
652 Ok(())
653 }
654
655 pub fn batch_update_files(&self, files: &[(String, String, usize)]) -> Result<()> {
660 let db_path = self.cache_path.join(META_DB);
661 let mut conn = Connection::open(&db_path)
662 .context("Failed to open meta.db for batch update")?;
663
664 let now = chrono::Utc::now().timestamp();
665 let now_str = now.to_string();
666
667 let tx = conn.transaction()?;
669
670 for (path, language, line_count) in files {
671 tx.execute(
672 "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
673 VALUES (?, ?, ?, ?)",
674 [path.as_str(), &now_str, language.as_str(), &line_count.to_string()],
675 )?;
676 }
677
678 tx.commit()?;
679 Ok(())
680 }
681
682 pub fn batch_update_files_and_branch(
687 &self,
688 files: &[(String, String, usize)], branch_files: &[(String, String)], branch: &str,
691 commit_sha: Option<&str>,
692 ) -> Result<()> {
693 log::info!("batch_update_files_and_branch: Processing {} files for branch '{}'", files.len(), branch);
694
695 let db_path = self.cache_path.join(META_DB);
696 let mut conn = Connection::open(&db_path)
697 .context("Failed to open meta.db for batch update and branch recording")?;
698
699 let now = chrono::Utc::now().timestamp();
700 let now_str = now.to_string();
701
702 let tx = conn.transaction()?;
704
705 for (path, language, line_count) in files {
707 tx.execute(
708 "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
709 VALUES (?, ?, ?, ?)",
710 [path.as_str(), &now_str, language.as_str(), &line_count.to_string()],
711 )?;
712 }
713 log::info!("Inserted {} files into files table", files.len());
714
715 let branch_id = self.get_or_create_branch_id(&tx, branch, commit_sha)?;
717 log::debug!("Got branch_id={} for branch '{}'", branch_id, branch);
718
719 let mut inserted = 0;
721 for (path, hash) in branch_files {
722 let file_id: i64 = tx.query_row(
724 "SELECT id FROM files WHERE path = ?",
725 [path.as_str()],
726 |row| row.get(0)
727 ).context(format!("File not found in index after insert: {}", path))?;
728
729 tx.execute(
731 "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
732 VALUES (?, ?, ?, ?)",
733 rusqlite::params![file_id, branch_id, hash.as_str(), now],
734 )?;
735 inserted += 1;
736 }
737 log::info!("Inserted {} file_branches entries", inserted);
738
739 tx.commit()?;
741 log::info!("Transaction committed successfully (files + file_branches)");
742
743 let verify_conn = Connection::open(&db_path)
746 .context("Failed to open meta.db for verification")?;
747
748 let actual_file_count: i64 = verify_conn.query_row(
750 "SELECT COUNT(*) FROM files WHERE path IN (SELECT path FROM files ORDER BY id DESC LIMIT ?)",
751 [files.len()],
752 |row| row.get(0)
753 ).unwrap_or(0);
754
755 let actual_fb_count: i64 = verify_conn.query_row(
757 "SELECT COUNT(*) FROM file_branches fb
758 JOIN branches b ON fb.branch_id = b.id
759 WHERE b.name = ?",
760 [branch],
761 |row| row.get(0)
762 ).unwrap_or(0);
763
764 log::info!(
765 "Post-commit verification: {} files in files table (expected {}), {} file_branches entries for '{}' (expected {})",
766 actual_file_count,
767 files.len(),
768 actual_fb_count,
769 branch,
770 inserted
771 );
772
773 if actual_file_count < files.len() as i64 {
775 log::warn!(
776 "MISMATCH: Expected {} files in database, but only found {}! Data may not have persisted.",
777 files.len(),
778 actual_file_count
779 );
780 }
781 if actual_fb_count < inserted as i64 {
782 log::warn!(
783 "MISMATCH: Expected {} file_branches entries for branch '{}', but only found {}! Data may not have persisted.",
784 inserted,
785 branch,
786 actual_fb_count
787 );
788 }
789
790 Ok(())
791 }
792
793 pub fn update_stats(&self, branch: &str) -> Result<()> {
797 let db_path = self.cache_path.join(META_DB);
798 let conn = Connection::open(&db_path)
799 .context("Failed to open meta.db for stats update")?;
800
801 let total_files: usize = conn.query_row(
803 "SELECT COUNT(DISTINCT fb.file_id)
804 FROM file_branches fb
805 JOIN branches b ON fb.branch_id = b.id
806 WHERE b.name = ?",
807 [branch],
808 |row| row.get(0),
809 ).unwrap_or(0);
810
811 let now = chrono::Utc::now().timestamp();
812
813 conn.execute(
814 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
815 ["total_files", &total_files.to_string(), &now.to_string()],
816 )?;
817
818 log::debug!("Updated statistics for branch '{}': {} files", branch, total_files);
819 Ok(())
820 }
821
822 pub fn check_schema_hash(&self) -> Result<bool> {
825 let db_path = self.cache_path.join(META_DB);
826 if !db_path.exists() {
827 return Ok(false);
828 }
829 let conn = Connection::open(&db_path)?;
830 let current = env!("CACHE_SCHEMA_HASH");
831 let stored: Option<String> = conn
832 .query_row(
833 "SELECT value FROM statistics WHERE key = 'schema_hash'",
834 [],
835 |row| row.get(0),
836 )
837 .optional()?;
838 Ok(stored.as_deref() == Some(current))
839 }
840
841 pub fn update_schema_hash(&self) -> Result<()> {
846 let db_path = self.cache_path.join(META_DB);
847 let conn = Connection::open(&db_path)
848 .context("Failed to open meta.db for schema hash update")?;
849
850 let schema_hash = env!("CACHE_SCHEMA_HASH");
851 let now = chrono::Utc::now().timestamp();
852
853 conn.execute(
854 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
855 ["schema_hash", schema_hash, &now.to_string()],
856 )?;
857
858 log::debug!("Updated schema hash to: {}", schema_hash);
859 Ok(())
860 }
861
862 pub fn list_files(&self) -> Result<Vec<IndexedFile>> {
864 let db_path = self.cache_path.join(META_DB);
865
866 if !db_path.exists() {
867 return Ok(Vec::new());
868 }
869
870 let conn = Connection::open(&db_path)
871 .context("Failed to open meta.db")?;
872
873 let mut stmt = conn.prepare(
874 "SELECT path, language, last_indexed FROM files ORDER BY path"
875 )?;
876
877 let files = stmt.query_map([], |row| {
878 let path: String = row.get(0)?;
879 let language: String = row.get(1)?;
880 let last_indexed: i64 = row.get(2)?;
881
882 Ok(IndexedFile {
883 path,
884 language,
885 last_indexed: chrono::DateTime::from_timestamp(last_indexed, 0)
886 .unwrap_or_else(chrono::Utc::now)
887 .to_rfc3339(),
888 })
889 })?
890 .collect::<Result<Vec<_>, _>>()?;
891
892 Ok(files)
893 }
894
895 pub fn stats(&self) -> Result<crate::models::IndexStats> {
900 let db_path = self.cache_path.join(META_DB);
901
902 if !db_path.exists() {
903 return Ok(crate::models::IndexStats {
905 total_files: 0,
906 index_size_bytes: 0,
907 last_updated: chrono::Utc::now().to_rfc3339(),
908 files_by_language: std::collections::HashMap::new(),
909 lines_by_language: std::collections::HashMap::new(),
910 ..Default::default()
911 });
912 }
913
914 let conn = Connection::open(&db_path)
915 .context("Failed to open meta.db")?;
916
917 let workspace_root = self.workspace_root();
919 let current_branch = if crate::git::is_git_repo(&workspace_root) {
920 crate::git::get_git_state(&workspace_root)
921 .ok()
922 .map(|state| state.branch)
923 } else {
924 Some("_default".to_string())
925 };
926
927 log::debug!("stats(): current_branch = {:?}", current_branch);
928
929 let total_files: usize = if let Some(ref branch) = current_branch {
931 log::debug!("stats(): Counting files for branch '{}'", branch);
932
933 let branches: Vec<(i64, String, i64)> = conn.prepare(
935 "SELECT id, name, file_count FROM branches"
936 )
937 .and_then(|mut stmt| {
938 stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)))
939 .map(|rows| rows.collect())
940 })
941 .and_then(|result| result)
942 .unwrap_or_default();
943
944 for (id, name, count) in &branches {
945 log::debug!("stats(): Branch ID={}, Name='{}', FileCount={}", id, name, count);
946 }
947
948 let fb_counts: Vec<(String, i64)> = conn.prepare(
950 "SELECT b.name, COUNT(*) FROM file_branches fb
951 JOIN branches b ON fb.branch_id = b.id
952 GROUP BY b.name"
953 )
954 .and_then(|mut stmt| {
955 stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?)))
956 .map(|rows| rows.collect())
957 })
958 .and_then(|result| result)
959 .unwrap_or_default();
960
961 for (name, count) in &fb_counts {
962 log::debug!("stats(): file_branches count for branch '{}': {}", name, count);
963 }
964
965 let count: usize = conn.query_row(
967 "SELECT COUNT(DISTINCT fb.file_id)
968 FROM file_branches fb
969 JOIN branches b ON fb.branch_id = b.id
970 WHERE b.name = ?",
971 [branch],
972 |row| row.get(0),
973 ).unwrap_or(0);
974
975 log::debug!("stats(): Query returned total_files = {}", count);
976 count
977 } else {
978 log::warn!("stats(): No current_branch detected!");
980 0
981 };
982
983 let last_updated: String = conn.query_row(
985 "SELECT updated_at FROM statistics WHERE key = 'total_files'",
986 [],
987 |row| {
988 let timestamp: i64 = row.get(0)?;
989 Ok(chrono::DateTime::from_timestamp(timestamp, 0)
990 .unwrap_or_else(chrono::Utc::now)
991 .to_rfc3339())
992 },
993 ).unwrap_or_else(|_| chrono::Utc::now().to_rfc3339());
994
995 let mut index_size_bytes: u64 = 0;
997
998 for file_name in [META_DB, TOKENS_BIN, CONFIG_TOML, "content.bin", "trigrams.bin"] {
999 let file_path = self.cache_path.join(file_name);
1000 if let Ok(metadata) = std::fs::metadata(&file_path) {
1001 index_size_bytes += metadata.len();
1002 }
1003 }
1004
1005 let mut files_by_language = std::collections::HashMap::new();
1007 if let Some(ref branch) = current_branch {
1008 let mut stmt = conn.prepare(
1010 "SELECT f.language, COUNT(DISTINCT f.id)
1011 FROM files f
1012 JOIN file_branches fb ON f.id = fb.file_id
1013 JOIN branches b ON fb.branch_id = b.id
1014 WHERE b.name = ?
1015 GROUP BY f.language"
1016 )?;
1017 let lang_counts = stmt.query_map([branch], |row| {
1018 let language: String = row.get(0)?;
1019 let count: i64 = row.get(1)?;
1020 Ok((language, count as usize))
1021 })?;
1022
1023 for result in lang_counts {
1024 let (language, count) = result?;
1025 files_by_language.insert(language, count);
1026 }
1027 } else {
1028 let mut stmt = conn.prepare("SELECT language, COUNT(*) FROM files GROUP BY language")?;
1030 let lang_counts = stmt.query_map([], |row| {
1031 let language: String = row.get(0)?;
1032 let count: i64 = row.get(1)?;
1033 Ok((language, count as usize))
1034 })?;
1035
1036 for result in lang_counts {
1037 let (language, count) = result?;
1038 files_by_language.insert(language, count);
1039 }
1040 }
1041
1042 let mut lines_by_language = std::collections::HashMap::new();
1044 if let Some(ref branch) = current_branch {
1045 let mut stmt = conn.prepare(
1047 "SELECT f.language, SUM(f.line_count)
1048 FROM files f
1049 JOIN file_branches fb ON f.id = fb.file_id
1050 JOIN branches b ON fb.branch_id = b.id
1051 WHERE b.name = ?
1052 GROUP BY f.language"
1053 )?;
1054 let line_counts = stmt.query_map([branch], |row| {
1055 let language: String = row.get(0)?;
1056 let count: i64 = row.get(1)?;
1057 Ok((language, count as usize))
1058 })?;
1059
1060 for result in line_counts {
1061 let (language, count) = result?;
1062 lines_by_language.insert(language, count);
1063 }
1064 } else {
1065 let mut stmt = conn.prepare("SELECT language, SUM(line_count) FROM files GROUP BY language")?;
1067 let line_counts = stmt.query_map([], |row| {
1068 let language: String = row.get(0)?;
1069 let count: i64 = row.get(1)?;
1070 Ok((language, count as usize))
1071 })?;
1072
1073 for result in line_counts {
1074 let (language, count) = result?;
1075 lines_by_language.insert(language, count);
1076 }
1077 }
1078
1079 Ok(crate::models::IndexStats {
1080 total_files,
1081 index_size_bytes,
1082 last_updated,
1083 files_by_language,
1084 lines_by_language,
1085 ..Default::default()
1086 })
1087 }
1088
1089 fn get_or_create_branch_id(&self, conn: &Connection, branch_name: &str, commit_sha: Option<&str>) -> Result<i64> {
1095 let existing_id: Option<i64> = conn
1097 .query_row(
1098 "SELECT id FROM branches WHERE name = ?",
1099 [branch_name],
1100 |row| row.get(0),
1101 )
1102 .optional()?;
1103
1104 if let Some(id) = existing_id {
1105 return Ok(id);
1106 }
1107
1108 let now = chrono::Utc::now().timestamp();
1110 conn.execute(
1111 "INSERT INTO branches (name, commit_sha, last_indexed, file_count, is_dirty)
1112 VALUES (?, ?, ?, 0, 0)",
1113 [branch_name, commit_sha.unwrap_or("unknown"), &now.to_string()],
1114 )?;
1115
1116 let id: i64 = conn.last_insert_rowid();
1118 Ok(id)
1119 }
1120
1121 pub fn record_branch_file(
1123 &self,
1124 path: &str,
1125 branch: &str,
1126 hash: &str,
1127 commit_sha: Option<&str>,
1128 ) -> Result<()> {
1129 let db_path = self.cache_path.join(META_DB);
1130 let conn = Connection::open(&db_path)
1131 .context("Failed to open meta.db for branch file recording")?;
1132
1133 let file_id: i64 = conn.query_row(
1135 "SELECT id FROM files WHERE path = ?",
1136 [path],
1137 |row| row.get(0)
1138 ).context(format!("File not found in index: {}", path))?;
1139
1140 let branch_id = self.get_or_create_branch_id(&conn, branch, commit_sha)?;
1142
1143 let now = chrono::Utc::now().timestamp();
1144
1145 conn.execute(
1147 "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
1148 VALUES (?, ?, ?, ?)",
1149 rusqlite::params![file_id, branch_id, hash, now],
1150 )?;
1151
1152 Ok(())
1153 }
1154
1155 pub fn batch_record_branch_files(
1160 &self,
1161 files: &[(String, String)], branch: &str,
1163 commit_sha: Option<&str>,
1164 ) -> Result<()> {
1165 log::info!("batch_record_branch_files: Processing {} files for branch '{}'", files.len(), branch);
1166
1167 let db_path = self.cache_path.join(META_DB);
1168 let mut conn = Connection::open(&db_path)
1169 .context("Failed to open meta.db for batch branch recording")?;
1170
1171 let now = chrono::Utc::now().timestamp();
1172
1173 let tx = conn.transaction()?;
1175
1176 let branch_id = self.get_or_create_branch_id(&tx, branch, commit_sha)?;
1178 log::debug!("Got branch_id={} for branch '{}'", branch_id, branch);
1179
1180 let mut inserted = 0;
1181 for (path, hash) in files {
1182 log::trace!("Looking up file_id for path: {}", path);
1184 let file_id: i64 = tx.query_row(
1185 "SELECT id FROM files WHERE path = ?",
1186 [path.as_str()],
1187 |row| row.get(0)
1188 ).context(format!("File not found in index: {}", path))?;
1189 log::trace!("Found file_id={} for path: {}", file_id, path);
1190
1191 tx.execute(
1193 "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
1194 VALUES (?, ?, ?, ?)",
1195 rusqlite::params![file_id, branch_id, hash.as_str(), now],
1196 )?;
1197 inserted += 1;
1198 }
1199
1200 log::info!("Inserted {} file_branches entries", inserted);
1201 tx.commit()?;
1202 log::info!("Transaction committed successfully");
1203 Ok(())
1204 }
1205
1206 pub fn get_branch_files(&self, branch: &str) -> Result<HashMap<String, String>> {
1210 let db_path = self.cache_path.join(META_DB);
1211
1212 if !db_path.exists() {
1213 return Ok(HashMap::new());
1214 }
1215
1216 let conn = Connection::open(&db_path)
1217 .context("Failed to open meta.db")?;
1218
1219 let mut stmt = conn.prepare(
1220 "SELECT f.path, fb.hash
1221 FROM file_branches fb
1222 JOIN files f ON fb.file_id = f.id
1223 JOIN branches b ON fb.branch_id = b.id
1224 WHERE b.name = ?"
1225 )?;
1226 let files: HashMap<String, String> = stmt
1227 .query_map([branch], |row| Ok((row.get(0)?, row.get(1)?)))?
1228 .collect::<Result<HashMap<_, _>, _>>()?;
1229
1230 log::debug!(
1231 "Loaded {} files for branch '{}' from file_branches table",
1232 files.len(),
1233 branch
1234 );
1235 Ok(files)
1236 }
1237
1238 pub fn branch_exists(&self, branch: &str) -> Result<bool> {
1242 let db_path = self.cache_path.join(META_DB);
1243
1244 if !db_path.exists() {
1245 return Ok(false);
1246 }
1247
1248 let conn = Connection::open(&db_path)
1249 .context("Failed to open meta.db")?;
1250
1251 let count: i64 = conn
1252 .query_row(
1253 "SELECT COUNT(*)
1254 FROM file_branches fb
1255 JOIN branches b ON fb.branch_id = b.id
1256 WHERE b.name = ?
1257 LIMIT 1",
1258 [branch],
1259 |row| row.get(0),
1260 )
1261 .unwrap_or(0);
1262
1263 Ok(count > 0)
1264 }
1265
1266 pub fn get_branch_info(&self, branch: &str) -> Result<BranchInfo> {
1268 let db_path = self.cache_path.join(META_DB);
1269
1270 if !db_path.exists() {
1271 anyhow::bail!("Database not initialized");
1272 }
1273
1274 let conn = Connection::open(&db_path)
1275 .context("Failed to open meta.db")?;
1276
1277 let info = conn.query_row(
1278 "SELECT commit_sha, last_indexed, file_count, is_dirty FROM branches WHERE name = ?",
1279 [branch],
1280 |row| {
1281 Ok(BranchInfo {
1282 branch: branch.to_string(),
1283 commit_sha: row.get(0)?,
1284 last_indexed: row.get(1)?,
1285 file_count: row.get(2)?,
1286 is_dirty: row.get::<_, i64>(3)? != 0,
1287 })
1288 },
1289 )?;
1290
1291 Ok(info)
1292 }
1293
1294 pub fn update_branch_metadata(
1299 &self,
1300 branch: &str,
1301 commit_sha: Option<&str>,
1302 file_count: usize,
1303 is_dirty: bool,
1304 ) -> Result<()> {
1305 let db_path = self.cache_path.join(META_DB);
1306 let conn = Connection::open(&db_path)
1307 .context("Failed to open meta.db for branch metadata update")?;
1308
1309 let now = chrono::Utc::now().timestamp();
1310 let is_dirty_int = if is_dirty { 1 } else { 0 };
1311
1312 let rows_updated = conn.execute(
1314 "UPDATE branches
1315 SET commit_sha = ?, last_indexed = ?, file_count = ?, is_dirty = ?
1316 WHERE name = ?",
1317 rusqlite::params![
1318 commit_sha.unwrap_or("unknown"),
1319 now,
1320 file_count,
1321 is_dirty_int,
1322 branch
1323 ],
1324 )?;
1325
1326 if rows_updated == 0 {
1328 conn.execute(
1329 "INSERT INTO branches (name, commit_sha, last_indexed, file_count, is_dirty)
1330 VALUES (?, ?, ?, ?, ?)",
1331 rusqlite::params![
1332 branch,
1333 commit_sha.unwrap_or("unknown"),
1334 now,
1335 file_count,
1336 is_dirty_int
1337 ],
1338 )?;
1339 }
1340
1341 log::debug!(
1342 "Updated branch metadata for '{}': commit={}, files={}, dirty={}",
1343 branch,
1344 commit_sha.unwrap_or("unknown"),
1345 file_count,
1346 is_dirty
1347 );
1348 Ok(())
1349 }
1350
1351 pub fn find_file_with_hash(&self, hash: &str) -> Result<Option<(String, String)>> {
1356 let db_path = self.cache_path.join(META_DB);
1357
1358 if !db_path.exists() {
1359 return Ok(None);
1360 }
1361
1362 let conn = Connection::open(&db_path)
1363 .context("Failed to open meta.db")?;
1364
1365 let result = conn
1366 .query_row(
1367 "SELECT f.path, b.name
1368 FROM file_branches fb
1369 JOIN files f ON fb.file_id = f.id
1370 JOIN branches b ON fb.branch_id = b.id
1371 WHERE fb.hash = ?
1372 LIMIT 1",
1373 [hash],
1374 |row| Ok((row.get(0)?, row.get(1)?)),
1375 )
1376 .optional()?;
1377
1378 Ok(result)
1379 }
1380
1381 pub fn get_file_id(&self, path: &str) -> Result<Option<i64>> {
1385 let db_path = self.cache_path.join(META_DB);
1386
1387 if !db_path.exists() {
1388 return Ok(None);
1389 }
1390
1391 let conn = Connection::open(&db_path)
1392 .context("Failed to open meta.db")?;
1393
1394 let result = conn
1395 .query_row(
1396 "SELECT id FROM files WHERE path = ?",
1397 [path],
1398 |row| row.get(0),
1399 )
1400 .optional()?;
1401
1402 Ok(result)
1403 }
1404
1405 pub fn batch_get_file_ids(&self, paths: &[String]) -> Result<HashMap<String, i64>> {
1412 let db_path = self.cache_path.join(META_DB);
1413
1414 if !db_path.exists() {
1415 return Ok(HashMap::new());
1416 }
1417
1418 let conn = Connection::open(&db_path)
1419 .context("Failed to open meta.db")?;
1420
1421 const BATCH_SIZE: usize = 900;
1424
1425 let mut results = HashMap::new();
1426
1427 for chunk in paths.chunks(BATCH_SIZE) {
1428 let placeholders = chunk.iter()
1430 .map(|_| "?")
1431 .collect::<Vec<_>>()
1432 .join(", ");
1433
1434 let query = format!("SELECT path, id FROM files WHERE path IN ({})", placeholders);
1435
1436 let params: Vec<&str> = chunk.iter().map(|s| s.as_str()).collect();
1437 let mut stmt = conn.prepare(&query)?;
1438
1439 let chunk_results = stmt.query_map(rusqlite::params_from_iter(params), |row| {
1440 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
1441 })?
1442 .collect::<Result<HashMap<_, _>, _>>()?;
1443
1444 results.extend(chunk_results);
1445 }
1446
1447 log::debug!("Batch loaded {} file IDs (out of {} requested, {} chunks)",
1448 results.len(), paths.len(), paths.len().div_ceil(BATCH_SIZE));
1449 Ok(results)
1450 }
1451
1452 pub fn should_compact(&self) -> Result<bool> {
1459 let db_path = self.cache_path.join(META_DB);
1460
1461 if !db_path.exists() {
1462 return Ok(false);
1464 }
1465
1466 let conn = Connection::open(&db_path)
1467 .context("Failed to open meta.db for compaction check")?;
1468
1469 let last_compaction: i64 = conn
1471 .query_row(
1472 "SELECT value FROM statistics WHERE key = 'last_compaction'",
1473 [],
1474 |row| {
1475 let value: String = row.get(0)?;
1476 Ok(value.parse::<i64>().unwrap_or(0))
1477 },
1478 )
1479 .unwrap_or(0);
1480
1481 let now = chrono::Utc::now().timestamp();
1483
1484 const COMPACTION_THRESHOLD_SECS: i64 = 86400;
1486
1487 let elapsed_secs = now - last_compaction;
1488 let should_run = elapsed_secs >= COMPACTION_THRESHOLD_SECS;
1489
1490 log::debug!(
1491 "Compaction check: last={}, now={}, elapsed={}s, should_compact={}",
1492 last_compaction,
1493 now,
1494 elapsed_secs,
1495 should_run
1496 );
1497
1498 Ok(should_run)
1499 }
1500
1501 pub fn update_compaction_timestamp(&self) -> Result<()> {
1505 let db_path = self.cache_path.join(META_DB);
1506 let conn = Connection::open(&db_path)
1507 .context("Failed to open meta.db for compaction timestamp update")?;
1508
1509 let now = chrono::Utc::now().timestamp();
1510
1511 conn.execute(
1512 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
1513 ["last_compaction", &now.to_string(), &now.to_string()],
1514 )?;
1515
1516 log::debug!("Updated last_compaction timestamp to: {}", now);
1517 Ok(())
1518 }
1519
1520 pub fn compact(&self) -> Result<crate::models::CompactionReport> {
1531 let start_time = std::time::Instant::now();
1532 log::info!("Starting cache compaction...");
1533
1534 let size_before = self.calculate_cache_size()?;
1536
1537 let deleted_files = self.identify_deleted_files()?;
1539 log::info!("Found {} deleted files to remove from cache", deleted_files.len());
1540
1541 if deleted_files.is_empty() {
1542 log::info!("No deleted files to compact - cache is clean");
1543 self.update_compaction_timestamp()?;
1545
1546 return Ok(crate::models::CompactionReport {
1547 files_removed: 0,
1548 space_saved_bytes: 0,
1549 duration_ms: start_time.elapsed().as_millis() as u64,
1550 });
1551 }
1552
1553 self.delete_files_from_db(&deleted_files)?;
1555 log::info!("Deleted {} files from database", deleted_files.len());
1556
1557 self.vacuum_database()?;
1559 log::info!("Completed VACUUM operation");
1560
1561 let size_after = self.calculate_cache_size()?;
1563 let space_saved = size_before.saturating_sub(size_after);
1564
1565 self.update_compaction_timestamp()?;
1567
1568 let duration_ms = start_time.elapsed().as_millis() as u64;
1569
1570 log::info!(
1571 "Cache compaction completed: {} files removed, {} bytes saved ({:.2} MB), took {}ms",
1572 deleted_files.len(),
1573 space_saved,
1574 space_saved as f64 / 1_048_576.0,
1575 duration_ms
1576 );
1577
1578 Ok(crate::models::CompactionReport {
1579 files_removed: deleted_files.len(),
1580 space_saved_bytes: space_saved,
1581 duration_ms,
1582 })
1583 }
1584
1585 fn identify_deleted_files(&self) -> Result<Vec<i64>> {
1589 let db_path = self.cache_path.join(META_DB);
1590 let conn = Connection::open(&db_path)
1591 .context("Failed to open meta.db for deleted file identification")?;
1592
1593 let workspace_root = self.workspace_root();
1594
1595 let mut stmt = conn.prepare("SELECT id, path FROM files")?;
1597 let files = stmt.query_map([], |row| {
1598 Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
1599 })?
1600 .collect::<Result<Vec<_>, _>>()?;
1601
1602 log::debug!("Checking {} files for deletion status", files.len());
1603
1604 let mut deleted_file_ids = Vec::new();
1606 for (file_id, file_path) in files {
1607 let full_path = workspace_root.join(&file_path);
1608 if !full_path.exists() {
1609 log::trace!("File no longer exists: {} (id={})", file_path, file_id);
1610 deleted_file_ids.push(file_id);
1611 }
1612 }
1613
1614 Ok(deleted_file_ids)
1615 }
1616
1617 fn delete_files_from_db(&self, file_ids: &[i64]) -> Result<()> {
1624 if file_ids.is_empty() {
1625 return Ok(());
1626 }
1627
1628 let db_path = self.cache_path.join(META_DB);
1629 let mut conn = Connection::open(&db_path)
1630 .context("Failed to open meta.db for file deletion")?;
1631
1632 let tx = conn.transaction()?;
1633
1634 const BATCH_SIZE: usize = 900;
1636
1637 for chunk in file_ids.chunks(BATCH_SIZE) {
1638 let placeholders = chunk.iter()
1639 .map(|_| "?")
1640 .collect::<Vec<_>>()
1641 .join(", ");
1642
1643 let delete_query = format!("DELETE FROM files WHERE id IN ({})", placeholders);
1644
1645 let params: Vec<i64> = chunk.to_vec();
1646 tx.execute(&delete_query, rusqlite::params_from_iter(params))?;
1647 }
1648
1649 tx.commit()?;
1650 log::debug!("Deleted {} files from database (CASCADE handled related tables)", file_ids.len());
1651 Ok(())
1652 }
1653
1654 fn vacuum_database(&self) -> Result<()> {
1659 let db_path = self.cache_path.join(META_DB);
1660 let conn = Connection::open(&db_path)
1661 .context("Failed to open meta.db for VACUUM")?;
1662
1663 conn.execute("VACUUM", [])?;
1666
1667 log::debug!("VACUUM completed successfully");
1668 Ok(())
1669 }
1670
1671 fn calculate_cache_size(&self) -> Result<u64> {
1679 let mut total_size: u64 = 0;
1680
1681 for file_name in [META_DB, TOKENS_BIN, CONFIG_TOML, "content.bin", "trigrams.bin"] {
1682 let file_path = self.cache_path.join(file_name);
1683 if let Ok(metadata) = std::fs::metadata(&file_path) {
1684 total_size += metadata.len();
1685 }
1686 }
1687
1688 Ok(total_size)
1689 }
1690}
1691
1692#[derive(Debug, Clone)]
1694pub struct BranchInfo {
1695 pub branch: String,
1696 pub commit_sha: String,
1697 pub last_indexed: i64,
1698 pub file_count: usize,
1699 pub is_dirty: bool,
1700}
1701
1702#[cfg(test)]
1708mod tests {
1709 use super::*;
1710 use tempfile::TempDir;
1711
1712 #[test]
1713 fn test_cache_init() {
1714 let temp = TempDir::new().unwrap();
1715 let cache = CacheManager::new(temp.path());
1716
1717 assert!(!cache.exists());
1718 cache.init().unwrap();
1719 assert!(cache.exists());
1720 assert!(cache.path().exists());
1721
1722 assert!(cache.path().join(META_DB).exists());
1724 assert!(cache.path().join(CONFIG_TOML).exists());
1725 }
1726
1727 #[test]
1728 fn test_cache_init_idempotent() {
1729 let temp = TempDir::new().unwrap();
1730 let cache = CacheManager::new(temp.path());
1731
1732 cache.init().unwrap();
1734 cache.init().unwrap();
1735
1736 assert!(cache.exists());
1737 }
1738
1739 #[test]
1740 fn test_cache_clear() {
1741 let temp = TempDir::new().unwrap();
1742 let cache = CacheManager::new(temp.path());
1743
1744 cache.init().unwrap();
1745 assert!(cache.exists());
1746
1747 cache.clear().unwrap();
1748 assert!(!cache.exists());
1749 }
1750
1751 #[test]
1752 fn test_cache_clear_nonexistent() {
1753 let temp = TempDir::new().unwrap();
1754 let cache = CacheManager::new(temp.path());
1755
1756 assert!(!cache.exists());
1758 cache.clear().unwrap();
1759 assert!(!cache.exists());
1760 }
1761
1762 #[test]
1763 fn test_load_all_hashes_empty() {
1764 let temp = TempDir::new().unwrap();
1765 let cache = CacheManager::new(temp.path());
1766
1767 cache.init().unwrap();
1768 let hashes = cache.load_all_hashes().unwrap();
1769 assert_eq!(hashes.len(), 0);
1770 }
1771
1772 #[test]
1773 fn test_load_all_hashes_before_init() {
1774 let temp = TempDir::new().unwrap();
1775 let cache = CacheManager::new(temp.path());
1776
1777 let hashes = cache.load_all_hashes().unwrap();
1779 assert_eq!(hashes.len(), 0);
1780 }
1781
1782 #[test]
1783 fn test_load_hashes_for_branch_empty() {
1784 let temp = TempDir::new().unwrap();
1785 let cache = CacheManager::new(temp.path());
1786
1787 cache.init().unwrap();
1788 let hashes = cache.load_hashes_for_branch("main").unwrap();
1789 assert_eq!(hashes.len(), 0);
1790 }
1791
1792 #[test]
1793 fn test_update_file() {
1794 let temp = TempDir::new().unwrap();
1795 let cache = CacheManager::new(temp.path());
1796
1797 cache.init().unwrap();
1798 cache.update_file("src/main.rs", "rust", 100).unwrap();
1799
1800 let files = cache.list_files().unwrap();
1802 assert_eq!(files.len(), 1);
1803 assert_eq!(files[0].path, "src/main.rs");
1804 assert_eq!(files[0].language, "rust");
1805 }
1806
1807 #[test]
1808 fn test_update_file_multiple() {
1809 let temp = TempDir::new().unwrap();
1810 let cache = CacheManager::new(temp.path());
1811
1812 cache.init().unwrap();
1813 cache.update_file("src/main.rs", "rust", 100).unwrap();
1814 cache.update_file("src/lib.rs", "rust", 200).unwrap();
1815 cache.update_file("README.md", "markdown", 50).unwrap();
1816
1817 let files = cache.list_files().unwrap();
1819 assert_eq!(files.len(), 3);
1820 }
1821
1822 #[test]
1823 fn test_update_file_replace() {
1824 let temp = TempDir::new().unwrap();
1825 let cache = CacheManager::new(temp.path());
1826
1827 cache.init().unwrap();
1828 cache.update_file("src/main.rs", "rust", 100).unwrap();
1829 cache.update_file("src/main.rs", "rust", 150).unwrap();
1830
1831 let files = cache.list_files().unwrap();
1833 assert_eq!(files.len(), 1);
1834 assert_eq!(files[0].path, "src/main.rs");
1835 }
1836
1837 #[test]
1838 fn test_batch_update_files() {
1839 let temp = TempDir::new().unwrap();
1840 let cache = CacheManager::new(temp.path());
1841
1842 cache.init().unwrap();
1843
1844 let files = vec![
1845 ("src/main.rs".to_string(), "rust".to_string(), 100),
1846 ("src/lib.rs".to_string(), "rust".to_string(), 200),
1847 ("test.py".to_string(), "python".to_string(), 50),
1848 ];
1849
1850 cache.batch_update_files(&files).unwrap();
1851
1852 let stored_files = cache.list_files().unwrap();
1854 assert_eq!(stored_files.len(), 3);
1855 }
1856
1857 #[test]
1858 fn test_update_stats() {
1859 let temp = TempDir::new().unwrap();
1860 let cache = CacheManager::new(temp.path());
1861
1862 cache.init().unwrap();
1863 cache.update_file("src/main.rs", "rust", 100).unwrap();
1864 cache.update_file("src/lib.rs", "rust", 200).unwrap();
1865
1866 cache.record_branch_file("src/main.rs", "_default", "hash1", None).unwrap();
1868 cache.record_branch_file("src/lib.rs", "_default", "hash2", None).unwrap();
1869 cache.update_stats("_default").unwrap();
1870
1871 let stats = cache.stats().unwrap();
1872 assert_eq!(stats.total_files, 2);
1873 }
1874
1875 #[test]
1876 fn test_stats_empty_cache() {
1877 let temp = TempDir::new().unwrap();
1878 let cache = CacheManager::new(temp.path());
1879
1880 cache.init().unwrap();
1881 let stats = cache.stats().unwrap();
1882
1883 assert_eq!(stats.total_files, 0);
1884 assert_eq!(stats.files_by_language.len(), 0);
1885 }
1886
1887 #[test]
1888 fn test_stats_before_init() {
1889 let temp = TempDir::new().unwrap();
1890 let cache = CacheManager::new(temp.path());
1891
1892 let stats = cache.stats().unwrap();
1894 assert_eq!(stats.total_files, 0);
1895 }
1896
1897 #[test]
1898 fn test_stats_by_language() {
1899 let temp = TempDir::new().unwrap();
1900 let cache = CacheManager::new(temp.path());
1901
1902 cache.init().unwrap();
1903 cache.update_file("main.rs", "Rust", 100).unwrap();
1904 cache.update_file("lib.rs", "Rust", 200).unwrap();
1905 cache.update_file("script.py", "Python", 50).unwrap();
1906 cache.update_file("test.py", "Python", 80).unwrap();
1907
1908 cache.record_branch_file("main.rs", "_default", "hash1", None).unwrap();
1910 cache.record_branch_file("lib.rs", "_default", "hash2", None).unwrap();
1911 cache.record_branch_file("script.py", "_default", "hash3", None).unwrap();
1912 cache.record_branch_file("test.py", "_default", "hash4", None).unwrap();
1913 cache.update_stats("_default").unwrap();
1914
1915 let stats = cache.stats().unwrap();
1916 assert_eq!(stats.files_by_language.get("Rust"), Some(&2));
1917 assert_eq!(stats.files_by_language.get("Python"), Some(&2));
1918 assert_eq!(stats.lines_by_language.get("Rust"), Some(&300)); assert_eq!(stats.lines_by_language.get("Python"), Some(&130)); }
1921
1922 #[test]
1923 fn test_list_files_empty() {
1924 let temp = TempDir::new().unwrap();
1925 let cache = CacheManager::new(temp.path());
1926
1927 cache.init().unwrap();
1928 let files = cache.list_files().unwrap();
1929 assert_eq!(files.len(), 0);
1930 }
1931
1932 #[test]
1933 fn test_list_files() {
1934 let temp = TempDir::new().unwrap();
1935 let cache = CacheManager::new(temp.path());
1936
1937 cache.init().unwrap();
1938 cache.update_file("src/main.rs", "rust", 100).unwrap();
1939 cache.update_file("src/lib.rs", "rust", 200).unwrap();
1940
1941 let files = cache.list_files().unwrap();
1942 assert_eq!(files.len(), 2);
1943
1944 assert_eq!(files[0].path, "src/lib.rs");
1946 assert_eq!(files[1].path, "src/main.rs");
1947
1948 assert_eq!(files[0].language, "rust");
1949 }
1950
1951 #[test]
1952 fn test_list_files_before_init() {
1953 let temp = TempDir::new().unwrap();
1954 let cache = CacheManager::new(temp.path());
1955
1956 let files = cache.list_files().unwrap();
1958 assert_eq!(files.len(), 0);
1959 }
1960
1961 #[test]
1962 fn test_branch_exists() {
1963 let temp = TempDir::new().unwrap();
1964 let cache = CacheManager::new(temp.path());
1965
1966 cache.init().unwrap();
1967
1968 assert!(!cache.branch_exists("main").unwrap());
1969
1970 cache.update_file("src/main.rs", "rust", 100).unwrap();
1972 cache.record_branch_file("src/main.rs", "main", "hash1", Some("commit123")).unwrap();
1973
1974 assert!(cache.branch_exists("main").unwrap());
1975 assert!(!cache.branch_exists("feature-branch").unwrap());
1976 }
1977
1978 #[test]
1979 fn test_record_branch_file() {
1980 let temp = TempDir::new().unwrap();
1981 let cache = CacheManager::new(temp.path());
1982
1983 cache.init().unwrap();
1984 cache.update_file("src/main.rs", "rust", 100).unwrap();
1986 cache.record_branch_file("src/main.rs", "main", "hash1", Some("commit123")).unwrap();
1987
1988 let files = cache.get_branch_files("main").unwrap();
1989 assert_eq!(files.len(), 1);
1990 assert_eq!(files.get("src/main.rs"), Some(&"hash1".to_string()));
1991 }
1992
1993 #[test]
1994 fn test_get_branch_files_empty() {
1995 let temp = TempDir::new().unwrap();
1996 let cache = CacheManager::new(temp.path());
1997
1998 cache.init().unwrap();
1999 let files = cache.get_branch_files("nonexistent").unwrap();
2000 assert_eq!(files.len(), 0);
2001 }
2002
2003 #[test]
2004 fn test_batch_record_branch_files() {
2005 let temp = TempDir::new().unwrap();
2006 let cache = CacheManager::new(temp.path());
2007
2008 cache.init().unwrap();
2009
2010 let file_metadata = vec![
2012 ("src/main.rs".to_string(), "rust".to_string(), 100),
2013 ("src/lib.rs".to_string(), "rust".to_string(), 200),
2014 ("README.md".to_string(), "markdown".to_string(), 50),
2015 ];
2016 cache.batch_update_files(&file_metadata).unwrap();
2017
2018 let files = vec![
2019 ("src/main.rs".to_string(), "hash1".to_string()),
2020 ("src/lib.rs".to_string(), "hash2".to_string()),
2021 ("README.md".to_string(), "hash3".to_string()),
2022 ];
2023
2024 cache.batch_record_branch_files(&files, "main", Some("commit123")).unwrap();
2025
2026 let branch_files = cache.get_branch_files("main").unwrap();
2027 assert_eq!(branch_files.len(), 3);
2028 assert_eq!(branch_files.get("src/main.rs"), Some(&"hash1".to_string()));
2029 assert_eq!(branch_files.get("src/lib.rs"), Some(&"hash2".to_string()));
2030 assert_eq!(branch_files.get("README.md"), Some(&"hash3".to_string()));
2031 }
2032
2033 #[test]
2034 fn test_update_branch_metadata() {
2035 let temp = TempDir::new().unwrap();
2036 let cache = CacheManager::new(temp.path());
2037
2038 cache.init().unwrap();
2039 cache.update_branch_metadata("main", Some("commit123"), 10, false).unwrap();
2040
2041 let info = cache.get_branch_info("main").unwrap();
2042 assert_eq!(info.branch, "main");
2043 assert_eq!(info.commit_sha, "commit123");
2044 assert_eq!(info.file_count, 10);
2045 assert_eq!(info.is_dirty, false);
2046 }
2047
2048 #[test]
2049 fn test_update_branch_metadata_dirty() {
2050 let temp = TempDir::new().unwrap();
2051 let cache = CacheManager::new(temp.path());
2052
2053 cache.init().unwrap();
2054 cache.update_branch_metadata("feature", Some("commit456"), 5, true).unwrap();
2055
2056 let info = cache.get_branch_info("feature").unwrap();
2057 assert_eq!(info.is_dirty, true);
2058 }
2059
2060 #[test]
2061 fn test_find_file_with_hash() {
2062 let temp = TempDir::new().unwrap();
2063 let cache = CacheManager::new(temp.path());
2064
2065 cache.init().unwrap();
2066 cache.update_file("src/main.rs", "rust", 100).unwrap();
2068 cache.record_branch_file("src/main.rs", "main", "unique_hash", Some("commit123")).unwrap();
2069
2070 let result = cache.find_file_with_hash("unique_hash").unwrap();
2071 assert!(result.is_some());
2072
2073 let (path, branch) = result.unwrap();
2074 assert_eq!(path, "src/main.rs");
2075 assert_eq!(branch, "main");
2076 }
2077
2078 #[test]
2079 fn test_find_file_with_hash_not_found() {
2080 let temp = TempDir::new().unwrap();
2081 let cache = CacheManager::new(temp.path());
2082
2083 cache.init().unwrap();
2084
2085 let result = cache.find_file_with_hash("nonexistent_hash").unwrap();
2086 assert!(result.is_none());
2087 }
2088
2089 #[test]
2090 fn test_config_toml_created() {
2091 let temp = TempDir::new().unwrap();
2092 let cache = CacheManager::new(temp.path());
2093
2094 cache.init().unwrap();
2095
2096 let config_path = cache.path().join(CONFIG_TOML);
2097 let config_content = std::fs::read_to_string(&config_path).unwrap();
2098
2099 assert!(config_content.contains("[index]"));
2101 assert!(config_content.contains("[search]"));
2102 assert!(config_content.contains("[performance]"));
2103 assert!(config_content.contains("max_file_size"));
2104 }
2105
2106 #[test]
2107 fn test_meta_db_schema() {
2108 let temp = TempDir::new().unwrap();
2109 let cache = CacheManager::new(temp.path());
2110
2111 cache.init().unwrap();
2112
2113 let db_path = cache.path().join(META_DB);
2114 let conn = Connection::open(&db_path).unwrap();
2115
2116 let tables: Vec<String> = conn
2118 .prepare("SELECT name FROM sqlite_master WHERE type='table'").unwrap()
2119 .query_map([], |row| row.get(0)).unwrap()
2120 .collect::<Result<Vec<_>, _>>().unwrap();
2121
2122 assert!(tables.contains(&"files".to_string()));
2123 assert!(tables.contains(&"statistics".to_string()));
2124 assert!(tables.contains(&"config".to_string()));
2125 assert!(tables.contains(&"file_branches".to_string()));
2126 assert!(tables.contains(&"branches".to_string()));
2127 assert!(tables.contains(&"file_dependencies".to_string()));
2128 assert!(tables.contains(&"file_exports".to_string()));
2129 }
2130
2131 #[test]
2132 fn test_concurrent_file_updates() {
2133 use std::thread;
2134
2135 let temp = TempDir::new().unwrap();
2136 let cache_path = temp.path().to_path_buf();
2137
2138 let cache = CacheManager::new(&cache_path);
2139 cache.init().unwrap();
2140
2141 let handles: Vec<_> = (0..10)
2143 .map(|i| {
2144 let path = cache_path.clone();
2145 thread::spawn(move || {
2146 let cache = CacheManager::new(&path);
2147 cache
2148 .update_file(
2149 &format!("file_{}.rs", i),
2150 "rust",
2151 i * 10,
2152 )
2153 .unwrap();
2154 })
2155 })
2156 .collect();
2157
2158 for handle in handles {
2159 handle.join().unwrap();
2160 }
2161
2162 let cache = CacheManager::new(&cache_path);
2163 let files = cache.list_files().unwrap();
2164 assert_eq!(files.len(), 10);
2165 }
2166
2167 #[test]
2170 fn test_validate_corrupted_database() {
2171 use std::io::Write;
2172
2173 let temp = TempDir::new().unwrap();
2174 let cache = CacheManager::new(temp.path());
2175
2176 cache.init().unwrap();
2177
2178 let db_path = cache.path().join(META_DB);
2180 let mut file = File::create(&db_path).unwrap();
2181 file.write_all(b"CORRUPTED DATA").unwrap();
2182
2183 let result = cache.validate();
2185 assert!(result.is_err());
2186 let err_msg = result.unwrap_err().to_string();
2187 eprintln!("Error message: {}", err_msg);
2188 assert!(err_msg.contains("corrupted") || err_msg.contains("not a database"));
2189 }
2190
2191 #[test]
2192 fn test_validate_corrupted_trigrams() {
2193 use std::io::Write;
2194
2195 let temp = TempDir::new().unwrap();
2196 let cache = CacheManager::new(temp.path());
2197
2198 cache.init().unwrap();
2199
2200 let trigrams_path = cache.path().join("trigrams.bin");
2202 let mut file = File::create(&trigrams_path).unwrap();
2203 file.write_all(b"BADM").unwrap(); let result = cache.validate();
2207 assert!(result.is_err());
2208 let err = result.unwrap_err().to_string();
2209 assert!(err.contains("trigrams.bin") && err.contains("corrupted"));
2210 }
2211
2212 #[test]
2213 fn test_validate_corrupted_content() {
2214 use std::io::Write;
2215
2216 let temp = TempDir::new().unwrap();
2217 let cache = CacheManager::new(temp.path());
2218
2219 cache.init().unwrap();
2220
2221 let content_path = cache.path().join("content.bin");
2223 let mut file = File::create(&content_path).unwrap();
2224 file.write_all(b"BADM").unwrap(); let result = cache.validate();
2228 assert!(result.is_err());
2229 let err = result.unwrap_err().to_string();
2230 assert!(err.contains("content.bin") && err.contains("corrupted"));
2231 }
2232
2233 #[test]
2234 fn test_validate_missing_schema_table() {
2235 let temp = TempDir::new().unwrap();
2236 let cache = CacheManager::new(temp.path());
2237
2238 cache.init().unwrap();
2239
2240 let db_path = cache.path().join(META_DB);
2242 let conn = Connection::open(&db_path).unwrap();
2243 conn.execute("DROP TABLE files", []).unwrap();
2244
2245 let result = cache.validate();
2247 assert!(result.is_err());
2248 let err = result.unwrap_err().to_string();
2249 assert!(err.contains("files") && err.contains("missing"));
2250 }
2251}