1use anyhow::{Context, Result};
11use rusqlite::{Connection, OptionalExtension};
12use std::collections::HashMap;
13use std::fs::File;
14use std::path::{Path, PathBuf};
15
16use crate::models::IndexedFile;
17
18pub const CACHE_DIR: &str = ".reflex";
20
21pub const META_DB: &str = "meta.db";
23pub const TOKENS_BIN: &str = "tokens.bin";
24pub const HASHES_JSON: &str = "hashes.json";
25pub const CONFIG_TOML: &str = "config.toml";
26
27#[derive(Clone)]
29pub struct CacheManager {
30 cache_path: PathBuf,
31}
32
33impl CacheManager {
34 pub fn new(root: impl AsRef<Path>) -> Self {
36 let cache_path = root.as_ref().join(CACHE_DIR);
37 Self { cache_path }
38 }
39
40 pub fn init(&self) -> Result<()> {
42 log::info!("Initializing cache at {:?}", self.cache_path);
43
44 if !self.cache_path.exists() {
45 std::fs::create_dir_all(&self.cache_path)?;
46 }
47
48 self.init_meta_db()?;
50
51 self.init_config_toml()?;
53
54 log::info!("Cache initialized successfully");
58 Ok(())
59 }
60
61 fn init_meta_db(&self) -> Result<()> {
63 let db_path = self.cache_path.join(META_DB);
64
65 if db_path.exists() {
67 return Ok(());
68 }
69
70 let conn = Connection::open(&db_path).context("Failed to create meta.db")?;
71
72 conn.execute(
74 "CREATE TABLE IF NOT EXISTS files (
75 id INTEGER PRIMARY KEY AUTOINCREMENT,
76 path TEXT NOT NULL UNIQUE,
77 last_indexed INTEGER NOT NULL,
78 language TEXT NOT NULL,
79 token_count INTEGER DEFAULT 0,
80 line_count INTEGER DEFAULT 0
81 )",
82 [],
83 )?;
84
85 conn.execute(
86 "CREATE INDEX IF NOT EXISTS idx_files_path ON files(path)",
87 [],
88 )?;
89
90 conn.execute(
92 "CREATE TABLE IF NOT EXISTS statistics (
93 key TEXT PRIMARY KEY,
94 value TEXT NOT NULL,
95 updated_at INTEGER NOT NULL
96 )",
97 [],
98 )?;
99
100 let now = chrono::Utc::now().timestamp();
102 conn.execute(
103 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
104 ["total_files", "0", &now.to_string()],
105 )?;
106 conn.execute(
107 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
108 ["cache_version", "1", &now.to_string()],
109 )?;
110
111 let schema_hash = env!("CACHE_SCHEMA_HASH");
114 conn.execute(
115 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
116 ["schema_hash", schema_hash, &now.to_string()],
117 )?;
118
119 conn.execute(
121 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
122 ["last_compaction", "0", &now.to_string()],
123 )?;
124
125 conn.execute(
127 "CREATE TABLE IF NOT EXISTS config (
128 key TEXT PRIMARY KEY,
129 value TEXT NOT NULL
130 )",
131 [],
132 )?;
133
134 conn.execute(
136 "CREATE TABLE IF NOT EXISTS file_branches (
137 file_id INTEGER NOT NULL,
138 branch_id INTEGER NOT NULL,
139 hash TEXT NOT NULL,
140 last_indexed INTEGER NOT NULL,
141 PRIMARY KEY (file_id, branch_id),
142 FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
143 FOREIGN KEY (branch_id) REFERENCES branches(id) ON DELETE CASCADE
144 )",
145 [],
146 )?;
147
148 conn.execute(
149 "CREATE INDEX IF NOT EXISTS idx_branch_lookup ON file_branches(branch_id, file_id)",
150 [],
151 )?;
152
153 conn.execute(
154 "CREATE INDEX IF NOT EXISTS idx_hash_lookup ON file_branches(hash)",
155 [],
156 )?;
157
158 conn.execute(
160 "CREATE TABLE IF NOT EXISTS branches (
161 id INTEGER PRIMARY KEY AUTOINCREMENT,
162 name TEXT NOT NULL UNIQUE,
163 commit_sha TEXT NOT NULL,
164 last_indexed INTEGER NOT NULL,
165 file_count INTEGER DEFAULT 0,
166 is_dirty INTEGER DEFAULT 0
167 )",
168 [],
169 )?;
170
171 conn.execute(
173 "CREATE TABLE IF NOT EXISTS file_dependencies (
174 id INTEGER PRIMARY KEY AUTOINCREMENT,
175 file_id INTEGER NOT NULL,
176 imported_path TEXT NOT NULL,
177 resolved_file_id INTEGER,
178 import_type TEXT NOT NULL,
179 line_number INTEGER NOT NULL,
180 imported_symbols TEXT,
181 FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
182 FOREIGN KEY (resolved_file_id) REFERENCES files(id) ON DELETE SET NULL
183 )",
184 [],
185 )?;
186
187 conn.execute(
188 "CREATE INDEX IF NOT EXISTS idx_deps_file ON file_dependencies(file_id)",
189 [],
190 )?;
191
192 conn.execute(
193 "CREATE INDEX IF NOT EXISTS idx_deps_resolved ON file_dependencies(resolved_file_id)",
194 [],
195 )?;
196
197 conn.execute(
198 "CREATE INDEX IF NOT EXISTS idx_deps_type ON file_dependencies(import_type)",
199 [],
200 )?;
201
202 conn.execute(
204 "CREATE TABLE IF NOT EXISTS file_exports (
205 id INTEGER PRIMARY KEY AUTOINCREMENT,
206 file_id INTEGER NOT NULL,
207 exported_symbol TEXT,
208 source_path TEXT NOT NULL,
209 resolved_source_id INTEGER,
210 line_number INTEGER NOT NULL,
211 FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
212 FOREIGN KEY (resolved_source_id) REFERENCES files(id) ON DELETE SET NULL
213 )",
214 [],
215 )?;
216
217 conn.execute(
218 "CREATE INDEX IF NOT EXISTS idx_exports_file ON file_exports(file_id)",
219 [],
220 )?;
221
222 conn.execute(
223 "CREATE INDEX IF NOT EXISTS idx_exports_resolved ON file_exports(resolved_source_id)",
224 [],
225 )?;
226
227 conn.execute(
228 "CREATE INDEX IF NOT EXISTS idx_exports_symbol ON file_exports(exported_symbol)",
229 [],
230 )?;
231
232 log::debug!("Created meta.db with schema");
233 Ok(())
234 }
235
236 fn init_config_toml(&self) -> Result<()> {
238 let config_path = self.cache_path.join(CONFIG_TOML);
239
240 if config_path.exists() {
241 return Ok(());
242 }
243
244 let default_config = r#"[index]
245languages = [] # Empty = all supported languages
246max_file_size = 10485760 # 10 MB
247follow_symlinks = false
248
249[index.include]
250patterns = []
251
252[index.exclude]
253patterns = []
254
255[search]
256default_limit = 100
257fuzzy_threshold = 0.8
258
259[performance]
260parallel_threads = 0 # 0 = auto (80% of available cores), or set a specific number
261compression_level = 3 # zstd level
262
263[semantic]
264# Semantic query generation using LLMs
265# Translate natural language questions into rfx query commands
266provider = "openrouter" # Options: openai, anthropic, openrouter
267# model = "openai/gpt-4o-mini" # Optional: override provider default model
268# auto_execute = false # Optional: auto-execute queries without confirmation
269"#;
270
271 std::fs::write(&config_path, default_config)?;
272
273 log::debug!("Created default config.toml");
274 Ok(())
275 }
276
277 pub fn exists(&self) -> bool {
279 self.cache_path.exists() && self.cache_path.join(META_DB).exists()
280 }
281
282 pub fn validate(&self) -> Result<()> {
291 let start = std::time::Instant::now();
292
293 if !self.cache_path.exists() {
295 anyhow::bail!(
296 "Cache directory does not exist: {}",
297 self.cache_path.display()
298 );
299 }
300
301 let db_path = self.cache_path.join(META_DB);
303 if !db_path.exists() {
304 anyhow::bail!("Database file missing: {}", db_path.display());
305 }
306
307 let conn = Connection::open(&db_path)
309 .context("Failed to open meta.db - database may be corrupted")?;
310
311 let tables: Result<Vec<String>, _> = conn
313 .prepare("SELECT name FROM sqlite_master WHERE type='table'")
314 .and_then(|mut stmt| {
315 stmt.query_map([], |row| row.get(0))
316 .map(|rows| rows.collect())
317 })
318 .and_then(|result| result);
319
320 match tables {
321 Ok(table_list) => {
322 let required_tables = vec![
324 "files",
325 "statistics",
326 "config",
327 "file_branches",
328 "branches",
329 "file_dependencies",
330 "file_exports",
331 ];
332 for table in &required_tables {
333 if !table_list.iter().any(|t| t == table) {
334 anyhow::bail!("Required table '{}' missing from database schema", table);
335 }
336 }
337 }
338 Err(e) => {
339 anyhow::bail!("Failed to read database schema: {}", e);
340 }
341 }
342
343 let integrity_result: String =
346 conn.query_row("PRAGMA quick_check", [], |row| row.get(0))?;
347
348 if integrity_result != "ok" {
349 log::warn!("Database integrity check failed: {}", integrity_result);
350 anyhow::bail!(
351 "Database integrity check failed: {}. Cache may be corrupted. \
352 Run 'rfx index' to rebuild cache.",
353 integrity_result
354 );
355 }
356
357 let trigrams_path = self.cache_path.join("trigrams.bin");
359 if trigrams_path.exists() {
360 use std::io::Read;
361
362 match File::open(&trigrams_path) {
363 Ok(mut file) => {
364 let mut header = [0u8; 4];
365 match file.read_exact(&mut header) {
366 Ok(_) => {
367 if &header != b"RFTG" {
369 log::warn!(
370 "trigrams.bin has invalid magic bytes - may be corrupted"
371 );
372 anyhow::bail!(
373 "trigrams.bin appears to be corrupted (invalid magic bytes)"
374 );
375 }
376 }
377 Err(_) => {
378 anyhow::bail!("trigrams.bin is too small - appears to be corrupted");
379 }
380 }
381 }
382 Err(e) => {
383 anyhow::bail!("Failed to open trigrams.bin: {}", e);
384 }
385 }
386 }
387
388 let content_path = self.cache_path.join("content.bin");
390 if content_path.exists() {
391 use std::io::Read;
392
393 match File::open(&content_path) {
394 Ok(mut file) => {
395 let mut header = [0u8; 4];
396 match file.read_exact(&mut header) {
397 Ok(_) => {
398 if &header != b"RFCT" {
400 log::warn!(
401 "content.bin has invalid magic bytes - may be corrupted"
402 );
403 anyhow::bail!(
404 "content.bin appears to be corrupted (invalid magic bytes)"
405 );
406 }
407 }
408 Err(_) => {
409 anyhow::bail!("content.bin is too small - appears to be corrupted");
410 }
411 }
412 }
413 Err(e) => {
414 anyhow::bail!("Failed to open content.bin: {}", e);
415 }
416 }
417 }
418
419 let current_schema_hash = env!("CACHE_SCHEMA_HASH");
421
422 let stored_schema_hash: Option<String> = conn
423 .query_row(
424 "SELECT value FROM statistics WHERE key = 'schema_hash'",
425 [],
426 |row| row.get(0),
427 )
428 .optional()?;
429
430 if let Some(stored_hash) = stored_schema_hash {
431 if stored_hash != current_schema_hash {
432 log::warn!(
433 "Cache schema hash mismatch! Stored: {}, Current: {}",
434 stored_hash,
435 current_schema_hash
436 );
437 anyhow::bail!(
438 "Cache schema version mismatch.\n\
439 \n\
440 - Cache was built with version {}\n\
441 - Current binary expects version {}\n\
442 \n\
443 The cache format may be incompatible with this version of Reflex.\n\
444 Please rebuild the index by running:\n\
445 \n\
446 rfx index\n\
447 \n\
448 This usually happens after upgrading Reflex or making code changes.",
449 stored_hash,
450 current_schema_hash
451 );
452 }
453 } else {
454 log::debug!(
455 "No schema_hash found in cache - this cache was created before automatic invalidation was implemented"
456 );
457 }
460
461 let elapsed = start.elapsed();
462 log::debug!(
463 "Cache validation passed (schema hash: {}, took {:?})",
464 current_schema_hash,
465 elapsed
466 );
467 Ok(())
468 }
469
470 pub fn path(&self) -> &Path {
472 &self.cache_path
473 }
474
475 pub fn workspace_root(&self) -> PathBuf {
477 self.cache_path
478 .parent()
479 .expect(".reflex directory should have a parent")
480 .to_path_buf()
481 }
482
483 pub fn load_index_config(&self) -> Result<crate::models::IndexConfig> {
489 use crate::models::{IndexConfig, Language};
490
491 let config_path = self.cache_path.join(CONFIG_TOML);
492 if !config_path.exists() {
493 return Ok(IndexConfig::default());
494 }
495
496 let raw = std::fs::read_to_string(&config_path)
497 .with_context(|| format!("Failed to read {}", config_path.display()))?;
498
499 let toml_val: toml::Value = toml::from_str(&raw)
500 .with_context(|| format!("Failed to parse {}", config_path.display()))?;
501
502 let mut cfg = IndexConfig::default();
503
504 if let Some(index_tbl) = toml_val.get("index") {
505 if let Some(langs) = index_tbl.get("languages").and_then(|v| v.as_array()) {
506 let parsed: Vec<Language> = langs
507 .iter()
508 .filter_map(|v| v.as_str())
509 .filter_map(|s| {
510 Language::from_name(s).or_else(|| {
511 log::warn!(
512 "Unknown language '{}' in config.toml [index] section — ignoring",
513 s
514 );
515 None
516 })
517 })
518 .collect();
519 if !parsed.is_empty() {
520 cfg.languages = parsed;
521 }
522 }
523 if let Some(max_size) = index_tbl.get("max_file_size").and_then(|v| v.as_integer()) {
524 cfg.max_file_size = max_size as usize;
525 }
526 if let Some(follow) = index_tbl.get("follow_symlinks").and_then(|v| v.as_bool()) {
527 cfg.follow_symlinks = follow;
528 }
529 if let Some(include) = index_tbl
530 .get("include")
531 .and_then(|v| v.get("patterns"))
532 .and_then(|v| v.as_array())
533 {
534 cfg.include_patterns = include
535 .iter()
536 .filter_map(|v| v.as_str().map(String::from))
537 .collect();
538 }
539 if let Some(exclude) = index_tbl
540 .get("exclude")
541 .and_then(|v| v.get("patterns"))
542 .and_then(|v| v.as_array())
543 {
544 cfg.exclude_patterns = exclude
545 .iter()
546 .filter_map(|v| v.as_str().map(String::from))
547 .collect();
548 }
549 }
550
551 if let Some(perf) = toml_val.get("performance") {
552 if let Some(threads) = perf.get("parallel_threads").and_then(|v| v.as_integer()) {
553 cfg.parallel_threads = threads as usize;
554 }
555 }
556
557 log::debug!("Loaded IndexConfig from config.toml: {:?}", cfg);
558 Ok(cfg)
559 }
560
561 pub fn clear(&self) -> Result<()> {
563 log::info!("Clearing cache at {:?}", self.cache_path);
564
565 if self.cache_path.exists() {
566 std::fs::remove_dir_all(&self.cache_path)?;
567 }
568
569 Ok(())
570 }
571
572 pub fn checkpoint_wal(&self) -> Result<()> {
580 let db_path = self.cache_path.join(META_DB);
581
582 if !db_path.exists() {
583 return Ok(());
585 }
586
587 let conn =
588 Connection::open(&db_path).context("Failed to open meta.db for WAL checkpoint")?;
589
590 conn.query_row("PRAGMA wal_checkpoint(TRUNCATE)", [], |row| {
594 let busy: i64 = row.get(0)?;
595 let log_pages: i64 = row.get(1)?;
596 let checkpointed: i64 = row.get(2)?;
597 log::debug!(
598 "WAL checkpoint completed: busy={}, log_pages={}, checkpointed_pages={}",
599 busy,
600 log_pages,
601 checkpointed
602 );
603 Ok(())
604 })
605 .context("Failed to execute WAL checkpoint")?;
606
607 log::debug!("Executed WAL checkpoint (TRUNCATE) on meta.db");
608 Ok(())
609 }
610
611 pub fn load_all_hashes(&self) -> Result<HashMap<String, String>> {
616 let db_path = self.cache_path.join(META_DB);
617
618 if !db_path.exists() {
619 return Ok(HashMap::new());
620 }
621
622 let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
623
624 let mut stmt = conn.prepare(
628 "SELECT f.path, fb.hash
629 FROM file_branches fb
630 JOIN files f ON fb.file_id = f.id",
631 )?;
632 let hashes: HashMap<String, String> = stmt
633 .query_map([], |row| Ok((row.get(0)?, row.get(1)?)))?
634 .collect::<Result<HashMap<_, _>, _>>()?;
635
636 log::debug!(
637 "Loaded {} file hashes across all branches from SQLite",
638 hashes.len()
639 );
640 Ok(hashes)
641 }
642
643 pub fn load_hashes_for_branch(&self, branch: &str) -> Result<HashMap<String, String>> {
648 let db_path = self.cache_path.join(META_DB);
649
650 if !db_path.exists() {
651 return Ok(HashMap::new());
652 }
653
654 let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
655
656 let mut stmt = conn.prepare(
658 "SELECT f.path, fb.hash
659 FROM file_branches fb
660 JOIN files f ON fb.file_id = f.id
661 JOIN branches b ON fb.branch_id = b.id
662 WHERE b.name = ?",
663 )?;
664 let hashes: HashMap<String, String> = stmt
665 .query_map([branch], |row| Ok((row.get(0)?, row.get(1)?)))?
666 .collect::<Result<HashMap<_, _>, _>>()?;
667
668 log::debug!(
669 "Loaded {} file hashes for branch '{}' from SQLite",
670 hashes.len(),
671 branch
672 );
673 Ok(hashes)
674 }
675
676 #[deprecated(note = "Hashes are now stored in file_branches table via record_branch_file()")]
681 pub fn save_hashes(&self, _hashes: &HashMap<String, String>) -> Result<()> {
682 Ok(())
684 }
685
686 pub fn update_file(&self, path: &str, language: &str, line_count: usize) -> Result<()> {
691 let db_path = self.cache_path.join(META_DB);
692 let conn = Connection::open(&db_path).context("Failed to open meta.db for file update")?;
693
694 let now = chrono::Utc::now().timestamp();
695
696 conn.execute(
697 "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
698 VALUES (?, ?, ?, ?)",
699 [path, &now.to_string(), language, &line_count.to_string()],
700 )?;
701
702 Ok(())
703 }
704
705 pub fn batch_update_files(&self, files: &[(String, String, usize)]) -> Result<()> {
710 let db_path = self.cache_path.join(META_DB);
711 let mut conn =
712 Connection::open(&db_path).context("Failed to open meta.db for batch update")?;
713
714 let now = chrono::Utc::now().timestamp();
715 let now_str = now.to_string();
716
717 let tx = conn.transaction()?;
719
720 for (path, language, line_count) in files {
721 tx.execute(
722 "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
723 VALUES (?, ?, ?, ?)",
724 [
725 path.as_str(),
726 &now_str,
727 language.as_str(),
728 &line_count.to_string(),
729 ],
730 )?;
731 }
732
733 tx.commit()?;
734 Ok(())
735 }
736
737 pub fn batch_update_files_and_branch(
742 &self,
743 files: &[(String, String, usize)], branch_files: &[(String, String)], branch: &str,
746 commit_sha: Option<&str>,
747 ) -> Result<()> {
748 log::info!(
749 "batch_update_files_and_branch: Processing {} files for branch '{}'",
750 files.len(),
751 branch
752 );
753
754 let db_path = self.cache_path.join(META_DB);
755 let mut conn = Connection::open(&db_path)
756 .context("Failed to open meta.db for batch update and branch recording")?;
757
758 let now = chrono::Utc::now().timestamp();
759 let now_str = now.to_string();
760
761 let tx = conn.transaction()?;
763
764 for (path, language, line_count) in files {
766 tx.execute(
767 "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
768 VALUES (?, ?, ?, ?)",
769 [
770 path.as_str(),
771 &now_str,
772 language.as_str(),
773 &line_count.to_string(),
774 ],
775 )?;
776 }
777 log::info!("Inserted {} files into files table", files.len());
778
779 let branch_id = self.get_or_create_branch_id(&tx, branch, commit_sha)?;
781 log::debug!("Got branch_id={} for branch '{}'", branch_id, branch);
782
783 let mut inserted = 0;
785 for (path, hash) in branch_files {
786 let file_id: i64 = tx
788 .query_row(
789 "SELECT id FROM files WHERE path = ?",
790 [path.as_str()],
791 |row| row.get(0),
792 )
793 .context(format!("File not found in index after insert: {}", path))?;
794
795 tx.execute(
797 "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
798 VALUES (?, ?, ?, ?)",
799 rusqlite::params![file_id, branch_id, hash.as_str(), now],
800 )?;
801 inserted += 1;
802 }
803 log::info!("Inserted {} file_branches entries", inserted);
804
805 tx.commit()?;
807 log::info!("Transaction committed successfully (files + file_branches)");
808
809 let verify_conn =
812 Connection::open(&db_path).context("Failed to open meta.db for verification")?;
813
814 let actual_file_count: i64 = verify_conn.query_row(
816 "SELECT COUNT(*) FROM files WHERE path IN (SELECT path FROM files ORDER BY id DESC LIMIT ?)",
817 [files.len()],
818 |row| row.get(0)
819 ).unwrap_or(0);
820
821 let actual_fb_count: i64 = verify_conn
823 .query_row(
824 "SELECT COUNT(*) FROM file_branches fb
825 JOIN branches b ON fb.branch_id = b.id
826 WHERE b.name = ?",
827 [branch],
828 |row| row.get(0),
829 )
830 .unwrap_or(0);
831
832 log::info!(
833 "Post-commit verification: {} files in files table (expected {}), {} file_branches entries for '{}' (expected {})",
834 actual_file_count,
835 files.len(),
836 actual_fb_count,
837 branch,
838 inserted
839 );
840
841 if actual_file_count < files.len() as i64 {
843 log::warn!(
844 "MISMATCH: Expected {} files in database, but only found {}! Data may not have persisted.",
845 files.len(),
846 actual_file_count
847 );
848 }
849 if actual_fb_count < inserted as i64 {
850 log::warn!(
851 "MISMATCH: Expected {} file_branches entries for branch '{}', but only found {}! Data may not have persisted.",
852 inserted,
853 branch,
854 actual_fb_count
855 );
856 }
857
858 Ok(())
859 }
860
861 pub fn update_stats(&self, branch: &str) -> Result<()> {
865 let db_path = self.cache_path.join(META_DB);
866 let conn = Connection::open(&db_path).context("Failed to open meta.db for stats update")?;
867
868 let total_files: usize = conn
870 .query_row(
871 "SELECT COUNT(DISTINCT fb.file_id)
872 FROM file_branches fb
873 JOIN branches b ON fb.branch_id = b.id
874 WHERE b.name = ?",
875 [branch],
876 |row| row.get(0),
877 )
878 .unwrap_or(0);
879
880 let now = chrono::Utc::now().timestamp();
881
882 conn.execute(
883 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
884 ["total_files", &total_files.to_string(), &now.to_string()],
885 )?;
886
887 log::debug!(
888 "Updated statistics for branch '{}': {} files",
889 branch,
890 total_files
891 );
892 Ok(())
893 }
894
895 pub fn check_schema_hash(&self) -> Result<bool> {
898 let db_path = self.cache_path.join(META_DB);
899 if !db_path.exists() {
900 return Ok(false);
901 }
902 let conn = Connection::open(&db_path)?;
903 let current = env!("CACHE_SCHEMA_HASH");
904 let stored: Option<String> = conn
905 .query_row(
906 "SELECT value FROM statistics WHERE key = 'schema_hash'",
907 [],
908 |row| row.get(0),
909 )
910 .optional()?;
911 Ok(stored.as_deref() == Some(current))
912 }
913
914 pub fn update_schema_hash(&self) -> Result<()> {
919 let db_path = self.cache_path.join(META_DB);
920 let conn =
921 Connection::open(&db_path).context("Failed to open meta.db for schema hash update")?;
922
923 let schema_hash = env!("CACHE_SCHEMA_HASH");
924 let now = chrono::Utc::now().timestamp();
925
926 conn.execute(
927 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
928 ["schema_hash", schema_hash, &now.to_string()],
929 )?;
930
931 log::debug!("Updated schema hash to: {}", schema_hash);
932 Ok(())
933 }
934
935 pub fn list_files(&self) -> Result<Vec<IndexedFile>> {
937 let db_path = self.cache_path.join(META_DB);
938
939 if !db_path.exists() {
940 return Ok(Vec::new());
941 }
942
943 let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
944
945 let mut stmt =
946 conn.prepare("SELECT path, language, last_indexed FROM files ORDER BY path")?;
947
948 let files = stmt
949 .query_map([], |row| {
950 let path: String = row.get(0)?;
951 let language: String = row.get(1)?;
952 let last_indexed: i64 = row.get(2)?;
953
954 Ok(IndexedFile {
955 path,
956 language,
957 last_indexed: chrono::DateTime::from_timestamp(last_indexed, 0)
958 .unwrap_or_else(chrono::Utc::now)
959 .to_rfc3339(),
960 })
961 })?
962 .collect::<Result<Vec<_>, _>>()?;
963
964 Ok(files)
965 }
966
967 pub fn stats(&self) -> Result<crate::models::IndexStats> {
972 let db_path = self.cache_path.join(META_DB);
973
974 if !db_path.exists() {
975 return Ok(crate::models::IndexStats {
977 total_files: 0,
978 index_size_bytes: 0,
979 last_updated: chrono::Utc::now().to_rfc3339(),
980 files_by_language: std::collections::HashMap::new(),
981 lines_by_language: std::collections::HashMap::new(),
982 ..Default::default()
983 });
984 }
985
986 let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
987
988 let workspace_root = self.workspace_root();
990 let current_branch = if crate::git::is_git_repo(&workspace_root) {
991 crate::git::get_git_state(&workspace_root)
992 .ok()
993 .map(|state| state.branch)
994 } else {
995 Some("_default".to_string())
996 };
997
998 log::debug!("stats(): current_branch = {:?}", current_branch);
999
1000 let total_files: usize = if let Some(ref branch) = current_branch {
1002 log::debug!("stats(): Counting files for branch '{}'", branch);
1003
1004 let branches: Vec<(i64, String, i64)> = conn
1006 .prepare("SELECT id, name, file_count FROM branches")
1007 .and_then(|mut stmt| {
1008 stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)))
1009 .map(|rows| rows.collect())
1010 })
1011 .and_then(|result| result)
1012 .unwrap_or_default();
1013
1014 for (id, name, count) in &branches {
1015 log::debug!(
1016 "stats(): Branch ID={}, Name='{}', FileCount={}",
1017 id,
1018 name,
1019 count
1020 );
1021 }
1022
1023 let fb_counts: Vec<(String, i64)> = conn
1025 .prepare(
1026 "SELECT b.name, COUNT(*) FROM file_branches fb
1027 JOIN branches b ON fb.branch_id = b.id
1028 GROUP BY b.name",
1029 )
1030 .and_then(|mut stmt| {
1031 stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?)))
1032 .map(|rows| rows.collect())
1033 })
1034 .and_then(|result| result)
1035 .unwrap_or_default();
1036
1037 for (name, count) in &fb_counts {
1038 log::debug!(
1039 "stats(): file_branches count for branch '{}': {}",
1040 name,
1041 count
1042 );
1043 }
1044
1045 let count: usize = conn
1047 .query_row(
1048 "SELECT COUNT(DISTINCT fb.file_id)
1049 FROM file_branches fb
1050 JOIN branches b ON fb.branch_id = b.id
1051 WHERE b.name = ?",
1052 [branch],
1053 |row| row.get(0),
1054 )
1055 .unwrap_or(0);
1056
1057 log::debug!("stats(): Query returned total_files = {}", count);
1058 count
1059 } else {
1060 log::warn!("stats(): No current_branch detected!");
1062 0
1063 };
1064
1065 let last_updated: String = conn
1067 .query_row(
1068 "SELECT updated_at FROM statistics WHERE key = 'total_files'",
1069 [],
1070 |row| {
1071 let timestamp: i64 = row.get(0)?;
1072 Ok(chrono::DateTime::from_timestamp(timestamp, 0)
1073 .unwrap_or_else(chrono::Utc::now)
1074 .to_rfc3339())
1075 },
1076 )
1077 .unwrap_or_else(|_| chrono::Utc::now().to_rfc3339());
1078
1079 let mut index_size_bytes: u64 = 0;
1081
1082 for file_name in [
1083 META_DB,
1084 TOKENS_BIN,
1085 CONFIG_TOML,
1086 "content.bin",
1087 "trigrams.bin",
1088 ] {
1089 let file_path = self.cache_path.join(file_name);
1090 if let Ok(metadata) = std::fs::metadata(&file_path) {
1091 index_size_bytes += metadata.len();
1092 }
1093 }
1094
1095 let mut files_by_language = std::collections::HashMap::new();
1097 if let Some(ref branch) = current_branch {
1098 let mut stmt = conn.prepare(
1100 "SELECT f.language, COUNT(DISTINCT f.id)
1101 FROM files f
1102 JOIN file_branches fb ON f.id = fb.file_id
1103 JOIN branches b ON fb.branch_id = b.id
1104 WHERE b.name = ?
1105 GROUP BY f.language",
1106 )?;
1107 let lang_counts = stmt.query_map([branch], |row| {
1108 let language: String = row.get(0)?;
1109 let count: i64 = row.get(1)?;
1110 Ok((language, count as usize))
1111 })?;
1112
1113 for result in lang_counts {
1114 let (language, count) = result?;
1115 files_by_language.insert(language, count);
1116 }
1117 } else {
1118 let mut stmt =
1120 conn.prepare("SELECT language, COUNT(*) FROM files GROUP BY language")?;
1121 let lang_counts = stmt.query_map([], |row| {
1122 let language: String = row.get(0)?;
1123 let count: i64 = row.get(1)?;
1124 Ok((language, count as usize))
1125 })?;
1126
1127 for result in lang_counts {
1128 let (language, count) = result?;
1129 files_by_language.insert(language, count);
1130 }
1131 }
1132
1133 let mut lines_by_language = std::collections::HashMap::new();
1135 if let Some(ref branch) = current_branch {
1136 let mut stmt = conn.prepare(
1138 "SELECT f.language, SUM(f.line_count)
1139 FROM files f
1140 JOIN file_branches fb ON f.id = fb.file_id
1141 JOIN branches b ON fb.branch_id = b.id
1142 WHERE b.name = ?
1143 GROUP BY f.language",
1144 )?;
1145 let line_counts = stmt.query_map([branch], |row| {
1146 let language: String = row.get(0)?;
1147 let count: i64 = row.get(1)?;
1148 Ok((language, count as usize))
1149 })?;
1150
1151 for result in line_counts {
1152 let (language, count) = result?;
1153 lines_by_language.insert(language, count);
1154 }
1155 } else {
1156 let mut stmt =
1158 conn.prepare("SELECT language, SUM(line_count) FROM files GROUP BY language")?;
1159 let line_counts = stmt.query_map([], |row| {
1160 let language: String = row.get(0)?;
1161 let count: i64 = row.get(1)?;
1162 Ok((language, count as usize))
1163 })?;
1164
1165 for result in line_counts {
1166 let (language, count) = result?;
1167 lines_by_language.insert(language, count);
1168 }
1169 }
1170
1171 Ok(crate::models::IndexStats {
1172 total_files,
1173 index_size_bytes,
1174 last_updated,
1175 files_by_language,
1176 lines_by_language,
1177 ..Default::default()
1178 })
1179 }
1180
1181 fn get_or_create_branch_id(
1187 &self,
1188 conn: &Connection,
1189 branch_name: &str,
1190 commit_sha: Option<&str>,
1191 ) -> Result<i64> {
1192 let existing_id: Option<i64> = conn
1194 .query_row(
1195 "SELECT id FROM branches WHERE name = ?",
1196 [branch_name],
1197 |row| row.get(0),
1198 )
1199 .optional()?;
1200
1201 if let Some(id) = existing_id {
1202 return Ok(id);
1203 }
1204
1205 let now = chrono::Utc::now().timestamp();
1207 conn.execute(
1208 "INSERT INTO branches (name, commit_sha, last_indexed, file_count, is_dirty)
1209 VALUES (?, ?, ?, 0, 0)",
1210 [
1211 branch_name,
1212 commit_sha.unwrap_or("unknown"),
1213 &now.to_string(),
1214 ],
1215 )?;
1216
1217 let id: i64 = conn.last_insert_rowid();
1219 Ok(id)
1220 }
1221
1222 pub fn record_branch_file(
1224 &self,
1225 path: &str,
1226 branch: &str,
1227 hash: &str,
1228 commit_sha: Option<&str>,
1229 ) -> Result<()> {
1230 let db_path = self.cache_path.join(META_DB);
1231 let conn = Connection::open(&db_path)
1232 .context("Failed to open meta.db for branch file recording")?;
1233
1234 let file_id: i64 = conn
1236 .query_row("SELECT id FROM files WHERE path = ?", [path], |row| {
1237 row.get(0)
1238 })
1239 .context(format!("File not found in index: {}", path))?;
1240
1241 let branch_id = self.get_or_create_branch_id(&conn, branch, commit_sha)?;
1243
1244 let now = chrono::Utc::now().timestamp();
1245
1246 conn.execute(
1248 "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
1249 VALUES (?, ?, ?, ?)",
1250 rusqlite::params![file_id, branch_id, hash, now],
1251 )?;
1252
1253 Ok(())
1254 }
1255
1256 pub fn batch_record_branch_files(
1261 &self,
1262 files: &[(String, String)], branch: &str,
1264 commit_sha: Option<&str>,
1265 ) -> Result<()> {
1266 log::info!(
1267 "batch_record_branch_files: Processing {} files for branch '{}'",
1268 files.len(),
1269 branch
1270 );
1271
1272 let db_path = self.cache_path.join(META_DB);
1273 let mut conn = Connection::open(&db_path)
1274 .context("Failed to open meta.db for batch branch recording")?;
1275
1276 let now = chrono::Utc::now().timestamp();
1277
1278 let tx = conn.transaction()?;
1280
1281 let branch_id = self.get_or_create_branch_id(&tx, branch, commit_sha)?;
1283 log::debug!("Got branch_id={} for branch '{}'", branch_id, branch);
1284
1285 let mut inserted = 0;
1286 for (path, hash) in files {
1287 log::trace!("Looking up file_id for path: {}", path);
1289 let file_id: i64 = tx
1290 .query_row(
1291 "SELECT id FROM files WHERE path = ?",
1292 [path.as_str()],
1293 |row| row.get(0),
1294 )
1295 .context(format!("File not found in index: {}", path))?;
1296 log::trace!("Found file_id={} for path: {}", file_id, path);
1297
1298 tx.execute(
1300 "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
1301 VALUES (?, ?, ?, ?)",
1302 rusqlite::params![file_id, branch_id, hash.as_str(), now],
1303 )?;
1304 inserted += 1;
1305 }
1306
1307 log::info!("Inserted {} file_branches entries", inserted);
1308 tx.commit()?;
1309 log::info!("Transaction committed successfully");
1310 Ok(())
1311 }
1312
1313 pub fn get_branch_files(&self, branch: &str) -> Result<HashMap<String, String>> {
1317 let db_path = self.cache_path.join(META_DB);
1318
1319 if !db_path.exists() {
1320 return Ok(HashMap::new());
1321 }
1322
1323 let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
1324
1325 let mut stmt = conn.prepare(
1326 "SELECT f.path, fb.hash
1327 FROM file_branches fb
1328 JOIN files f ON fb.file_id = f.id
1329 JOIN branches b ON fb.branch_id = b.id
1330 WHERE b.name = ?",
1331 )?;
1332 let files: HashMap<String, String> = stmt
1333 .query_map([branch], |row| Ok((row.get(0)?, row.get(1)?)))?
1334 .collect::<Result<HashMap<_, _>, _>>()?;
1335
1336 log::debug!(
1337 "Loaded {} files for branch '{}' from file_branches table",
1338 files.len(),
1339 branch
1340 );
1341 Ok(files)
1342 }
1343
1344 pub fn branch_exists(&self, branch: &str) -> Result<bool> {
1348 let db_path = self.cache_path.join(META_DB);
1349
1350 if !db_path.exists() {
1351 return Ok(false);
1352 }
1353
1354 let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
1355
1356 let count: i64 = conn
1357 .query_row(
1358 "SELECT COUNT(*)
1359 FROM file_branches fb
1360 JOIN branches b ON fb.branch_id = b.id
1361 WHERE b.name = ?
1362 LIMIT 1",
1363 [branch],
1364 |row| row.get(0),
1365 )
1366 .unwrap_or(0);
1367
1368 Ok(count > 0)
1369 }
1370
1371 pub fn get_branch_info(&self, branch: &str) -> Result<BranchInfo> {
1373 let db_path = self.cache_path.join(META_DB);
1374
1375 if !db_path.exists() {
1376 anyhow::bail!("Database not initialized");
1377 }
1378
1379 let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
1380
1381 let info = conn.query_row(
1382 "SELECT commit_sha, last_indexed, file_count, is_dirty FROM branches WHERE name = ?",
1383 [branch],
1384 |row| {
1385 Ok(BranchInfo {
1386 branch: branch.to_string(),
1387 commit_sha: row.get(0)?,
1388 last_indexed: row.get(1)?,
1389 file_count: row.get(2)?,
1390 is_dirty: row.get::<_, i64>(3)? != 0,
1391 })
1392 },
1393 )?;
1394
1395 Ok(info)
1396 }
1397
1398 pub fn update_branch_metadata(
1403 &self,
1404 branch: &str,
1405 commit_sha: Option<&str>,
1406 file_count: usize,
1407 is_dirty: bool,
1408 ) -> Result<()> {
1409 let db_path = self.cache_path.join(META_DB);
1410 let conn = Connection::open(&db_path)
1411 .context("Failed to open meta.db for branch metadata update")?;
1412
1413 let now = chrono::Utc::now().timestamp();
1414 let is_dirty_int = if is_dirty { 1 } else { 0 };
1415
1416 let rows_updated = conn.execute(
1418 "UPDATE branches
1419 SET commit_sha = ?, last_indexed = ?, file_count = ?, is_dirty = ?
1420 WHERE name = ?",
1421 rusqlite::params![
1422 commit_sha.unwrap_or("unknown"),
1423 now,
1424 file_count,
1425 is_dirty_int,
1426 branch
1427 ],
1428 )?;
1429
1430 if rows_updated == 0 {
1432 conn.execute(
1433 "INSERT INTO branches (name, commit_sha, last_indexed, file_count, is_dirty)
1434 VALUES (?, ?, ?, ?, ?)",
1435 rusqlite::params![
1436 branch,
1437 commit_sha.unwrap_or("unknown"),
1438 now,
1439 file_count,
1440 is_dirty_int
1441 ],
1442 )?;
1443 }
1444
1445 log::debug!(
1446 "Updated branch metadata for '{}': commit={}, files={}, dirty={}",
1447 branch,
1448 commit_sha.unwrap_or("unknown"),
1449 file_count,
1450 is_dirty
1451 );
1452 Ok(())
1453 }
1454
1455 pub fn find_file_with_hash(&self, hash: &str) -> Result<Option<(String, String)>> {
1460 let db_path = self.cache_path.join(META_DB);
1461
1462 if !db_path.exists() {
1463 return Ok(None);
1464 }
1465
1466 let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
1467
1468 let result = conn
1469 .query_row(
1470 "SELECT f.path, b.name
1471 FROM file_branches fb
1472 JOIN files f ON fb.file_id = f.id
1473 JOIN branches b ON fb.branch_id = b.id
1474 WHERE fb.hash = ?
1475 LIMIT 1",
1476 [hash],
1477 |row| Ok((row.get(0)?, row.get(1)?)),
1478 )
1479 .optional()?;
1480
1481 Ok(result)
1482 }
1483
1484 pub fn get_file_id(&self, path: &str) -> Result<Option<i64>> {
1488 let db_path = self.cache_path.join(META_DB);
1489
1490 if !db_path.exists() {
1491 return Ok(None);
1492 }
1493
1494 let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
1495
1496 let result = conn
1497 .query_row("SELECT id FROM files WHERE path = ?", [path], |row| {
1498 row.get(0)
1499 })
1500 .optional()?;
1501
1502 Ok(result)
1503 }
1504
1505 pub fn batch_get_file_ids(&self, paths: &[String]) -> Result<HashMap<String, i64>> {
1512 let db_path = self.cache_path.join(META_DB);
1513
1514 if !db_path.exists() {
1515 return Ok(HashMap::new());
1516 }
1517
1518 let conn = Connection::open(&db_path).context("Failed to open meta.db")?;
1519
1520 const BATCH_SIZE: usize = 900;
1523
1524 let mut results = HashMap::new();
1525
1526 for chunk in paths.chunks(BATCH_SIZE) {
1527 let placeholders = chunk.iter().map(|_| "?").collect::<Vec<_>>().join(", ");
1529
1530 let query = format!(
1531 "SELECT path, id FROM files WHERE path IN ({})",
1532 placeholders
1533 );
1534
1535 let params: Vec<&str> = chunk.iter().map(|s| s.as_str()).collect();
1536 let mut stmt = conn.prepare(&query)?;
1537
1538 let chunk_results = stmt
1539 .query_map(rusqlite::params_from_iter(params), |row| {
1540 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
1541 })?
1542 .collect::<Result<HashMap<_, _>, _>>()?;
1543
1544 results.extend(chunk_results);
1545 }
1546
1547 log::debug!(
1548 "Batch loaded {} file IDs (out of {} requested, {} chunks)",
1549 results.len(),
1550 paths.len(),
1551 paths.len().div_ceil(BATCH_SIZE)
1552 );
1553 Ok(results)
1554 }
1555
1556 pub fn should_compact(&self) -> Result<bool> {
1563 let db_path = self.cache_path.join(META_DB);
1564
1565 if !db_path.exists() {
1566 return Ok(false);
1568 }
1569
1570 let conn =
1571 Connection::open(&db_path).context("Failed to open meta.db for compaction check")?;
1572
1573 let last_compaction: i64 = conn
1575 .query_row(
1576 "SELECT value FROM statistics WHERE key = 'last_compaction'",
1577 [],
1578 |row| {
1579 let value: String = row.get(0)?;
1580 Ok(value.parse::<i64>().unwrap_or(0))
1581 },
1582 )
1583 .unwrap_or(0);
1584
1585 let now = chrono::Utc::now().timestamp();
1587
1588 const COMPACTION_THRESHOLD_SECS: i64 = 86400;
1590
1591 let elapsed_secs = now - last_compaction;
1592 let should_run = elapsed_secs >= COMPACTION_THRESHOLD_SECS;
1593
1594 log::debug!(
1595 "Compaction check: last={}, now={}, elapsed={}s, should_compact={}",
1596 last_compaction,
1597 now,
1598 elapsed_secs,
1599 should_run
1600 );
1601
1602 Ok(should_run)
1603 }
1604
1605 pub fn update_compaction_timestamp(&self) -> Result<()> {
1609 let db_path = self.cache_path.join(META_DB);
1610 let conn = Connection::open(&db_path)
1611 .context("Failed to open meta.db for compaction timestamp update")?;
1612
1613 let now = chrono::Utc::now().timestamp();
1614
1615 conn.execute(
1616 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
1617 ["last_compaction", &now.to_string(), &now.to_string()],
1618 )?;
1619
1620 log::debug!("Updated last_compaction timestamp to: {}", now);
1621 Ok(())
1622 }
1623
1624 pub fn compact(&self) -> Result<crate::models::CompactionReport> {
1635 let start_time = std::time::Instant::now();
1636 log::info!("Starting cache compaction...");
1637
1638 let size_before = self.calculate_cache_size()?;
1640
1641 let deleted_files = self.identify_deleted_files()?;
1643 log::info!(
1644 "Found {} deleted files to remove from cache",
1645 deleted_files.len()
1646 );
1647
1648 if deleted_files.is_empty() {
1649 log::info!("No deleted files to compact - cache is clean");
1650 self.update_compaction_timestamp()?;
1652
1653 return Ok(crate::models::CompactionReport {
1654 files_removed: 0,
1655 space_saved_bytes: 0,
1656 duration_ms: start_time.elapsed().as_millis() as u64,
1657 });
1658 }
1659
1660 self.delete_files_from_db(&deleted_files)?;
1662 log::info!("Deleted {} files from database", deleted_files.len());
1663
1664 self.vacuum_database()?;
1666 log::info!("Completed VACUUM operation");
1667
1668 let size_after = self.calculate_cache_size()?;
1670 let space_saved = size_before.saturating_sub(size_after);
1671
1672 self.update_compaction_timestamp()?;
1674
1675 let duration_ms = start_time.elapsed().as_millis() as u64;
1676
1677 log::info!(
1678 "Cache compaction completed: {} files removed, {} bytes saved ({:.2} MB), took {}ms",
1679 deleted_files.len(),
1680 space_saved,
1681 space_saved as f64 / 1_048_576.0,
1682 duration_ms
1683 );
1684
1685 Ok(crate::models::CompactionReport {
1686 files_removed: deleted_files.len(),
1687 space_saved_bytes: space_saved,
1688 duration_ms,
1689 })
1690 }
1691
1692 fn identify_deleted_files(&self) -> Result<Vec<i64>> {
1696 let db_path = self.cache_path.join(META_DB);
1697 let conn = Connection::open(&db_path)
1698 .context("Failed to open meta.db for deleted file identification")?;
1699
1700 let workspace_root = self.workspace_root();
1701
1702 let mut stmt = conn.prepare("SELECT id, path FROM files")?;
1704 let files = stmt
1705 .query_map([], |row| {
1706 Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
1707 })?
1708 .collect::<Result<Vec<_>, _>>()?;
1709
1710 log::debug!("Checking {} files for deletion status", files.len());
1711
1712 let mut deleted_file_ids = Vec::new();
1714 for (file_id, file_path) in files {
1715 let full_path = workspace_root.join(&file_path);
1716 if !full_path.exists() {
1717 log::trace!("File no longer exists: {} (id={})", file_path, file_id);
1718 deleted_file_ids.push(file_id);
1719 }
1720 }
1721
1722 Ok(deleted_file_ids)
1723 }
1724
1725 fn delete_files_from_db(&self, file_ids: &[i64]) -> Result<()> {
1732 if file_ids.is_empty() {
1733 return Ok(());
1734 }
1735
1736 let db_path = self.cache_path.join(META_DB);
1737 let mut conn =
1738 Connection::open(&db_path).context("Failed to open meta.db for file deletion")?;
1739
1740 let tx = conn.transaction()?;
1741
1742 const BATCH_SIZE: usize = 900;
1744
1745 for chunk in file_ids.chunks(BATCH_SIZE) {
1746 let placeholders = chunk.iter().map(|_| "?").collect::<Vec<_>>().join(", ");
1747
1748 let delete_query = format!("DELETE FROM files WHERE id IN ({})", placeholders);
1749
1750 let params: Vec<i64> = chunk.to_vec();
1751 tx.execute(&delete_query, rusqlite::params_from_iter(params))?;
1752 }
1753
1754 tx.commit()?;
1755 log::debug!(
1756 "Deleted {} files from database (CASCADE handled related tables)",
1757 file_ids.len()
1758 );
1759 Ok(())
1760 }
1761
1762 fn vacuum_database(&self) -> Result<()> {
1767 let db_path = self.cache_path.join(META_DB);
1768 let conn = Connection::open(&db_path).context("Failed to open meta.db for VACUUM")?;
1769
1770 conn.execute("VACUUM", [])?;
1773
1774 log::debug!("VACUUM completed successfully");
1775 Ok(())
1776 }
1777
1778 fn calculate_cache_size(&self) -> Result<u64> {
1786 let mut total_size: u64 = 0;
1787
1788 for file_name in [
1789 META_DB,
1790 TOKENS_BIN,
1791 CONFIG_TOML,
1792 "content.bin",
1793 "trigrams.bin",
1794 ] {
1795 let file_path = self.cache_path.join(file_name);
1796 if let Ok(metadata) = std::fs::metadata(&file_path) {
1797 total_size += metadata.len();
1798 }
1799 }
1800
1801 Ok(total_size)
1802 }
1803}
1804
1805#[derive(Debug, Clone)]
1807pub struct BranchInfo {
1808 pub branch: String,
1809 pub commit_sha: String,
1810 pub last_indexed: i64,
1811 pub file_count: usize,
1812 pub is_dirty: bool,
1813}
1814
1815#[cfg(test)]
1821mod tests {
1822 use super::*;
1823 use tempfile::TempDir;
1824
1825 #[test]
1826 fn test_cache_init() {
1827 let temp = TempDir::new().unwrap();
1828 let cache = CacheManager::new(temp.path());
1829
1830 assert!(!cache.exists());
1831 cache.init().unwrap();
1832 assert!(cache.exists());
1833 assert!(cache.path().exists());
1834
1835 assert!(cache.path().join(META_DB).exists());
1837 assert!(cache.path().join(CONFIG_TOML).exists());
1838 }
1839
1840 #[test]
1841 fn test_cache_init_idempotent() {
1842 let temp = TempDir::new().unwrap();
1843 let cache = CacheManager::new(temp.path());
1844
1845 cache.init().unwrap();
1847 cache.init().unwrap();
1848
1849 assert!(cache.exists());
1850 }
1851
1852 #[test]
1853 fn test_cache_clear() {
1854 let temp = TempDir::new().unwrap();
1855 let cache = CacheManager::new(temp.path());
1856
1857 cache.init().unwrap();
1858 assert!(cache.exists());
1859
1860 cache.clear().unwrap();
1861 assert!(!cache.exists());
1862 }
1863
1864 #[test]
1865 fn test_cache_clear_nonexistent() {
1866 let temp = TempDir::new().unwrap();
1867 let cache = CacheManager::new(temp.path());
1868
1869 assert!(!cache.exists());
1871 cache.clear().unwrap();
1872 assert!(!cache.exists());
1873 }
1874
1875 #[test]
1876 fn test_load_all_hashes_empty() {
1877 let temp = TempDir::new().unwrap();
1878 let cache = CacheManager::new(temp.path());
1879
1880 cache.init().unwrap();
1881 let hashes = cache.load_all_hashes().unwrap();
1882 assert_eq!(hashes.len(), 0);
1883 }
1884
1885 #[test]
1886 fn test_load_all_hashes_before_init() {
1887 let temp = TempDir::new().unwrap();
1888 let cache = CacheManager::new(temp.path());
1889
1890 let hashes = cache.load_all_hashes().unwrap();
1892 assert_eq!(hashes.len(), 0);
1893 }
1894
1895 #[test]
1896 fn test_load_hashes_for_branch_empty() {
1897 let temp = TempDir::new().unwrap();
1898 let cache = CacheManager::new(temp.path());
1899
1900 cache.init().unwrap();
1901 let hashes = cache.load_hashes_for_branch("main").unwrap();
1902 assert_eq!(hashes.len(), 0);
1903 }
1904
1905 #[test]
1906 fn test_update_file() {
1907 let temp = TempDir::new().unwrap();
1908 let cache = CacheManager::new(temp.path());
1909
1910 cache.init().unwrap();
1911 cache.update_file("src/main.rs", "rust", 100).unwrap();
1912
1913 let files = cache.list_files().unwrap();
1915 assert_eq!(files.len(), 1);
1916 assert_eq!(files[0].path, "src/main.rs");
1917 assert_eq!(files[0].language, "rust");
1918 }
1919
1920 #[test]
1921 fn test_update_file_multiple() {
1922 let temp = TempDir::new().unwrap();
1923 let cache = CacheManager::new(temp.path());
1924
1925 cache.init().unwrap();
1926 cache.update_file("src/main.rs", "rust", 100).unwrap();
1927 cache.update_file("src/lib.rs", "rust", 200).unwrap();
1928 cache.update_file("README.md", "markdown", 50).unwrap();
1929
1930 let files = cache.list_files().unwrap();
1932 assert_eq!(files.len(), 3);
1933 }
1934
1935 #[test]
1936 fn test_update_file_replace() {
1937 let temp = TempDir::new().unwrap();
1938 let cache = CacheManager::new(temp.path());
1939
1940 cache.init().unwrap();
1941 cache.update_file("src/main.rs", "rust", 100).unwrap();
1942 cache.update_file("src/main.rs", "rust", 150).unwrap();
1943
1944 let files = cache.list_files().unwrap();
1946 assert_eq!(files.len(), 1);
1947 assert_eq!(files[0].path, "src/main.rs");
1948 }
1949
1950 #[test]
1951 fn test_batch_update_files() {
1952 let temp = TempDir::new().unwrap();
1953 let cache = CacheManager::new(temp.path());
1954
1955 cache.init().unwrap();
1956
1957 let files = vec![
1958 ("src/main.rs".to_string(), "rust".to_string(), 100),
1959 ("src/lib.rs".to_string(), "rust".to_string(), 200),
1960 ("test.py".to_string(), "python".to_string(), 50),
1961 ];
1962
1963 cache.batch_update_files(&files).unwrap();
1964
1965 let stored_files = cache.list_files().unwrap();
1967 assert_eq!(stored_files.len(), 3);
1968 }
1969
1970 #[test]
1971 fn test_update_stats() {
1972 let temp = TempDir::new().unwrap();
1973 let cache = CacheManager::new(temp.path());
1974
1975 cache.init().unwrap();
1976 cache.update_file("src/main.rs", "rust", 100).unwrap();
1977 cache.update_file("src/lib.rs", "rust", 200).unwrap();
1978
1979 cache
1981 .record_branch_file("src/main.rs", "_default", "hash1", None)
1982 .unwrap();
1983 cache
1984 .record_branch_file("src/lib.rs", "_default", "hash2", None)
1985 .unwrap();
1986 cache.update_stats("_default").unwrap();
1987
1988 let stats = cache.stats().unwrap();
1989 assert_eq!(stats.total_files, 2);
1990 }
1991
1992 #[test]
1993 fn test_stats_empty_cache() {
1994 let temp = TempDir::new().unwrap();
1995 let cache = CacheManager::new(temp.path());
1996
1997 cache.init().unwrap();
1998 let stats = cache.stats().unwrap();
1999
2000 assert_eq!(stats.total_files, 0);
2001 assert_eq!(stats.files_by_language.len(), 0);
2002 }
2003
2004 #[test]
2005 fn test_stats_before_init() {
2006 let temp = TempDir::new().unwrap();
2007 let cache = CacheManager::new(temp.path());
2008
2009 let stats = cache.stats().unwrap();
2011 assert_eq!(stats.total_files, 0);
2012 }
2013
2014 #[test]
2015 fn test_stats_by_language() {
2016 let temp = TempDir::new().unwrap();
2017 let cache = CacheManager::new(temp.path());
2018
2019 cache.init().unwrap();
2020 cache.update_file("main.rs", "Rust", 100).unwrap();
2021 cache.update_file("lib.rs", "Rust", 200).unwrap();
2022 cache.update_file("script.py", "Python", 50).unwrap();
2023 cache.update_file("test.py", "Python", 80).unwrap();
2024
2025 cache
2027 .record_branch_file("main.rs", "_default", "hash1", None)
2028 .unwrap();
2029 cache
2030 .record_branch_file("lib.rs", "_default", "hash2", None)
2031 .unwrap();
2032 cache
2033 .record_branch_file("script.py", "_default", "hash3", None)
2034 .unwrap();
2035 cache
2036 .record_branch_file("test.py", "_default", "hash4", None)
2037 .unwrap();
2038 cache.update_stats("_default").unwrap();
2039
2040 let stats = cache.stats().unwrap();
2041 assert_eq!(stats.files_by_language.get("Rust"), Some(&2));
2042 assert_eq!(stats.files_by_language.get("Python"), Some(&2));
2043 assert_eq!(stats.lines_by_language.get("Rust"), Some(&300)); assert_eq!(stats.lines_by_language.get("Python"), Some(&130)); }
2046
2047 #[test]
2048 fn test_list_files_empty() {
2049 let temp = TempDir::new().unwrap();
2050 let cache = CacheManager::new(temp.path());
2051
2052 cache.init().unwrap();
2053 let files = cache.list_files().unwrap();
2054 assert_eq!(files.len(), 0);
2055 }
2056
2057 #[test]
2058 fn test_list_files() {
2059 let temp = TempDir::new().unwrap();
2060 let cache = CacheManager::new(temp.path());
2061
2062 cache.init().unwrap();
2063 cache.update_file("src/main.rs", "rust", 100).unwrap();
2064 cache.update_file("src/lib.rs", "rust", 200).unwrap();
2065
2066 let files = cache.list_files().unwrap();
2067 assert_eq!(files.len(), 2);
2068
2069 assert_eq!(files[0].path, "src/lib.rs");
2071 assert_eq!(files[1].path, "src/main.rs");
2072
2073 assert_eq!(files[0].language, "rust");
2074 }
2075
2076 #[test]
2077 fn test_list_files_before_init() {
2078 let temp = TempDir::new().unwrap();
2079 let cache = CacheManager::new(temp.path());
2080
2081 let files = cache.list_files().unwrap();
2083 assert_eq!(files.len(), 0);
2084 }
2085
2086 #[test]
2087 fn test_branch_exists() {
2088 let temp = TempDir::new().unwrap();
2089 let cache = CacheManager::new(temp.path());
2090
2091 cache.init().unwrap();
2092
2093 assert!(!cache.branch_exists("main").unwrap());
2094
2095 cache.update_file("src/main.rs", "rust", 100).unwrap();
2097 cache
2098 .record_branch_file("src/main.rs", "main", "hash1", Some("commit123"))
2099 .unwrap();
2100
2101 assert!(cache.branch_exists("main").unwrap());
2102 assert!(!cache.branch_exists("feature-branch").unwrap());
2103 }
2104
2105 #[test]
2106 fn test_record_branch_file() {
2107 let temp = TempDir::new().unwrap();
2108 let cache = CacheManager::new(temp.path());
2109
2110 cache.init().unwrap();
2111 cache.update_file("src/main.rs", "rust", 100).unwrap();
2113 cache
2114 .record_branch_file("src/main.rs", "main", "hash1", Some("commit123"))
2115 .unwrap();
2116
2117 let files = cache.get_branch_files("main").unwrap();
2118 assert_eq!(files.len(), 1);
2119 assert_eq!(files.get("src/main.rs"), Some(&"hash1".to_string()));
2120 }
2121
2122 #[test]
2123 fn test_get_branch_files_empty() {
2124 let temp = TempDir::new().unwrap();
2125 let cache = CacheManager::new(temp.path());
2126
2127 cache.init().unwrap();
2128 let files = cache.get_branch_files("nonexistent").unwrap();
2129 assert_eq!(files.len(), 0);
2130 }
2131
2132 #[test]
2133 fn test_batch_record_branch_files() {
2134 let temp = TempDir::new().unwrap();
2135 let cache = CacheManager::new(temp.path());
2136
2137 cache.init().unwrap();
2138
2139 let file_metadata = vec![
2141 ("src/main.rs".to_string(), "rust".to_string(), 100),
2142 ("src/lib.rs".to_string(), "rust".to_string(), 200),
2143 ("README.md".to_string(), "markdown".to_string(), 50),
2144 ];
2145 cache.batch_update_files(&file_metadata).unwrap();
2146
2147 let files = vec![
2148 ("src/main.rs".to_string(), "hash1".to_string()),
2149 ("src/lib.rs".to_string(), "hash2".to_string()),
2150 ("README.md".to_string(), "hash3".to_string()),
2151 ];
2152
2153 cache
2154 .batch_record_branch_files(&files, "main", Some("commit123"))
2155 .unwrap();
2156
2157 let branch_files = cache.get_branch_files("main").unwrap();
2158 assert_eq!(branch_files.len(), 3);
2159 assert_eq!(branch_files.get("src/main.rs"), Some(&"hash1".to_string()));
2160 assert_eq!(branch_files.get("src/lib.rs"), Some(&"hash2".to_string()));
2161 assert_eq!(branch_files.get("README.md"), Some(&"hash3".to_string()));
2162 }
2163
2164 #[test]
2165 fn test_update_branch_metadata() {
2166 let temp = TempDir::new().unwrap();
2167 let cache = CacheManager::new(temp.path());
2168
2169 cache.init().unwrap();
2170 cache
2171 .update_branch_metadata("main", Some("commit123"), 10, false)
2172 .unwrap();
2173
2174 let info = cache.get_branch_info("main").unwrap();
2175 assert_eq!(info.branch, "main");
2176 assert_eq!(info.commit_sha, "commit123");
2177 assert_eq!(info.file_count, 10);
2178 assert_eq!(info.is_dirty, false);
2179 }
2180
2181 #[test]
2182 fn test_update_branch_metadata_dirty() {
2183 let temp = TempDir::new().unwrap();
2184 let cache = CacheManager::new(temp.path());
2185
2186 cache.init().unwrap();
2187 cache
2188 .update_branch_metadata("feature", Some("commit456"), 5, true)
2189 .unwrap();
2190
2191 let info = cache.get_branch_info("feature").unwrap();
2192 assert_eq!(info.is_dirty, true);
2193 }
2194
2195 #[test]
2196 fn test_find_file_with_hash() {
2197 let temp = TempDir::new().unwrap();
2198 let cache = CacheManager::new(temp.path());
2199
2200 cache.init().unwrap();
2201 cache.update_file("src/main.rs", "rust", 100).unwrap();
2203 cache
2204 .record_branch_file("src/main.rs", "main", "unique_hash", Some("commit123"))
2205 .unwrap();
2206
2207 let result = cache.find_file_with_hash("unique_hash").unwrap();
2208 assert!(result.is_some());
2209
2210 let (path, branch) = result.unwrap();
2211 assert_eq!(path, "src/main.rs");
2212 assert_eq!(branch, "main");
2213 }
2214
2215 #[test]
2216 fn test_find_file_with_hash_not_found() {
2217 let temp = TempDir::new().unwrap();
2218 let cache = CacheManager::new(temp.path());
2219
2220 cache.init().unwrap();
2221
2222 let result = cache.find_file_with_hash("nonexistent_hash").unwrap();
2223 assert!(result.is_none());
2224 }
2225
2226 #[test]
2227 fn test_config_toml_created() {
2228 let temp = TempDir::new().unwrap();
2229 let cache = CacheManager::new(temp.path());
2230
2231 cache.init().unwrap();
2232
2233 let config_path = cache.path().join(CONFIG_TOML);
2234 let config_content = std::fs::read_to_string(&config_path).unwrap();
2235
2236 assert!(config_content.contains("[index]"));
2238 assert!(config_content.contains("[search]"));
2239 assert!(config_content.contains("[performance]"));
2240 assert!(config_content.contains("max_file_size"));
2241 }
2242
2243 #[test]
2244 fn test_meta_db_schema() {
2245 let temp = TempDir::new().unwrap();
2246 let cache = CacheManager::new(temp.path());
2247
2248 cache.init().unwrap();
2249
2250 let db_path = cache.path().join(META_DB);
2251 let conn = Connection::open(&db_path).unwrap();
2252
2253 let tables: Vec<String> = conn
2255 .prepare("SELECT name FROM sqlite_master WHERE type='table'")
2256 .unwrap()
2257 .query_map([], |row| row.get(0))
2258 .unwrap()
2259 .collect::<Result<Vec<_>, _>>()
2260 .unwrap();
2261
2262 assert!(tables.contains(&"files".to_string()));
2263 assert!(tables.contains(&"statistics".to_string()));
2264 assert!(tables.contains(&"config".to_string()));
2265 assert!(tables.contains(&"file_branches".to_string()));
2266 assert!(tables.contains(&"branches".to_string()));
2267 assert!(tables.contains(&"file_dependencies".to_string()));
2268 assert!(tables.contains(&"file_exports".to_string()));
2269 }
2270
2271 #[test]
2272 fn test_concurrent_file_updates() {
2273 use std::thread;
2274
2275 let temp = TempDir::new().unwrap();
2276 let cache_path = temp.path().to_path_buf();
2277
2278 let cache = CacheManager::new(&cache_path);
2279 cache.init().unwrap();
2280
2281 let handles: Vec<_> = (0..10)
2283 .map(|i| {
2284 let path = cache_path.clone();
2285 thread::spawn(move || {
2286 let cache = CacheManager::new(&path);
2287 cache
2288 .update_file(&format!("file_{}.rs", i), "rust", i * 10)
2289 .unwrap();
2290 })
2291 })
2292 .collect();
2293
2294 for handle in handles {
2295 handle.join().unwrap();
2296 }
2297
2298 let cache = CacheManager::new(&cache_path);
2299 let files = cache.list_files().unwrap();
2300 assert_eq!(files.len(), 10);
2301 }
2302
2303 #[test]
2306 fn test_validate_corrupted_database() {
2307 use std::io::Write;
2308
2309 let temp = TempDir::new().unwrap();
2310 let cache = CacheManager::new(temp.path());
2311
2312 cache.init().unwrap();
2313
2314 let db_path = cache.path().join(META_DB);
2316 let mut file = File::create(&db_path).unwrap();
2317 file.write_all(b"CORRUPTED DATA").unwrap();
2318
2319 let result = cache.validate();
2321 assert!(result.is_err());
2322 let err_msg = result.unwrap_err().to_string();
2323 eprintln!("Error message: {}", err_msg);
2324 assert!(err_msg.contains("corrupted") || err_msg.contains("not a database"));
2325 }
2326
2327 #[test]
2328 fn test_validate_corrupted_trigrams() {
2329 use std::io::Write;
2330
2331 let temp = TempDir::new().unwrap();
2332 let cache = CacheManager::new(temp.path());
2333
2334 cache.init().unwrap();
2335
2336 let trigrams_path = cache.path().join("trigrams.bin");
2338 let mut file = File::create(&trigrams_path).unwrap();
2339 file.write_all(b"BADM").unwrap(); let result = cache.validate();
2343 assert!(result.is_err());
2344 let err = result.unwrap_err().to_string();
2345 assert!(err.contains("trigrams.bin") && err.contains("corrupted"));
2346 }
2347
2348 #[test]
2349 fn test_validate_corrupted_content() {
2350 use std::io::Write;
2351
2352 let temp = TempDir::new().unwrap();
2353 let cache = CacheManager::new(temp.path());
2354
2355 cache.init().unwrap();
2356
2357 let content_path = cache.path().join("content.bin");
2359 let mut file = File::create(&content_path).unwrap();
2360 file.write_all(b"BADM").unwrap(); let result = cache.validate();
2364 assert!(result.is_err());
2365 let err = result.unwrap_err().to_string();
2366 assert!(err.contains("content.bin") && err.contains("corrupted"));
2367 }
2368
2369 #[test]
2370 fn test_validate_missing_schema_table() {
2371 let temp = TempDir::new().unwrap();
2372 let cache = CacheManager::new(temp.path());
2373
2374 cache.init().unwrap();
2375
2376 let db_path = cache.path().join(META_DB);
2378 let conn = Connection::open(&db_path).unwrap();
2379 conn.execute("DROP TABLE files", []).unwrap();
2380
2381 let result = cache.validate();
2383 assert!(result.is_err());
2384 let err = result.unwrap_err().to_string();
2385 assert!(err.contains("files") && err.contains("missing"));
2386 }
2387}