1use anyhow::{Context, Result};
11use rusqlite::{Connection, OptionalExtension};
12use std::collections::HashMap;
13use std::fs::File;
14use std::path::{Path, PathBuf};
15
16use crate::models::IndexedFile;
17
18pub const CACHE_DIR: &str = ".reflex";
20
21pub const META_DB: &str = "meta.db";
23pub const TOKENS_BIN: &str = "tokens.bin";
24pub const HASHES_JSON: &str = "hashes.json";
25pub const CONFIG_TOML: &str = "config.toml";
26
27#[derive(Clone)]
29pub struct CacheManager {
30 cache_path: PathBuf,
31}
32
33impl CacheManager {
34 pub fn new(root: impl AsRef<Path>) -> Self {
36 let cache_path = root.as_ref().join(CACHE_DIR);
37 Self { cache_path }
38 }
39
40 pub fn init(&self) -> Result<()> {
42 log::info!("Initializing cache at {:?}", self.cache_path);
43
44 if !self.cache_path.exists() {
45 std::fs::create_dir_all(&self.cache_path)?;
46 }
47
48 self.init_meta_db()?;
50
51 self.init_config_toml()?;
53
54 log::info!("Cache initialized successfully");
58 Ok(())
59 }
60
61 fn init_meta_db(&self) -> Result<()> {
63 let db_path = self.cache_path.join(META_DB);
64
65 if db_path.exists() {
67 return Ok(());
68 }
69
70 let conn = Connection::open(&db_path)
71 .context("Failed to create meta.db")?;
72
73 conn.execute(
75 "CREATE TABLE IF NOT EXISTS files (
76 id INTEGER PRIMARY KEY AUTOINCREMENT,
77 path TEXT NOT NULL UNIQUE,
78 last_indexed INTEGER NOT NULL,
79 language TEXT NOT NULL,
80 token_count INTEGER DEFAULT 0,
81 line_count INTEGER DEFAULT 0
82 )",
83 [],
84 )?;
85
86 conn.execute("CREATE INDEX IF NOT EXISTS idx_files_path ON files(path)", [])?;
87
88 conn.execute(
90 "CREATE TABLE IF NOT EXISTS statistics (
91 key TEXT PRIMARY KEY,
92 value TEXT NOT NULL,
93 updated_at INTEGER NOT NULL
94 )",
95 [],
96 )?;
97
98 let now = chrono::Utc::now().timestamp();
100 conn.execute(
101 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
102 ["total_files", "0", &now.to_string()],
103 )?;
104 conn.execute(
105 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
106 ["cache_version", "1", &now.to_string()],
107 )?;
108
109 let schema_hash = env!("CACHE_SCHEMA_HASH");
112 conn.execute(
113 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
114 ["schema_hash", schema_hash, &now.to_string()],
115 )?;
116
117 conn.execute(
119 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
120 ["last_compaction", "0", &now.to_string()],
121 )?;
122
123 conn.execute(
125 "CREATE TABLE IF NOT EXISTS config (
126 key TEXT PRIMARY KEY,
127 value TEXT NOT NULL
128 )",
129 [],
130 )?;
131
132 conn.execute(
134 "CREATE TABLE IF NOT EXISTS file_branches (
135 file_id INTEGER NOT NULL,
136 branch_id INTEGER NOT NULL,
137 hash TEXT NOT NULL,
138 last_indexed INTEGER NOT NULL,
139 PRIMARY KEY (file_id, branch_id),
140 FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
141 FOREIGN KEY (branch_id) REFERENCES branches(id) ON DELETE CASCADE
142 )",
143 [],
144 )?;
145
146 conn.execute(
147 "CREATE INDEX IF NOT EXISTS idx_branch_lookup ON file_branches(branch_id, file_id)",
148 [],
149 )?;
150
151 conn.execute(
152 "CREATE INDEX IF NOT EXISTS idx_hash_lookup ON file_branches(hash)",
153 [],
154 )?;
155
156 conn.execute(
158 "CREATE TABLE IF NOT EXISTS branches (
159 id INTEGER PRIMARY KEY AUTOINCREMENT,
160 name TEXT NOT NULL UNIQUE,
161 commit_sha TEXT NOT NULL,
162 last_indexed INTEGER NOT NULL,
163 file_count INTEGER DEFAULT 0,
164 is_dirty INTEGER DEFAULT 0
165 )",
166 [],
167 )?;
168
169 conn.execute(
171 "CREATE TABLE IF NOT EXISTS file_dependencies (
172 id INTEGER PRIMARY KEY AUTOINCREMENT,
173 file_id INTEGER NOT NULL,
174 imported_path TEXT NOT NULL,
175 resolved_file_id INTEGER,
176 import_type TEXT NOT NULL,
177 line_number INTEGER NOT NULL,
178 imported_symbols TEXT,
179 FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
180 FOREIGN KEY (resolved_file_id) REFERENCES files(id) ON DELETE SET NULL
181 )",
182 [],
183 )?;
184
185 conn.execute(
186 "CREATE INDEX IF NOT EXISTS idx_deps_file ON file_dependencies(file_id)",
187 [],
188 )?;
189
190 conn.execute(
191 "CREATE INDEX IF NOT EXISTS idx_deps_resolved ON file_dependencies(resolved_file_id)",
192 [],
193 )?;
194
195 conn.execute(
196 "CREATE INDEX IF NOT EXISTS idx_deps_type ON file_dependencies(import_type)",
197 [],
198 )?;
199
200 conn.execute(
202 "CREATE TABLE IF NOT EXISTS file_exports (
203 id INTEGER PRIMARY KEY AUTOINCREMENT,
204 file_id INTEGER NOT NULL,
205 exported_symbol TEXT,
206 source_path TEXT NOT NULL,
207 resolved_source_id INTEGER,
208 line_number INTEGER NOT NULL,
209 FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE,
210 FOREIGN KEY (resolved_source_id) REFERENCES files(id) ON DELETE SET NULL
211 )",
212 [],
213 )?;
214
215 conn.execute(
216 "CREATE INDEX IF NOT EXISTS idx_exports_file ON file_exports(file_id)",
217 [],
218 )?;
219
220 conn.execute(
221 "CREATE INDEX IF NOT EXISTS idx_exports_resolved ON file_exports(resolved_source_id)",
222 [],
223 )?;
224
225 conn.execute(
226 "CREATE INDEX IF NOT EXISTS idx_exports_symbol ON file_exports(exported_symbol)",
227 [],
228 )?;
229
230 log::debug!("Created meta.db with schema");
231 Ok(())
232 }
233
234 fn init_config_toml(&self) -> Result<()> {
236 let config_path = self.cache_path.join(CONFIG_TOML);
237
238 if config_path.exists() {
239 return Ok(());
240 }
241
242 let default_config = r#"[index]
243languages = [] # Empty = all supported languages
244max_file_size = 10485760 # 10 MB
245follow_symlinks = false
246
247[index.include]
248patterns = []
249
250[index.exclude]
251patterns = []
252
253[search]
254default_limit = 100
255fuzzy_threshold = 0.8
256
257[performance]
258parallel_threads = 0 # 0 = auto (80% of available cores), or set a specific number
259compression_level = 3 # zstd level
260
261[semantic]
262# Semantic query generation using LLMs
263# Translate natural language questions into rfx query commands
264provider = "groq" # Options: openai, anthropic, groq
265# model = "llama-3.3-70b-versatile" # Optional: override provider default model
266# auto_execute = false # Optional: auto-execute queries without confirmation
267"#;
268
269 std::fs::write(&config_path, default_config)?;
270
271 log::debug!("Created default config.toml");
272 Ok(())
273 }
274
275 pub fn exists(&self) -> bool {
277 self.cache_path.exists()
278 && self.cache_path.join(META_DB).exists()
279 }
280
281 pub fn validate(&self) -> Result<()> {
290 let start = std::time::Instant::now();
291
292 if !self.cache_path.exists() {
294 anyhow::bail!("Cache directory does not exist: {}", self.cache_path.display());
295 }
296
297 let db_path = self.cache_path.join(META_DB);
299 if !db_path.exists() {
300 anyhow::bail!("Database file missing: {}", db_path.display());
301 }
302
303 let conn = Connection::open(&db_path)
305 .context("Failed to open meta.db - database may be corrupted")?;
306
307 let tables: Result<Vec<String>, _> = conn
309 .prepare("SELECT name FROM sqlite_master WHERE type='table'")
310 .and_then(|mut stmt| {
311 stmt.query_map([], |row| row.get(0))
312 .map(|rows| rows.collect())
313 })
314 .and_then(|result| result);
315
316 match tables {
317 Ok(table_list) => {
318 let required_tables = vec!["files", "statistics", "config", "file_branches", "branches", "file_dependencies", "file_exports"];
320 for table in &required_tables {
321 if !table_list.iter().any(|t| t == table) {
322 anyhow::bail!("Required table '{}' missing from database schema", table);
323 }
324 }
325 }
326 Err(e) => {
327 anyhow::bail!("Failed to read database schema: {}", e);
328 }
329 }
330
331 let integrity_result: String = conn
334 .query_row("PRAGMA quick_check", [], |row| row.get(0))?;
335
336 if integrity_result != "ok" {
337 log::warn!("Database integrity check failed: {}", integrity_result);
338 anyhow::bail!(
339 "Database integrity check failed: {}. Cache may be corrupted. \
340 Run 'rfx index' to rebuild cache.",
341 integrity_result
342 );
343 }
344
345 let trigrams_path = self.cache_path.join("trigrams.bin");
347 if trigrams_path.exists() {
348 use std::io::Read;
349
350 match File::open(&trigrams_path) {
351 Ok(mut file) => {
352 let mut header = [0u8; 4];
353 match file.read_exact(&mut header) {
354 Ok(_) => {
355 if &header != b"RFTG" {
357 log::warn!("trigrams.bin has invalid magic bytes - may be corrupted");
358 anyhow::bail!("trigrams.bin appears to be corrupted (invalid magic bytes)");
359 }
360 }
361 Err(_) => {
362 anyhow::bail!("trigrams.bin is too small - appears to be corrupted");
363 }
364 }
365 }
366 Err(e) => {
367 anyhow::bail!("Failed to open trigrams.bin: {}", e);
368 }
369 }
370 }
371
372 let content_path = self.cache_path.join("content.bin");
374 if content_path.exists() {
375 use std::io::Read;
376
377 match File::open(&content_path) {
378 Ok(mut file) => {
379 let mut header = [0u8; 4];
380 match file.read_exact(&mut header) {
381 Ok(_) => {
382 if &header != b"RFCT" {
384 log::warn!("content.bin has invalid magic bytes - may be corrupted");
385 anyhow::bail!("content.bin appears to be corrupted (invalid magic bytes)");
386 }
387 }
388 Err(_) => {
389 anyhow::bail!("content.bin is too small - appears to be corrupted");
390 }
391 }
392 }
393 Err(e) => {
394 anyhow::bail!("Failed to open content.bin: {}", e);
395 }
396 }
397 }
398
399 let current_schema_hash = env!("CACHE_SCHEMA_HASH");
401
402 let stored_schema_hash: Option<String> = conn
403 .query_row(
404 "SELECT value FROM statistics WHERE key = 'schema_hash'",
405 [],
406 |row| row.get(0),
407 )
408 .optional()?;
409
410 if let Some(stored_hash) = stored_schema_hash {
411 if stored_hash != current_schema_hash {
412 log::warn!(
413 "Cache schema hash mismatch! Stored: {}, Current: {}",
414 stored_hash,
415 current_schema_hash
416 );
417 anyhow::bail!(
418 "Cache schema version mismatch.\n\
419 \n\
420 - Cache was built with version {}\n\
421 - Current binary expects version {}\n\
422 \n\
423 The cache format may be incompatible with this version of Reflex.\n\
424 Please rebuild the index by running:\n\
425 \n\
426 rfx index\n\
427 \n\
428 This usually happens after upgrading Reflex or making code changes.",
429 stored_hash,
430 current_schema_hash
431 );
432 }
433 } else {
434 log::debug!("No schema_hash found in cache - this cache was created before automatic invalidation was implemented");
435 }
438
439 let elapsed = start.elapsed();
440 log::debug!("Cache validation passed (schema hash: {}, took {:?})", current_schema_hash, elapsed);
441 Ok(())
442 }
443
444 pub fn path(&self) -> &Path {
446 &self.cache_path
447 }
448
449 pub fn workspace_root(&self) -> PathBuf {
451 self.cache_path
452 .parent()
453 .expect(".reflex directory should have a parent")
454 .to_path_buf()
455 }
456
457 pub fn clear(&self) -> Result<()> {
459 log::warn!("Clearing cache at {:?}", self.cache_path);
460
461 if self.cache_path.exists() {
462 std::fs::remove_dir_all(&self.cache_path)?;
463 }
464
465 Ok(())
466 }
467
468 pub fn checkpoint_wal(&self) -> Result<()> {
476 let db_path = self.cache_path.join(META_DB);
477
478 if !db_path.exists() {
479 return Ok(());
481 }
482
483 let conn = Connection::open(&db_path)
484 .context("Failed to open meta.db for WAL checkpoint")?;
485
486 conn.query_row("PRAGMA wal_checkpoint(TRUNCATE)", [], |row| {
490 let busy: i64 = row.get(0)?;
491 let log_pages: i64 = row.get(1)?;
492 let checkpointed: i64 = row.get(2)?;
493 log::debug!(
494 "WAL checkpoint completed: busy={}, log_pages={}, checkpointed_pages={}",
495 busy, log_pages, checkpointed
496 );
497 Ok(())
498 }).context("Failed to execute WAL checkpoint")?;
499
500 log::debug!("Executed WAL checkpoint (TRUNCATE) on meta.db");
501 Ok(())
502 }
503
504 pub fn load_all_hashes(&self) -> Result<HashMap<String, String>> {
509 let db_path = self.cache_path.join(META_DB);
510
511 if !db_path.exists() {
512 return Ok(HashMap::new());
513 }
514
515 let conn = Connection::open(&db_path)
516 .context("Failed to open meta.db")?;
517
518 let mut stmt = conn.prepare(
522 "SELECT f.path, fb.hash
523 FROM file_branches fb
524 JOIN files f ON fb.file_id = f.id"
525 )?;
526 let hashes: HashMap<String, String> = stmt.query_map([], |row| {
527 Ok((row.get(0)?, row.get(1)?))
528 })?
529 .collect::<Result<HashMap<_, _>, _>>()?;
530
531 log::debug!("Loaded {} file hashes across all branches from SQLite", hashes.len());
532 Ok(hashes)
533 }
534
535 pub fn load_hashes_for_branch(&self, branch: &str) -> Result<HashMap<String, String>> {
540 let db_path = self.cache_path.join(META_DB);
541
542 if !db_path.exists() {
543 return Ok(HashMap::new());
544 }
545
546 let conn = Connection::open(&db_path)
547 .context("Failed to open meta.db")?;
548
549 let mut stmt = conn.prepare(
551 "SELECT f.path, fb.hash
552 FROM file_branches fb
553 JOIN files f ON fb.file_id = f.id
554 JOIN branches b ON fb.branch_id = b.id
555 WHERE b.name = ?"
556 )?;
557 let hashes: HashMap<String, String> = stmt.query_map([branch], |row| {
558 Ok((row.get(0)?, row.get(1)?))
559 })?
560 .collect::<Result<HashMap<_, _>, _>>()?;
561
562 log::debug!("Loaded {} file hashes for branch '{}' from SQLite", hashes.len(), branch);
563 Ok(hashes)
564 }
565
566 #[deprecated(note = "Hashes are now stored in file_branches table via record_branch_file()")]
571 pub fn save_hashes(&self, _hashes: &HashMap<String, String>) -> Result<()> {
572 Ok(())
574 }
575
576 pub fn update_file(&self, path: &str, language: &str, line_count: usize) -> Result<()> {
581 let db_path = self.cache_path.join(META_DB);
582 let conn = Connection::open(&db_path)
583 .context("Failed to open meta.db for file update")?;
584
585 let now = chrono::Utc::now().timestamp();
586
587 conn.execute(
588 "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
589 VALUES (?, ?, ?, ?)",
590 [path, &now.to_string(), language, &line_count.to_string()],
591 )?;
592
593 Ok(())
594 }
595
596 pub fn batch_update_files(&self, files: &[(String, String, usize)]) -> Result<()> {
601 let db_path = self.cache_path.join(META_DB);
602 let mut conn = Connection::open(&db_path)
603 .context("Failed to open meta.db for batch update")?;
604
605 let now = chrono::Utc::now().timestamp();
606 let now_str = now.to_string();
607
608 let tx = conn.transaction()?;
610
611 for (path, language, line_count) in files {
612 tx.execute(
613 "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
614 VALUES (?, ?, ?, ?)",
615 [path.as_str(), &now_str, language.as_str(), &line_count.to_string()],
616 )?;
617 }
618
619 tx.commit()?;
620 Ok(())
621 }
622
623 pub fn batch_update_files_and_branch(
628 &self,
629 files: &[(String, String, usize)], branch_files: &[(String, String)], branch: &str,
632 commit_sha: Option<&str>,
633 ) -> Result<()> {
634 log::info!("batch_update_files_and_branch: Processing {} files for branch '{}'", files.len(), branch);
635
636 let db_path = self.cache_path.join(META_DB);
637 let mut conn = Connection::open(&db_path)
638 .context("Failed to open meta.db for batch update and branch recording")?;
639
640 let now = chrono::Utc::now().timestamp();
641 let now_str = now.to_string();
642
643 let tx = conn.transaction()?;
645
646 for (path, language, line_count) in files {
648 tx.execute(
649 "INSERT OR REPLACE INTO files (path, last_indexed, language, line_count)
650 VALUES (?, ?, ?, ?)",
651 [path.as_str(), &now_str, language.as_str(), &line_count.to_string()],
652 )?;
653 }
654 log::info!("Inserted {} files into files table", files.len());
655
656 let branch_id = self.get_or_create_branch_id(&tx, branch, commit_sha)?;
658 log::debug!("Got branch_id={} for branch '{}'", branch_id, branch);
659
660 let mut inserted = 0;
662 for (path, hash) in branch_files {
663 let file_id: i64 = tx.query_row(
665 "SELECT id FROM files WHERE path = ?",
666 [path.as_str()],
667 |row| row.get(0)
668 ).context(format!("File not found in index after insert: {}", path))?;
669
670 tx.execute(
672 "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
673 VALUES (?, ?, ?, ?)",
674 rusqlite::params![file_id, branch_id, hash.as_str(), now],
675 )?;
676 inserted += 1;
677 }
678 log::info!("Inserted {} file_branches entries", inserted);
679
680 tx.commit()?;
682 log::info!("Transaction committed successfully (files + file_branches)");
683
684 let verify_conn = Connection::open(&db_path)
687 .context("Failed to open meta.db for verification")?;
688
689 let actual_file_count: i64 = verify_conn.query_row(
691 "SELECT COUNT(*) FROM files WHERE path IN (SELECT path FROM files ORDER BY id DESC LIMIT ?)",
692 [files.len()],
693 |row| row.get(0)
694 ).unwrap_or(0);
695
696 let actual_fb_count: i64 = verify_conn.query_row(
698 "SELECT COUNT(*) FROM file_branches fb
699 JOIN branches b ON fb.branch_id = b.id
700 WHERE b.name = ?",
701 [branch],
702 |row| row.get(0)
703 ).unwrap_or(0);
704
705 log::info!(
706 "Post-commit verification: {} files in files table (expected {}), {} file_branches entries for '{}' (expected {})",
707 actual_file_count,
708 files.len(),
709 actual_fb_count,
710 branch,
711 inserted
712 );
713
714 if actual_file_count < files.len() as i64 {
716 log::warn!(
717 "MISMATCH: Expected {} files in database, but only found {}! Data may not have persisted.",
718 files.len(),
719 actual_file_count
720 );
721 }
722 if actual_fb_count < inserted as i64 {
723 log::warn!(
724 "MISMATCH: Expected {} file_branches entries for branch '{}', but only found {}! Data may not have persisted.",
725 inserted,
726 branch,
727 actual_fb_count
728 );
729 }
730
731 Ok(())
732 }
733
734 pub fn update_stats(&self, branch: &str) -> Result<()> {
738 let db_path = self.cache_path.join(META_DB);
739 let conn = Connection::open(&db_path)
740 .context("Failed to open meta.db for stats update")?;
741
742 let total_files: usize = conn.query_row(
744 "SELECT COUNT(DISTINCT fb.file_id)
745 FROM file_branches fb
746 JOIN branches b ON fb.branch_id = b.id
747 WHERE b.name = ?",
748 [branch],
749 |row| row.get(0),
750 ).unwrap_or(0);
751
752 let now = chrono::Utc::now().timestamp();
753
754 conn.execute(
755 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
756 ["total_files", &total_files.to_string(), &now.to_string()],
757 )?;
758
759 log::debug!("Updated statistics for branch '{}': {} files", branch, total_files);
760 Ok(())
761 }
762
763 pub fn update_schema_hash(&self) -> Result<()> {
768 let db_path = self.cache_path.join(META_DB);
769 let conn = Connection::open(&db_path)
770 .context("Failed to open meta.db for schema hash update")?;
771
772 let schema_hash = env!("CACHE_SCHEMA_HASH");
773 let now = chrono::Utc::now().timestamp();
774
775 conn.execute(
776 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
777 ["schema_hash", schema_hash, &now.to_string()],
778 )?;
779
780 log::debug!("Updated schema hash to: {}", schema_hash);
781 Ok(())
782 }
783
784 pub fn list_files(&self) -> Result<Vec<IndexedFile>> {
786 let db_path = self.cache_path.join(META_DB);
787
788 if !db_path.exists() {
789 return Ok(Vec::new());
790 }
791
792 let conn = Connection::open(&db_path)
793 .context("Failed to open meta.db")?;
794
795 let mut stmt = conn.prepare(
796 "SELECT path, language, last_indexed FROM files ORDER BY path"
797 )?;
798
799 let files = stmt.query_map([], |row| {
800 let path: String = row.get(0)?;
801 let language: String = row.get(1)?;
802 let last_indexed: i64 = row.get(2)?;
803
804 Ok(IndexedFile {
805 path,
806 language,
807 last_indexed: chrono::DateTime::from_timestamp(last_indexed, 0)
808 .unwrap_or_else(chrono::Utc::now)
809 .to_rfc3339(),
810 })
811 })?
812 .collect::<Result<Vec<_>, _>>()?;
813
814 Ok(files)
815 }
816
817 pub fn stats(&self) -> Result<crate::models::IndexStats> {
822 let db_path = self.cache_path.join(META_DB);
823
824 if !db_path.exists() {
825 return Ok(crate::models::IndexStats {
827 total_files: 0,
828 index_size_bytes: 0,
829 last_updated: chrono::Utc::now().to_rfc3339(),
830 files_by_language: std::collections::HashMap::new(),
831 lines_by_language: std::collections::HashMap::new(),
832 });
833 }
834
835 let conn = Connection::open(&db_path)
836 .context("Failed to open meta.db")?;
837
838 let workspace_root = self.workspace_root();
840 let current_branch = if crate::git::is_git_repo(&workspace_root) {
841 crate::git::get_git_state(&workspace_root)
842 .ok()
843 .map(|state| state.branch)
844 } else {
845 Some("_default".to_string())
846 };
847
848 log::debug!("stats(): current_branch = {:?}", current_branch);
849
850 let total_files: usize = if let Some(ref branch) = current_branch {
852 log::debug!("stats(): Counting files for branch '{}'", branch);
853
854 let branches: Vec<(i64, String, i64)> = conn.prepare(
856 "SELECT id, name, file_count FROM branches"
857 )
858 .and_then(|mut stmt| {
859 stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)))
860 .map(|rows| rows.collect())
861 })
862 .and_then(|result| result)
863 .unwrap_or_default();
864
865 for (id, name, count) in &branches {
866 log::debug!("stats(): Branch ID={}, Name='{}', FileCount={}", id, name, count);
867 }
868
869 let fb_counts: Vec<(String, i64)> = conn.prepare(
871 "SELECT b.name, COUNT(*) FROM file_branches fb
872 JOIN branches b ON fb.branch_id = b.id
873 GROUP BY b.name"
874 )
875 .and_then(|mut stmt| {
876 stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?)))
877 .map(|rows| rows.collect())
878 })
879 .and_then(|result| result)
880 .unwrap_or_default();
881
882 for (name, count) in &fb_counts {
883 log::debug!("stats(): file_branches count for branch '{}': {}", name, count);
884 }
885
886 let count: usize = conn.query_row(
888 "SELECT COUNT(DISTINCT fb.file_id)
889 FROM file_branches fb
890 JOIN branches b ON fb.branch_id = b.id
891 WHERE b.name = ?",
892 [branch],
893 |row| row.get(0),
894 ).unwrap_or(0);
895
896 log::debug!("stats(): Query returned total_files = {}", count);
897 count
898 } else {
899 log::warn!("stats(): No current_branch detected!");
901 0
902 };
903
904 let last_updated: String = conn.query_row(
906 "SELECT updated_at FROM statistics WHERE key = 'total_files'",
907 [],
908 |row| {
909 let timestamp: i64 = row.get(0)?;
910 Ok(chrono::DateTime::from_timestamp(timestamp, 0)
911 .unwrap_or_else(chrono::Utc::now)
912 .to_rfc3339())
913 },
914 ).unwrap_or_else(|_| chrono::Utc::now().to_rfc3339());
915
916 let mut index_size_bytes: u64 = 0;
918
919 for file_name in [META_DB, TOKENS_BIN, CONFIG_TOML, "content.bin", "trigrams.bin"] {
920 let file_path = self.cache_path.join(file_name);
921 if let Ok(metadata) = std::fs::metadata(&file_path) {
922 index_size_bytes += metadata.len();
923 }
924 }
925
926 let mut files_by_language = std::collections::HashMap::new();
928 if let Some(ref branch) = current_branch {
929 let mut stmt = conn.prepare(
931 "SELECT f.language, COUNT(DISTINCT f.id)
932 FROM files f
933 JOIN file_branches fb ON f.id = fb.file_id
934 JOIN branches b ON fb.branch_id = b.id
935 WHERE b.name = ?
936 GROUP BY f.language"
937 )?;
938 let lang_counts = stmt.query_map([branch], |row| {
939 let language: String = row.get(0)?;
940 let count: i64 = row.get(1)?;
941 Ok((language, count as usize))
942 })?;
943
944 for result in lang_counts {
945 let (language, count) = result?;
946 files_by_language.insert(language, count);
947 }
948 } else {
949 let mut stmt = conn.prepare("SELECT language, COUNT(*) FROM files GROUP BY language")?;
951 let lang_counts = stmt.query_map([], |row| {
952 let language: String = row.get(0)?;
953 let count: i64 = row.get(1)?;
954 Ok((language, count as usize))
955 })?;
956
957 for result in lang_counts {
958 let (language, count) = result?;
959 files_by_language.insert(language, count);
960 }
961 }
962
963 let mut lines_by_language = std::collections::HashMap::new();
965 if let Some(ref branch) = current_branch {
966 let mut stmt = conn.prepare(
968 "SELECT f.language, SUM(f.line_count)
969 FROM files f
970 JOIN file_branches fb ON f.id = fb.file_id
971 JOIN branches b ON fb.branch_id = b.id
972 WHERE b.name = ?
973 GROUP BY f.language"
974 )?;
975 let line_counts = stmt.query_map([branch], |row| {
976 let language: String = row.get(0)?;
977 let count: i64 = row.get(1)?;
978 Ok((language, count as usize))
979 })?;
980
981 for result in line_counts {
982 let (language, count) = result?;
983 lines_by_language.insert(language, count);
984 }
985 } else {
986 let mut stmt = conn.prepare("SELECT language, SUM(line_count) FROM files GROUP BY language")?;
988 let line_counts = stmt.query_map([], |row| {
989 let language: String = row.get(0)?;
990 let count: i64 = row.get(1)?;
991 Ok((language, count as usize))
992 })?;
993
994 for result in line_counts {
995 let (language, count) = result?;
996 lines_by_language.insert(language, count);
997 }
998 }
999
1000 Ok(crate::models::IndexStats {
1001 total_files,
1002 index_size_bytes,
1003 last_updated,
1004 files_by_language,
1005 lines_by_language,
1006 })
1007 }
1008
1009 fn get_or_create_branch_id(&self, conn: &Connection, branch_name: &str, commit_sha: Option<&str>) -> Result<i64> {
1015 let existing_id: Option<i64> = conn
1017 .query_row(
1018 "SELECT id FROM branches WHERE name = ?",
1019 [branch_name],
1020 |row| row.get(0),
1021 )
1022 .optional()?;
1023
1024 if let Some(id) = existing_id {
1025 return Ok(id);
1026 }
1027
1028 let now = chrono::Utc::now().timestamp();
1030 conn.execute(
1031 "INSERT INTO branches (name, commit_sha, last_indexed, file_count, is_dirty)
1032 VALUES (?, ?, ?, 0, 0)",
1033 [branch_name, commit_sha.unwrap_or("unknown"), &now.to_string()],
1034 )?;
1035
1036 let id: i64 = conn.last_insert_rowid();
1038 Ok(id)
1039 }
1040
1041 pub fn record_branch_file(
1043 &self,
1044 path: &str,
1045 branch: &str,
1046 hash: &str,
1047 commit_sha: Option<&str>,
1048 ) -> Result<()> {
1049 let db_path = self.cache_path.join(META_DB);
1050 let conn = Connection::open(&db_path)
1051 .context("Failed to open meta.db for branch file recording")?;
1052
1053 let file_id: i64 = conn.query_row(
1055 "SELECT id FROM files WHERE path = ?",
1056 [path],
1057 |row| row.get(0)
1058 ).context(format!("File not found in index: {}", path))?;
1059
1060 let branch_id = self.get_or_create_branch_id(&conn, branch, commit_sha)?;
1062
1063 let now = chrono::Utc::now().timestamp();
1064
1065 conn.execute(
1067 "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
1068 VALUES (?, ?, ?, ?)",
1069 rusqlite::params![file_id, branch_id, hash, now],
1070 )?;
1071
1072 Ok(())
1073 }
1074
1075 pub fn batch_record_branch_files(
1080 &self,
1081 files: &[(String, String)], branch: &str,
1083 commit_sha: Option<&str>,
1084 ) -> Result<()> {
1085 log::info!("batch_record_branch_files: Processing {} files for branch '{}'", files.len(), branch);
1086
1087 let db_path = self.cache_path.join(META_DB);
1088 let mut conn = Connection::open(&db_path)
1089 .context("Failed to open meta.db for batch branch recording")?;
1090
1091 let now = chrono::Utc::now().timestamp();
1092
1093 let tx = conn.transaction()?;
1095
1096 let branch_id = self.get_or_create_branch_id(&tx, branch, commit_sha)?;
1098 log::debug!("Got branch_id={} for branch '{}'", branch_id, branch);
1099
1100 let mut inserted = 0;
1101 for (path, hash) in files {
1102 log::trace!("Looking up file_id for path: {}", path);
1104 let file_id: i64 = tx.query_row(
1105 "SELECT id FROM files WHERE path = ?",
1106 [path.as_str()],
1107 |row| row.get(0)
1108 ).context(format!("File not found in index: {}", path))?;
1109 log::trace!("Found file_id={} for path: {}", file_id, path);
1110
1111 tx.execute(
1113 "INSERT OR REPLACE INTO file_branches (file_id, branch_id, hash, last_indexed)
1114 VALUES (?, ?, ?, ?)",
1115 rusqlite::params![file_id, branch_id, hash.as_str(), now],
1116 )?;
1117 inserted += 1;
1118 }
1119
1120 log::info!("Inserted {} file_branches entries", inserted);
1121 tx.commit()?;
1122 log::info!("Transaction committed successfully");
1123 Ok(())
1124 }
1125
1126 pub fn get_branch_files(&self, branch: &str) -> Result<HashMap<String, String>> {
1130 let db_path = self.cache_path.join(META_DB);
1131
1132 if !db_path.exists() {
1133 return Ok(HashMap::new());
1134 }
1135
1136 let conn = Connection::open(&db_path)
1137 .context("Failed to open meta.db")?;
1138
1139 let mut stmt = conn.prepare(
1140 "SELECT f.path, fb.hash
1141 FROM file_branches fb
1142 JOIN files f ON fb.file_id = f.id
1143 JOIN branches b ON fb.branch_id = b.id
1144 WHERE b.name = ?"
1145 )?;
1146 let files: HashMap<String, String> = stmt
1147 .query_map([branch], |row| Ok((row.get(0)?, row.get(1)?)))?
1148 .collect::<Result<HashMap<_, _>, _>>()?;
1149
1150 log::debug!(
1151 "Loaded {} files for branch '{}' from file_branches table",
1152 files.len(),
1153 branch
1154 );
1155 Ok(files)
1156 }
1157
1158 pub fn branch_exists(&self, branch: &str) -> Result<bool> {
1162 let db_path = self.cache_path.join(META_DB);
1163
1164 if !db_path.exists() {
1165 return Ok(false);
1166 }
1167
1168 let conn = Connection::open(&db_path)
1169 .context("Failed to open meta.db")?;
1170
1171 let count: i64 = conn
1172 .query_row(
1173 "SELECT COUNT(*)
1174 FROM file_branches fb
1175 JOIN branches b ON fb.branch_id = b.id
1176 WHERE b.name = ?
1177 LIMIT 1",
1178 [branch],
1179 |row| row.get(0),
1180 )
1181 .unwrap_or(0);
1182
1183 Ok(count > 0)
1184 }
1185
1186 pub fn get_branch_info(&self, branch: &str) -> Result<BranchInfo> {
1188 let db_path = self.cache_path.join(META_DB);
1189
1190 if !db_path.exists() {
1191 anyhow::bail!("Database not initialized");
1192 }
1193
1194 let conn = Connection::open(&db_path)
1195 .context("Failed to open meta.db")?;
1196
1197 let info = conn.query_row(
1198 "SELECT commit_sha, last_indexed, file_count, is_dirty FROM branches WHERE name = ?",
1199 [branch],
1200 |row| {
1201 Ok(BranchInfo {
1202 branch: branch.to_string(),
1203 commit_sha: row.get(0)?,
1204 last_indexed: row.get(1)?,
1205 file_count: row.get(2)?,
1206 is_dirty: row.get::<_, i64>(3)? != 0,
1207 })
1208 },
1209 )?;
1210
1211 Ok(info)
1212 }
1213
1214 pub fn update_branch_metadata(
1219 &self,
1220 branch: &str,
1221 commit_sha: Option<&str>,
1222 file_count: usize,
1223 is_dirty: bool,
1224 ) -> Result<()> {
1225 let db_path = self.cache_path.join(META_DB);
1226 let conn = Connection::open(&db_path)
1227 .context("Failed to open meta.db for branch metadata update")?;
1228
1229 let now = chrono::Utc::now().timestamp();
1230 let is_dirty_int = if is_dirty { 1 } else { 0 };
1231
1232 let rows_updated = conn.execute(
1234 "UPDATE branches
1235 SET commit_sha = ?, last_indexed = ?, file_count = ?, is_dirty = ?
1236 WHERE name = ?",
1237 rusqlite::params![
1238 commit_sha.unwrap_or("unknown"),
1239 now,
1240 file_count,
1241 is_dirty_int,
1242 branch
1243 ],
1244 )?;
1245
1246 if rows_updated == 0 {
1248 conn.execute(
1249 "INSERT INTO branches (name, commit_sha, last_indexed, file_count, is_dirty)
1250 VALUES (?, ?, ?, ?, ?)",
1251 rusqlite::params![
1252 branch,
1253 commit_sha.unwrap_or("unknown"),
1254 now,
1255 file_count,
1256 is_dirty_int
1257 ],
1258 )?;
1259 }
1260
1261 log::debug!(
1262 "Updated branch metadata for '{}': commit={}, files={}, dirty={}",
1263 branch,
1264 commit_sha.unwrap_or("unknown"),
1265 file_count,
1266 is_dirty
1267 );
1268 Ok(())
1269 }
1270
1271 pub fn find_file_with_hash(&self, hash: &str) -> Result<Option<(String, String)>> {
1276 let db_path = self.cache_path.join(META_DB);
1277
1278 if !db_path.exists() {
1279 return Ok(None);
1280 }
1281
1282 let conn = Connection::open(&db_path)
1283 .context("Failed to open meta.db")?;
1284
1285 let result = conn
1286 .query_row(
1287 "SELECT f.path, b.name
1288 FROM file_branches fb
1289 JOIN files f ON fb.file_id = f.id
1290 JOIN branches b ON fb.branch_id = b.id
1291 WHERE fb.hash = ?
1292 LIMIT 1",
1293 [hash],
1294 |row| Ok((row.get(0)?, row.get(1)?)),
1295 )
1296 .optional()?;
1297
1298 Ok(result)
1299 }
1300
1301 pub fn get_file_id(&self, path: &str) -> Result<Option<i64>> {
1305 let db_path = self.cache_path.join(META_DB);
1306
1307 if !db_path.exists() {
1308 return Ok(None);
1309 }
1310
1311 let conn = Connection::open(&db_path)
1312 .context("Failed to open meta.db")?;
1313
1314 let result = conn
1315 .query_row(
1316 "SELECT id FROM files WHERE path = ?",
1317 [path],
1318 |row| row.get(0),
1319 )
1320 .optional()?;
1321
1322 Ok(result)
1323 }
1324
1325 pub fn batch_get_file_ids(&self, paths: &[String]) -> Result<HashMap<String, i64>> {
1332 let db_path = self.cache_path.join(META_DB);
1333
1334 if !db_path.exists() {
1335 return Ok(HashMap::new());
1336 }
1337
1338 let conn = Connection::open(&db_path)
1339 .context("Failed to open meta.db")?;
1340
1341 const BATCH_SIZE: usize = 900;
1344
1345 let mut results = HashMap::new();
1346
1347 for chunk in paths.chunks(BATCH_SIZE) {
1348 let placeholders = chunk.iter()
1350 .map(|_| "?")
1351 .collect::<Vec<_>>()
1352 .join(", ");
1353
1354 let query = format!("SELECT path, id FROM files WHERE path IN ({})", placeholders);
1355
1356 let params: Vec<&str> = chunk.iter().map(|s| s.as_str()).collect();
1357 let mut stmt = conn.prepare(&query)?;
1358
1359 let chunk_results = stmt.query_map(rusqlite::params_from_iter(params), |row| {
1360 Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
1361 })?
1362 .collect::<Result<HashMap<_, _>, _>>()?;
1363
1364 results.extend(chunk_results);
1365 }
1366
1367 log::debug!("Batch loaded {} file IDs (out of {} requested, {} chunks)",
1368 results.len(), paths.len(), paths.len().div_ceil(BATCH_SIZE));
1369 Ok(results)
1370 }
1371
1372 pub fn should_compact(&self) -> Result<bool> {
1379 let db_path = self.cache_path.join(META_DB);
1380
1381 if !db_path.exists() {
1382 return Ok(false);
1384 }
1385
1386 let conn = Connection::open(&db_path)
1387 .context("Failed to open meta.db for compaction check")?;
1388
1389 let last_compaction: i64 = conn
1391 .query_row(
1392 "SELECT value FROM statistics WHERE key = 'last_compaction'",
1393 [],
1394 |row| {
1395 let value: String = row.get(0)?;
1396 Ok(value.parse::<i64>().unwrap_or(0))
1397 },
1398 )
1399 .unwrap_or(0);
1400
1401 let now = chrono::Utc::now().timestamp();
1403
1404 const COMPACTION_THRESHOLD_SECS: i64 = 86400;
1406
1407 let elapsed_secs = now - last_compaction;
1408 let should_run = elapsed_secs >= COMPACTION_THRESHOLD_SECS;
1409
1410 log::debug!(
1411 "Compaction check: last={}, now={}, elapsed={}s, should_compact={}",
1412 last_compaction,
1413 now,
1414 elapsed_secs,
1415 should_run
1416 );
1417
1418 Ok(should_run)
1419 }
1420
1421 pub fn update_compaction_timestamp(&self) -> Result<()> {
1425 let db_path = self.cache_path.join(META_DB);
1426 let conn = Connection::open(&db_path)
1427 .context("Failed to open meta.db for compaction timestamp update")?;
1428
1429 let now = chrono::Utc::now().timestamp();
1430
1431 conn.execute(
1432 "INSERT OR REPLACE INTO statistics (key, value, updated_at) VALUES (?, ?, ?)",
1433 ["last_compaction", &now.to_string(), &now.to_string()],
1434 )?;
1435
1436 log::debug!("Updated last_compaction timestamp to: {}", now);
1437 Ok(())
1438 }
1439
1440 pub fn compact(&self) -> Result<crate::models::CompactionReport> {
1451 let start_time = std::time::Instant::now();
1452 log::info!("Starting cache compaction...");
1453
1454 let size_before = self.calculate_cache_size()?;
1456
1457 let deleted_files = self.identify_deleted_files()?;
1459 log::info!("Found {} deleted files to remove from cache", deleted_files.len());
1460
1461 if deleted_files.is_empty() {
1462 log::info!("No deleted files to compact - cache is clean");
1463 self.update_compaction_timestamp()?;
1465
1466 return Ok(crate::models::CompactionReport {
1467 files_removed: 0,
1468 space_saved_bytes: 0,
1469 duration_ms: start_time.elapsed().as_millis() as u64,
1470 });
1471 }
1472
1473 self.delete_files_from_db(&deleted_files)?;
1475 log::info!("Deleted {} files from database", deleted_files.len());
1476
1477 self.vacuum_database()?;
1479 log::info!("Completed VACUUM operation");
1480
1481 let size_after = self.calculate_cache_size()?;
1483 let space_saved = size_before.saturating_sub(size_after);
1484
1485 self.update_compaction_timestamp()?;
1487
1488 let duration_ms = start_time.elapsed().as_millis() as u64;
1489
1490 log::info!(
1491 "Cache compaction completed: {} files removed, {} bytes saved ({:.2} MB), took {}ms",
1492 deleted_files.len(),
1493 space_saved,
1494 space_saved as f64 / 1_048_576.0,
1495 duration_ms
1496 );
1497
1498 Ok(crate::models::CompactionReport {
1499 files_removed: deleted_files.len(),
1500 space_saved_bytes: space_saved,
1501 duration_ms,
1502 })
1503 }
1504
1505 fn identify_deleted_files(&self) -> Result<Vec<i64>> {
1509 let db_path = self.cache_path.join(META_DB);
1510 let conn = Connection::open(&db_path)
1511 .context("Failed to open meta.db for deleted file identification")?;
1512
1513 let workspace_root = self.workspace_root();
1514
1515 let mut stmt = conn.prepare("SELECT id, path FROM files")?;
1517 let files = stmt.query_map([], |row| {
1518 Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
1519 })?
1520 .collect::<Result<Vec<_>, _>>()?;
1521
1522 log::debug!("Checking {} files for deletion status", files.len());
1523
1524 let mut deleted_file_ids = Vec::new();
1526 for (file_id, file_path) in files {
1527 let full_path = workspace_root.join(&file_path);
1528 if !full_path.exists() {
1529 log::trace!("File no longer exists: {} (id={})", file_path, file_id);
1530 deleted_file_ids.push(file_id);
1531 }
1532 }
1533
1534 Ok(deleted_file_ids)
1535 }
1536
1537 fn delete_files_from_db(&self, file_ids: &[i64]) -> Result<()> {
1544 if file_ids.is_empty() {
1545 return Ok(());
1546 }
1547
1548 let db_path = self.cache_path.join(META_DB);
1549 let mut conn = Connection::open(&db_path)
1550 .context("Failed to open meta.db for file deletion")?;
1551
1552 let tx = conn.transaction()?;
1553
1554 const BATCH_SIZE: usize = 900;
1556
1557 for chunk in file_ids.chunks(BATCH_SIZE) {
1558 let placeholders = chunk.iter()
1559 .map(|_| "?")
1560 .collect::<Vec<_>>()
1561 .join(", ");
1562
1563 let delete_query = format!("DELETE FROM files WHERE id IN ({})", placeholders);
1564
1565 let params: Vec<i64> = chunk.to_vec();
1566 tx.execute(&delete_query, rusqlite::params_from_iter(params))?;
1567 }
1568
1569 tx.commit()?;
1570 log::debug!("Deleted {} files from database (CASCADE handled related tables)", file_ids.len());
1571 Ok(())
1572 }
1573
1574 fn vacuum_database(&self) -> Result<()> {
1579 let db_path = self.cache_path.join(META_DB);
1580 let conn = Connection::open(&db_path)
1581 .context("Failed to open meta.db for VACUUM")?;
1582
1583 conn.execute("VACUUM", [])?;
1586
1587 log::debug!("VACUUM completed successfully");
1588 Ok(())
1589 }
1590
1591 fn calculate_cache_size(&self) -> Result<u64> {
1599 let mut total_size: u64 = 0;
1600
1601 for file_name in [META_DB, TOKENS_BIN, CONFIG_TOML, "content.bin", "trigrams.bin"] {
1602 let file_path = self.cache_path.join(file_name);
1603 if let Ok(metadata) = std::fs::metadata(&file_path) {
1604 total_size += metadata.len();
1605 }
1606 }
1607
1608 Ok(total_size)
1609 }
1610}
1611
1612#[derive(Debug, Clone)]
1614pub struct BranchInfo {
1615 pub branch: String,
1616 pub commit_sha: String,
1617 pub last_indexed: i64,
1618 pub file_count: usize,
1619 pub is_dirty: bool,
1620}
1621
1622#[cfg(test)]
1628mod tests {
1629 use super::*;
1630 use tempfile::TempDir;
1631
1632 #[test]
1633 fn test_cache_init() {
1634 let temp = TempDir::new().unwrap();
1635 let cache = CacheManager::new(temp.path());
1636
1637 assert!(!cache.exists());
1638 cache.init().unwrap();
1639 assert!(cache.exists());
1640 assert!(cache.path().exists());
1641
1642 assert!(cache.path().join(META_DB).exists());
1644 assert!(cache.path().join(CONFIG_TOML).exists());
1645 }
1646
1647 #[test]
1648 fn test_cache_init_idempotent() {
1649 let temp = TempDir::new().unwrap();
1650 let cache = CacheManager::new(temp.path());
1651
1652 cache.init().unwrap();
1654 cache.init().unwrap();
1655
1656 assert!(cache.exists());
1657 }
1658
1659 #[test]
1660 fn test_cache_clear() {
1661 let temp = TempDir::new().unwrap();
1662 let cache = CacheManager::new(temp.path());
1663
1664 cache.init().unwrap();
1665 assert!(cache.exists());
1666
1667 cache.clear().unwrap();
1668 assert!(!cache.exists());
1669 }
1670
1671 #[test]
1672 fn test_cache_clear_nonexistent() {
1673 let temp = TempDir::new().unwrap();
1674 let cache = CacheManager::new(temp.path());
1675
1676 assert!(!cache.exists());
1678 cache.clear().unwrap();
1679 assert!(!cache.exists());
1680 }
1681
1682 #[test]
1683 fn test_load_all_hashes_empty() {
1684 let temp = TempDir::new().unwrap();
1685 let cache = CacheManager::new(temp.path());
1686
1687 cache.init().unwrap();
1688 let hashes = cache.load_all_hashes().unwrap();
1689 assert_eq!(hashes.len(), 0);
1690 }
1691
1692 #[test]
1693 fn test_load_all_hashes_before_init() {
1694 let temp = TempDir::new().unwrap();
1695 let cache = CacheManager::new(temp.path());
1696
1697 let hashes = cache.load_all_hashes().unwrap();
1699 assert_eq!(hashes.len(), 0);
1700 }
1701
1702 #[test]
1703 fn test_load_hashes_for_branch_empty() {
1704 let temp = TempDir::new().unwrap();
1705 let cache = CacheManager::new(temp.path());
1706
1707 cache.init().unwrap();
1708 let hashes = cache.load_hashes_for_branch("main").unwrap();
1709 assert_eq!(hashes.len(), 0);
1710 }
1711
1712 #[test]
1713 fn test_update_file() {
1714 let temp = TempDir::new().unwrap();
1715 let cache = CacheManager::new(temp.path());
1716
1717 cache.init().unwrap();
1718 cache.update_file("src/main.rs", "rust", 100).unwrap();
1719
1720 let files = cache.list_files().unwrap();
1722 assert_eq!(files.len(), 1);
1723 assert_eq!(files[0].path, "src/main.rs");
1724 assert_eq!(files[0].language, "rust");
1725 }
1726
1727 #[test]
1728 fn test_update_file_multiple() {
1729 let temp = TempDir::new().unwrap();
1730 let cache = CacheManager::new(temp.path());
1731
1732 cache.init().unwrap();
1733 cache.update_file("src/main.rs", "rust", 100).unwrap();
1734 cache.update_file("src/lib.rs", "rust", 200).unwrap();
1735 cache.update_file("README.md", "markdown", 50).unwrap();
1736
1737 let files = cache.list_files().unwrap();
1739 assert_eq!(files.len(), 3);
1740 }
1741
1742 #[test]
1743 fn test_update_file_replace() {
1744 let temp = TempDir::new().unwrap();
1745 let cache = CacheManager::new(temp.path());
1746
1747 cache.init().unwrap();
1748 cache.update_file("src/main.rs", "rust", 100).unwrap();
1749 cache.update_file("src/main.rs", "rust", 150).unwrap();
1750
1751 let files = cache.list_files().unwrap();
1753 assert_eq!(files.len(), 1);
1754 assert_eq!(files[0].path, "src/main.rs");
1755 }
1756
1757 #[test]
1758 fn test_batch_update_files() {
1759 let temp = TempDir::new().unwrap();
1760 let cache = CacheManager::new(temp.path());
1761
1762 cache.init().unwrap();
1763
1764 let files = vec![
1765 ("src/main.rs".to_string(), "rust".to_string(), 100),
1766 ("src/lib.rs".to_string(), "rust".to_string(), 200),
1767 ("test.py".to_string(), "python".to_string(), 50),
1768 ];
1769
1770 cache.batch_update_files(&files).unwrap();
1771
1772 let stored_files = cache.list_files().unwrap();
1774 assert_eq!(stored_files.len(), 3);
1775 }
1776
1777 #[test]
1778 fn test_update_stats() {
1779 let temp = TempDir::new().unwrap();
1780 let cache = CacheManager::new(temp.path());
1781
1782 cache.init().unwrap();
1783 cache.update_file("src/main.rs", "rust", 100).unwrap();
1784 cache.update_file("src/lib.rs", "rust", 200).unwrap();
1785
1786 cache.record_branch_file("src/main.rs", "_default", "hash1", None).unwrap();
1788 cache.record_branch_file("src/lib.rs", "_default", "hash2", None).unwrap();
1789 cache.update_stats("_default").unwrap();
1790
1791 let stats = cache.stats().unwrap();
1792 assert_eq!(stats.total_files, 2);
1793 }
1794
1795 #[test]
1796 fn test_stats_empty_cache() {
1797 let temp = TempDir::new().unwrap();
1798 let cache = CacheManager::new(temp.path());
1799
1800 cache.init().unwrap();
1801 let stats = cache.stats().unwrap();
1802
1803 assert_eq!(stats.total_files, 0);
1804 assert_eq!(stats.files_by_language.len(), 0);
1805 }
1806
1807 #[test]
1808 fn test_stats_before_init() {
1809 let temp = TempDir::new().unwrap();
1810 let cache = CacheManager::new(temp.path());
1811
1812 let stats = cache.stats().unwrap();
1814 assert_eq!(stats.total_files, 0);
1815 }
1816
1817 #[test]
1818 fn test_stats_by_language() {
1819 let temp = TempDir::new().unwrap();
1820 let cache = CacheManager::new(temp.path());
1821
1822 cache.init().unwrap();
1823 cache.update_file("main.rs", "Rust", 100).unwrap();
1824 cache.update_file("lib.rs", "Rust", 200).unwrap();
1825 cache.update_file("script.py", "Python", 50).unwrap();
1826 cache.update_file("test.py", "Python", 80).unwrap();
1827
1828 cache.record_branch_file("main.rs", "_default", "hash1", None).unwrap();
1830 cache.record_branch_file("lib.rs", "_default", "hash2", None).unwrap();
1831 cache.record_branch_file("script.py", "_default", "hash3", None).unwrap();
1832 cache.record_branch_file("test.py", "_default", "hash4", None).unwrap();
1833 cache.update_stats("_default").unwrap();
1834
1835 let stats = cache.stats().unwrap();
1836 assert_eq!(stats.files_by_language.get("Rust"), Some(&2));
1837 assert_eq!(stats.files_by_language.get("Python"), Some(&2));
1838 assert_eq!(stats.lines_by_language.get("Rust"), Some(&300)); assert_eq!(stats.lines_by_language.get("Python"), Some(&130)); }
1841
1842 #[test]
1843 fn test_list_files_empty() {
1844 let temp = TempDir::new().unwrap();
1845 let cache = CacheManager::new(temp.path());
1846
1847 cache.init().unwrap();
1848 let files = cache.list_files().unwrap();
1849 assert_eq!(files.len(), 0);
1850 }
1851
1852 #[test]
1853 fn test_list_files() {
1854 let temp = TempDir::new().unwrap();
1855 let cache = CacheManager::new(temp.path());
1856
1857 cache.init().unwrap();
1858 cache.update_file("src/main.rs", "rust", 100).unwrap();
1859 cache.update_file("src/lib.rs", "rust", 200).unwrap();
1860
1861 let files = cache.list_files().unwrap();
1862 assert_eq!(files.len(), 2);
1863
1864 assert_eq!(files[0].path, "src/lib.rs");
1866 assert_eq!(files[1].path, "src/main.rs");
1867
1868 assert_eq!(files[0].language, "rust");
1869 }
1870
1871 #[test]
1872 fn test_list_files_before_init() {
1873 let temp = TempDir::new().unwrap();
1874 let cache = CacheManager::new(temp.path());
1875
1876 let files = cache.list_files().unwrap();
1878 assert_eq!(files.len(), 0);
1879 }
1880
1881 #[test]
1882 fn test_branch_exists() {
1883 let temp = TempDir::new().unwrap();
1884 let cache = CacheManager::new(temp.path());
1885
1886 cache.init().unwrap();
1887
1888 assert!(!cache.branch_exists("main").unwrap());
1889
1890 cache.update_file("src/main.rs", "rust", 100).unwrap();
1892 cache.record_branch_file("src/main.rs", "main", "hash1", Some("commit123")).unwrap();
1893
1894 assert!(cache.branch_exists("main").unwrap());
1895 assert!(!cache.branch_exists("feature-branch").unwrap());
1896 }
1897
1898 #[test]
1899 fn test_record_branch_file() {
1900 let temp = TempDir::new().unwrap();
1901 let cache = CacheManager::new(temp.path());
1902
1903 cache.init().unwrap();
1904 cache.update_file("src/main.rs", "rust", 100).unwrap();
1906 cache.record_branch_file("src/main.rs", "main", "hash1", Some("commit123")).unwrap();
1907
1908 let files = cache.get_branch_files("main").unwrap();
1909 assert_eq!(files.len(), 1);
1910 assert_eq!(files.get("src/main.rs"), Some(&"hash1".to_string()));
1911 }
1912
1913 #[test]
1914 fn test_get_branch_files_empty() {
1915 let temp = TempDir::new().unwrap();
1916 let cache = CacheManager::new(temp.path());
1917
1918 cache.init().unwrap();
1919 let files = cache.get_branch_files("nonexistent").unwrap();
1920 assert_eq!(files.len(), 0);
1921 }
1922
1923 #[test]
1924 fn test_batch_record_branch_files() {
1925 let temp = TempDir::new().unwrap();
1926 let cache = CacheManager::new(temp.path());
1927
1928 cache.init().unwrap();
1929
1930 let file_metadata = vec![
1932 ("src/main.rs".to_string(), "rust".to_string(), 100),
1933 ("src/lib.rs".to_string(), "rust".to_string(), 200),
1934 ("README.md".to_string(), "markdown".to_string(), 50),
1935 ];
1936 cache.batch_update_files(&file_metadata).unwrap();
1937
1938 let files = vec![
1939 ("src/main.rs".to_string(), "hash1".to_string()),
1940 ("src/lib.rs".to_string(), "hash2".to_string()),
1941 ("README.md".to_string(), "hash3".to_string()),
1942 ];
1943
1944 cache.batch_record_branch_files(&files, "main", Some("commit123")).unwrap();
1945
1946 let branch_files = cache.get_branch_files("main").unwrap();
1947 assert_eq!(branch_files.len(), 3);
1948 assert_eq!(branch_files.get("src/main.rs"), Some(&"hash1".to_string()));
1949 assert_eq!(branch_files.get("src/lib.rs"), Some(&"hash2".to_string()));
1950 assert_eq!(branch_files.get("README.md"), Some(&"hash3".to_string()));
1951 }
1952
1953 #[test]
1954 fn test_update_branch_metadata() {
1955 let temp = TempDir::new().unwrap();
1956 let cache = CacheManager::new(temp.path());
1957
1958 cache.init().unwrap();
1959 cache.update_branch_metadata("main", Some("commit123"), 10, false).unwrap();
1960
1961 let info = cache.get_branch_info("main").unwrap();
1962 assert_eq!(info.branch, "main");
1963 assert_eq!(info.commit_sha, "commit123");
1964 assert_eq!(info.file_count, 10);
1965 assert_eq!(info.is_dirty, false);
1966 }
1967
1968 #[test]
1969 fn test_update_branch_metadata_dirty() {
1970 let temp = TempDir::new().unwrap();
1971 let cache = CacheManager::new(temp.path());
1972
1973 cache.init().unwrap();
1974 cache.update_branch_metadata("feature", Some("commit456"), 5, true).unwrap();
1975
1976 let info = cache.get_branch_info("feature").unwrap();
1977 assert_eq!(info.is_dirty, true);
1978 }
1979
1980 #[test]
1981 fn test_find_file_with_hash() {
1982 let temp = TempDir::new().unwrap();
1983 let cache = CacheManager::new(temp.path());
1984
1985 cache.init().unwrap();
1986 cache.update_file("src/main.rs", "rust", 100).unwrap();
1988 cache.record_branch_file("src/main.rs", "main", "unique_hash", Some("commit123")).unwrap();
1989
1990 let result = cache.find_file_with_hash("unique_hash").unwrap();
1991 assert!(result.is_some());
1992
1993 let (path, branch) = result.unwrap();
1994 assert_eq!(path, "src/main.rs");
1995 assert_eq!(branch, "main");
1996 }
1997
1998 #[test]
1999 fn test_find_file_with_hash_not_found() {
2000 let temp = TempDir::new().unwrap();
2001 let cache = CacheManager::new(temp.path());
2002
2003 cache.init().unwrap();
2004
2005 let result = cache.find_file_with_hash("nonexistent_hash").unwrap();
2006 assert!(result.is_none());
2007 }
2008
2009 #[test]
2010 fn test_config_toml_created() {
2011 let temp = TempDir::new().unwrap();
2012 let cache = CacheManager::new(temp.path());
2013
2014 cache.init().unwrap();
2015
2016 let config_path = cache.path().join(CONFIG_TOML);
2017 let config_content = std::fs::read_to_string(&config_path).unwrap();
2018
2019 assert!(config_content.contains("[index]"));
2021 assert!(config_content.contains("[search]"));
2022 assert!(config_content.contains("[performance]"));
2023 assert!(config_content.contains("max_file_size"));
2024 }
2025
2026 #[test]
2027 fn test_meta_db_schema() {
2028 let temp = TempDir::new().unwrap();
2029 let cache = CacheManager::new(temp.path());
2030
2031 cache.init().unwrap();
2032
2033 let db_path = cache.path().join(META_DB);
2034 let conn = Connection::open(&db_path).unwrap();
2035
2036 let tables: Vec<String> = conn
2038 .prepare("SELECT name FROM sqlite_master WHERE type='table'").unwrap()
2039 .query_map([], |row| row.get(0)).unwrap()
2040 .collect::<Result<Vec<_>, _>>().unwrap();
2041
2042 assert!(tables.contains(&"files".to_string()));
2043 assert!(tables.contains(&"statistics".to_string()));
2044 assert!(tables.contains(&"config".to_string()));
2045 assert!(tables.contains(&"file_branches".to_string()));
2046 assert!(tables.contains(&"branches".to_string()));
2047 assert!(tables.contains(&"file_dependencies".to_string()));
2048 assert!(tables.contains(&"file_exports".to_string()));
2049 }
2050
2051 #[test]
2052 fn test_concurrent_file_updates() {
2053 use std::thread;
2054
2055 let temp = TempDir::new().unwrap();
2056 let cache_path = temp.path().to_path_buf();
2057
2058 let cache = CacheManager::new(&cache_path);
2059 cache.init().unwrap();
2060
2061 let handles: Vec<_> = (0..10)
2063 .map(|i| {
2064 let path = cache_path.clone();
2065 thread::spawn(move || {
2066 let cache = CacheManager::new(&path);
2067 cache
2068 .update_file(
2069 &format!("file_{}.rs", i),
2070 "rust",
2071 i * 10,
2072 )
2073 .unwrap();
2074 })
2075 })
2076 .collect();
2077
2078 for handle in handles {
2079 handle.join().unwrap();
2080 }
2081
2082 let cache = CacheManager::new(&cache_path);
2083 let files = cache.list_files().unwrap();
2084 assert_eq!(files.len(), 10);
2085 }
2086
2087 #[test]
2090 fn test_validate_corrupted_database() {
2091 use std::io::Write;
2092
2093 let temp = TempDir::new().unwrap();
2094 let cache = CacheManager::new(temp.path());
2095
2096 cache.init().unwrap();
2097
2098 let db_path = cache.path().join(META_DB);
2100 let mut file = File::create(&db_path).unwrap();
2101 file.write_all(b"CORRUPTED DATA").unwrap();
2102
2103 let result = cache.validate();
2105 assert!(result.is_err());
2106 let err_msg = result.unwrap_err().to_string();
2107 eprintln!("Error message: {}", err_msg);
2108 assert!(err_msg.contains("corrupted") || err_msg.contains("not a database"));
2109 }
2110
2111 #[test]
2112 fn test_validate_corrupted_trigrams() {
2113 use std::io::Write;
2114
2115 let temp = TempDir::new().unwrap();
2116 let cache = CacheManager::new(temp.path());
2117
2118 cache.init().unwrap();
2119
2120 let trigrams_path = cache.path().join("trigrams.bin");
2122 let mut file = File::create(&trigrams_path).unwrap();
2123 file.write_all(b"BADM").unwrap(); let result = cache.validate();
2127 assert!(result.is_err());
2128 let err = result.unwrap_err().to_string();
2129 assert!(err.contains("trigrams.bin") && err.contains("corrupted"));
2130 }
2131
2132 #[test]
2133 fn test_validate_corrupted_content() {
2134 use std::io::Write;
2135
2136 let temp = TempDir::new().unwrap();
2137 let cache = CacheManager::new(temp.path());
2138
2139 cache.init().unwrap();
2140
2141 let content_path = cache.path().join("content.bin");
2143 let mut file = File::create(&content_path).unwrap();
2144 file.write_all(b"BADM").unwrap(); let result = cache.validate();
2148 assert!(result.is_err());
2149 let err = result.unwrap_err().to_string();
2150 assert!(err.contains("content.bin") && err.contains("corrupted"));
2151 }
2152
2153 #[test]
2154 fn test_validate_missing_schema_table() {
2155 let temp = TempDir::new().unwrap();
2156 let cache = CacheManager::new(temp.path());
2157
2158 cache.init().unwrap();
2159
2160 let db_path = cache.path().join(META_DB);
2162 let conn = Connection::open(&db_path).unwrap();
2163 conn.execute("DROP TABLE files", []).unwrap();
2164
2165 let result = cache.validate();
2167 assert!(result.is_err());
2168 let err = result.unwrap_err().to_string();
2169 assert!(err.contains("files") && err.contains("missing"));
2170 }
2171}