1use anyhow::{Context, Result};
9use rusqlite::{Connection, OptionalExtension};
10use std::path::Path;
11
12use crate::models::SearchResult;
13
14#[cfg(test)]
15use crate::models::{Language, Span, SymbolKind};
16
17pub struct SymbolCache {
19 db_path: std::path::PathBuf,
20}
21
22impl SymbolCache {
23 pub fn open(cache_dir: &Path) -> Result<Self> {
25 let db_path = cache_dir.join("meta.db");
26
27 if !db_path.exists() {
28 anyhow::bail!("Cache not initialized - run 'rfx index' first");
29 }
30
31 let cache = Self { db_path };
32 cache.init_schema()?;
33
34 Ok(cache)
35 }
36
37 fn init_schema(&self) -> Result<()> {
39 let conn = Connection::open(&self.db_path).context("Failed to open meta.db")?;
40
41 let uses_file_id: bool = conn
43 .query_row(
44 "SELECT COUNT(*) FROM pragma_table_info('symbols') WHERE name='file_id'",
45 [],
46 |row| row.get::<_, i64>(0),
47 )
48 .unwrap_or(0)
49 > 0;
50
51 if !uses_file_id {
52 let table_exists: bool = conn
54 .query_row(
55 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='symbols'",
56 [],
57 |row| row.get::<_, i64>(0),
58 )
59 .unwrap_or(0)
60 > 0;
61
62 if table_exists {
63 log::warn!("Symbol cache schema outdated - migrating to file_id-based schema");
64 conn.execute("DROP TABLE IF EXISTS symbols", [])?;
65 }
66 }
67
68 conn.execute(
70 "CREATE TABLE IF NOT EXISTS symbols (
71 file_id INTEGER NOT NULL,
72 file_hash TEXT NOT NULL,
73 symbols_json TEXT NOT NULL,
74 last_cached INTEGER NOT NULL,
75 PRIMARY KEY (file_id, file_hash),
76 FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
77 )",
78 [],
79 )?;
80
81 conn.execute(
82 "CREATE INDEX IF NOT EXISTS idx_symbols_file_id ON symbols(file_id)",
83 [],
84 )?;
85
86 conn.execute(
87 "CREATE INDEX IF NOT EXISTS idx_symbols_hash ON symbols(file_hash)",
88 [],
89 )?;
90
91 log::debug!("Symbol cache schema initialized (file_id-based)");
92 Ok(())
93 }
94
95 pub fn get(&self, file_path: &str, file_hash: &str) -> Result<Option<Vec<SearchResult>>> {
97 let conn = Connection::open(&self.db_path)?;
98
99 let file_id: Option<i64> = conn
101 .query_row("SELECT id FROM files WHERE path = ?", [file_path], |row| {
102 row.get(0)
103 })
104 .optional()?;
105
106 let Some(file_id) = file_id else {
107 log::debug!("Symbol cache MISS: {} (file not in index)", file_path);
108 return Ok(None);
109 };
110
111 let symbols_json: Option<String> = conn
112 .query_row(
113 "SELECT symbols_json FROM symbols WHERE file_id = ? AND file_hash = ?",
114 [&file_id.to_string(), file_hash],
115 |row| row.get(0),
116 )
117 .optional()?;
118
119 match symbols_json {
120 Some(json) => {
121 let mut symbols: Vec<SearchResult> =
122 serde_json::from_str(&json).context("Failed to deserialize cached symbols")?;
123
124 for symbol in &mut symbols {
126 symbol.path = file_path.to_string();
127 }
128
129 log::debug!(
130 "Symbol cache HIT: {} ({} symbols)",
131 file_path,
132 symbols.len()
133 );
134 Ok(Some(symbols))
135 }
136 None => {
137 log::debug!("Symbol cache MISS: {}", file_path);
138 Ok(None)
139 }
140 }
141 }
142
143 pub fn batch_get(
152 &self,
153 files: &[(String, String)],
154 ) -> Result<Vec<(String, Option<Vec<SearchResult>>)>> {
155 if files.is_empty() {
156 return Ok(Vec::new());
157 }
158
159 let conn = Connection::open(&self.db_path)?;
160
161 let mut file_id_stmt = conn.prepare("SELECT id FROM files WHERE path = ?")?;
163 let mut symbols_stmt =
164 conn.prepare("SELECT symbols_json FROM symbols WHERE file_id = ? AND file_hash = ?")?;
165
166 let mut results = Vec::with_capacity(files.len());
167 let mut hits = 0;
168 let mut misses = 0;
169
170 for (file_path, file_hash) in files {
171 let file_id: Option<i64> = file_id_stmt
173 .query_row([file_path.as_str()], |row| row.get(0))
174 .optional()?;
175
176 let symbols = if let Some(file_id) = file_id {
177 let symbols_json: Option<String> = symbols_stmt
178 .query_row([&file_id.to_string(), file_hash.as_str()], |row| row.get(0))
179 .optional()?;
180
181 match symbols_json {
182 Some(json) => {
183 match serde_json::from_str::<Vec<SearchResult>>(&json) {
184 Ok(mut symbols) => {
185 for symbol in &mut symbols {
187 symbol.path = file_path.clone();
188 }
189 hits += 1;
190 Some(symbols)
191 }
192 Err(e) => {
193 log::warn!(
194 "Failed to deserialize cached symbols for {}: {}",
195 file_path,
196 e
197 );
198 misses += 1;
199 None
200 }
201 }
202 }
203 None => {
204 misses += 1;
205 None
206 }
207 }
208 } else {
209 misses += 1;
210 None
211 };
212
213 results.push((file_path.clone(), symbols));
214 }
215
216 log::debug!(
217 "Batch symbol cache: {} hits, {} misses ({} total)",
218 hits,
219 misses,
220 files.len()
221 );
222 Ok(results)
223 }
224
225 pub fn batch_get_with_kind(
238 &self,
239 file_ids: &[(i64, String, String)], kind_filter: Option<crate::models::SymbolKind>,
241 ) -> Result<std::collections::HashMap<i64, Vec<SearchResult>>> {
242 use std::collections::HashMap;
243
244 if file_ids.is_empty() {
245 return Ok(HashMap::new());
246 }
247
248 let conn = Connection::open(&self.db_path)?;
249
250 const BATCH_SIZE: usize = 900;
253
254 let file_info: HashMap<i64, (String, String)> = file_ids
256 .iter()
257 .map(|(id, hash, path)| (*id, (hash.clone(), path.clone())))
258 .collect();
259
260 let kind_for_filtering = kind_filter.clone();
262
263 let mut cache_map: HashMap<i64, Vec<SearchResult>> = HashMap::new();
265 let mut hits = 0;
266
267 for chunk in file_ids.chunks(BATCH_SIZE) {
268 let id_placeholders = chunk.iter().map(|_| "?").collect::<Vec<_>>().join(", ");
270
271 let query = format!(
273 "SELECT file_id, symbols_json
274 FROM symbols
275 WHERE file_id IN ({})",
276 id_placeholders
277 );
278
279 let params: Vec<Box<dyn rusqlite::ToSql>> = chunk
281 .iter()
282 .map(|(id, _, _)| Box::new(*id) as Box<dyn rusqlite::ToSql>)
283 .collect();
284
285 let mut stmt = conn.prepare(&query)?;
287 let param_refs: Vec<&dyn rusqlite::ToSql> = params.iter().map(|p| p.as_ref()).collect();
288 let rows = stmt.query_map(param_refs.as_slice(), |row| {
289 Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?))
290 })?;
291
292 for row_result in rows {
293 let (file_id, symbols_json) = row_result?;
294
295 if let Some((_hash, file_path)) = file_info.get(&file_id) {
297 match serde_json::from_str::<Vec<SearchResult>>(&symbols_json) {
300 Ok(mut symbols) => {
301 for symbol in &mut symbols {
303 symbol.path = file_path.clone();
304 }
305
306 if let Some(ref filter_kind) = kind_for_filtering {
310 symbols.retain(|s| &s.kind == filter_kind);
311 }
312
313 cache_map.insert(file_id, symbols);
314 hits += 1;
315 }
316 Err(e) => {
317 log::warn!(
318 "Failed to deserialize cached symbols for file_id {}: {}",
319 file_id,
320 e
321 );
322 }
323 }
324 }
325 }
326 }
327
328 let misses = file_ids.len() - hits;
329
330 if kind_for_filtering.is_some() {
331 log::debug!(
332 "Batch symbol cache with Rust-side kind filter: {} hits, {} misses ({} total, {} chunks)",
333 hits,
334 misses,
335 file_ids.len(),
336 (file_ids.len() + BATCH_SIZE - 1) / BATCH_SIZE
337 );
338 } else {
339 log::debug!(
340 "Batch symbol cache: {} hits, {} misses ({} total, {} chunks)",
341 hits,
342 misses,
343 file_ids.len(),
344 (file_ids.len() + BATCH_SIZE - 1) / BATCH_SIZE
345 );
346 }
347
348 Ok(cache_map)
349 }
350
351 pub fn set(&self, file_path: &str, file_hash: &str, symbols: &[SearchResult]) -> Result<()> {
353 let conn = Connection::open(&self.db_path)?;
354
355 let file_id: i64 = conn
357 .query_row("SELECT id FROM files WHERE path = ?", [file_path], |row| {
358 row.get(0)
359 })
360 .context(format!("File not found in index: {}", file_path))?;
361
362 let symbols_without_path: Vec<_> = symbols
364 .iter()
365 .map(|s| {
366 let mut s = s.clone();
367 s.path = String::new(); s
369 })
370 .collect();
371
372 let symbols_json =
373 serde_json::to_string(&symbols_without_path).context("Failed to serialize symbols")?;
374
375 let now = chrono::Utc::now().timestamp();
376
377 conn.execute(
378 "INSERT OR REPLACE INTO symbols (file_id, file_hash, symbols_json, last_cached)
379 VALUES (?, ?, ?, ?)",
380 [
381 &file_id.to_string(),
382 file_hash,
383 &symbols_json,
384 &now.to_string(),
385 ],
386 )?;
387
388 log::debug!("Cached {} symbols for {}", symbols.len(), file_path);
389 Ok(())
390 }
391
392 pub fn batch_set(&self, entries: &[(String, String, Vec<SearchResult>)]) -> Result<()> {
394 let mut conn = Connection::open(&self.db_path)?;
395 let tx = conn.transaction()?;
396
397 let now = chrono::Utc::now().timestamp();
398 let now_str = now.to_string();
399
400 for (file_path, file_hash, symbols) in entries {
401 let file_id: i64 = tx
403 .query_row(
404 "SELECT id FROM files WHERE path = ?",
405 [file_path.as_str()],
406 |row| row.get(0),
407 )
408 .context(format!("File not found in index: {}", file_path))?;
409
410 let symbols_without_path: Vec<_> = symbols
412 .iter()
413 .map(|s| {
414 let mut s = s.clone();
415 s.path = String::new();
416 s
417 })
418 .collect();
419
420 let symbols_json = serde_json::to_string(&symbols_without_path)
421 .context("Failed to serialize symbols")?;
422
423 tx.execute(
425 "INSERT OR REPLACE INTO symbols (file_id, file_hash, symbols_json, last_cached)
426 VALUES (?, ?, ?, ?)",
427 [
428 &file_id.to_string(),
429 file_hash.as_str(),
430 &symbols_json,
431 &now_str,
432 ],
433 )?;
434 }
435
436 tx.commit()?;
437 log::debug!("Batch cached symbols for {} files", entries.len());
438 Ok(())
439 }
440
441 pub fn clear(&self) -> Result<()> {
443 let conn = Connection::open(&self.db_path)?;
444 conn.execute("DELETE FROM symbols", [])?;
445 log::info!("Cleared symbol cache");
446 Ok(())
447 }
448
449 pub fn stats(&self) -> Result<SymbolCacheStats> {
451 let conn = Connection::open(&self.db_path)?;
452
453 let total_files: usize = conn
454 .query_row("SELECT COUNT(DISTINCT file_id) FROM symbols", [], |row| {
455 row.get(0)
456 })
457 .unwrap_or(0);
458
459 let total_entries: usize = conn
460 .query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
461 .unwrap_or(0);
462
463 let cache_size_bytes: u64 = conn
465 .query_row("SELECT SUM(LENGTH(symbols_json)) FROM symbols", [], |row| {
466 row.get(0)
467 })
468 .unwrap_or(0);
469
470 Ok(SymbolCacheStats {
471 total_files,
472 total_entries,
473 cache_size_bytes,
474 })
475 }
476
477 pub fn cleanup_stale(&self) -> Result<usize> {
485 let conn = Connection::open(&self.db_path)?;
486
487 let removed = conn.execute(
488 "DELETE FROM symbols WHERE file_id NOT IN (SELECT id FROM files)",
489 [],
490 )?;
491
492 if removed > 0 {
493 log::info!("Removed {} stale symbol cache entries", removed);
494 }
495
496 Ok(removed)
497 }
498}
499
500#[derive(Debug, Clone)]
502pub struct SymbolCacheStats {
503 pub total_files: usize,
504 pub total_entries: usize,
505 pub cache_size_bytes: u64,
506}
507
508#[cfg(test)]
509mod tests {
510 use super::*;
511 use crate::cache::CacheManager;
512 use tempfile::TempDir;
513
514 #[test]
515 fn test_symbol_cache_init() {
516 let temp = TempDir::new().unwrap();
517 let cache_mgr = CacheManager::new(temp.path());
518 cache_mgr.init().unwrap();
519
520 let symbol_cache = SymbolCache::open(cache_mgr.path()).unwrap();
521 let stats = symbol_cache.stats().unwrap();
522 assert_eq!(stats.total_files, 0);
523 }
524
525 #[test]
526 fn test_symbol_cache_set_get() {
527 let temp = TempDir::new().unwrap();
528 let cache_mgr = CacheManager::new(temp.path());
529 cache_mgr.init().unwrap();
530
531 cache_mgr.update_file("test.rs", "rust", 100).unwrap();
533
534 let symbol_cache = SymbolCache::open(cache_mgr.path()).unwrap();
535
536 let symbols = vec![SearchResult::new(
537 "test.rs".to_string(),
538 Language::Rust,
539 SymbolKind::Function,
540 Some("test_fn".to_string()),
541 Span::new(1, 0, 5, 0),
542 None,
543 "fn test_fn() {}".to_string(),
544 )];
545
546 symbol_cache.set("test.rs", "hash123", &symbols).unwrap();
548
549 let cached = symbol_cache.get("test.rs", "hash123").unwrap();
551 assert!(cached.is_some());
552 assert_eq!(cached.as_ref().unwrap().len(), 1);
553 assert_eq!(cached.unwrap()[0].symbol.as_deref(), Some("test_fn"));
554 }
555
556 #[test]
557 fn test_symbol_cache_hash_mismatch() {
558 let temp = TempDir::new().unwrap();
559 let cache_mgr = CacheManager::new(temp.path());
560 cache_mgr.init().unwrap();
561
562 cache_mgr.update_file("test.rs", "rust", 100).unwrap();
564
565 let symbol_cache = SymbolCache::open(cache_mgr.path()).unwrap();
566
567 let symbols = vec![SearchResult::new(
568 "test.rs".to_string(),
569 Language::Rust,
570 SymbolKind::Function,
571 Some("test_fn".to_string()),
572 Span::new(1, 0, 5, 0),
573 None,
574 "fn test_fn() {}".to_string(),
575 )];
576
577 symbol_cache.set("test.rs", "hash123", &symbols).unwrap();
579
580 let cached = symbol_cache.get("test.rs", "hash456").unwrap();
582 assert!(cached.is_none());
583 }
584
585 #[test]
586 fn test_symbol_cache_batch_set() {
587 let temp = TempDir::new().unwrap();
588 let cache_mgr = CacheManager::new(temp.path());
589 cache_mgr.init().unwrap();
590
591 cache_mgr.update_file("file1.rs", "rust", 100).unwrap();
593 cache_mgr.update_file("file2.rs", "rust", 200).unwrap();
594
595 let symbol_cache = SymbolCache::open(cache_mgr.path()).unwrap();
596
597 let entries = vec![
598 (
599 "file1.rs".to_string(),
600 "hash1".to_string(),
601 vec![SearchResult::new(
602 "file1.rs".to_string(),
603 Language::Rust,
604 SymbolKind::Function,
605 Some("fn1".to_string()),
606 Span::new(1, 0, 5, 0),
607 None,
608 "fn fn1() {}".to_string(),
609 )],
610 ),
611 (
612 "file2.rs".to_string(),
613 "hash2".to_string(),
614 vec![SearchResult::new(
615 "file2.rs".to_string(),
616 Language::Rust,
617 SymbolKind::Function,
618 Some("fn2".to_string()),
619 Span::new(1, 0, 5, 0),
620 None,
621 "fn fn2() {}".to_string(),
622 )],
623 ),
624 ];
625
626 symbol_cache.batch_set(&entries).unwrap();
627
628 let stats = symbol_cache.stats().unwrap();
629 assert_eq!(stats.total_files, 2);
630
631 let cached1 = symbol_cache.get("file1.rs", "hash1").unwrap();
632 assert!(cached1.is_some());
633
634 let cached2 = symbol_cache.get("file2.rs", "hash2").unwrap();
635 assert!(cached2.is_some());
636 }
637
638 #[test]
639 fn test_symbol_cache_batch_get() {
640 let temp = TempDir::new().unwrap();
641 let cache_mgr = CacheManager::new(temp.path());
642 cache_mgr.init().unwrap();
643
644 cache_mgr.update_file("file1.rs", "rust", 100).unwrap();
646 cache_mgr.update_file("file2.rs", "rust", 200).unwrap();
647 cache_mgr.update_file("file3.rs", "rust", 300).unwrap();
648
649 let symbol_cache = SymbolCache::open(cache_mgr.path()).unwrap();
650
651 let entries = vec![
653 (
654 "file1.rs".to_string(),
655 "hash1".to_string(),
656 vec![SearchResult::new(
657 "file1.rs".to_string(),
658 Language::Rust,
659 SymbolKind::Function,
660 Some("fn1".to_string()),
661 Span::new(1, 0, 5, 0),
662 None,
663 "fn fn1() {}".to_string(),
664 )],
665 ),
666 (
667 "file2.rs".to_string(),
668 "hash2".to_string(),
669 vec![SearchResult::new(
670 "file2.rs".to_string(),
671 Language::Rust,
672 SymbolKind::Struct,
673 Some("Struct2".to_string()),
674 Span::new(1, 0, 5, 0),
675 None,
676 "struct Struct2 {}".to_string(),
677 )],
678 ),
679 (
680 "file3.rs".to_string(),
681 "hash3".to_string(),
682 vec![SearchResult::new(
683 "file3.rs".to_string(),
684 Language::Rust,
685 SymbolKind::Enum,
686 Some("Enum3".to_string()),
687 Span::new(1, 0, 5, 0),
688 None,
689 "enum Enum3 {}".to_string(),
690 )],
691 ),
692 ];
693
694 symbol_cache.batch_set(&entries).unwrap();
695
696 let lookup = vec![
698 ("file1.rs".to_string(), "hash1".to_string()),
699 ("file2.rs".to_string(), "hash2".to_string()),
700 ("file3.rs".to_string(), "hash3".to_string()),
701 ];
702
703 let results = symbol_cache.batch_get(&lookup).unwrap();
704 assert_eq!(results.len(), 3);
705
706 assert!(results[0].1.is_some());
708 assert_eq!(
709 results[0].1.as_ref().unwrap()[0].symbol.as_deref(),
710 Some("fn1")
711 );
712
713 assert!(results[1].1.is_some());
714 assert_eq!(
715 results[1].1.as_ref().unwrap()[0].symbol.as_deref(),
716 Some("Struct2")
717 );
718
719 assert!(results[2].1.is_some());
720 assert_eq!(
721 results[2].1.as_ref().unwrap()[0].symbol.as_deref(),
722 Some("Enum3")
723 );
724
725 let mixed_lookup = vec![
727 ("file1.rs".to_string(), "hash1".to_string()), ("nonexistent.rs".to_string(), "hash999".to_string()), ("file2.rs".to_string(), "wrong_hash".to_string()), ("file3.rs".to_string(), "hash3".to_string()), ];
732
733 let mixed_results = symbol_cache.batch_get(&mixed_lookup).unwrap();
734 assert_eq!(mixed_results.len(), 4);
735
736 assert!(mixed_results[0].1.is_some()); assert!(mixed_results[1].1.is_none()); assert!(mixed_results[2].1.is_none()); assert!(mixed_results[3].1.is_some()); let empty_results = symbol_cache.batch_get(&[]).unwrap();
743 assert_eq!(empty_results.len(), 0);
744 }
745
746 #[test]
747 fn test_symbol_cache_clear() {
748 let temp = TempDir::new().unwrap();
749 let cache_mgr = CacheManager::new(temp.path());
750 cache_mgr.init().unwrap();
751
752 cache_mgr.update_file("test.rs", "rust", 100).unwrap();
754
755 let symbol_cache = SymbolCache::open(cache_mgr.path()).unwrap();
756
757 let symbols = vec![SearchResult::new(
758 "test.rs".to_string(),
759 Language::Rust,
760 SymbolKind::Function,
761 Some("test_fn".to_string()),
762 Span::new(1, 0, 5, 0),
763 None,
764 "fn test_fn() {}".to_string(),
765 )];
766
767 symbol_cache.set("test.rs", "hash123", &symbols).unwrap();
768
769 let stats_before = symbol_cache.stats().unwrap();
770 assert_eq!(stats_before.total_files, 1);
771
772 symbol_cache.clear().unwrap();
773
774 let stats_after = symbol_cache.stats().unwrap();
775 assert_eq!(stats_after.total_files, 0);
776 }
777
778 #[test]
779 fn test_symbol_cache_cleanup_stale() {
780 let temp = TempDir::new().unwrap();
781 let cache_mgr = CacheManager::new(temp.path());
782 cache_mgr.init().unwrap();
783
784 cache_mgr.update_file("exists.rs", "rust", 100).unwrap();
786 cache_mgr
787 .record_branch_file("exists.rs", "main", "hash1", None)
788 .unwrap();
789
790 cache_mgr.update_file("deleted.rs", "rust", 200).unwrap();
792
793 let symbol_cache = SymbolCache::open(cache_mgr.path()).unwrap();
794
795 let symbols = vec![SearchResult::new(
797 "test.rs".to_string(),
798 Language::Rust,
799 SymbolKind::Function,
800 Some("test_fn".to_string()),
801 Span::new(1, 0, 5, 0),
802 None,
803 "fn test_fn() {}".to_string(),
804 )];
805
806 symbol_cache.set("exists.rs", "hash1", &symbols).unwrap();
807 symbol_cache.set("deleted.rs", "hash2", &symbols).unwrap();
808
809 let stats_before = symbol_cache.stats().unwrap();
810 assert_eq!(stats_before.total_files, 2);
811
812 let conn = rusqlite::Connection::open(cache_mgr.path().join("meta.db")).unwrap();
816 conn.execute("DELETE FROM files WHERE path = 'deleted.rs'", [])
817 .unwrap();
818
819 let removed = symbol_cache.cleanup_stale().unwrap();
821 assert_eq!(removed, 0); let stats_after = symbol_cache.stats().unwrap();
824 assert_eq!(stats_after.total_files, 1);
825
826 let cached = symbol_cache.get("exists.rs", "hash1").unwrap();
828 assert!(cached.is_some());
829
830 let cached2 = symbol_cache.get("deleted.rs", "hash2").unwrap();
832 assert!(cached2.is_none());
833 }
834}