1use crate::embed::Embedder;
2use crate::parser::{DefKind, LanguageId, ParsedDef, ParsedFile, ParsedImport, ParsedRef, RefKind};
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::path::{Path, PathBuf};
6use std::time::UNIX_EPOCH;
7
8pub fn is_relevant_source_event(event: ¬ify::Event) -> bool {
11 use notify::EventKind::*;
12 if matches!(event.kind, Access(_) | Other) {
13 return false;
14 }
15 for path in &event.paths {
16 if path.components().any(|c| c.as_os_str() == ".sift") {
17 continue;
18 }
19 if LanguageId::from_path(path).is_some() {
20 return true;
21 }
22 }
23 false
24}
25
26const V2_MAGIC: &[u8; 4] = b"siV2";
28
29pub type SymbolId = usize;
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct Symbol {
33 pub id: SymbolId,
34 pub name: String,
35 pub kind: DefKind,
36 pub file: PathBuf,
37 pub line: usize,
38 pub end_line: usize,
39 pub doc: Option<String>,
40 pub embedding: Option<Vec<f32>>,
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct CallEdge {
45 pub caller_name: String,
46 pub caller_file: PathBuf,
47 pub caller_line: usize,
48 pub callee_name: String,
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct ImportEdge {
53 pub file: PathBuf,
54 pub symbol_name: String,
55 pub resolved_to: Option<SymbolId>,
56 pub resolved_file: Option<PathBuf>,
57 pub resolved_line: Option<usize>,
58 pub resolved_kind: Option<String>,
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct CodeIndex {
63 pub symbols: Vec<Symbol>,
64 pub calls: Vec<CallEdge>,
65 pub imports: Vec<ImportEdge>,
66 pub files: Vec<PathBuf>,
67 pub root: PathBuf,
68
69 pub file_mtimes: HashMap<PathBuf, u64>,
71
72 #[serde(skip)]
74 by_name: HashMap<String, Vec<SymbolId>>,
75 #[serde(skip)]
77 by_file: HashMap<PathBuf, Vec<SymbolId>>,
78}
79
80#[derive(Serialize, Deserialize)]
82struct CodeIndexV1 {
83 pub symbols: Vec<Symbol>,
84 pub calls: Vec<CallEdge>,
85 pub imports: Vec<ImportEdge>,
86 pub files: Vec<PathBuf>,
87 pub root: PathBuf,
88 by_name: HashMap<String, Vec<SymbolId>>,
89 by_file: HashMap<PathBuf, Vec<SymbolId>>,
90}
91
92impl From<CodeIndexV1> for CodeIndex {
93 fn from(old: CodeIndexV1) -> Self {
94 CodeIndex {
95 symbols: old.symbols,
96 calls: old.calls,
97 imports: old.imports,
98 files: old.files,
99 root: old.root,
100 file_mtimes: HashMap::new(),
101 by_name: old.by_name,
102 by_file: old.by_file,
103 }
104 }
105}
106
107impl CodeIndex {
108 pub fn build(
109 parsed: Vec<ParsedFile>,
110 root: &Path,
111 embedder: Option<&dyn Embedder>,
112 ) -> Self {
113 let root = root.to_path_buf();
114 let mut idx = CodeIndex {
115 symbols: Vec::new(),
116 calls: Vec::new(),
117 imports: Vec::new(),
118 files: Vec::new(),
119 file_mtimes: HashMap::new(),
120 by_name: HashMap::new(),
121 by_file: HashMap::new(),
122 root,
123 };
124
125 for pf in &parsed {
126 idx.add_file(pf);
127 }
128
129 if let Some(embedder) = embedder {
130 idx.compute_embeddings(embedder);
131 }
132
133 idx.resolve_caller_names();
134 idx.resolve_imports();
135 idx
136 }
137
138 fn compute_embeddings(&mut self, embedder: &dyn Embedder) {
139 let texts: Vec<String> = self
140 .symbols
141 .iter()
142 .map(|s| {
143 let mut t = format!("{}: {:?}", s.name, s.kind);
144 if let Some(ref doc) = s.doc {
145 t.push('\n');
146 t.push_str(doc);
147 }
148 t
149 })
150 .collect();
151 let text_refs: Vec<&str> = texts.iter().map(|s| s.as_str()).collect();
152 if text_refs.is_empty() {
153 return;
154 }
155 match embedder.embed(&text_refs) {
156 Ok(embeddings) => {
157 for (sym, emb) in self.symbols.iter_mut().zip(embeddings) {
158 sym.embedding = Some(emb);
159 }
160 }
161 Err(e) => {
162 eprintln!("warn: embedding computation failed: {:#}", e);
163 }
164 }
165 }
166
167 fn add_file(&mut self, pf: &ParsedFile) {
168 if !self.files.contains(&pf.path) {
169 self.files.push(pf.path.clone());
170 }
171
172 for def in &pf.definitions {
173 let id = self.symbols.len();
174 self.symbols.push(Symbol {
175 id,
176 name: def.name.clone(),
177 kind: def.kind,
178 file: pf.path.clone(),
179 line: def.start_line,
180 end_line: def.end_line,
181 doc: def.doc.clone(),
182 embedding: None,
183 });
184 self.by_name
185 .entry(def.name.clone())
186 .or_default()
187 .push(id);
188 self.by_file
189 .entry(pf.path.clone())
190 .or_default()
191 .push(id);
192 }
193
194 for rf in &pf.references {
195 self.calls.push(CallEdge {
196 caller_name: String::new(),
197 caller_file: pf.path.clone(),
198 caller_line: rf.line,
199 callee_name: rf.name.clone(),
200 });
201 }
202
203 for imp in &pf.imports {
204 self.imports.push(ImportEdge {
205 file: pf.path.clone(),
206 symbol_name: imp.name.clone(),
207 resolved_to: None,
208 resolved_file: None,
209 resolved_line: None,
210 resolved_kind: None,
211 });
212 }
213 }
214
215 fn resolve_caller_names(&mut self) {
216 for call in &mut self.calls {
217 let Some(sym_ids) = self.by_file.get(&call.caller_file) else {
218 continue;
219 };
220 for &sym_id in sym_ids {
221 let Some(sym) = self.symbols.get(sym_id) else {
222 continue;
223 };
224 if sym.line <= call.caller_line && call.caller_line <= sym.end_line {
225 call.caller_name = sym.name.clone();
226 break;
227 }
228 }
229 }
230 }
231
232 fn resolve_imports(&mut self) {
233 for imp in &mut self.imports {
234 let Some(sym_ids) = self.by_name.get(&imp.symbol_name) else {
235 continue;
236 };
237 let resolved = sym_ids
239 .iter()
240 .filter_map(|id| self.symbols.get(*id))
241 .find(|s| s.file != imp.file)
242 .or_else(|| {
243 sym_ids
244 .iter()
245 .filter_map(|id| self.symbols.get(*id))
246 .next()
247 });
248 if let Some(sym) = resolved {
249 imp.resolved_to = Some(sym.id);
250 imp.resolved_file = Some(sym.file.clone());
251 imp.resolved_line = Some(sym.line);
252 imp.resolved_kind = Some(format!("{:?}", sym.kind).to_lowercase());
253 }
254 }
255 }
256
257 pub fn collect_mtimes(root: &Path) -> HashMap<PathBuf, u64> {
260 let mut mtimes = HashMap::new();
261 let walk = ignore::WalkBuilder::new(root)
262 .standard_filters(true)
263 .build();
264 for entry in walk {
265 let Ok(entry) = entry else { continue };
266 if !entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
267 continue;
268 }
269 let path = entry.path();
270 if path.components().any(|c| c.as_os_str() == "target") {
271 continue;
272 }
273 if LanguageId::from_path(path).is_none() {
274 continue;
275 }
276 if let Ok(meta) = path.metadata() {
277 if let Ok(mtime) = meta.modified() {
278 if let Ok(dur) = mtime.duration_since(UNIX_EPOCH) {
279 mtimes.insert(path.to_path_buf(), dur.as_millis() as u64);
280 }
281 }
282 }
283 }
284 mtimes
285 }
286
287 pub fn classify_files(&self, current: &HashMap<PathBuf, u64>) -> (Vec<PathBuf>, Vec<PathBuf>) {
290 let mut unchanged = Vec::new();
291 let mut changed = Vec::new();
292
293 let mut seen = std::collections::HashSet::new();
295 for (path, mtime) in current {
296 seen.insert(path.clone());
297 match self.file_mtimes.get(path) {
298 Some(stored) if *stored == *mtime => unchanged.push(path.clone()),
299 _ => changed.push(path.clone()),
300 }
301 }
302 for path in self.file_mtimes.keys() {
304 if !seen.contains(path) {
305 changed.push(path.clone());
306 }
307 }
308
309 (unchanged, changed)
310 }
311
312 pub fn reconstruct_parsed_file(&self, path: &Path) -> ParsedFile {
315 let language = LanguageId::from_path(path).unwrap_or(LanguageId::Rust);
316 let definitions: Vec<ParsedDef> = self
317 .by_file
318 .get(path)
319 .map(|ids| {
320 ids.iter()
321 .filter_map(|id| self.symbols.get(*id))
322 .map(|s| ParsedDef {
323 name: s.name.clone(),
324 kind: s.kind,
325 start_line: s.line,
326 end_line: s.end_line,
327 doc: s.doc.clone(),
328 })
329 .collect()
330 })
331 .unwrap_or_default();
332 let references: Vec<ParsedRef> = self
333 .calls
334 .iter()
335 .filter(|c| c.caller_file == path)
336 .map(|c| ParsedRef {
337 name: c.callee_name.clone(),
338 kind: RefKind::Call,
339 line: c.caller_line,
340 })
341 .collect();
342 let imports: Vec<ParsedImport> = self
343 .imports
344 .iter()
345 .filter(|i| i.file == path)
346 .map(|i| ParsedImport {
347 name: i.symbol_name.clone(),
348 })
349 .collect();
350 ParsedFile {
351 path: path.to_path_buf(),
352 language,
353 definitions,
354 references,
355 imports,
356 }
357 }
358
359 fn map_push<K: std::hash::Hash + Eq + Clone>(
361 map: &mut HashMap<K, Vec<SymbolId>>,
362 key: &K,
363 id: SymbolId,
364 ) {
365 match map.get_mut(key) {
366 Some(v) => v.push(id),
367 None => {
368 map.insert(key.clone(), vec![id]);
369 }
370 }
371 }
372
373 pub fn apply_changes(&mut self, changed: &[PathBuf], new_parsed: &[ParsedFile]) {
378 let changed_set: std::collections::HashSet<&Path> =
379 changed.iter().map(|p| p.as_path()).collect();
380
381 let changed_names = self.rebuild_symbols_and_maps(&changed_set, new_parsed);
382 self.rebuild_calls_from(&changed_set, new_parsed);
383 self.rebuild_imports_from(&changed_set, new_parsed);
384 self.rebuild_files_from(&changed_set, new_parsed);
385
386 self.resolve_caller_names_incremental(&changed_set);
387 self.resolve_imports_incremental(&changed_set, &changed_names);
388 }
389
390 fn rebuild_symbols_and_maps(
393 &mut self,
394 changed_set: &std::collections::HashSet<&Path>,
395 new_parsed: &[ParsedFile],
396 ) -> std::collections::HashSet<String> {
397 let old_symbols = std::mem::take(&mut self.symbols);
398 let n = old_symbols.len();
399
400 let mut offset = vec![0usize; n];
403 let mut removed = vec![false; n];
404 let mut removed_count = 0;
405 let mut new_symbols = Vec::with_capacity(n);
406 let mut changed_names: std::collections::HashSet<String> =
407 std::collections::HashSet::new();
408
409 for (i, mut sym) in old_symbols.into_iter().enumerate() {
410 offset[i] = removed_count;
411 if changed_set.contains(sym.file.as_path()) {
412 changed_names.insert(sym.name);
413 removed[i] = true;
414 removed_count += 1;
415 } else {
416 let id = new_symbols.len();
417 sym.id = id;
418 new_symbols.push(sym);
419 }
420 }
421
422 let old_by_name = std::mem::take(&mut self.by_name);
425 let mut new_by_name: HashMap<String, Vec<SymbolId>> = old_by_name
426 .into_iter()
427 .map(|(name, ids)| {
428 let ids: Vec<SymbolId> = ids
429 .into_iter()
430 .filter_map(|old_id| {
431 if removed[old_id] {
432 None
433 } else {
434 Some(old_id - offset[old_id])
435 }
436 })
437 .collect();
438 (name, ids)
439 })
440 .filter(|(_, ids)| !ids.is_empty())
441 .collect();
442
443 let old_by_file = std::mem::take(&mut self.by_file);
444 let mut new_by_file: HashMap<PathBuf, Vec<SymbolId>> = old_by_file
445 .into_iter()
446 .map(|(file, ids)| {
447 let ids: Vec<SymbolId> = ids
448 .into_iter()
449 .filter_map(|old_id| {
450 if removed[old_id] {
451 None
452 } else {
453 Some(old_id - offset[old_id])
454 }
455 })
456 .collect();
457 (file, ids)
458 })
459 .filter(|(_, ids)| !ids.is_empty())
460 .collect();
461
462 for pf in new_parsed {
464 for def in &pf.definitions {
465 changed_names.insert(def.name.clone());
466 let id = new_symbols.len();
467 new_symbols.push(Symbol {
468 id,
469 name: def.name.clone(),
470 kind: def.kind,
471 file: pf.path.clone(),
472 line: def.start_line,
473 end_line: def.end_line,
474 doc: def.doc.clone(),
475 embedding: None,
476 });
477 Self::map_push(&mut new_by_name, &def.name, id);
478 Self::map_push(&mut new_by_file, &pf.path, id);
479 }
480 }
481
482 self.symbols = new_symbols;
483 self.by_name = new_by_name;
484 self.by_file = new_by_file;
485 changed_names
486 }
487
488 fn rebuild_calls_from(
490 &mut self,
491 changed_set: &std::collections::HashSet<&Path>,
492 new_parsed: &[ParsedFile],
493 ) {
494 let old_calls = std::mem::take(&mut self.calls);
495 let mut new_calls = Vec::with_capacity(old_calls.len());
496 for call in old_calls {
497 if !changed_set.contains(call.caller_file.as_path()) {
498 new_calls.push(call);
499 }
500 }
501 for pf in new_parsed {
502 for rf in &pf.references {
503 new_calls.push(CallEdge {
504 caller_name: String::new(),
505 caller_file: pf.path.clone(),
506 caller_line: rf.line,
507 callee_name: rf.name.clone(),
508 });
509 }
510 }
511 self.calls = new_calls;
512 }
513
514 fn rebuild_imports_from(
516 &mut self,
517 changed_set: &std::collections::HashSet<&Path>,
518 new_parsed: &[ParsedFile],
519 ) {
520 let old_imports = std::mem::take(&mut self.imports);
521 let mut new_imports = Vec::with_capacity(old_imports.len());
522 for imp in old_imports {
523 if !changed_set.contains(imp.file.as_path()) {
524 new_imports.push(imp);
525 }
526 }
527 for pf in new_parsed {
528 for imp in &pf.imports {
529 new_imports.push(ImportEdge {
530 file: pf.path.clone(),
531 symbol_name: imp.name.clone(),
532 resolved_to: None,
533 resolved_file: None,
534 resolved_line: None,
535 resolved_kind: None,
536 });
537 }
538 }
539 self.imports = new_imports;
540 }
541
542 fn rebuild_files_from(
544 &mut self,
545 changed_set: &std::collections::HashSet<&Path>,
546 new_parsed: &[ParsedFile],
547 ) {
548 let old_files = std::mem::take(&mut self.files);
549 let mut new_files = Vec::with_capacity(old_files.len());
550 for f in old_files {
551 if !changed_set.contains(f.as_path()) {
552 new_files.push(f);
553 }
554 }
555 for pf in new_parsed {
556 if !new_files.contains(&pf.path) {
557 new_files.push(pf.path.clone());
558 }
559 }
560 self.files = new_files;
561 }
562
563 fn resolve_caller_names_incremental(&mut self, changed: &std::collections::HashSet<&Path>) {
565 for call in &mut self.calls {
566 if !changed.contains(call.caller_file.as_path()) {
567 continue;
568 }
569 call.caller_name = String::new();
570 let Some(sym_ids) = self.by_file.get(&call.caller_file) else {
571 continue;
572 };
573 for &sym_id in sym_ids {
574 let Some(sym) = self.symbols.get(sym_id) else {
575 continue;
576 };
577 if sym.line <= call.caller_line && call.caller_line <= sym.end_line {
578 call.caller_name = sym.name.clone();
579 break;
580 }
581 }
582 }
583 }
584
585 fn resolve_imports_incremental(
588 &mut self,
589 changed: &std::collections::HashSet<&Path>,
590 changed_names: &std::collections::HashSet<String>,
591 ) {
592 for imp in &mut self.imports {
593 if !changed.contains(imp.file.as_path()) && !changed_names.contains(&imp.symbol_name)
594 {
595 continue;
596 }
597 imp.resolved_to = None;
598 imp.resolved_file = None;
599 imp.resolved_line = None;
600 imp.resolved_kind = None;
601 let Some(sym_ids) = self.by_name.get(&imp.symbol_name) else {
602 continue;
603 };
604 let resolved = sym_ids
605 .iter()
606 .filter_map(|id| self.symbols.get(*id))
607 .find(|s| s.file != imp.file)
608 .or_else(|| {
609 sym_ids
610 .iter()
611 .filter_map(|id| self.symbols.get(*id))
612 .next()
613 });
614 if let Some(sym) = resolved {
615 imp.resolved_to = Some(sym.id);
616 imp.resolved_file = Some(sym.file.clone());
617 imp.resolved_line = Some(sym.line);
618 imp.resolved_kind = Some(format!("{:?}", sym.kind).to_lowercase());
619 }
620 }
621 }
622
623 pub fn compute_missing_embeddings(&mut self, embedder: &dyn Embedder) {
625 let new_ids: Vec<usize> = self
626 .symbols
627 .iter()
628 .enumerate()
629 .filter(|(_, s)| s.embedding.is_none())
630 .map(|(i, _)| i)
631 .collect();
632 if new_ids.is_empty() {
633 return;
634 }
635 let texts: Vec<String> = new_ids
636 .iter()
637 .map(|&id| {
638 let s = &self.symbols[id];
639 let mut t = format!("{}: {:?}", s.name, s.kind);
640 if let Some(ref doc) = s.doc {
641 t.push('\n');
642 t.push_str(doc);
643 }
644 t
645 })
646 .collect();
647 let text_refs: Vec<&str> = texts.iter().map(|s| s.as_str()).collect();
648 if let Ok(embeddings) = embedder.embed(&text_refs) {
649 for (&id, emb) in new_ids.iter().zip(embeddings) {
650 self.symbols[id].embedding = Some(emb);
651 }
652 }
653 }
654
655 pub fn preserve_embeddings(&mut self, old: &CodeIndex) {
658 let old_lookup: std::collections::HashMap<(&str, &Path, usize), &[f32]> = old
659 .symbols
660 .iter()
661 .filter_map(|s| s.embedding.as_deref().map(|emb| ((s.name.as_str(), &*s.file, s.line), emb)))
662 .collect();
663 for sym in &mut self.symbols {
664 if sym.embedding.is_none() {
665 if let Some(emb) = old_lookup.get(&(sym.name.as_str(), &*sym.file, sym.line)) {
666 sym.embedding = Some(emb.to_vec());
667 }
668 }
669 }
670 }
671
672 pub fn save(&self, path: &Path) -> anyhow::Result<()> {
673 if let Some(parent) = path.parent() {
674 std::fs::create_dir_all(parent)?;
675 }
676 let mut buf = V2_MAGIC.to_vec();
677 buf.extend(bincode::serialize(self)?);
678 let tmp = path.with_extension("tmp");
681 std::fs::write(&tmp, &buf)?;
682 std::fs::rename(&tmp, path)?;
683 Ok(())
684 }
685
686 fn rebuild_maps(&mut self) {
689 for (i, sym) in self.symbols.iter().enumerate() {
690 Self::map_push(&mut self.by_name, &sym.name, i);
691 Self::map_push(&mut self.by_file, &sym.file, i);
692 }
693 }
694
695 pub fn load(path: &Path) -> anyhow::Result<Self> {
696 let bytes = std::fs::read(path)?;
697 if bytes.len() >= 4 && &bytes[..4] == V2_MAGIC {
698 let mut idx: CodeIndex = bincode::deserialize(&bytes[4..])?;
699 idx.rebuild_maps();
700 return Ok(idx);
701 }
702 let old: CodeIndexV1 = bincode::deserialize(&bytes)?;
704 Ok(old.into())
705 }
706
707 pub fn find_symbols_by_name(&self, name: &str) -> Vec<&Symbol> {
708 self.by_name
709 .get(name)
710 .map(|ids| ids.iter().filter_map(|id| self.symbols.get(*id)).collect())
711 .unwrap_or_default()
712 }
713
714 pub fn find_symbols_by_pattern(&self, pattern: &str) -> Vec<&Symbol> {
715 let lower = pattern.to_lowercase();
716 self.symbols
717 .iter()
718 .filter(|s| s.name.to_lowercase().contains(&lower))
719 .collect()
720 }
721
722 pub fn find_calls_to(&self, name: &str) -> Vec<&CallEdge> {
723 self.calls
724 .iter()
725 .filter(|c| c.callee_name == name)
726 .collect()
727 }
728
729 pub fn find_calls_by(&self, name: &str) -> Vec<&CallEdge> {
730 self.calls
731 .iter()
732 .filter(|c| c.caller_name == name)
733 .collect()
734 }
735
736 pub fn find_implementations(&self, name: &str) -> Vec<&Symbol> {
737 self.symbols
738 .iter()
739 .filter(|s| s.kind == DefKind::Impl && s.name == name)
740 .collect()
741 }
742
743 pub fn find_symbols_in_file(&self, file: &Path) -> Vec<&Symbol> {
744 self.by_file
745 .get(file)
746 .map(|ids| ids.iter().filter_map(|id| self.symbols.get(*id)).collect())
747 .unwrap_or_default()
748 }
749
750 pub fn relative_path(&self, path: &Path) -> String {
751 path.strip_prefix(&self.root)
752 .unwrap_or(path)
753 .to_string_lossy()
754 .to_string()
755 }
756
757 pub fn find_imports_in_file(&self, file: &Path) -> Vec<&ImportEdge> {
758 self.imports
759 .iter()
760 .filter(|i| i.file == file)
761 .collect()
762 }
763
764 pub fn find_importers_of(&self, name: &str) -> Vec<&ImportEdge> {
765 self.imports
766 .iter()
767 .filter(|i| {
768 i.resolved_to
769 .and_then(|id| self.symbols.get(id))
770 .is_some_and(|s| s.name == name)
771 })
772 .collect()
773 }
774
775 pub fn semantic_search(
776 &self,
777 query_embed: &[f32],
778 k: usize,
779 ) -> Vec<(f64, &Symbol)> {
780 let mut scores: Vec<(f64, &Symbol)> = self
781 .symbols
782 .iter()
783 .filter_map(|s| s.embedding.as_ref().map(|e| (cosine_similarity(query_embed, e), s)))
784 .collect();
785 scores.sort_unstable_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
786 scores.truncate(k);
787 scores
788 }
789}
790
791fn cosine_similarity(a: &[f32], b: &[f32]) -> f64 {
792 let dot: f64 = a.iter().zip(b).map(|(x, y)| *x as f64 * *y as f64).sum();
793 let na: f64 = a.iter().map(|x| *x as f64 * *x as f64).sum::<f64>().sqrt();
794 let nb: f64 = b.iter().map(|x| *x as f64 * *x as f64).sum::<f64>().sqrt();
795 if na == 0.0 || nb == 0.0 {
796 0.0
797 } else {
798 dot / (na * nb)
799 }
800}
801
802#[cfg(test)]
803mod tests {
804 use super::*;
805 use crate::parser::{DefKind, ParsedDef, ParsedFile, ParsedImport, ParsedRef, RefKind};
806
807 fn make_file(
808 path: &str,
809 defs: Vec<(&str, DefKind, usize, usize)>,
810 refs: Vec<(&str, usize)>,
811 imports: Vec<&str>,
812 ) -> ParsedFile {
813 ParsedFile {
814 path: PathBuf::from(path),
815 language: crate::parser::LanguageId::Rust,
816 definitions: defs
817 .into_iter()
818 .map(|(name, kind, start_line, end_line)| ParsedDef {
819 name: name.to_string(),
820 kind,
821 start_line,
822 end_line,
823 doc: None,
824 })
825 .collect(),
826 references: refs
827 .into_iter()
828 .map(|(name, line)| ParsedRef {
829 name: name.to_string(),
830 kind: RefKind::Call,
831 line,
832 })
833 .collect(),
834 imports: imports
835 .into_iter()
836 .map(|name| ParsedImport {
837 name: name.to_string(),
838 })
839 .collect(),
840 }
841 }
842
843 #[test]
844 fn test_build_empty_index() {
845 let index = CodeIndex::build(vec![], Path::new("/root"), None);
846 assert_eq!(index.symbols.len(), 0);
847 assert_eq!(index.calls.len(), 0);
848 assert_eq!(index.imports.len(), 0);
849 assert_eq!(index.files.len(), 0);
850 }
851
852 #[test]
853 fn test_build_index_with_symbols() {
854 let files = vec![make_file(
855 "src/main.rs",
856 vec![("main", DefKind::Function, 1, 5)],
857 vec![],
858 vec![],
859 )];
860 let index = CodeIndex::build(files, Path::new("/root"), None);
861 assert_eq!(index.symbols.len(), 1);
862 assert_eq!(index.symbols[0].name, "main");
863 assert_eq!(index.symbols[0].kind, DefKind::Function);
864 assert_eq!(index.symbols[0].line, 1);
865 assert_eq!(index.symbols[0].end_line, 5);
866 }
867
868 #[test]
869 fn test_find_symbols_by_name() {
870 let files = vec![make_file(
871 "src/lib.rs",
872 vec![("foo", DefKind::Function, 1, 3), ("bar", DefKind::Function, 5, 7)],
873 vec![],
874 vec![],
875 )];
876 let index = CodeIndex::build(files, Path::new("/root"), None);
877 let found = index.find_symbols_by_name("foo");
878 assert_eq!(found.len(), 1);
879 assert_eq!(found[0].name, "foo");
880 }
881
882 #[test]
883 fn test_find_symbols_by_pattern() {
884 let files = vec![make_file(
885 "src/lib.rs",
886 vec![
887 ("calculate_revenue", DefKind::Function, 1, 3),
888 ("calculate_expenses", DefKind::Function, 5, 7),
889 ("print_report", DefKind::Function, 9, 11),
890 ],
891 vec![],
892 vec![],
893 )];
894 let index = CodeIndex::build(files, Path::new("/root"), None);
895 let found = index.find_symbols_by_pattern("calculate");
896 assert_eq!(found.len(), 2);
897 }
898
899 #[test]
900 fn test_calls_are_recorded() {
901 let files = vec![make_file(
902 "src/main.rs",
903 vec![("run", DefKind::Function, 1, 10)],
904 vec![("helper", 3),("other", 5)],
905 vec![],
906 )];
907 let index = CodeIndex::build(files, Path::new("/root"), None);
908 assert_eq!(index.calls.len(), 2);
909 }
910
911 #[test]
912 fn test_imports_are_recorded() {
913 let files = vec![make_file(
914 "src/main.rs",
915 vec![],
916 vec![],
917 vec!["HashMap", "Vec"],
918 )];
919 let index = CodeIndex::build(files, Path::new("/root"), None);
920 assert_eq!(index.imports.len(), 2);
921 assert_eq!(index.imports[0].symbol_name, "HashMap");
922 assert!(index.imports[0].resolved_to.is_none());
924 }
925
926 #[test]
927 fn test_import_resolution() {
928 let files = vec![
929 make_file(
930 "src/lib.rs",
931 vec![("HashMap", DefKind::Struct, 10, 30)],
932 vec![],
933 vec![],
934 ),
935 make_file(
936 "src/main.rs",
937 vec![("main", DefKind::Function, 1, 5)],
938 vec![],
939 vec!["HashMap"],
940 ),
941 ];
942 let index = CodeIndex::build(files, Path::new("/root"), None);
943 let imports = index.find_imports_in_file(Path::new("src/main.rs"));
944 assert_eq!(imports.len(), 1);
945 let imp = imports[0];
946 assert!(imp.resolved_to.is_some());
947 assert_eq!(imp.resolved_file.as_deref(), Some(Path::new("src/lib.rs")));
948 assert_eq!(imp.resolved_line, Some(10));
949 assert_eq!(imp.resolved_kind.as_deref(), Some("struct"));
950 }
951
952 #[test]
953 fn test_save_and_load_roundtrip() -> anyhow::Result<()> {
954 let files = vec![make_file(
955 "src/main.rs",
956 vec![("main", DefKind::Function, 1, 10)],
957 vec![("helper", 5)],
958 vec!["std::fs"],
959 )];
960 let index = CodeIndex::build(files, Path::new("/root"), None);
961
962 let tmp = std::env::temp_dir().join("sift_test_index.bin");
963 index.save(&tmp)?;
964 let loaded = CodeIndex::load(&tmp)?;
965 std::fs::remove_file(&tmp)?;
966
967 assert_eq!(loaded.symbols.len(), 1);
968 assert_eq!(loaded.symbols[0].name, "main");
969 assert_eq!(loaded.calls.len(), 1);
970 assert_eq!(loaded.imports.len(), 1);
971 assert_eq!(loaded.imports[0].resolved_to, None);
973 assert!(loaded.file_mtimes.is_empty());
975 Ok(())
976 }
977
978 #[test]
979 fn test_save_and_load_with_mtimes() -> anyhow::Result<()> {
980 let files = vec![make_file(
981 "src/main.rs",
982 vec![("main", DefKind::Function, 1, 10)],
983 vec![],
984 vec![],
985 )];
986 let mut index = CodeIndex::build(files, Path::new("/root"), None);
987 index.file_mtimes =
988 vec![(PathBuf::from("src/main.rs"), 42)].into_iter().collect();
989
990 let tmp = std::env::temp_dir().join("sift_test_mtimes.bin");
991 index.save(&tmp)?;
992 let raw = std::fs::read(&tmp)?;
994 assert_eq!(&raw[..4], V2_MAGIC);
995 assert!(!tmp.with_extension("tmp").exists());
997 let loaded = CodeIndex::load(&tmp)?;
999 std::fs::remove_file(&tmp)?;
1000
1001 assert_eq!(loaded.file_mtimes.len(), 1);
1002 assert_eq!(loaded.file_mtimes.get(Path::new("src/main.rs")), Some(&42));
1003 Ok(())
1004 }
1005
1006 #[test]
1007 fn test_load_old_format_without_mtimes() -> anyhow::Result<()> {
1008 let pf = make_file(
1010 "src/main.rs",
1011 vec![("main", DefKind::Function, 1, 10)],
1012 vec![("helper", 5)],
1013 vec!["std::fs"],
1014 );
1015 let old = CodeIndexV1 {
1016 symbols: pf.definitions.iter().map(|d| Symbol {
1017 id: 0,
1018 name: d.name.clone(),
1019 kind: d.kind,
1020 file: pf.path.clone(),
1021 line: d.start_line,
1022 end_line: d.end_line,
1023 doc: d.doc.clone(),
1024 embedding: None,
1025 }).collect(),
1026 calls: pf.references.iter().map(|r| CallEdge {
1027 caller_name: String::new(),
1028 caller_file: pf.path.clone(),
1029 caller_line: r.line,
1030 callee_name: r.name.clone(),
1031 }).collect(),
1032 imports: pf.imports.iter().map(|i| ImportEdge {
1033 file: pf.path.clone(),
1034 symbol_name: i.name.clone(),
1035 resolved_to: None,
1036 resolved_file: None,
1037 resolved_line: None,
1038 resolved_kind: None,
1039 }).collect(),
1040 files: vec![pf.path.clone()],
1041 root: PathBuf::from("/root"),
1042 by_name: HashMap::from([("main".into(), vec![0])]),
1043 by_file: HashMap::from([(pf.path, vec![0])]),
1044 };
1045
1046 let tmp = std::env::temp_dir().join("sift_test_v1_index.bin");
1047 let bytes = bincode::serialize(&old)?;
1048 std::fs::write(&tmp, bytes)?; let loaded = CodeIndex::load(&tmp)?;
1051 std::fs::remove_file(&tmp)?;
1052
1053 assert_eq!(loaded.symbols.len(), 1);
1054 assert_eq!(loaded.symbols[0].name, "main");
1055 assert!(loaded.file_mtimes.is_empty(), "V1 load should produce empty file_mtimes");
1056 Ok(())
1057 }
1058
1059 #[test]
1060 fn test_multiple_files_index() {
1061 let files = vec![
1062 make_file(
1063 "src/main.rs",
1064 vec![("main", DefKind::Function, 1, 10)],
1065 vec![("helper", 3)],
1066 vec![],
1067 ),
1068 make_file(
1069 "src/helper.rs",
1070 vec![("helper", DefKind::Function, 1, 5)],
1071 vec![],
1072 vec![],
1073 ),
1074 ];
1075 let index = CodeIndex::build(files, Path::new("/root"), None);
1076 assert_eq!(index.symbols.len(), 2);
1077 assert_eq!(index.files.len(), 2);
1078 }
1079
1080 #[test]
1081 fn test_find_implementations() {
1082 let files = vec![make_file(
1083 "src/main.rs",
1084 vec![
1085 ("Iterator", DefKind::Trait, 1, 3),
1086 ("Iterator", DefKind::Impl, 5, 20),
1087 ],
1088 vec![],
1089 vec![],
1090 )];
1091 let index = CodeIndex::build(files, Path::new("/root"), None);
1092 let impls = index.find_implementations("Iterator");
1093 assert_eq!(impls.len(), 1);
1094 assert_eq!(impls[0].kind, DefKind::Impl);
1095 }
1096
1097 #[test]
1098 fn test_relative_path() {
1099 let files = vec![make_file(
1100 "/root/src/main.rs",
1101 vec![],
1102 vec![],
1103 vec![],
1104 )];
1105 let index = CodeIndex::build(files, Path::new("/root"), None);
1106 assert_eq!(index.relative_path(Path::new("/root/src/main.rs")), "src/main.rs");
1107 }
1108
1109 #[test]
1110 fn test_classify_files_unchanged() {
1111 let files = vec![make_file(
1112 "src/main.rs",
1113 vec![("foo", DefKind::Function, 1, 10)],
1114 vec![],
1115 vec![],
1116 )];
1117 let mut index = CodeIndex::build(files, Path::new("/root"), None);
1118 index.file_mtimes = vec![(PathBuf::from("src/main.rs"), 1000)].into_iter().collect();
1119
1120 let current = vec![(PathBuf::from("src/main.rs"), 1000)].into_iter().collect();
1121 let (unchanged, changed) = index.classify_files(¤t);
1122 assert_eq!(unchanged.len(), 1);
1123 assert_eq!(changed.len(), 0);
1124 }
1125
1126 #[test]
1127 fn test_classify_files_changed_mtime() {
1128 let files = vec![make_file(
1129 "src/main.rs",
1130 vec![("foo", DefKind::Function, 1, 10)],
1131 vec![],
1132 vec![],
1133 )];
1134 let mut index = CodeIndex::build(files, Path::new("/root"), None);
1135 index.file_mtimes = vec![(PathBuf::from("src/main.rs"), 1000)].into_iter().collect();
1136
1137 let current = vec![(PathBuf::from("src/main.rs"), 2000)].into_iter().collect();
1138 let (unchanged, changed) = index.classify_files(¤t);
1139 assert_eq!(unchanged.len(), 0);
1140 assert_eq!(changed.len(), 1);
1141 }
1142
1143 #[test]
1144 fn test_classify_files_new_and_deleted() {
1145 let mut index = CodeIndex::build(vec![], Path::new("/root"), None);
1146 index.file_mtimes = vec![(PathBuf::from("deleted.rs"), 1000)].into_iter().collect();
1147
1148 let current = vec![(PathBuf::from("new.rs"), 2000)].into_iter().collect();
1149 let (unchanged, changed) = index.classify_files(¤t);
1150 assert_eq!(unchanged.len(), 0);
1151 assert!(changed.contains(&PathBuf::from("new.rs")));
1152 assert!(changed.contains(&PathBuf::from("deleted.rs")));
1153 }
1154
1155 #[test]
1156 fn test_reconstruct_parsed_file() {
1157 let files = vec![make_file(
1158 "src/main.rs",
1159 vec![
1160 ("foo", DefKind::Function, 1, 10),
1161 ("Bar", DefKind::Struct, 15, 25),
1162 ],
1163 vec![("helper", 5)],
1164 vec!["std::collections::HashMap"],
1165 )];
1166 let index = CodeIndex::build(files, Path::new("/root"), None);
1167
1168 let pf = index.reconstruct_parsed_file(Path::new("src/main.rs"));
1169 assert_eq!(pf.path, PathBuf::from("src/main.rs"));
1170 assert_eq!(pf.definitions.len(), 2);
1171 assert_eq!(pf.definitions[0].name, "foo");
1172 assert_eq!(pf.definitions[0].kind, DefKind::Function);
1173 assert_eq!(pf.definitions[1].name, "Bar");
1174 assert_eq!(pf.definitions[1].kind, DefKind::Struct);
1175 assert_eq!(pf.references.len(), 1);
1176 assert_eq!(pf.references[0].name, "helper");
1177 assert_eq!(pf.imports.len(), 1);
1178 assert_eq!(pf.imports[0].name, "std::collections::HashMap");
1179 }
1180
1181 #[test]
1182 fn test_reconstruct_parsed_file_empty() {
1183 let index = CodeIndex::build(vec![], Path::new("/root"), None);
1184 let pf = index.reconstruct_parsed_file(Path::new("nonexistent.rs"));
1185 assert_eq!(pf.definitions.len(), 0);
1186 assert_eq!(pf.references.len(), 0);
1187 assert_eq!(pf.imports.len(), 0);
1188 }
1189
1190 #[test]
1191 fn test_preserve_embeddings() {
1192 let mut new_index = CodeIndex::build(vec![], Path::new("/root"), None);
1193 new_index.symbols.push(Symbol {
1194 id: 0,
1195 name: "foo".into(),
1196 kind: DefKind::Function,
1197 file: PathBuf::from("src/lib.rs"),
1198 line: 1,
1199 end_line: 10,
1200 doc: None,
1201 embedding: None,
1202 });
1203
1204 let mut old_index = CodeIndex::build(vec![], Path::new("/root"), None);
1205 old_index.symbols.push(Symbol {
1206 id: 0,
1207 name: "foo".into(),
1208 kind: DefKind::Function,
1209 file: PathBuf::from("src/lib.rs"),
1210 line: 1,
1211 end_line: 10,
1212 doc: None,
1213 embedding: Some(vec![0.1, 0.2, 0.3]),
1214 });
1215
1216 new_index.preserve_embeddings(&old_index);
1217 assert_eq!(
1218 new_index.symbols[0].embedding,
1219 Some(vec![0.1, 0.2, 0.3]),
1220 );
1221 }
1222
1223 #[test]
1224 fn test_preserve_embeddings_no_match() {
1225 let mut new_index = CodeIndex::build(vec![], Path::new("/root"), None);
1226 new_index.symbols.push(Symbol {
1227 id: 0,
1228 name: "bar".into(),
1229 kind: DefKind::Function,
1230 file: PathBuf::from("src/lib.rs"),
1231 line: 1,
1232 end_line: 10,
1233 doc: None,
1234 embedding: None,
1235 });
1236
1237 let mut old_index = CodeIndex::build(vec![], Path::new("/root"), None);
1238 old_index.symbols.push(Symbol {
1239 id: 0,
1240 name: "foo".into(),
1241 kind: DefKind::Function,
1242 file: PathBuf::from("src/lib.rs"),
1243 line: 1,
1244 end_line: 10,
1245 doc: None,
1246 embedding: Some(vec![0.1, 0.2, 0.3]),
1247 });
1248
1249 new_index.preserve_embeddings(&old_index);
1250 assert!(new_index.symbols[0].embedding.is_none());
1251 }
1252
1253 fn make_event(kind: notify::EventKind, paths: Vec<PathBuf>) -> notify::Event {
1258 notify::Event { kind, paths, attrs: notify::event::EventAttributes::default() }
1259 }
1260
1261 #[test]
1262 fn test_is_relevant_source_event_skips_access_and_other() {
1263 let skip_kinds = [
1264 notify::EventKind::Access(notify::event::AccessKind::Any),
1265 notify::EventKind::Other,
1266 ];
1267 for kind in &skip_kinds {
1268 let ev = make_event(kind.clone(), vec![PathBuf::from("src/main.rs")]);
1269 assert!(!is_relevant_source_event(&ev), "should skip {:?}", kind);
1270 }
1271 }
1272
1273 #[test]
1274 fn test_is_relevant_source_event_allows_modify() {
1275 let ev = make_event(
1276 notify::EventKind::Modify(notify::event::ModifyKind::Data(notify::event::DataChange::Content)),
1277 vec![PathBuf::from("src/main.rs")],
1278 );
1279 assert!(is_relevant_source_event(&ev));
1280 }
1281
1282 #[test]
1283 fn test_is_relevant_source_event_allows_create() {
1284 let ev = make_event(
1285 notify::EventKind::Create(notify::event::CreateKind::File),
1286 vec![PathBuf::from("src/lib.rs")],
1287 );
1288 assert!(is_relevant_source_event(&ev));
1289 }
1290
1291 #[test]
1292 fn test_is_relevant_source_event_skips_dot_sift() {
1293 let ev = make_event(
1294 notify::EventKind::Create(notify::event::CreateKind::File),
1295 vec![PathBuf::from(".sift/index.bin")],
1296 );
1297 assert!(!is_relevant_source_event(&ev));
1298 }
1299
1300 #[test]
1301 fn test_is_relevant_source_event_skips_non_source() {
1302 let ev = make_event(
1303 notify::EventKind::Create(notify::event::CreateKind::File),
1304 vec![PathBuf::from("Makefile")],
1305 );
1306 assert!(!is_relevant_source_event(&ev));
1307 }
1308
1309 #[test]
1310 fn test_is_relevant_source_event_multiple_paths() {
1311 let ev = make_event(
1312 notify::EventKind::Modify(notify::event::ModifyKind::Data(notify::event::DataChange::Content)),
1313 vec![PathBuf::from("README.md"), PathBuf::from("src/main.rs")],
1314 );
1315 assert!(is_relevant_source_event(&ev));
1316 }
1317
1318 #[test]
1323 fn test_collect_mtimes_finds_source_files() -> anyhow::Result<()> {
1324 let dir = std::env::temp_dir().join("sift_test_mtimes_find");
1325 let _ = std::fs::remove_dir_all(&dir);
1326 std::fs::create_dir_all(&dir)?;
1327 std::fs::write(dir.join("main.rs"), "fn main() {}")?;
1328 std::fs::write(dir.join("lib.py"), "def foo(): pass")?;
1329 std::fs::write(dir.join("README.md"), "# docs")?;
1330 std::fs::create_dir(dir.join("target"))?;
1331 std::fs::write(dir.join("target").join("out.rs"), "fn build() {}")?;
1332
1333 let mtimes = CodeIndex::collect_mtimes(&dir);
1334 let _ = std::fs::remove_dir_all(&dir);
1335
1336 assert!(mtimes.contains_key(&dir.join("main.rs")), "should find main.rs");
1338 assert!(mtimes.contains_key(&dir.join("lib.py")), "should find lib.py");
1339 assert!(!mtimes.contains_key(&dir.join("README.md")), "should skip non-source");
1340 assert!(!mtimes.contains_key(&dir.join("target").join("out.rs")), "should skip target/");
1341 assert_eq!(mtimes.len(), 2);
1342 Ok(())
1343 }
1344
1345 #[test]
1346 fn test_collect_mtimes_returns_valid_timestamps() -> anyhow::Result<()> {
1347 let dir = std::env::temp_dir().join("sift_test_mtimes_ts");
1348 let _ = std::fs::remove_dir_all(&dir);
1349 std::fs::create_dir_all(&dir)?;
1350 std::fs::write(dir.join("a.rs"), "fn a() {}")?;
1351 std::thread::sleep(std::time::Duration::from_millis(5));
1353 std::fs::write(dir.join("b.rs"), "fn b() {}")?;
1354
1355 let mtimes = CodeIndex::collect_mtimes(&dir);
1356 let _ = std::fs::remove_dir_all(&dir);
1357
1358 assert_eq!(mtimes.len(), 2);
1359 let mtime_a = mtimes.get(&dir.join("a.rs")).expect("a.rs should exist");
1360 let mtime_b = mtimes.get(&dir.join("b.rs")).expect("b.rs should exist");
1361 assert!(*mtime_b >= *mtime_a, "b.rs (written later) should have >= mtime than a.rs");
1362 Ok(())
1363 }
1364
1365 #[test]
1366 fn test_collect_mtimes_empty_dir() -> anyhow::Result<()> {
1367 let dir = std::env::temp_dir().join("sift_test_mtimes_empty");
1368 let _ = std::fs::remove_dir_all(&dir);
1369 std::fs::create_dir_all(&dir)?;
1370
1371 let mtimes = CodeIndex::collect_mtimes(&dir);
1372 let _ = std::fs::remove_dir_all(&dir);
1373
1374 assert!(mtimes.is_empty());
1375 Ok(())
1376 }
1377}