1use super::convert::{convert_symbol_kind, convert_visibility};
7use super::patterns::{
8 GO_IMPORT, JAVA_IMPORT, JS_IMPORT, JS_IMPORT_MULTILINE, JS_REQUIRE, PYTHON_FROM_IMPORT,
9 PYTHON_IMPORT, RUST_USE,
10};
11use super::types::{
12 DepGraph, FileEntry, FileId, Import, IndexSymbol, Language, Span, SymbolId, SymbolIndex,
13};
14use crate::parser::{Language as ParserLanguage, Parser};
15use crate::SymbolKind;
16use ignore::WalkBuilder;
17use once_cell::sync::Lazy;
18use rayon::prelude::*;
19use regex::Regex;
20use std::cell::RefCell;
21use std::collections::{HashMap, HashSet};
22use std::fs;
23use std::path::{Path, PathBuf};
24use std::time::{SystemTime, UNIX_EPOCH};
25use thiserror::Error;
26
27thread_local! {
29 static THREAD_PARSER: RefCell<Parser> = RefCell::new(Parser::new());
30}
31
32#[derive(Error, Debug)]
34pub enum BuildError {
35 #[error("IO error: {0}")]
36 Io(#[from] std::io::Error),
37
38 #[error("Parse error in {file}: {message}")]
39 Parse { file: String, message: String },
40
41 #[error("Repository not found: {0}")]
42 RepoNotFound(PathBuf),
43
44 #[error("Git error: {0}")]
45 Git(String),
46}
47
48static IDENT_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"[A-Za-z_][A-Za-z0-9_]*").unwrap());
49
50static COMMON_KEYWORDS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
51 [
52 "if",
53 "else",
54 "for",
55 "while",
56 "return",
57 "break",
58 "continue",
59 "match",
60 "case",
61 "switch",
62 "default",
63 "try",
64 "catch",
65 "throw",
66 "finally",
67 "yield",
68 "await",
69 "async",
70 "new",
71 "in",
72 "of",
73 "do",
74 "fn",
75 "function",
76 "def",
77 "class",
78 "struct",
79 "enum",
80 "trait",
81 "interface",
82 "type",
83 "impl",
84 "let",
85 "var",
86 "const",
87 "static",
88 "public",
89 "private",
90 "protected",
91 "internal",
92 "use",
93 "import",
94 "from",
95 "package",
96 "module",
97 "export",
98 "super",
99 "self",
100 "this",
101 "crate",
102 "pub",
103 "mod",
104 ]
105 .into_iter()
106 .collect()
107});
108
109#[derive(Debug, Clone)]
111pub struct BuildOptions {
112 pub respect_gitignore: bool,
114 pub max_file_size: u64,
116 pub include_extensions: Vec<String>,
118 pub exclude_dirs: Vec<String>,
120 pub compute_pagerank: bool,
122}
123
124impl Default for BuildOptions {
125 fn default() -> Self {
126 Self {
127 respect_gitignore: true,
128 max_file_size: 10 * 1024 * 1024, include_extensions: vec![],
130 exclude_dirs: vec![
131 "node_modules".into(),
132 ".git".into(),
133 "target".into(),
134 "build".into(),
135 "dist".into(),
136 "__pycache__".into(),
137 ".venv".into(),
138 "venv".into(),
139 ],
140 compute_pagerank: true,
141 }
142 }
143}
144
145pub struct IndexBuilder {
147 repo_root: PathBuf,
149 options: BuildOptions,
151}
152
153impl IndexBuilder {
154 pub fn new(repo_root: impl AsRef<Path>) -> Self {
156 Self { repo_root: repo_root.as_ref().to_path_buf(), options: BuildOptions::default() }
157 }
158
159 pub fn with_options(mut self, options: BuildOptions) -> Self {
161 self.options = options;
162 self
163 }
164
165 #[must_use = "index should be used for context queries or saved to disk"]
175 pub fn build(&self) -> Result<(SymbolIndex, DepGraph), BuildError> {
176 use std::time::Instant;
177
178 if !self.repo_root.exists() {
179 return Err(BuildError::RepoNotFound(self.repo_root.clone()));
180 }
181
182 let repo_name = self
183 .repo_root
184 .file_name()
185 .and_then(|n| n.to_str())
186 .unwrap_or("unknown")
187 .to_owned();
188
189 let t0 = Instant::now();
191 let files = self.collect_files()?;
192 let collect_time = t0.elapsed();
193 log::info!("Found {} files to index", files.len());
194
195 let t1 = Instant::now();
197 let parsed_files = self.parse_files_parallel(&files)?;
198 let parse_time = t1.elapsed();
199 log::info!("Parsed {} files", parsed_files.len());
200
201 let show_timing = std::env::var("INFINILOOM_TIMING").is_ok();
203 if show_timing {
204 log::info!(" [timing] collect: {:?}", collect_time);
205 log::info!(" [timing] parse: {:?}", parse_time);
206 }
207
208 let mut index = SymbolIndex::new();
210 index.repo_name = repo_name;
211 index.created_at = SystemTime::now()
212 .duration_since(UNIX_EPOCH)
213 .map(|d| d.as_secs())
214 .unwrap_or(0);
215
216 index.commit_hash = self.get_current_commit();
218
219 let mut symbol_id_counter = 0u32;
221 let mut file_path_to_id: HashMap<String, u32> = HashMap::new();
222 let mut symbol_calls: Vec<(u32, Vec<String>)> = Vec::new();
223 let mut symbol_parents: Vec<(u32, String)> = Vec::new();
224
225 for (file_id, parsed) in parsed_files.into_iter().enumerate() {
226 let file_id = file_id as u32;
227 file_path_to_id.insert(parsed.path.clone(), file_id);
228
229 let symbol_start = symbol_id_counter;
230
231 for sym in parsed.symbols {
233 index.symbols.push(IndexSymbol {
234 id: SymbolId::new(symbol_id_counter),
235 name: sym.name.clone(),
236 kind: convert_symbol_kind(sym.kind),
237 file_id: FileId::new(file_id),
238 span: Span::new(sym.start_line, 0, sym.end_line, 0),
239 signature: sym.signature,
240 parent: None, visibility: convert_visibility(sym.visibility),
242 docstring: sym.docstring,
243 });
244 if !sym.calls.is_empty() {
246 symbol_calls.push((symbol_id_counter, sym.calls));
247 }
248 if let Some(parent_name) = sym.parent {
250 symbol_parents.push((symbol_id_counter, parent_name));
251 }
252 symbol_id_counter += 1;
253 }
254
255 index.files.push(FileEntry {
256 id: FileId::new(file_id),
257 path: parsed.path,
258 language: parsed.language,
259 content_hash: parsed.content_hash,
260 symbols: symbol_start..symbol_id_counter,
261 imports: parsed.imports,
262 lines: parsed.lines,
263 tokens: parsed.tokens,
264 });
265 }
266
267 let t2 = Instant::now();
269 index.rebuild_lookups();
270 let lookup_time = t2.elapsed();
271
272 for (symbol_id, parent_name) in &symbol_parents {
274 let symbol = &index.symbols[*symbol_id as usize];
276 let file_id = symbol.file_id;
277 if let Some(parent_sym) = index
278 .symbols
279 .iter()
280 .find(|s| s.file_id == file_id && s.name == *parent_name && s.kind.is_scope())
281 {
282 index.symbols[*symbol_id as usize].parent = Some(parent_sym.id);
283 }
284 }
285
286 let t3 = Instant::now();
288 let mut graph = DepGraph::new();
289 self.build_graph(&index, &file_path_to_id, &symbol_calls, &mut graph);
290 let graph_time = t3.elapsed();
291
292 let mut pagerank_time = std::time::Duration::ZERO;
294 if self.options.compute_pagerank {
295 let t4 = Instant::now();
296 self.compute_pagerank(&index, &mut graph);
297 pagerank_time = t4.elapsed();
298 }
299
300 if show_timing {
301 log::info!(" [timing] lookups: {:?}", lookup_time);
302 log::info!(" [timing] graph: {:?}", graph_time);
303 log::info!(" [timing] pagerank: {:?}", pagerank_time);
304 }
305
306 Ok((index, graph))
307 }
308
309 fn collect_files(&self) -> Result<Vec<PathBuf>, BuildError> {
311 let mut files = Vec::new();
312 let exclude_dirs = self.options.exclude_dirs.clone();
314
315 let walker = WalkBuilder::new(&self.repo_root)
317 .hidden(false) .git_ignore(self.options.respect_gitignore)
319 .git_global(self.options.respect_gitignore)
320 .git_exclude(self.options.respect_gitignore)
321 .filter_entry(move |entry| {
322 let path = entry.path();
323 if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
325 if name == ".git" {
326 return false;
327 }
328 if path.is_dir() && exclude_dirs.iter().any(|dir| dir == name) {
330 return false;
331 }
332 if path.is_dir() && name.starts_with('.') {
334 return false;
335 }
336 }
337 true
338 })
339 .build();
340
341 for entry in walker.flatten() {
342 let path = entry.path();
343 if path.is_file() && self.should_index_file(path) {
344 files.push(path.to_path_buf());
345 }
346 }
347
348 Ok(files)
349 }
350
351 fn should_index_file(&self, path: &Path) -> bool {
352 if let Ok(metadata) = fs::metadata(path) {
354 if metadata.len() > self.options.max_file_size {
355 return false;
356 }
357 }
358
359 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
361 let lang = Language::from_extension(ext);
362
363 if lang == Language::Unknown {
364 return false;
365 }
366
367 if !self.options.include_extensions.is_empty()
369 && !self
370 .options
371 .include_extensions
372 .iter()
373 .any(|entry| entry == ext)
374 {
375 return false;
376 }
377
378 true
379 }
380
381 fn parse_files_parallel(&self, files: &[PathBuf]) -> Result<Vec<ParsedFile>, BuildError> {
383 let results: Vec<Result<ParsedFile, BuildError>> =
384 files.par_iter().map(|path| self.parse_file(path)).collect();
385
386 let mut parsed = Vec::with_capacity(results.len());
388 for result in results {
389 match result {
390 Ok(f) => parsed.push(f),
391 Err(e) => log::warn!("Failed to parse file: {}", e),
392 }
393 }
394
395 Ok(parsed)
396 }
397
398 fn parse_file(&self, path: &Path) -> Result<ParsedFile, BuildError> {
400 let content = fs::read_to_string(path)?;
401 let relative_path = path
402 .strip_prefix(&self.repo_root)
403 .unwrap_or(path)
404 .to_string_lossy()
405 .to_string();
406
407 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
408 let language = Language::from_extension(ext);
409
410 let content_hash = blake3::hash(content.as_bytes());
412
413 let lines = content.lines().count() as u32;
415
416 let tokens = (content.len() / 4) as u32;
418
419 let parser_lang = match language {
421 Language::Rust => Some(ParserLanguage::Rust),
422 Language::Python => Some(ParserLanguage::Python),
423 Language::JavaScript => Some(ParserLanguage::JavaScript),
424 Language::TypeScript => Some(ParserLanguage::TypeScript),
425 Language::Go => Some(ParserLanguage::Go),
426 Language::Java => Some(ParserLanguage::Java),
427 Language::C => Some(ParserLanguage::C),
428 Language::Cpp => Some(ParserLanguage::Cpp),
429 Language::CSharp => Some(ParserLanguage::CSharp),
430 Language::Ruby => Some(ParserLanguage::Ruby),
431 Language::Bash => Some(ParserLanguage::Bash),
432 Language::Php => Some(ParserLanguage::Php),
433 Language::Kotlin => Some(ParserLanguage::Kotlin),
434 Language::Swift => Some(ParserLanguage::Swift),
435 Language::Scala => Some(ParserLanguage::Scala),
436 Language::Haskell => Some(ParserLanguage::Haskell),
437 Language::Elixir => Some(ParserLanguage::Elixir),
438 Language::Clojure => Some(ParserLanguage::Clojure),
439 Language::OCaml => Some(ParserLanguage::OCaml),
440 Language::Lua => Some(ParserLanguage::Lua),
441 Language::R => Some(ParserLanguage::R),
442 Language::Unknown => None,
443 };
444
445 let mut symbols = Vec::new();
446 let imports = self.extract_imports(&content, language);
447
448 if let Some(lang) = parser_lang {
449 THREAD_PARSER.with(|parser_cell| {
451 let mut parser = parser_cell.borrow_mut();
452 if let Ok(parsed_symbols) = parser.parse(&content, lang) {
453 for sym in parsed_symbols {
454 symbols.push(ParsedSymbol {
455 name: sym.name,
456 kind: sym.kind,
457 start_line: sym.start_line,
458 end_line: sym.end_line,
459 signature: sym.signature,
460 docstring: sym.docstring,
461 parent: sym.parent,
462 visibility: sym.visibility,
463 calls: sym.calls,
464 });
465 }
466 }
467 });
468 }
469
470 Ok(ParsedFile {
471 path: relative_path,
472 language,
473 content_hash: *content_hash.as_bytes(),
474 lines,
475 tokens,
476 symbols,
477 imports,
478 })
479 }
480
481 fn extract_imports(&self, content: &str, language: Language) -> Vec<Import> {
483 let mut imports = Vec::new();
484
485 if matches!(language, Language::JavaScript | Language::TypeScript) {
486 use std::collections::HashSet;
487
488 let mut seen_sources: HashSet<String> = HashSet::new();
489
490 let patterns: &[(&Regex, bool)] = &[(&JS_IMPORT, true), (&JS_REQUIRE, true)];
492 for (line_num, line) in content.lines().enumerate() {
493 for (re, check_external) in patterns {
494 if let Some(captures) = re.captures(line) {
495 if let Some(source) = captures.get(1) {
496 let source_str = source.as_str().to_owned();
497 if !seen_sources.insert(source_str.clone()) {
498 continue;
499 }
500 let is_external = if *check_external {
501 !source_str.starts_with('.')
502 && !source_str.starts_with('/')
503 && !source_str.starts_with("src/")
504 } else {
505 false
506 };
507 imports.push(Import {
508 source: source_str,
509 resolved_file: None,
510 symbols: vec![],
511 span: Span::new(line_num as u32 + 1, 0, line_num as u32 + 1, 0),
512 is_external,
513 });
514 }
515 }
516 }
517 }
518
519 for caps in JS_IMPORT_MULTILINE.captures_iter(content) {
521 if let Some(source) = caps.get(1) {
522 let source_str = source.as_str().to_owned();
523 if !seen_sources.insert(source_str.clone()) {
524 continue;
525 }
526 let line_num = content[..source.start()].matches('\n').count() as u32 + 1;
527 let is_external = !source_str.starts_with('.')
528 && !source_str.starts_with('/')
529 && !source_str.starts_with("src/");
530 imports.push(Import {
531 source: source_str,
532 resolved_file: None,
533 symbols: vec![],
534 span: Span::new(line_num, 0, line_num, 0),
535 is_external,
536 });
537 }
538 }
539
540 return imports;
541 }
542
543 let patterns: &[(&Regex, bool)] = match language {
545 Language::Python => &[(&PYTHON_IMPORT, false), (&PYTHON_FROM_IMPORT, false)],
546 Language::Rust => &[(&RUST_USE, false)],
547 Language::Go => &[(&GO_IMPORT, true)],
548 Language::Java => &[(&JAVA_IMPORT, false)],
549 _ => return imports, };
551
552 for (line_num, line) in content.lines().enumerate() {
553 for (re, check_external) in patterns {
554 if let Some(captures) = re.captures(line) {
555 if let Some(source) = captures.get(1) {
556 let source_str = source.as_str().to_owned();
557 let is_external = if *check_external {
558 !source_str.starts_with('.')
560 && !source_str.starts_with('/')
561 && !source_str.starts_with("src/")
562 } else {
563 false
564 };
565
566 imports.push(Import {
567 source: source_str,
568 resolved_file: None,
569 symbols: vec![],
570 span: Span::new(line_num as u32 + 1, 0, line_num as u32 + 1, 0),
571 is_external,
572 });
573 }
574 }
575 }
576 }
577
578 imports
579 }
580
581 fn build_graph(
583 &self,
584 index: &SymbolIndex,
585 file_path_to_id: &HashMap<String, u32>,
586 symbol_calls: &[(u32, Vec<String>)],
587 graph: &mut DepGraph,
588 ) {
589 let mut file_imports_by_file: HashMap<u32, Vec<u32>> = HashMap::new();
590
591 for file in &index.files {
593 for import in &file.imports {
594 if let Some(resolved_id) =
596 self.resolve_import(&import.source, &file.path, file_path_to_id)
597 {
598 graph.add_file_import(file.id.as_u32(), resolved_id);
599 file_imports_by_file
600 .entry(file.id.as_u32())
601 .or_default()
602 .push(resolved_id);
603 }
604 }
605 }
606
607 let mut symbol_name_to_ids: HashMap<&str, Vec<u32>> = HashMap::new();
609 for sym in &index.symbols {
610 symbol_name_to_ids
611 .entry(&sym.name)
612 .or_default()
613 .push(sym.id.as_u32());
614 }
615
616 for (caller_id, call_names) in symbol_calls {
618 let caller = &index.symbols[*caller_id as usize];
619 let caller_file_id = caller.file_id;
620 let imported_file_ids = file_imports_by_file
621 .get(&caller_file_id.as_u32())
622 .map(|ids| ids.iter().copied().collect::<HashSet<u32>>());
623
624 for call_name in call_names {
625 if let Some(callee_ids) = symbol_name_to_ids.get(call_name.as_str()) {
626 let callee_id = callee_ids
628 .iter()
629 .find(|&&id| index.symbols[id as usize].file_id == caller_file_id)
630 .or_else(|| {
631 imported_file_ids.as_ref().and_then(|imports| {
632 callee_ids.iter().find(|&&id| {
633 imports.contains(&index.symbols[id as usize].file_id.as_u32())
634 })
635 })
636 })
637 .or_else(|| callee_ids.first())
638 .copied();
639
640 if let Some(callee_id) = callee_id {
641 if callee_id != *caller_id {
643 graph.add_call(*caller_id, callee_id);
644 }
645 }
646 }
647 }
648 }
649
650 self.add_symbol_reference_edges(index, &file_imports_by_file, &symbol_name_to_ids, graph);
651 }
652
653 fn add_symbol_reference_edges(
654 &self,
655 index: &SymbolIndex,
656 file_imports_by_file: &HashMap<u32, Vec<u32>>,
657 symbol_name_to_ids: &HashMap<&str, Vec<u32>>,
658 graph: &mut DepGraph,
659 ) {
660 let mut added: HashSet<(u32, u32)> = HashSet::new();
661
662 for file in &index.files {
663 let content = match fs::read_to_string(self.repo_root.join(&file.path)) {
664 Ok(content) => content,
665 Err(_) => continue,
666 };
667
668 let imported_file_ids = file_imports_by_file
669 .get(&file.id.as_u32())
670 .map(|ids| ids.iter().copied().collect::<HashSet<u32>>());
671
672 for (line_idx, line) in content.lines().enumerate() {
673 if self.should_skip_reference_line(line, file.language) {
674 continue;
675 }
676 let line_no = line_idx as u32 + 1;
677 let referencer = match index.find_symbol_at_line(file.id, line_no) {
678 Some(symbol) => symbol,
679 None => continue,
680 };
681
682 for mat in IDENT_RE.find_iter(line) {
683 let name = mat.as_str();
684 if name.len() <= 1 || self.is_reference_keyword(name) {
685 continue;
686 }
687 if referencer.span.start_line == line_no && referencer.name == name {
688 continue;
689 }
690
691 let target_id = symbol_name_to_ids.get(name).and_then(|candidate_ids| {
692 candidate_ids
693 .iter()
694 .find(|&&id| index.symbols[id as usize].file_id == file.id)
695 .or_else(|| {
696 imported_file_ids.as_ref().and_then(|imports| {
697 candidate_ids.iter().find(|&&id| {
698 imports
699 .contains(&index.symbols[id as usize].file_id.as_u32())
700 })
701 })
702 })
703 .or_else(|| candidate_ids.first())
704 .copied()
705 });
706
707 if let Some(target_id) = target_id {
708 if target_id != referencer.id.as_u32()
709 && added.insert((referencer.id.as_u32(), target_id))
710 {
711 graph.add_symbol_ref(referencer.id.as_u32(), target_id);
712 }
713 }
714 }
715 }
716 }
717 }
718
719 fn should_skip_reference_line(&self, line: &str, language: Language) -> bool {
720 let trimmed = line.trim_start();
721 if trimmed.is_empty() {
722 return true;
723 }
724
725 let comment_prefixes: &[&str] = match language {
726 Language::Python | Language::R => &["#"],
727 Language::Bash => &["#"],
728 Language::Ruby => &["#"],
729 Language::Lua => &["--"],
730 Language::JavaScript
731 | Language::TypeScript
732 | Language::C
733 | Language::Cpp
734 | Language::CSharp
735 | Language::Go
736 | Language::Java
737 | Language::Php
738 | Language::Kotlin
739 | Language::Swift
740 | Language::Scala => &["//"],
741 _ => &["//"],
742 };
743
744 if comment_prefixes.iter().any(|p| trimmed.starts_with(p)) {
745 return true;
746 }
747
748 let import_prefixes: &[&str] = match language {
749 Language::Python => &["import ", "from "],
750 Language::Rust => &["use "],
751 Language::Go => &["import "],
752 Language::Java => &["import "],
753 Language::JavaScript | Language::TypeScript => &["import ", "export ", "require("],
754 _ => &[],
755 };
756
757 import_prefixes.iter().any(|p| trimmed.starts_with(p))
758 }
759
760 fn is_reference_keyword(&self, name: &str) -> bool {
761 COMMON_KEYWORDS.contains(name)
762 }
763
764 fn resolve_import(
771 &self,
772 source: &str,
773 importing_file: &str,
774 file_path_to_id: &HashMap<String, u32>,
775 ) -> Option<u32> {
776 if source.starts_with("./") || source.starts_with("../") {
778 let import_dir = Path::new(importing_file).parent().unwrap_or(Path::new(""));
779
780 let relative_source = source.strip_prefix("./").unwrap_or(source);
782
783 let resolved = import_dir.join(relative_source);
785 let resolved_str = resolved.to_string_lossy();
786 let resolved_str = resolved_str.as_ref();
787
788 let relative_candidates = [
790 resolved_str.to_owned(),
791 format!("{}.ts", resolved_str),
792 format!("{}.js", resolved_str),
793 format!("{}.tsx", resolved_str),
794 format!("{}.jsx", resolved_str),
795 format!("{}/index.ts", resolved_str),
796 format!("{}/index.js", resolved_str),
797 format!("{}.py", resolved_str),
798 format!("{}/__init__.py", resolved_str),
799 ];
800
801 for candidate in relative_candidates {
802 let normalized = self.normalize_path(&candidate);
804 if let Some(&id) = file_path_to_id.get(&normalized) {
805 return Some(id);
806 }
807 }
808 }
809
810 let candidates = [
812 source.to_owned(),
813 format!("{}.rs", source.replace("::", "/")),
814 format!("{}/mod.rs", source.replace("::", "/")),
815 format!("{}.py", source.replace(".", "/")),
816 format!("{}/__init__.py", source.replace(".", "/")),
817 format!("{}.ts", source),
818 format!("{}.js", source),
819 format!("{}/index.ts", source),
820 format!("{}/index.js", source),
821 format!("src/{}.rs", source.replace("::", "/")),
822 format!("src/{}.py", source.replace(".", "/")),
823 format!("src/{}.ts", source),
824 format!("src/{}.js", source),
825 ];
826
827 for candidate in candidates {
828 if let Some(&id) = file_path_to_id.get(&candidate) {
829 return Some(id);
830 }
831 }
832
833 None
834 }
835
836 fn normalize_path(&self, path: &str) -> String {
838 let mut parts: Vec<&str> = Vec::new();
839 for part in path.split('/') {
840 match part {
841 "" | "." => continue,
842 ".." => {
843 parts.pop();
844 },
845 _ => parts.push(part),
846 }
847 }
848 parts.join("/")
849 }
850
851 fn compute_pagerank(&self, index: &SymbolIndex, graph: &mut DepGraph) {
853 self.compute_file_pagerank(index, graph);
855
856 self.compute_symbol_pagerank(index, graph);
858 }
859
860 fn compute_file_pagerank(&self, index: &SymbolIndex, graph: &mut DepGraph) {
862 let n = index.files.len();
863 if n == 0 {
864 return;
865 }
866
867 let damping = 0.85f32;
868 let iterations = 20;
869 let initial_rank = 1.0 / n as f32;
870
871 let mut ranks: Vec<f32> = vec![initial_rank; n];
872 let mut new_ranks: Vec<f32> = vec![0.0; n];
873
874 let mut outgoing: Vec<Vec<u32>> = vec![vec![]; n];
876 for &(from, to) in &graph.file_imports {
877 if (from as usize) < n && (to as usize) < n {
878 outgoing[from as usize].push(to);
879 }
880 }
881
882 for _ in 0..iterations {
883 for r in &mut new_ranks {
885 *r = (1.0 - damping) / n as f32;
886 }
887
888 for (i, neighbors) in outgoing.iter().enumerate() {
890 if !neighbors.is_empty() {
891 let contribution = damping * ranks[i] / neighbors.len() as f32;
892 for &j in neighbors {
893 new_ranks[j as usize] += contribution;
894 }
895 } else {
896 let contribution = damping * ranks[i] / n as f32;
898 for r in &mut new_ranks {
899 *r += contribution;
900 }
901 }
902 }
903
904 std::mem::swap(&mut ranks, &mut new_ranks);
905 }
906
907 graph.file_pagerank = ranks;
908 }
909
910 fn compute_symbol_pagerank(&self, index: &SymbolIndex, graph: &mut DepGraph) {
912 let n = index.symbols.len();
913 if n == 0 {
914 graph.symbol_pagerank = Vec::new();
915 return;
916 }
917
918 let damping = 0.85f32;
919 let iterations = 20;
920 let initial_rank = 1.0 / n as f32;
921
922 let mut ranks: Vec<f32> = vec![initial_rank; n];
923 let mut new_ranks: Vec<f32> = vec![0.0; n];
924
925 let mut outgoing: Vec<Vec<u32>> = vec![vec![]; n];
928 for &(caller, callee) in &graph.calls {
929 if (caller as usize) < n && (callee as usize) < n {
930 outgoing[caller as usize].push(callee);
931 }
932 }
933
934 for &(from, to) in &graph.symbol_refs {
936 if (from as usize) < n && (to as usize) < n {
937 if !outgoing[from as usize].contains(&to) {
939 outgoing[from as usize].push(to);
940 }
941 }
942 }
943
944 for _ in 0..iterations {
945 for r in &mut new_ranks {
947 *r = (1.0 - damping) / n as f32;
948 }
949
950 for (i, neighbors) in outgoing.iter().enumerate() {
952 if !neighbors.is_empty() {
953 let contribution = damping * ranks[i] / neighbors.len() as f32;
954 for &j in neighbors {
955 new_ranks[j as usize] += contribution;
956 }
957 } else {
958 let contribution = damping * ranks[i] / n as f32;
960 for r in &mut new_ranks {
961 *r += contribution;
962 }
963 }
964 }
965
966 std::mem::swap(&mut ranks, &mut new_ranks);
967 }
968
969 graph.symbol_pagerank = ranks;
970 }
971
972 fn get_current_commit(&self) -> Option<String> {
974 let git_head = self.repo_root.join(".git/HEAD");
975 if let Ok(content) = fs::read_to_string(&git_head) {
976 if content.starts_with("ref: ") {
977 let ref_path = content.trim_start_matches("ref: ").trim();
979 let ref_file = self.repo_root.join(".git").join(ref_path);
980 if let Ok(hash) = fs::read_to_string(&ref_file) {
981 return Some(hash.trim().to_owned());
982 }
983 } else {
984 return Some(content.trim().to_owned());
986 }
987 }
988 None
989 }
990}
991
992struct ParsedFile {
994 path: String,
995 language: Language,
996 content_hash: [u8; 32],
997 lines: u32,
998 tokens: u32,
999 symbols: Vec<ParsedSymbol>,
1000 imports: Vec<Import>,
1001}
1002
1003struct ParsedSymbol {
1005 name: String,
1006 kind: SymbolKind,
1007 start_line: u32,
1008 end_line: u32,
1009 signature: Option<String>,
1010 docstring: Option<String>,
1011 parent: Option<String>,
1012 visibility: crate::types::Visibility,
1013 calls: Vec<String>,
1014}
1015
1016#[cfg(test)]
1017mod tests {
1018 use super::*;
1019 use std::fs;
1020 use tempfile::TempDir;
1021
1022 #[test]
1023 fn test_build_simple_index() {
1024 let tmp = TempDir::new().unwrap();
1025
1026 fs::write(
1028 tmp.path().join("main.rs"),
1029 r#"fn main() {
1030 println!("Hello, world!");
1031 helper();
1032}
1033
1034fn helper() {
1035 // Do something
1036}
1037"#,
1038 )
1039 .unwrap();
1040
1041 fs::write(
1042 tmp.path().join("lib.rs"),
1043 r#"pub mod utils;
1044
1045pub fn public_fn() {}
1046"#,
1047 )
1048 .unwrap();
1049
1050 let builder = IndexBuilder::new(tmp.path());
1052 let (index, graph) = builder.build().unwrap();
1053
1054 assert_eq!(
1056 index.files.len(),
1057 2,
1058 "Expected 2 files, found {:?}",
1059 index.files.iter().map(|f| &f.path).collect::<Vec<_>>()
1060 );
1061
1062 assert!(
1064 index.symbols.len() >= 3,
1065 "Expected at least 3 symbols, got {}",
1066 index.symbols.len()
1067 );
1068
1069 assert!(index.get_file("main.rs").is_some(), "main.rs not found in index");
1071 assert!(index.get_file("lib.rs").is_some(), "lib.rs not found in index");
1072
1073 assert_eq!(graph.file_pagerank.len(), 2);
1075 }
1076
1077 #[test]
1078 fn test_symbol_reference_edges() {
1079 let tmp = TempDir::new().unwrap();
1080
1081 fs::write(
1082 tmp.path().join("lib.rs"),
1083 r#"pub struct Foo;
1084"#,
1085 )
1086 .unwrap();
1087
1088 fs::write(
1089 tmp.path().join("main.rs"),
1090 r#"mod lib;
1091
1092fn main() {
1093 let _value: Foo;
1094}
1095"#,
1096 )
1097 .unwrap();
1098
1099 let builder = IndexBuilder::new(tmp.path());
1100 let (index, graph) = builder.build().unwrap();
1101
1102 let foo = index.find_symbols("Foo");
1103 let main = index.find_symbols("main");
1104 assert!(!foo.is_empty(), "Expected Foo symbol");
1105 assert!(!main.is_empty(), "Expected main symbol");
1106
1107 let foo_id = foo[0].id.as_u32();
1108 let main_id = main[0].id.as_u32();
1109
1110 let referencers = graph.get_referencers(foo_id);
1111 assert!(referencers.contains(&main_id), "Expected main to reference Foo");
1112 }
1113
1114 #[test]
1115 fn test_language_detection() {
1116 assert_eq!(Language::from_extension("rs"), Language::Rust);
1118 assert_eq!(Language::from_extension("py"), Language::Python);
1119 assert_eq!(Language::from_extension("ts"), Language::TypeScript);
1120 assert_eq!(Language::from_extension("tsx"), Language::TypeScript);
1121 assert_eq!(Language::from_extension("go"), Language::Go);
1122 assert_eq!(Language::from_extension("java"), Language::Java);
1123 assert_eq!(Language::from_extension("js"), Language::JavaScript);
1124 assert_eq!(Language::from_extension("c"), Language::C);
1125 assert_eq!(Language::from_extension("cpp"), Language::Cpp);
1126 assert_eq!(Language::from_extension("cs"), Language::CSharp);
1127 assert_eq!(Language::from_extension("rb"), Language::Ruby);
1128 assert_eq!(Language::from_extension("sh"), Language::Bash);
1129 assert_eq!(Language::from_extension("php"), Language::Php);
1131 assert_eq!(Language::from_extension("kt"), Language::Kotlin);
1132 assert_eq!(Language::from_extension("swift"), Language::Swift);
1133 assert_eq!(Language::from_extension("scala"), Language::Scala);
1134 assert_eq!(Language::from_extension("hs"), Language::Haskell);
1135 assert_eq!(Language::from_extension("ex"), Language::Elixir);
1136 assert_eq!(Language::from_extension("clj"), Language::Clojure);
1137 assert_eq!(Language::from_extension("ml"), Language::OCaml);
1138 assert_eq!(Language::from_extension("lua"), Language::Lua);
1139 assert_eq!(Language::from_extension("r"), Language::R);
1140 }
1141}