1use std::collections::HashMap;
8use std::fmt::Write as _;
9use std::path::{Path, PathBuf};
10
11use rayon::prelude::*;
12use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
13use streaming_iterator::StreamingIterator;
14use tree_sitter::{Parser, Query, QueryCursor};
15
16use crate::languages;
17use crate::walk;
18
19#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
23pub struct RepoGraph {
24 pub files: Vec<FileNode>,
26 pub edges: Vec<(u32, u32, u32)>,
28 pub base_ranks: Vec<f32>,
30 pub callers: Vec<Vec<u32>>,
32 pub callees: Vec<Vec<u32>>,
34 pub def_edges: Vec<(DefId, DefId, u32)>,
36 pub def_ranks: Vec<f32>,
38 pub def_callers: Vec<Vec<DefId>>,
40 pub def_callees: Vec<Vec<DefId>>,
42 pub def_offsets: Vec<usize>,
44 pub alpha: f32,
46}
47
48#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
50pub struct FileNode {
51 pub path: String,
53 pub defs: Vec<Definition>,
55 pub imports: Vec<ImportRef>,
57}
58
59#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
61pub struct Definition {
62 pub name: String,
64 pub kind: String,
66 pub start_line: u32,
68 pub end_line: u32,
70 pub scope: String,
72 pub signature: Option<String>,
74 pub start_byte: u32,
76 pub end_byte: u32,
78 pub calls: Vec<CallRef>,
80}
81
82#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
84pub struct ImportRef {
85 pub raw_path: String,
87 pub resolved_idx: Option<u32>,
89}
90
91pub type DefId = (u32, u16);
93
94#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
96pub struct CallRef {
97 pub name: String,
99 pub byte_offset: u32,
101 pub resolved: Option<DefId>,
103}
104
105const DAMPING: f32 = 0.85;
109
110const EPSILON: f32 = 1e-6;
112
113const MAX_ITERATIONS: usize = 100;
115
116const MAX_NEIGHBORS: usize = 5;
118
119const CHARS_PER_TOKEN: usize = 4;
121
122fn import_query_for_extension(ext: &str) -> Option<(tree_sitter::Language, Query)> {
128 let (lang, query_str): (tree_sitter::Language, &str) = match ext {
129 "rs" => (
130 tree_sitter_rust::LANGUAGE.into(),
131 "(use_declaration) @import",
132 ),
133 "py" | "pyi" => (
134 tree_sitter_python::LANGUAGE.into(),
135 concat!(
136 "(import_statement) @import\n",
137 "(import_from_statement) @import",
138 ),
139 ),
140 "js" | "jsx" => (
141 tree_sitter_javascript::LANGUAGE.into(),
142 "(import_statement source: (string) @import_path) @import",
143 ),
144 "ts" => (
145 tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
146 "(import_statement source: (string) @import_path) @import",
147 ),
148 "tsx" => (
149 tree_sitter_typescript::LANGUAGE_TSX.into(),
150 "(import_statement source: (string) @import_path) @import",
151 ),
152 "go" => (
153 tree_sitter_go::LANGUAGE.into(),
154 "(import_spec path: (interpreted_string_literal) @import_path) @import",
155 ),
156 "rb" => (
158 tree_sitter_ruby::LANGUAGE.into(),
159 "(call method: (identifier) @_method arguments: (argument_list (string (string_content) @import_path)) (#eq? @_method \"require\")) @import",
160 ),
161 _ => return None,
162 };
163 let query = match Query::new(&lang, query_str) {
164 Ok(q) => q,
165 Err(e) => {
166 tracing::warn!(ext, %e, "import query compilation failed — language may be ABI-incompatible");
167 return None;
168 }
169 };
170 Some((lang, query))
171}
172
173fn extract_imports(
175 source: &str,
176 lang: &tree_sitter::Language,
177 import_query: &Query,
178) -> Vec<String> {
179 let mut parser = Parser::new();
180 if parser.set_language(lang).is_err() {
181 return vec![];
182 }
183 let Some(tree) = parser.parse(source, None) else {
184 return vec![];
185 };
186
187 let mut cursor = QueryCursor::new();
188 let mut imports = Vec::new();
189 let mut matches = cursor.matches(import_query, tree.root_node(), source.as_bytes());
190
191 while let Some(m) = matches.next() {
192 let mut import_path_text = None;
194 let mut import_text = None;
195
196 for cap in m.captures {
197 let cap_name = &import_query.capture_names()[cap.index as usize];
198 let text = &source[cap.node.start_byte()..cap.node.end_byte()];
199 if *cap_name == "import_path" {
200 import_path_text = Some(text.trim_matches(|c| c == '"' || c == '\''));
201 } else if *cap_name == "import" {
202 import_text = Some(text);
203 }
204 }
205
206 if let Some(path) = import_path_text {
207 imports.push(path.to_string());
208 } else if let Some(text) = import_text {
209 imports.push(text.to_string());
210 }
211 }
212
213 imports
214}
215
216fn resolve_rust_import(
223 raw: &str,
224 file_path: &Path,
225 root: &Path,
226 file_index: &HashMap<PathBuf, usize>,
227) -> Option<usize> {
228 let trimmed = raw
230 .trim()
231 .trim_start_matches("use ")
232 .trim_end_matches(';')
233 .trim();
234
235 let segments: Vec<&str> = trimmed.split("::").collect();
236 if segments.is_empty() {
237 return None;
238 }
239
240 let (base, skip) = match segments[0] {
242 "crate" => {
243 let mut dir = file_path.parent();
247 let crate_root = loop {
248 match dir {
249 Some(d) if d.join("Cargo.toml").exists() => break d.join("src"),
250 Some(d) => dir = d.parent(),
251 None => break root.join("src"), }
253 };
254 (crate_root, 1)
255 }
256 "self" => {
257 let dir = file_path.parent()?;
258 (dir.to_path_buf(), 1)
259 }
260 "super" => {
261 let dir = file_path.parent()?.parent()?;
262 (dir.to_path_buf(), 1)
263 }
264 _ => return None,
266 };
267
268 let path_segments = &segments[skip..];
272 for end in (1..=path_segments.len()).rev() {
273 let mut candidate = base.clone();
274 for seg in &path_segments[..end] {
275 let clean = seg.split('{').next().unwrap_or(seg).trim();
277 if !clean.is_empty() {
278 candidate.push(clean);
279 }
280 }
281
282 let as_file = candidate.with_extension("rs");
284 if let Some(&idx) = file_index.get(&as_file) {
285 return Some(idx);
286 }
287
288 let as_mod = candidate.join("mod.rs");
290 if let Some(&idx) = file_index.get(&as_mod) {
291 return Some(idx);
292 }
293 }
294
295 None
296}
297
298fn resolve_import(
300 raw: &str,
301 ext: &str,
302 file_path: &Path,
303 root: &Path,
304 file_index: &HashMap<PathBuf, usize>,
305) -> Option<usize> {
306 match ext {
307 "rs" => resolve_rust_import(raw, file_path, root, file_index),
308 "py" | "pyi" => resolve_python_import(raw, root, file_index),
309 "js" | "jsx" | "ts" | "tsx" => resolve_js_import(raw, file_path, file_index),
310 _ => None,
312 }
313}
314
315fn resolve_python_import(
319 raw: &str,
320 root: &Path,
321 file_index: &HashMap<PathBuf, usize>,
322) -> Option<usize> {
323 let module_path = if let Some(rest) = raw.strip_prefix("from ") {
324 rest.split_whitespace().next()?
325 } else if let Some(rest) = raw.strip_prefix("import ") {
326 rest.split_whitespace().next()?
327 } else {
328 return None;
329 };
330
331 let rel_path: PathBuf = module_path.split('.').collect();
332 for ext in ["py", "pyi"] {
333 let as_file = root.join(&rel_path).with_extension(ext);
334 if let Some(&idx) = file_index.get(&as_file) {
335 return Some(idx);
336 }
337 }
338
339 for init_name in ["__init__.py", "__init__.pyi"] {
340 let as_init = root.join(&rel_path).join(init_name);
341 if let Some(&idx) = file_index.get(&as_init) {
342 return Some(idx);
343 }
344 }
345
346 None
347}
348
349fn resolve_js_import(
353 raw: &str,
354 file_path: &Path,
355 file_index: &HashMap<PathBuf, usize>,
356) -> Option<usize> {
357 if !raw.starts_with('.') {
358 return None;
359 }
360
361 let dir = file_path.parent()?;
362 let candidate = dir.join(raw);
363
364 for ext in &["js", "jsx", "ts", "tsx"] {
365 let with_ext = candidate.with_extension(ext);
366 if let Some(&idx) = file_index.get(&with_ext) {
367 return Some(idx);
368 }
369 }
370
371 for ext in &["js", "jsx", "ts", "tsx"] {
372 let index_file = candidate.join("index").with_extension(ext);
373 if let Some(&idx) = file_index.get(&index_file) {
374 return Some(idx);
375 }
376 }
377
378 None
379}
380
381fn extract_definitions(source: &str, config: &languages::LangConfig) -> Vec<Definition> {
385 let mut parser = Parser::new();
386 if parser.set_language(&config.language).is_err() {
387 return vec![];
388 }
389 let Some(tree) = parser.parse(source, None) else {
390 return vec![];
391 };
392
393 let mut cursor = QueryCursor::new();
394 let mut defs = Vec::new();
395 let mut matches = cursor.matches(&config.query, tree.root_node(), source.as_bytes());
396
397 while let Some(m) = matches.next() {
398 let mut name = String::new();
399 let mut def_node = None;
400
401 for cap in m.captures {
402 let cap_name = &config.query.capture_names()[cap.index as usize];
403 if *cap_name == "name" {
404 name = source[cap.node.start_byte()..cap.node.end_byte()].to_string();
405 } else if *cap_name == "def" {
406 def_node = Some(cap.node);
407 }
408 }
409
410 if let Some(node) = def_node {
411 let scope = crate::chunk::build_scope_chain(node, source);
412 let signature = crate::chunk::extract_signature(node, source);
413 #[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
414 let start_line = node.start_position().row as u32 + 1;
415 #[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
416 let end_line = node.end_position().row as u32 + 1;
417 #[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
418 let start_byte = node.start_byte() as u32;
419 #[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
420 let end_byte = node.end_byte() as u32;
421 defs.push(Definition {
422 name,
423 kind: node.kind().to_string(),
424 start_line,
425 end_line,
426 scope,
427 signature,
428 start_byte,
429 end_byte,
430 calls: vec![],
431 });
432 }
433 }
434
435 defs
436}
437
438fn extract_calls(source: &str, call_config: &languages::CallConfig, defs: &mut [Definition]) {
446 let mut parser = Parser::new();
447 if parser.set_language(&call_config.language).is_err() {
448 return;
449 }
450 let Some(tree) = parser.parse(source, None) else {
451 return;
452 };
453
454 let mut cursor = QueryCursor::new();
455 let mut matches = cursor.matches(&call_config.query, tree.root_node(), source.as_bytes());
456
457 while let Some(m) = matches.next() {
458 let mut callee_name = None;
459 let mut call_byte = 0u32;
460
461 for cap in m.captures {
462 let cap_name = &call_config.query.capture_names()[cap.index as usize];
463 if *cap_name == "callee" {
464 callee_name = Some(source[cap.node.start_byte()..cap.node.end_byte()].to_string());
465 #[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
466 {
467 call_byte = cap.node.start_byte() as u32;
468 }
469 }
470 }
471
472 if let Some(name) = callee_name {
473 if let Some(def) = defs
475 .iter_mut()
476 .find(|d| d.start_byte <= call_byte && call_byte < d.end_byte)
477 {
478 if def.name != name {
480 def.calls.push(CallRef {
481 name,
482 byte_offset: call_byte,
483 resolved: None,
484 });
485 }
486 }
487 }
489 }
490}
491
492fn build_def_index(files: &[FileNode]) -> HashMap<String, Vec<DefId>> {
494 let mut index: HashMap<String, Vec<DefId>> = HashMap::new();
495 for (file_idx, file) in files.iter().enumerate() {
496 for (def_idx, def) in file.defs.iter().enumerate() {
497 #[expect(clippy::cast_possible_truncation)]
498 let did: DefId = (file_idx as u32, def_idx as u16);
499 index.entry(def.name.clone()).or_default().push(did);
500 }
501 }
502 index
503}
504
505fn resolve_calls(files: &mut [FileNode], def_index: &HashMap<String, Vec<DefId>>) {
512 let imported_files: Vec<std::collections::HashSet<u32>> = files
514 .iter()
515 .map(|f| {
516 f.imports
517 .iter()
518 .filter_map(|imp| imp.resolved_idx)
519 .collect()
520 })
521 .collect();
522
523 for file_idx in 0..files.len() {
524 for def_idx in 0..files[file_idx].defs.len() {
525 for call_idx in 0..files[file_idx].defs[def_idx].calls.len() {
526 let call_name = files[file_idx].defs[def_idx].calls[call_idx].name.clone();
527
528 let Some(candidates) = def_index.get(&call_name) else {
529 continue;
530 };
531
532 #[expect(clippy::cast_possible_truncation)]
534 let file_idx_u32 = file_idx as u32;
535 if let Some(&did) = candidates.iter().find(|(f, _)| *f == file_idx_u32) {
536 files[file_idx].defs[def_idx].calls[call_idx].resolved = Some(did);
537 continue;
538 }
539
540 if let Some(&did) = candidates
542 .iter()
543 .find(|(f, _)| imported_files[file_idx].contains(f))
544 {
545 files[file_idx].defs[def_idx].calls[call_idx].resolved = Some(did);
546 }
547 }
549 }
550 }
551}
552
553fn def_offsets(files: &[FileNode]) -> Vec<usize> {
555 let mut offsets = Vec::with_capacity(files.len() + 1);
556 offsets.push(0);
557 for file in files {
558 offsets.push(offsets.last().unwrap() + file.defs.len());
559 }
560 offsets
561}
562
563fn flatten_def_id(offsets: &[usize], did: DefId) -> usize {
565 offsets[did.0 as usize] + did.1 as usize
566}
567
568fn build_def_neighbor_lists(
570 n: usize,
571 edges: &[(u32, u32, u32)],
572 offsets: &[usize],
573) -> (Vec<Vec<DefId>>, Vec<Vec<DefId>>) {
574 let mut incoming: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
575 let mut outgoing: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
576
577 for &(src, dst, w) in edges {
578 let (s, d) = (src as usize, dst as usize);
579 if s < n && d < n {
580 incoming[d].push((src, w));
581 outgoing[s].push((dst, w));
582 }
583 }
584
585 let to_def_id = |flat: u32| -> DefId {
587 let flat_usize = flat as usize;
588 let file_idx = offsets.partition_point(|&o| o <= flat_usize) - 1;
589 let def_idx = flat_usize - offsets[file_idx];
590 #[expect(clippy::cast_possible_truncation)]
591 (file_idx as u32, def_idx as u16)
592 };
593
594 let callers = incoming
595 .into_iter()
596 .map(|mut v| {
597 v.sort_by_key(|b| std::cmp::Reverse(b.1));
598 v.truncate(MAX_NEIGHBORS);
599 v.into_iter().map(|(idx, _)| to_def_id(idx)).collect()
600 })
601 .collect();
602
603 let callees = outgoing
604 .into_iter()
605 .map(|mut v| {
606 v.sort_by_key(|b| std::cmp::Reverse(b.1));
607 v.truncate(MAX_NEIGHBORS);
608 v.into_iter().map(|(idx, _)| to_def_id(idx)).collect()
609 })
610 .collect();
611
612 (callers, callees)
613}
614
615#[expect(
624 clippy::cast_precision_loss,
625 reason = "node count fits comfortably in f32"
626)]
627fn pagerank(n: usize, edges: &[(u32, u32, u32)], focus: Option<usize>) -> Vec<f32> {
628 if n == 0 {
629 return vec![];
630 }
631
632 let mut out_edges: Vec<Vec<(usize, f32)>> = vec![vec![]; n];
634 let mut out_weight: Vec<f32> = vec![0.0; n];
635
636 for &(src, dst, w) in edges {
637 let (s, d) = (src as usize, dst as usize);
638 if s < n && d < n {
639 #[expect(clippy::cast_possible_truncation, reason = "edge weights are small")]
640 let wf = f64::from(w) as f32;
641 out_edges[s].push((d, wf));
642 out_weight[s] += wf;
643 }
644 }
645
646 let bias: Vec<f32> = if let Some(idx) = focus {
650 let uniform = 1.0 / n as f32;
651 let mut b = vec![0.3 * uniform; n];
652 if idx < n {
653 b[idx] += 0.7;
654 }
655 let sum: f32 = b.iter().sum();
657 for v in &mut b {
658 *v /= sum;
659 }
660 b
661 } else {
662 vec![1.0 / n as f32; n]
663 };
664
665 let mut rank = vec![1.0 / n as f32; n];
666 let mut next_rank = vec![0.0_f32; n];
667
668 for _ in 0..MAX_ITERATIONS {
669 let dangling: f32 = rank
671 .iter()
672 .enumerate()
673 .filter(|&(i, _)| out_edges[i].is_empty())
674 .map(|(_, &r)| r)
675 .sum();
676
677 for (i, nr) in next_rank.iter_mut().enumerate() {
679 *nr = (1.0 - DAMPING).mul_add(bias[i], DAMPING * dangling * bias[i]);
680 }
681
682 for (src, edges_list) in out_edges.iter().enumerate() {
683 if edges_list.is_empty() {
684 continue;
685 }
686 let src_rank = rank[src];
687 let total_w = out_weight[src];
688 for &(dst, w) in edges_list {
689 next_rank[dst] += DAMPING * src_rank * (w / total_w);
690 }
691 }
692
693 let diff: f32 = rank
695 .iter()
696 .zip(next_rank.iter())
697 .map(|(a, b)| (a - b).abs())
698 .sum();
699
700 std::mem::swap(&mut rank, &mut next_rank);
701
702 if diff < EPSILON {
703 break;
704 }
705 }
706
707 rank
708}
709
710struct DefGraphData {
714 def_edges: Vec<(DefId, DefId, u32)>,
715 def_ranks: Vec<f32>,
716 def_callers: Vec<Vec<DefId>>,
717 def_callees: Vec<Vec<DefId>>,
718 offsets: Vec<usize>,
719 base_ranks: Vec<f32>,
720 file_edges: Vec<(u32, u32, u32)>,
721}
722
723fn compute_def_graph(files: &[FileNode]) -> DefGraphData {
725 let mut def_edge_map: HashMap<(DefId, DefId), u32> = HashMap::new();
727 for (file_idx, file) in files.iter().enumerate() {
728 for (def_idx, def) in file.defs.iter().enumerate() {
729 #[expect(clippy::cast_possible_truncation)]
730 let caller_id: DefId = (file_idx as u32, def_idx as u16);
731 for call in &def.calls {
732 if let Some(callee_id) = call.resolved {
733 *def_edge_map.entry((caller_id, callee_id)).or_insert(0) += 1;
734 }
735 }
736 }
737 }
738 let def_edges: Vec<(DefId, DefId, u32)> = def_edge_map
739 .into_iter()
740 .map(|((src, dst), w)| (src, dst, w))
741 .collect();
742
743 let offsets = def_offsets(files);
745 let n_defs = *offsets.last().unwrap_or(&0);
746
747 let flat_def_edges: Vec<(u32, u32, u32)> = def_edges
748 .iter()
749 .map(|(src, dst, w)| {
750 #[expect(clippy::cast_possible_truncation)]
751 (
752 flatten_def_id(&offsets, *src) as u32,
753 flatten_def_id(&offsets, *dst) as u32,
754 *w,
755 )
756 })
757 .collect();
758
759 let def_ranks = pagerank(n_defs, &flat_def_edges, None);
760
761 let base_ranks: Vec<f32> = files
763 .iter()
764 .enumerate()
765 .map(|(i, file)| {
766 let start = offsets[i];
767 let end = start + file.defs.len();
768 def_ranks[start..end].iter().sum()
769 })
770 .collect();
771
772 let mut file_edge_map: HashMap<(u32, u32), u32> = HashMap::new();
774 for &(src, dst, w) in &def_edges {
775 let src_file = src.0;
776 let dst_file = dst.0;
777 if src_file != dst_file {
778 *file_edge_map.entry((src_file, dst_file)).or_insert(0) += w;
779 }
780 }
781 let file_edges: Vec<(u32, u32, u32)> = file_edge_map
782 .into_iter()
783 .map(|((src, dst), w)| (src, dst, w))
784 .collect();
785
786 let (def_callers, def_callees) = build_def_neighbor_lists(n_defs, &flat_def_edges, &offsets);
788
789 DefGraphData {
790 def_edges,
791 def_ranks,
792 def_callers,
793 def_callees,
794 offsets,
795 base_ranks,
796 file_edges,
797 }
798}
799
800pub fn build_graph(root: &Path) -> crate::Result<RepoGraph> {
810 let root = root.canonicalize().map_err(|e| crate::Error::Io {
811 path: root.display().to_string(),
812 source: e,
813 })?;
814
815 let mut walk_options = walk::WalkOptions::default();
816 if let Some((_, config)) = crate::cache::config::find_config(&root) {
817 walk_options.ignore_patterns = config.ignore.patterns;
818 }
819 let all_files = walk::collect_files_with_options(&root, &walk_options);
820
821 let raw_inputs: Vec<(PathBuf, String, String, String)> = all_files
827 .par_iter()
828 .filter_map(|path| {
829 let ext = path
830 .extension()
831 .and_then(|e| e.to_str())
832 .unwrap_or_default()
833 .to_string();
834 if languages::config_for_extension(&ext).is_none()
835 && import_query_for_extension(&ext).is_none()
836 {
837 return None;
838 }
839 let source = std::fs::read_to_string(path).ok()?;
840 let rel_path = path
841 .strip_prefix(&root)
842 .unwrap_or(path)
843 .display()
844 .to_string();
845 Some((path.clone(), rel_path, ext, source))
846 })
847 .collect();
848
849 let mut file_index: HashMap<PathBuf, usize> = HashMap::with_capacity(raw_inputs.len());
853 let mut files: Vec<FileNode> = Vec::with_capacity(raw_inputs.len());
854 let mut raw_sources: Vec<(usize, String, String)> = Vec::with_capacity(raw_inputs.len());
855 for (idx, (abs_path, rel_path, ext, source)) in raw_inputs.into_iter().enumerate() {
856 file_index.insert(abs_path, idx);
857 files.push(FileNode {
858 path: rel_path,
859 defs: vec![],
860 imports: vec![],
861 });
862 raw_sources.push((idx, ext, source));
863 }
864
865 files
872 .par_iter_mut()
873 .zip(raw_sources.par_iter())
874 .for_each(|(file, (_, ext, source))| {
875 if let Some(config) = languages::config_for_extension(ext) {
876 file.defs = extract_definitions(source, &config);
877 }
878 if let Some((lang, import_query)) = import_query_for_extension(ext) {
879 let raw_imports = extract_imports(source, &lang, &import_query);
880 let file_path = root.join(&file.path);
881 file.imports = raw_imports
882 .into_iter()
883 .map(|raw| {
884 let resolved_idx =
885 resolve_import(&raw, ext, &file_path, &root, &file_index)
886 .and_then(|i| u32::try_from(i).ok());
887 ImportRef {
888 raw_path: raw,
889 resolved_idx,
890 }
891 })
892 .collect();
893 }
894 });
895
896 files
900 .par_iter_mut()
901 .zip(raw_sources.par_iter())
902 .for_each(|(file, (_, ext, source))| {
903 if let Some(call_config) = languages::call_query_for_extension(ext) {
904 extract_calls(source, &call_config, &mut file.defs);
905 }
906 });
907
908 let def_index = build_def_index(&files);
910 resolve_calls(&mut files, &def_index);
911
912 let graph_data = compute_def_graph(&files);
914
915 let n = files.len();
917 let (callers, callees) = build_neighbor_lists(n, &graph_data.file_edges);
918
919 #[expect(clippy::cast_precision_loss, reason = "graph sizes fit in f32")]
921 let density = if n > 1 {
922 graph_data.file_edges.len() as f32 / (n as f32 * (n as f32 - 1.0))
923 } else {
924 0.0
925 };
926 let alpha = 0.3f32.mul_add(density.min(1.0), 0.5);
927
928 Ok(RepoGraph {
929 files,
930 edges: graph_data.file_edges,
931 base_ranks: graph_data.base_ranks,
932 callers,
933 callees,
934 def_edges: graph_data.def_edges,
935 def_ranks: graph_data.def_ranks,
936 def_callers: graph_data.def_callers,
937 def_callees: graph_data.def_callees,
938 def_offsets: graph_data.offsets,
939 alpha,
940 })
941}
942
943impl RepoGraph {
944 #[must_use]
946 pub fn def_rank(&self, did: DefId) -> f32 {
947 let flat = self.def_offsets[did.0 as usize] + did.1 as usize;
948 self.def_ranks.get(flat).copied().unwrap_or(0.0)
949 }
950
951 #[must_use]
953 pub fn find_def(&self, file_path: &str, def_name: &str) -> Option<DefId> {
954 for (file_idx, file) in self.files.iter().enumerate() {
955 if file.path == file_path {
956 for (def_idx, def) in file.defs.iter().enumerate() {
957 if def.name == def_name {
958 #[expect(clippy::cast_possible_truncation)]
959 return Some((file_idx as u32, def_idx as u16));
960 }
961 }
962 }
963 }
964 None
965 }
966}
967
968fn build_neighbor_lists(n: usize, edges: &[(u32, u32, u32)]) -> (Vec<Vec<u32>>, Vec<Vec<u32>>) {
970 let mut incoming: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
971 let mut outgoing: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
972
973 for &(src, dst, w) in edges {
974 let (s, d) = (src as usize, dst as usize);
975 if s < n && d < n {
976 incoming[d].push((src, w));
977 outgoing[s].push((dst, w));
978 }
979 }
980
981 let trim = |lists: &mut [Vec<(u32, u32)>]| -> Vec<Vec<u32>> {
983 lists
984 .iter_mut()
985 .map(|list| {
986 list.sort_by_key(|b| std::cmp::Reverse(b.1));
987 list.iter()
988 .take(MAX_NEIGHBORS)
989 .map(|(idx, _)| *idx)
990 .collect()
991 })
992 .collect()
993 };
994
995 (trim(&mut incoming), trim(&mut outgoing))
996}
997
998#[must_use]
1013pub fn render(graph: &RepoGraph, max_tokens: usize, focus: Option<usize>) -> String {
1014 let n = graph.files.len();
1015 if n == 0 {
1016 return String::new();
1017 }
1018
1019 let ranks = if focus.is_some() {
1021 pagerank(n, &graph.edges, focus)
1022 } else {
1023 graph.base_ranks.clone()
1024 };
1025
1026 let mut sorted: Vec<usize> = (0..n).collect();
1028 sorted.sort_by(|&a, &b| ranks[b].total_cmp(&ranks[a]));
1029
1030 let mut output = String::new();
1031 let mut used_tokens = 0;
1032 let max_chars = max_tokens * CHARS_PER_TOKEN;
1033
1034 for (rank_pos, &file_idx) in sorted.iter().enumerate() {
1035 if used_tokens >= max_tokens {
1036 break;
1037 }
1038
1039 let file = &graph.files[file_idx];
1040 let score = ranks[file_idx];
1041 #[expect(clippy::cast_precision_loss, reason = "file counts fit in f32")]
1042 let percentile = (rank_pos as f32) / (n as f32);
1043
1044 let section = if percentile < 0.1 {
1045 render_tier0(graph, file_idx, file, score)
1046 } else if percentile < 0.3 {
1047 render_tier1(file, score)
1048 } else if percentile < 0.7 {
1049 render_tier2(file, score)
1050 } else {
1051 render_tier3(file)
1052 };
1053
1054 let section_chars = section.len();
1055 if used_tokens > 0 && used_tokens + section_chars / CHARS_PER_TOKEN > max_tokens {
1056 let path_line = format!("{}\n", file.path);
1058 let path_tokens = path_line.len() / CHARS_PER_TOKEN;
1059 if used_tokens + path_tokens <= max_tokens {
1060 output.push_str(&path_line);
1061 }
1062 break;
1063 }
1064
1065 output.push_str(§ion);
1066 used_tokens = output.len().min(max_chars) / CHARS_PER_TOKEN;
1067 }
1068
1069 output
1070}
1071
1072fn render_tier0(graph: &RepoGraph, file_idx: usize, file: &FileNode, score: f32) -> String {
1074 let mut out = format!("## {} (rank: {score:.4})\n", file.path);
1075
1076 if file_idx < graph.callers.len() && !graph.callers[file_idx].is_empty() {
1078 let _ = write!(out, " called by: ");
1079 let names: Vec<&str> = graph.callers[file_idx]
1080 .iter()
1081 .filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
1082 .collect();
1083 let _ = writeln!(out, "{}", names.join(", "));
1084 }
1085
1086 if file_idx < graph.callees.len() && !graph.callees[file_idx].is_empty() {
1088 let _ = write!(out, " calls: ");
1089 let names: Vec<&str> = graph.callees[file_idx]
1090 .iter()
1091 .filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
1092 .collect();
1093 let _ = writeln!(out, "{}", names.join(", "));
1094 }
1095
1096 for def in &file.defs {
1098 let scope_prefix = if def.scope.is_empty() {
1099 String::new()
1100 } else {
1101 format!("{} > ", def.scope)
1102 };
1103 if let Some(sig) = &def.signature {
1104 let _ = writeln!(out, " {scope_prefix}{} {sig}", def.kind);
1105 } else {
1106 let _ = writeln!(out, " {scope_prefix}{} {}", def.kind, def.name);
1107 }
1108 }
1109 let _ = writeln!(out);
1110 out
1111}
1112
1113fn render_tier1(file: &FileNode, score: f32) -> String {
1115 let mut out = format!("## {} (rank: {score:.4})\n", file.path);
1116 for def in &file.defs {
1117 if let Some(sig) = &def.signature {
1118 let _ = writeln!(out, " {sig}");
1119 } else {
1120 let _ = writeln!(out, " {} {}", def.kind, def.name);
1121 }
1122 }
1123 let _ = writeln!(out);
1124 out
1125}
1126
1127fn render_tier2(file: &FileNode, score: f32) -> String {
1129 let mut out = format!("{} (rank: {score:.4})", file.path);
1130 if !file.defs.is_empty() {
1131 let names: Vec<String> = file
1132 .defs
1133 .iter()
1134 .map(|d| format!("{}:{}", d.kind, d.name))
1135 .collect();
1136 let _ = write!(out, " -- {}", names.join(", "));
1137 }
1138 let _ = writeln!(out);
1139 out
1140}
1141
1142fn render_tier3(file: &FileNode) -> String {
1144 format!("{}\n", file.path)
1145}
1146
1147#[cfg(test)]
1150mod tests {
1151 use super::*;
1152
1153 #[test]
1154 fn test_pagerank_simple() {
1155 let edges = vec![(0, 1, 1), (1, 2, 1), (2, 0, 1)];
1157 let ranks = pagerank(3, &edges, None);
1158
1159 assert_eq!(ranks.len(), 3);
1161 let sum: f32 = ranks.iter().sum();
1162 assert!(
1163 (sum - 1.0).abs() < 0.01,
1164 "ranks should sum to ~1.0, got {sum}"
1165 );
1166
1167 let expected = 1.0 / 3.0;
1169 for (i, &r) in ranks.iter().enumerate() {
1170 assert!(
1171 (r - expected).abs() < 0.05,
1172 "rank[{i}] = {r}, expected ~{expected}"
1173 );
1174 }
1175 }
1176
1177 #[test]
1178 fn test_pagerank_star() {
1179 let edges = vec![(0, 3, 1), (1, 3, 1), (2, 3, 1)];
1181 let ranks = pagerank(4, &edges, None);
1182
1183 assert_eq!(ranks.len(), 4);
1184 let max_idx = ranks
1186 .iter()
1187 .enumerate()
1188 .max_by(|a, b| a.1.total_cmp(b.1))
1189 .unwrap()
1190 .0;
1191 assert_eq!(max_idx, 3, "node 3 should have highest rank");
1192 assert!(
1193 ranks[3] > ranks[0],
1194 "rank[3]={} should be > rank[0]={}",
1195 ranks[3],
1196 ranks[0]
1197 );
1198 }
1199
1200 #[test]
1201 fn test_pagerank_topic_sensitive() {
1202 let edges = vec![(0, 1, 1), (1, 2, 1)];
1204 let uniform_ranks = pagerank(3, &edges, None);
1205 let biased_ranks = pagerank(3, &edges, Some(0));
1206
1207 assert!(
1209 biased_ranks[0] > uniform_ranks[0],
1210 "focused rank[0]={} should be > uniform rank[0]={}",
1211 biased_ranks[0],
1212 uniform_ranks[0]
1213 );
1214 }
1215
1216 #[test]
1217 fn test_pagerank_empty() {
1218 let ranks = pagerank(0, &[], None);
1219 assert!(ranks.is_empty());
1220 }
1221
1222 #[test]
1223 fn test_render_tiers() {
1224 let files: Vec<FileNode> = (0..10)
1226 .map(|i| FileNode {
1227 path: format!("src/file_{i}.rs"),
1228 defs: vec![Definition {
1229 name: format!("func_{i}"),
1230 kind: "function_item".to_string(),
1231 start_line: 1,
1232 end_line: 5,
1233 scope: String::new(),
1234 signature: Some(format!("func_{i}(x: i32) -> i32")),
1235 start_byte: 0,
1236 end_byte: 0,
1237 calls: vec![],
1238 }],
1239 imports: vec![],
1240 })
1241 .collect();
1242
1243 let edges: Vec<(u32, u32, u32)> = (1..10).map(|i| (i, 0, 1)).collect();
1245 let base_ranks = pagerank(10, &edges, None);
1246 let (top_callers, top_callees) = build_neighbor_lists(10, &edges);
1247
1248 let graph = RepoGraph {
1249 files,
1250 edges,
1251 base_ranks,
1252 callers: top_callers,
1253 callees: top_callees,
1254 def_edges: vec![],
1255 def_ranks: vec![],
1256 def_callers: vec![],
1257 def_callees: vec![],
1258 def_offsets: vec![0],
1259 alpha: 0.5,
1260 };
1261
1262 let full = render(&graph, 10_000, None);
1264 assert!(
1265 full.contains("file_0"),
1266 "output should contain the top-ranked file"
1267 );
1268 assert!(
1270 full.contains("## src/file_0.rs"),
1271 "top file should have tier 0 heading"
1272 );
1273
1274 let small = render(&graph, 10, None);
1276 assert!(
1277 !small.is_empty(),
1278 "even tiny budget should produce some output"
1279 );
1280 let full_lines = full.lines().count();
1282 let small_lines = small.lines().count();
1283 assert!(
1284 small_lines < full_lines,
1285 "small budget ({small_lines} lines) should have fewer lines than full ({full_lines})"
1286 );
1287 }
1288
1289 #[test]
1290 fn test_render_empty_graph() {
1291 let graph = RepoGraph {
1292 files: vec![],
1293 edges: vec![],
1294 base_ranks: vec![],
1295 callers: vec![],
1296 callees: vec![],
1297 def_edges: vec![],
1298 def_ranks: vec![],
1299 def_callers: vec![],
1300 def_callees: vec![],
1301 def_offsets: vec![0],
1302 alpha: 0.5,
1303 };
1304 let output = render(&graph, 1000, None);
1305 assert!(output.is_empty(), "empty graph should render empty string");
1306 }
1307
1308 #[test]
1309 fn test_build_graph_on_fixtures() {
1310 let fixtures = Path::new(env!("CARGO_MANIFEST_DIR"))
1311 .parent()
1312 .unwrap()
1313 .parent()
1314 .unwrap()
1315 .join("tests")
1316 .join("fixtures");
1317
1318 let graph = build_graph(&fixtures).expect("build_graph should succeed on fixtures");
1319
1320 assert!(
1322 !graph.files.is_empty(),
1323 "graph should contain files from fixtures"
1324 );
1325
1326 let rs_file = graph.files.iter().find(|f| f.path.ends_with("sample.rs"));
1328 assert!(rs_file.is_some(), "should find sample.rs");
1329 let rs_file = rs_file.unwrap();
1330 assert!(
1331 !rs_file.defs.is_empty(),
1332 "sample.rs should have definitions"
1333 );
1334 assert!(
1335 rs_file.defs.iter().any(|d| d.name == "hello"),
1336 "should find 'hello' function in sample.rs"
1337 );
1338
1339 let py_file = graph.files.iter().find(|f| f.path.ends_with("sample.py"));
1341 assert!(py_file.is_some(), "should find sample.py");
1342 let py_file = py_file.unwrap();
1343 assert!(
1344 !py_file.defs.is_empty(),
1345 "sample.py should have definitions"
1346 );
1347 assert!(
1348 py_file.defs.iter().any(|d| d.name == "greet"),
1349 "should find 'greet' function in sample.py"
1350 );
1351
1352 assert_eq!(graph.base_ranks.len(), graph.files.len());
1354 let sum: f32 = graph.base_ranks.iter().sum();
1355 assert!(
1356 (sum - 1.0).abs() < 0.01,
1357 "PageRank scores should sum to ~1.0, got {sum}"
1358 );
1359 }
1360
1361 #[test]
1362 fn test_extract_imports_rust() {
1363 let source = "use crate::foo::bar;\nuse std::collections::HashMap;\n";
1364 let (lang, query) = import_query_for_extension("rs").unwrap();
1365 let imports = extract_imports(source, &lang, &query);
1366 assert_eq!(imports.len(), 2);
1367 assert!(imports[0].contains("crate::foo::bar"));
1368 }
1369
1370 #[test]
1371 fn test_extract_imports_python_stub() {
1372 let source = "from typing import Protocol\nimport pkg.types\n";
1373 let (lang, query) = import_query_for_extension("pyi").unwrap();
1374 let imports = extract_imports(source, &lang, &query);
1375 assert_eq!(imports.len(), 2);
1376 assert!(imports[0].contains("from typing import Protocol"));
1377 assert!(imports[1].contains("import pkg.types"));
1378 }
1379
1380 #[test]
1381 fn test_resolve_python_import_to_stub_file() {
1382 let root = PathBuf::from("/project");
1383 let mut file_index = HashMap::new();
1384 file_index.insert(PathBuf::from("/project/pkg/types.pyi"), 1);
1385
1386 let result = resolve_python_import("import pkg.types", &root, &file_index);
1387 assert_eq!(result, Some(1));
1388 }
1389
1390 #[test]
1391 fn test_resolve_rust_crate_import() {
1392 let root = PathBuf::from("/project");
1393 let file_path = PathBuf::from("/project/src/main.rs");
1394 let mut file_index = HashMap::new();
1395 file_index.insert(PathBuf::from("/project/src/foo/bar.rs"), 1);
1396 file_index.insert(PathBuf::from("/project/src/main.rs"), 0);
1397
1398 let result = resolve_rust_import("use crate::foo::bar;", &file_path, &root, &file_index);
1399 assert_eq!(result, Some(1));
1400 }
1401
1402 #[test]
1403 fn test_resolve_rust_external_crate_dropped() {
1404 let root = PathBuf::from("/project");
1405 let file_path = PathBuf::from("/project/src/main.rs");
1406 let file_index = HashMap::new();
1407
1408 let result = resolve_rust_import(
1409 "use std::collections::HashMap;",
1410 &file_path,
1411 &root,
1412 &file_index,
1413 );
1414 assert_eq!(result, None, "external crate imports should be dropped");
1415 }
1416
1417 #[test]
1418 fn test_neighbor_lists() {
1419 let edges = vec![(0, 1, 1), (0, 2, 1), (1, 2, 1)];
1421 let (incoming, outgoing) = build_neighbor_lists(3, &edges);
1422
1423 assert!(incoming[2].contains(&0));
1425 assert!(incoming[2].contains(&1));
1426
1427 assert!(outgoing[0].contains(&1));
1429 assert!(outgoing[0].contains(&2));
1430 }
1431
1432 #[test]
1433 #[ignore = "runs on full ripvec codebase; use --nocapture to see output"]
1434 fn test_full_repo_map() {
1435 use std::time::Instant;
1436
1437 let root = Path::new(env!("CARGO_MANIFEST_DIR"))
1438 .parent()
1439 .unwrap()
1440 .parent()
1441 .unwrap();
1442
1443 let t0 = Instant::now();
1445 let graph = build_graph(root).expect("build_graph on ripvec root");
1446 let build_ms = t0.elapsed().as_secs_f64() * 1000.0;
1447
1448 let t1 = Instant::now();
1450 let rendered = render(&graph, 2000, None);
1451 let render_ms = t1.elapsed().as_secs_f64() * 1000.0;
1452
1453 let t2 = Instant::now();
1455 let focus_idx = graph
1456 .base_ranks
1457 .iter()
1458 .enumerate()
1459 .max_by(|a, b| a.1.total_cmp(b.1))
1460 .map(|(i, _)| i);
1461 let focused = render(&graph, 2000, focus_idx);
1462 let focus_ms = t2.elapsed().as_secs_f64() * 1000.0;
1463
1464 eprintln!("\n=== Repo Map Performance ===");
1465 eprintln!(
1466 "Files: {}, Edges: {}, Defs: {}",
1467 graph.files.len(),
1468 graph.edges.len(),
1469 graph.files.iter().map(|f| f.defs.len()).sum::<usize>()
1470 );
1471 eprintln!("build_graph: {build_ms:.1}ms (walk + parse + resolve + PageRank)");
1472 eprintln!(
1473 "render(default): {render_ms:.3}ms ({} chars, ~{} tokens)",
1474 rendered.len(),
1475 rendered.len() / 4
1476 );
1477 eprintln!(
1478 "render(focused): {focus_ms:.3}ms ({} chars, ~{} tokens)",
1479 focused.len(),
1480 focused.len() / 4
1481 );
1482
1483 eprintln!("\nTop 5 by PageRank:");
1484 let mut ranked: Vec<(usize, f32)> = graph.base_ranks.iter().copied().enumerate().collect();
1485 ranked.sort_by(|a, b| b.1.total_cmp(&a.1));
1486 for (i, rank) in ranked.iter().take(5) {
1487 eprintln!(" {:.4} {}", rank, graph.files[*i].path);
1488 }
1489
1490 eprintln!("\n=== Default Render ===\n{rendered}");
1491 eprintln!(
1492 "\n=== Focused Render (on {}) ===\n{focused}",
1493 focus_idx
1494 .map(|i| graph.files[i].path.as_str())
1495 .unwrap_or("none")
1496 );
1497 }
1498}