1use std::collections::HashMap;
8use std::fmt::Write as _;
9use std::path::{Path, PathBuf};
10
11use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
12use streaming_iterator::StreamingIterator;
13use tree_sitter::{Parser, Query, QueryCursor};
14
15use crate::languages;
16use crate::walk;
17
18#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
22pub struct RepoGraph {
23 pub files: Vec<FileNode>,
25 pub edges: Vec<(u32, u32, u32)>,
27 pub base_ranks: Vec<f32>,
29 pub callers: Vec<Vec<u32>>,
31 pub callees: Vec<Vec<u32>>,
33 pub def_edges: Vec<(DefId, DefId, u32)>,
35 pub def_ranks: Vec<f32>,
37 pub def_callers: Vec<Vec<DefId>>,
39 pub def_callees: Vec<Vec<DefId>>,
41 pub def_offsets: Vec<usize>,
43 pub alpha: f32,
45}
46
47#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
49pub struct FileNode {
50 pub path: String,
52 pub defs: Vec<Definition>,
54 pub imports: Vec<ImportRef>,
56}
57
58#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
60pub struct Definition {
61 pub name: String,
63 pub kind: String,
65 pub start_line: u32,
67 pub end_line: u32,
69 pub scope: String,
71 pub signature: Option<String>,
73 pub start_byte: u32,
75 pub end_byte: u32,
77 pub calls: Vec<CallRef>,
79}
80
81#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
83pub struct ImportRef {
84 pub raw_path: String,
86 pub resolved_idx: Option<u32>,
88}
89
90pub type DefId = (u32, u16);
92
93#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
95pub struct CallRef {
96 pub name: String,
98 pub byte_offset: u32,
100 pub resolved: Option<DefId>,
102}
103
104const DAMPING: f32 = 0.85;
108
109const EPSILON: f32 = 1e-6;
111
112const MAX_ITERATIONS: usize = 100;
114
115const MAX_NEIGHBORS: usize = 5;
117
118const CHARS_PER_TOKEN: usize = 4;
120
121fn import_query_for_extension(ext: &str) -> Option<(tree_sitter::Language, Query)> {
127 let (lang, query_str): (tree_sitter::Language, &str) = match ext {
128 "rs" => (
129 tree_sitter_rust::LANGUAGE.into(),
130 "(use_declaration) @import",
131 ),
132 "py" | "pyi" => (
133 tree_sitter_python::LANGUAGE.into(),
134 concat!(
135 "(import_statement) @import\n",
136 "(import_from_statement) @import",
137 ),
138 ),
139 "js" | "jsx" => (
140 tree_sitter_javascript::LANGUAGE.into(),
141 "(import_statement source: (string) @import_path) @import",
142 ),
143 "ts" => (
144 tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
145 "(import_statement source: (string) @import_path) @import",
146 ),
147 "tsx" => (
148 tree_sitter_typescript::LANGUAGE_TSX.into(),
149 "(import_statement source: (string) @import_path) @import",
150 ),
151 "go" => (
152 tree_sitter_go::LANGUAGE.into(),
153 "(import_spec path: (interpreted_string_literal) @import_path) @import",
154 ),
155 "rb" => (
157 tree_sitter_ruby::LANGUAGE.into(),
158 "(call method: (identifier) @_method arguments: (argument_list (string (string_content) @import_path)) (#eq? @_method \"require\")) @import",
159 ),
160 _ => return None,
161 };
162 let query = match Query::new(&lang, query_str) {
163 Ok(q) => q,
164 Err(e) => {
165 tracing::warn!(ext, %e, "import query compilation failed — language may be ABI-incompatible");
166 return None;
167 }
168 };
169 Some((lang, query))
170}
171
172fn extract_imports(
174 source: &str,
175 lang: &tree_sitter::Language,
176 import_query: &Query,
177) -> Vec<String> {
178 let mut parser = Parser::new();
179 if parser.set_language(lang).is_err() {
180 return vec![];
181 }
182 let Some(tree) = parser.parse(source, None) else {
183 return vec![];
184 };
185
186 let mut cursor = QueryCursor::new();
187 let mut imports = Vec::new();
188 let mut matches = cursor.matches(import_query, tree.root_node(), source.as_bytes());
189
190 while let Some(m) = matches.next() {
191 let mut import_path_text = None;
193 let mut import_text = None;
194
195 for cap in m.captures {
196 let cap_name = &import_query.capture_names()[cap.index as usize];
197 let text = &source[cap.node.start_byte()..cap.node.end_byte()];
198 if *cap_name == "import_path" {
199 import_path_text = Some(text.trim_matches(|c| c == '"' || c == '\''));
200 } else if *cap_name == "import" {
201 import_text = Some(text);
202 }
203 }
204
205 if let Some(path) = import_path_text {
206 imports.push(path.to_string());
207 } else if let Some(text) = import_text {
208 imports.push(text.to_string());
209 }
210 }
211
212 imports
213}
214
215fn resolve_rust_import(
222 raw: &str,
223 file_path: &Path,
224 root: &Path,
225 file_index: &HashMap<PathBuf, usize>,
226) -> Option<usize> {
227 let trimmed = raw
229 .trim()
230 .trim_start_matches("use ")
231 .trim_end_matches(';')
232 .trim();
233
234 let segments: Vec<&str> = trimmed.split("::").collect();
235 if segments.is_empty() {
236 return None;
237 }
238
239 let (base, skip) = match segments[0] {
241 "crate" => {
242 let mut dir = file_path.parent();
246 let crate_root = loop {
247 match dir {
248 Some(d) if d.join("Cargo.toml").exists() => break d.join("src"),
249 Some(d) => dir = d.parent(),
250 None => break root.join("src"), }
252 };
253 (crate_root, 1)
254 }
255 "self" => {
256 let dir = file_path.parent()?;
257 (dir.to_path_buf(), 1)
258 }
259 "super" => {
260 let dir = file_path.parent()?.parent()?;
261 (dir.to_path_buf(), 1)
262 }
263 _ => return None,
265 };
266
267 let path_segments = &segments[skip..];
271 for end in (1..=path_segments.len()).rev() {
272 let mut candidate = base.clone();
273 for seg in &path_segments[..end] {
274 let clean = seg.split('{').next().unwrap_or(seg).trim();
276 if !clean.is_empty() {
277 candidate.push(clean);
278 }
279 }
280
281 let as_file = candidate.with_extension("rs");
283 if let Some(&idx) = file_index.get(&as_file) {
284 return Some(idx);
285 }
286
287 let as_mod = candidate.join("mod.rs");
289 if let Some(&idx) = file_index.get(&as_mod) {
290 return Some(idx);
291 }
292 }
293
294 None
295}
296
297fn resolve_import(
299 raw: &str,
300 ext: &str,
301 file_path: &Path,
302 root: &Path,
303 file_index: &HashMap<PathBuf, usize>,
304) -> Option<usize> {
305 match ext {
306 "rs" => resolve_rust_import(raw, file_path, root, file_index),
307 "py" | "pyi" => resolve_python_import(raw, root, file_index),
308 "js" | "jsx" | "ts" | "tsx" => resolve_js_import(raw, file_path, file_index),
309 _ => None,
311 }
312}
313
314fn resolve_python_import(
318 raw: &str,
319 root: &Path,
320 file_index: &HashMap<PathBuf, usize>,
321) -> Option<usize> {
322 let module_path = if let Some(rest) = raw.strip_prefix("from ") {
323 rest.split_whitespace().next()?
324 } else if let Some(rest) = raw.strip_prefix("import ") {
325 rest.split_whitespace().next()?
326 } else {
327 return None;
328 };
329
330 let rel_path: PathBuf = module_path.split('.').collect();
331 for ext in ["py", "pyi"] {
332 let as_file = root.join(&rel_path).with_extension(ext);
333 if let Some(&idx) = file_index.get(&as_file) {
334 return Some(idx);
335 }
336 }
337
338 for init_name in ["__init__.py", "__init__.pyi"] {
339 let as_init = root.join(&rel_path).join(init_name);
340 if let Some(&idx) = file_index.get(&as_init) {
341 return Some(idx);
342 }
343 }
344
345 None
346}
347
348fn resolve_js_import(
352 raw: &str,
353 file_path: &Path,
354 file_index: &HashMap<PathBuf, usize>,
355) -> Option<usize> {
356 if !raw.starts_with('.') {
357 return None;
358 }
359
360 let dir = file_path.parent()?;
361 let candidate = dir.join(raw);
362
363 for ext in &["js", "jsx", "ts", "tsx"] {
364 let with_ext = candidate.with_extension(ext);
365 if let Some(&idx) = file_index.get(&with_ext) {
366 return Some(idx);
367 }
368 }
369
370 for ext in &["js", "jsx", "ts", "tsx"] {
371 let index_file = candidate.join("index").with_extension(ext);
372 if let Some(&idx) = file_index.get(&index_file) {
373 return Some(idx);
374 }
375 }
376
377 None
378}
379
380fn extract_definitions(source: &str, config: &languages::LangConfig) -> Vec<Definition> {
384 let mut parser = Parser::new();
385 if parser.set_language(&config.language).is_err() {
386 return vec![];
387 }
388 let Some(tree) = parser.parse(source, None) else {
389 return vec![];
390 };
391
392 let mut cursor = QueryCursor::new();
393 let mut defs = Vec::new();
394 let mut matches = cursor.matches(&config.query, tree.root_node(), source.as_bytes());
395
396 while let Some(m) = matches.next() {
397 let mut name = String::new();
398 let mut def_node = None;
399
400 for cap in m.captures {
401 let cap_name = &config.query.capture_names()[cap.index as usize];
402 if *cap_name == "name" {
403 name = source[cap.node.start_byte()..cap.node.end_byte()].to_string();
404 } else if *cap_name == "def" {
405 def_node = Some(cap.node);
406 }
407 }
408
409 if let Some(node) = def_node {
410 let scope = crate::chunk::build_scope_chain(node, source);
411 let signature = crate::chunk::extract_signature(node, source);
412 #[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
413 let start_line = node.start_position().row as u32 + 1;
414 #[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
415 let end_line = node.end_position().row as u32 + 1;
416 #[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
417 let start_byte = node.start_byte() as u32;
418 #[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
419 let end_byte = node.end_byte() as u32;
420 defs.push(Definition {
421 name,
422 kind: node.kind().to_string(),
423 start_line,
424 end_line,
425 scope,
426 signature,
427 start_byte,
428 end_byte,
429 calls: vec![],
430 });
431 }
432 }
433
434 defs
435}
436
437fn extract_calls(source: &str, call_config: &languages::CallConfig, defs: &mut [Definition]) {
445 let mut parser = Parser::new();
446 if parser.set_language(&call_config.language).is_err() {
447 return;
448 }
449 let Some(tree) = parser.parse(source, None) else {
450 return;
451 };
452
453 let mut cursor = QueryCursor::new();
454 let mut matches = cursor.matches(&call_config.query, tree.root_node(), source.as_bytes());
455
456 while let Some(m) = matches.next() {
457 let mut callee_name = None;
458 let mut call_byte = 0u32;
459
460 for cap in m.captures {
461 let cap_name = &call_config.query.capture_names()[cap.index as usize];
462 if *cap_name == "callee" {
463 callee_name = Some(source[cap.node.start_byte()..cap.node.end_byte()].to_string());
464 #[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
465 {
466 call_byte = cap.node.start_byte() as u32;
467 }
468 }
469 }
470
471 if let Some(name) = callee_name {
472 if let Some(def) = defs
474 .iter_mut()
475 .find(|d| d.start_byte <= call_byte && call_byte < d.end_byte)
476 {
477 if def.name != name {
479 def.calls.push(CallRef {
480 name,
481 byte_offset: call_byte,
482 resolved: None,
483 });
484 }
485 }
486 }
488 }
489}
490
491fn build_def_index(files: &[FileNode]) -> HashMap<String, Vec<DefId>> {
493 let mut index: HashMap<String, Vec<DefId>> = HashMap::new();
494 for (file_idx, file) in files.iter().enumerate() {
495 for (def_idx, def) in file.defs.iter().enumerate() {
496 #[expect(clippy::cast_possible_truncation)]
497 let did: DefId = (file_idx as u32, def_idx as u16);
498 index.entry(def.name.clone()).or_default().push(did);
499 }
500 }
501 index
502}
503
504fn resolve_calls(files: &mut [FileNode], def_index: &HashMap<String, Vec<DefId>>) {
511 let imported_files: Vec<std::collections::HashSet<u32>> = files
513 .iter()
514 .map(|f| {
515 f.imports
516 .iter()
517 .filter_map(|imp| imp.resolved_idx)
518 .collect()
519 })
520 .collect();
521
522 for file_idx in 0..files.len() {
523 for def_idx in 0..files[file_idx].defs.len() {
524 for call_idx in 0..files[file_idx].defs[def_idx].calls.len() {
525 let call_name = files[file_idx].defs[def_idx].calls[call_idx].name.clone();
526
527 let Some(candidates) = def_index.get(&call_name) else {
528 continue;
529 };
530
531 #[expect(clippy::cast_possible_truncation)]
533 let file_idx_u32 = file_idx as u32;
534 if let Some(&did) = candidates.iter().find(|(f, _)| *f == file_idx_u32) {
535 files[file_idx].defs[def_idx].calls[call_idx].resolved = Some(did);
536 continue;
537 }
538
539 if let Some(&did) = candidates
541 .iter()
542 .find(|(f, _)| imported_files[file_idx].contains(f))
543 {
544 files[file_idx].defs[def_idx].calls[call_idx].resolved = Some(did);
545 }
546 }
548 }
549 }
550}
551
552fn def_offsets(files: &[FileNode]) -> Vec<usize> {
554 let mut offsets = Vec::with_capacity(files.len() + 1);
555 offsets.push(0);
556 for file in files {
557 offsets.push(offsets.last().unwrap() + file.defs.len());
558 }
559 offsets
560}
561
562fn flatten_def_id(offsets: &[usize], did: DefId) -> usize {
564 offsets[did.0 as usize] + did.1 as usize
565}
566
567fn build_def_neighbor_lists(
569 n: usize,
570 edges: &[(u32, u32, u32)],
571 offsets: &[usize],
572) -> (Vec<Vec<DefId>>, Vec<Vec<DefId>>) {
573 let mut incoming: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
574 let mut outgoing: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
575
576 for &(src, dst, w) in edges {
577 let (s, d) = (src as usize, dst as usize);
578 if s < n && d < n {
579 incoming[d].push((src, w));
580 outgoing[s].push((dst, w));
581 }
582 }
583
584 let to_def_id = |flat: u32| -> DefId {
586 let flat_usize = flat as usize;
587 let file_idx = offsets.partition_point(|&o| o <= flat_usize) - 1;
588 let def_idx = flat_usize - offsets[file_idx];
589 #[expect(clippy::cast_possible_truncation)]
590 (file_idx as u32, def_idx as u16)
591 };
592
593 let callers = incoming
594 .into_iter()
595 .map(|mut v| {
596 v.sort_by_key(|b| std::cmp::Reverse(b.1));
597 v.truncate(MAX_NEIGHBORS);
598 v.into_iter().map(|(idx, _)| to_def_id(idx)).collect()
599 })
600 .collect();
601
602 let callees = outgoing
603 .into_iter()
604 .map(|mut v| {
605 v.sort_by_key(|b| std::cmp::Reverse(b.1));
606 v.truncate(MAX_NEIGHBORS);
607 v.into_iter().map(|(idx, _)| to_def_id(idx)).collect()
608 })
609 .collect();
610
611 (callers, callees)
612}
613
614#[expect(
623 clippy::cast_precision_loss,
624 reason = "node count fits comfortably in f32"
625)]
626fn pagerank(n: usize, edges: &[(u32, u32, u32)], focus: Option<usize>) -> Vec<f32> {
627 if n == 0 {
628 return vec![];
629 }
630
631 let mut out_edges: Vec<Vec<(usize, f32)>> = vec![vec![]; n];
633 let mut out_weight: Vec<f32> = vec![0.0; n];
634
635 for &(src, dst, w) in edges {
636 let (s, d) = (src as usize, dst as usize);
637 if s < n && d < n {
638 #[expect(clippy::cast_possible_truncation, reason = "edge weights are small")]
639 let wf = f64::from(w) as f32;
640 out_edges[s].push((d, wf));
641 out_weight[s] += wf;
642 }
643 }
644
645 let bias: Vec<f32> = if let Some(idx) = focus {
649 let uniform = 1.0 / n as f32;
650 let mut b = vec![0.3 * uniform; n];
651 if idx < n {
652 b[idx] += 0.7;
653 }
654 let sum: f32 = b.iter().sum();
656 for v in &mut b {
657 *v /= sum;
658 }
659 b
660 } else {
661 vec![1.0 / n as f32; n]
662 };
663
664 let mut rank = vec![1.0 / n as f32; n];
665 let mut next_rank = vec![0.0_f32; n];
666
667 for _ in 0..MAX_ITERATIONS {
668 let dangling: f32 = rank
670 .iter()
671 .enumerate()
672 .filter(|&(i, _)| out_edges[i].is_empty())
673 .map(|(_, &r)| r)
674 .sum();
675
676 for (i, nr) in next_rank.iter_mut().enumerate() {
678 *nr = (1.0 - DAMPING).mul_add(bias[i], DAMPING * dangling * bias[i]);
679 }
680
681 for (src, edges_list) in out_edges.iter().enumerate() {
682 if edges_list.is_empty() {
683 continue;
684 }
685 let src_rank = rank[src];
686 let total_w = out_weight[src];
687 for &(dst, w) in edges_list {
688 next_rank[dst] += DAMPING * src_rank * (w / total_w);
689 }
690 }
691
692 let diff: f32 = rank
694 .iter()
695 .zip(next_rank.iter())
696 .map(|(a, b)| (a - b).abs())
697 .sum();
698
699 std::mem::swap(&mut rank, &mut next_rank);
700
701 if diff < EPSILON {
702 break;
703 }
704 }
705
706 rank
707}
708
709struct DefGraphData {
713 def_edges: Vec<(DefId, DefId, u32)>,
714 def_ranks: Vec<f32>,
715 def_callers: Vec<Vec<DefId>>,
716 def_callees: Vec<Vec<DefId>>,
717 offsets: Vec<usize>,
718 base_ranks: Vec<f32>,
719 file_edges: Vec<(u32, u32, u32)>,
720}
721
722fn compute_def_graph(files: &[FileNode]) -> DefGraphData {
724 let mut def_edge_map: HashMap<(DefId, DefId), u32> = HashMap::new();
726 for (file_idx, file) in files.iter().enumerate() {
727 for (def_idx, def) in file.defs.iter().enumerate() {
728 #[expect(clippy::cast_possible_truncation)]
729 let caller_id: DefId = (file_idx as u32, def_idx as u16);
730 for call in &def.calls {
731 if let Some(callee_id) = call.resolved {
732 *def_edge_map.entry((caller_id, callee_id)).or_insert(0) += 1;
733 }
734 }
735 }
736 }
737 let def_edges: Vec<(DefId, DefId, u32)> = def_edge_map
738 .into_iter()
739 .map(|((src, dst), w)| (src, dst, w))
740 .collect();
741
742 let offsets = def_offsets(files);
744 let n_defs = *offsets.last().unwrap_or(&0);
745
746 let flat_def_edges: Vec<(u32, u32, u32)> = def_edges
747 .iter()
748 .map(|(src, dst, w)| {
749 #[expect(clippy::cast_possible_truncation)]
750 (
751 flatten_def_id(&offsets, *src) as u32,
752 flatten_def_id(&offsets, *dst) as u32,
753 *w,
754 )
755 })
756 .collect();
757
758 let def_ranks = pagerank(n_defs, &flat_def_edges, None);
759
760 let base_ranks: Vec<f32> = files
762 .iter()
763 .enumerate()
764 .map(|(i, file)| {
765 let start = offsets[i];
766 let end = start + file.defs.len();
767 def_ranks[start..end].iter().sum()
768 })
769 .collect();
770
771 let mut file_edge_map: HashMap<(u32, u32), u32> = HashMap::new();
773 for &(src, dst, w) in &def_edges {
774 let src_file = src.0;
775 let dst_file = dst.0;
776 if src_file != dst_file {
777 *file_edge_map.entry((src_file, dst_file)).or_insert(0) += w;
778 }
779 }
780 let file_edges: Vec<(u32, u32, u32)> = file_edge_map
781 .into_iter()
782 .map(|((src, dst), w)| (src, dst, w))
783 .collect();
784
785 let (def_callers, def_callees) = build_def_neighbor_lists(n_defs, &flat_def_edges, &offsets);
787
788 DefGraphData {
789 def_edges,
790 def_ranks,
791 def_callers,
792 def_callees,
793 offsets,
794 base_ranks,
795 file_edges,
796 }
797}
798
799pub fn build_graph(root: &Path) -> crate::Result<RepoGraph> {
809 let root = root.canonicalize().map_err(|e| crate::Error::Io {
810 path: root.display().to_string(),
811 source: e,
812 })?;
813
814 let mut walk_options = walk::WalkOptions::default();
815 if let Some((_, config)) = crate::cache::config::find_config(&root) {
816 walk_options.ignore_patterns = config.ignore.patterns;
817 }
818 let all_files = walk::collect_files_with_options(&root, &walk_options);
819
820 let mut file_index: HashMap<PathBuf, usize> = HashMap::new();
822 let mut files: Vec<FileNode> = Vec::new();
823 let mut raw_sources: Vec<(usize, String, String)> = Vec::new(); for path in &all_files {
826 let ext = path
827 .extension()
828 .and_then(|e| e.to_str())
829 .unwrap_or_default()
830 .to_string();
831
832 if languages::config_for_extension(&ext).is_none()
834 && import_query_for_extension(&ext).is_none()
835 {
836 continue;
837 }
838
839 let Ok(source) = std::fs::read_to_string(path) else {
840 continue; };
842
843 let rel_path = path
844 .strip_prefix(&root)
845 .unwrap_or(path)
846 .display()
847 .to_string();
848
849 let idx = files.len();
850 file_index.insert(path.clone(), idx);
851 files.push(FileNode {
852 path: rel_path,
853 defs: vec![],
854 imports: vec![],
855 });
856 raw_sources.push((idx, ext, source));
857 }
858
859 for (idx, ext, source) in &raw_sources {
861 if let Some(config) = languages::config_for_extension(ext) {
863 files[*idx].defs = extract_definitions(source, &config);
864 }
865
866 if let Some((lang, import_query)) = import_query_for_extension(ext) {
868 let raw_imports = extract_imports(source, &lang, &import_query);
869 let file_path = root.join(&files[*idx].path);
870
871 files[*idx].imports = raw_imports
872 .into_iter()
873 .map(|raw| {
874 let resolved_idx = resolve_import(&raw, ext, &file_path, &root, &file_index)
875 .and_then(|i| u32::try_from(i).ok());
876 ImportRef {
877 raw_path: raw,
878 resolved_idx,
879 }
880 })
881 .collect();
882 }
883 }
884
885 for (idx, ext, source) in &raw_sources {
887 if let Some(call_config) = languages::call_query_for_extension(ext) {
888 extract_calls(source, &call_config, &mut files[*idx].defs);
889 }
890 }
891
892 let def_index = build_def_index(&files);
894 resolve_calls(&mut files, &def_index);
895
896 let graph_data = compute_def_graph(&files);
898
899 let n = files.len();
901 let (callers, callees) = build_neighbor_lists(n, &graph_data.file_edges);
902
903 #[expect(clippy::cast_precision_loss, reason = "graph sizes fit in f32")]
905 let density = if n > 1 {
906 graph_data.file_edges.len() as f32 / (n as f32 * (n as f32 - 1.0))
907 } else {
908 0.0
909 };
910 let alpha = 0.3f32.mul_add(density.min(1.0), 0.5);
911
912 Ok(RepoGraph {
913 files,
914 edges: graph_data.file_edges,
915 base_ranks: graph_data.base_ranks,
916 callers,
917 callees,
918 def_edges: graph_data.def_edges,
919 def_ranks: graph_data.def_ranks,
920 def_callers: graph_data.def_callers,
921 def_callees: graph_data.def_callees,
922 def_offsets: graph_data.offsets,
923 alpha,
924 })
925}
926
927impl RepoGraph {
928 #[must_use]
930 pub fn def_rank(&self, did: DefId) -> f32 {
931 let flat = self.def_offsets[did.0 as usize] + did.1 as usize;
932 self.def_ranks.get(flat).copied().unwrap_or(0.0)
933 }
934
935 #[must_use]
937 pub fn find_def(&self, file_path: &str, def_name: &str) -> Option<DefId> {
938 for (file_idx, file) in self.files.iter().enumerate() {
939 if file.path == file_path {
940 for (def_idx, def) in file.defs.iter().enumerate() {
941 if def.name == def_name {
942 #[expect(clippy::cast_possible_truncation)]
943 return Some((file_idx as u32, def_idx as u16));
944 }
945 }
946 }
947 }
948 None
949 }
950}
951
952fn build_neighbor_lists(n: usize, edges: &[(u32, u32, u32)]) -> (Vec<Vec<u32>>, Vec<Vec<u32>>) {
954 let mut incoming: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
955 let mut outgoing: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
956
957 for &(src, dst, w) in edges {
958 let (s, d) = (src as usize, dst as usize);
959 if s < n && d < n {
960 incoming[d].push((src, w));
961 outgoing[s].push((dst, w));
962 }
963 }
964
965 let trim = |lists: &mut [Vec<(u32, u32)>]| -> Vec<Vec<u32>> {
967 lists
968 .iter_mut()
969 .map(|list| {
970 list.sort_by_key(|b| std::cmp::Reverse(b.1));
971 list.iter()
972 .take(MAX_NEIGHBORS)
973 .map(|(idx, _)| *idx)
974 .collect()
975 })
976 .collect()
977 };
978
979 (trim(&mut incoming), trim(&mut outgoing))
980}
981
982#[must_use]
997pub fn render(graph: &RepoGraph, max_tokens: usize, focus: Option<usize>) -> String {
998 let n = graph.files.len();
999 if n == 0 {
1000 return String::new();
1001 }
1002
1003 let ranks = if focus.is_some() {
1005 pagerank(n, &graph.edges, focus)
1006 } else {
1007 graph.base_ranks.clone()
1008 };
1009
1010 let mut sorted: Vec<usize> = (0..n).collect();
1012 sorted.sort_by(|&a, &b| ranks[b].total_cmp(&ranks[a]));
1013
1014 let mut output = String::new();
1015 let mut used_tokens = 0;
1016 let max_chars = max_tokens * CHARS_PER_TOKEN;
1017
1018 for (rank_pos, &file_idx) in sorted.iter().enumerate() {
1019 if used_tokens >= max_tokens {
1020 break;
1021 }
1022
1023 let file = &graph.files[file_idx];
1024 let score = ranks[file_idx];
1025 #[expect(clippy::cast_precision_loss, reason = "file counts fit in f32")]
1026 let percentile = (rank_pos as f32) / (n as f32);
1027
1028 let section = if percentile < 0.1 {
1029 render_tier0(graph, file_idx, file, score)
1030 } else if percentile < 0.3 {
1031 render_tier1(file, score)
1032 } else if percentile < 0.7 {
1033 render_tier2(file, score)
1034 } else {
1035 render_tier3(file)
1036 };
1037
1038 let section_chars = section.len();
1039 if used_tokens > 0 && used_tokens + section_chars / CHARS_PER_TOKEN > max_tokens {
1040 let path_line = format!("{}\n", file.path);
1042 let path_tokens = path_line.len() / CHARS_PER_TOKEN;
1043 if used_tokens + path_tokens <= max_tokens {
1044 output.push_str(&path_line);
1045 }
1046 break;
1047 }
1048
1049 output.push_str(§ion);
1050 used_tokens = output.len().min(max_chars) / CHARS_PER_TOKEN;
1051 }
1052
1053 output
1054}
1055
1056fn render_tier0(graph: &RepoGraph, file_idx: usize, file: &FileNode, score: f32) -> String {
1058 let mut out = format!("## {} (rank: {score:.4})\n", file.path);
1059
1060 if file_idx < graph.callers.len() && !graph.callers[file_idx].is_empty() {
1062 let _ = write!(out, " called by: ");
1063 let names: Vec<&str> = graph.callers[file_idx]
1064 .iter()
1065 .filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
1066 .collect();
1067 let _ = writeln!(out, "{}", names.join(", "));
1068 }
1069
1070 if file_idx < graph.callees.len() && !graph.callees[file_idx].is_empty() {
1072 let _ = write!(out, " calls: ");
1073 let names: Vec<&str> = graph.callees[file_idx]
1074 .iter()
1075 .filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
1076 .collect();
1077 let _ = writeln!(out, "{}", names.join(", "));
1078 }
1079
1080 for def in &file.defs {
1082 let scope_prefix = if def.scope.is_empty() {
1083 String::new()
1084 } else {
1085 format!("{} > ", def.scope)
1086 };
1087 if let Some(sig) = &def.signature {
1088 let _ = writeln!(out, " {scope_prefix}{} {sig}", def.kind);
1089 } else {
1090 let _ = writeln!(out, " {scope_prefix}{} {}", def.kind, def.name);
1091 }
1092 }
1093 let _ = writeln!(out);
1094 out
1095}
1096
1097fn render_tier1(file: &FileNode, score: f32) -> String {
1099 let mut out = format!("## {} (rank: {score:.4})\n", file.path);
1100 for def in &file.defs {
1101 if let Some(sig) = &def.signature {
1102 let _ = writeln!(out, " {sig}");
1103 } else {
1104 let _ = writeln!(out, " {} {}", def.kind, def.name);
1105 }
1106 }
1107 let _ = writeln!(out);
1108 out
1109}
1110
1111fn render_tier2(file: &FileNode, score: f32) -> String {
1113 let mut out = format!("{} (rank: {score:.4})", file.path);
1114 if !file.defs.is_empty() {
1115 let names: Vec<String> = file
1116 .defs
1117 .iter()
1118 .map(|d| format!("{}:{}", d.kind, d.name))
1119 .collect();
1120 let _ = write!(out, " -- {}", names.join(", "));
1121 }
1122 let _ = writeln!(out);
1123 out
1124}
1125
1126fn render_tier3(file: &FileNode) -> String {
1128 format!("{}\n", file.path)
1129}
1130
1131#[cfg(test)]
1134mod tests {
1135 use super::*;
1136
1137 #[test]
1138 fn test_pagerank_simple() {
1139 let edges = vec![(0, 1, 1), (1, 2, 1), (2, 0, 1)];
1141 let ranks = pagerank(3, &edges, None);
1142
1143 assert_eq!(ranks.len(), 3);
1145 let sum: f32 = ranks.iter().sum();
1146 assert!(
1147 (sum - 1.0).abs() < 0.01,
1148 "ranks should sum to ~1.0, got {sum}"
1149 );
1150
1151 let expected = 1.0 / 3.0;
1153 for (i, &r) in ranks.iter().enumerate() {
1154 assert!(
1155 (r - expected).abs() < 0.05,
1156 "rank[{i}] = {r}, expected ~{expected}"
1157 );
1158 }
1159 }
1160
1161 #[test]
1162 fn test_pagerank_star() {
1163 let edges = vec![(0, 3, 1), (1, 3, 1), (2, 3, 1)];
1165 let ranks = pagerank(4, &edges, None);
1166
1167 assert_eq!(ranks.len(), 4);
1168 let max_idx = ranks
1170 .iter()
1171 .enumerate()
1172 .max_by(|a, b| a.1.total_cmp(b.1))
1173 .unwrap()
1174 .0;
1175 assert_eq!(max_idx, 3, "node 3 should have highest rank");
1176 assert!(
1177 ranks[3] > ranks[0],
1178 "rank[3]={} should be > rank[0]={}",
1179 ranks[3],
1180 ranks[0]
1181 );
1182 }
1183
1184 #[test]
1185 fn test_pagerank_topic_sensitive() {
1186 let edges = vec![(0, 1, 1), (1, 2, 1)];
1188 let uniform_ranks = pagerank(3, &edges, None);
1189 let biased_ranks = pagerank(3, &edges, Some(0));
1190
1191 assert!(
1193 biased_ranks[0] > uniform_ranks[0],
1194 "focused rank[0]={} should be > uniform rank[0]={}",
1195 biased_ranks[0],
1196 uniform_ranks[0]
1197 );
1198 }
1199
1200 #[test]
1201 fn test_pagerank_empty() {
1202 let ranks = pagerank(0, &[], None);
1203 assert!(ranks.is_empty());
1204 }
1205
1206 #[test]
1207 fn test_render_tiers() {
1208 let files: Vec<FileNode> = (0..10)
1210 .map(|i| FileNode {
1211 path: format!("src/file_{i}.rs"),
1212 defs: vec![Definition {
1213 name: format!("func_{i}"),
1214 kind: "function_item".to_string(),
1215 start_line: 1,
1216 end_line: 5,
1217 scope: String::new(),
1218 signature: Some(format!("func_{i}(x: i32) -> i32")),
1219 start_byte: 0,
1220 end_byte: 0,
1221 calls: vec![],
1222 }],
1223 imports: vec![],
1224 })
1225 .collect();
1226
1227 let edges: Vec<(u32, u32, u32)> = (1..10).map(|i| (i, 0, 1)).collect();
1229 let base_ranks = pagerank(10, &edges, None);
1230 let (top_callers, top_callees) = build_neighbor_lists(10, &edges);
1231
1232 let graph = RepoGraph {
1233 files,
1234 edges,
1235 base_ranks,
1236 callers: top_callers,
1237 callees: top_callees,
1238 def_edges: vec![],
1239 def_ranks: vec![],
1240 def_callers: vec![],
1241 def_callees: vec![],
1242 def_offsets: vec![0],
1243 alpha: 0.5,
1244 };
1245
1246 let full = render(&graph, 10_000, None);
1248 assert!(
1249 full.contains("file_0"),
1250 "output should contain the top-ranked file"
1251 );
1252 assert!(
1254 full.contains("## src/file_0.rs"),
1255 "top file should have tier 0 heading"
1256 );
1257
1258 let small = render(&graph, 10, None);
1260 assert!(
1261 !small.is_empty(),
1262 "even tiny budget should produce some output"
1263 );
1264 let full_lines = full.lines().count();
1266 let small_lines = small.lines().count();
1267 assert!(
1268 small_lines < full_lines,
1269 "small budget ({small_lines} lines) should have fewer lines than full ({full_lines})"
1270 );
1271 }
1272
1273 #[test]
1274 fn test_render_empty_graph() {
1275 let graph = RepoGraph {
1276 files: vec![],
1277 edges: vec![],
1278 base_ranks: vec![],
1279 callers: vec![],
1280 callees: vec![],
1281 def_edges: vec![],
1282 def_ranks: vec![],
1283 def_callers: vec![],
1284 def_callees: vec![],
1285 def_offsets: vec![0],
1286 alpha: 0.5,
1287 };
1288 let output = render(&graph, 1000, None);
1289 assert!(output.is_empty(), "empty graph should render empty string");
1290 }
1291
1292 #[test]
1293 fn test_build_graph_on_fixtures() {
1294 let fixtures = Path::new(env!("CARGO_MANIFEST_DIR"))
1295 .parent()
1296 .unwrap()
1297 .parent()
1298 .unwrap()
1299 .join("tests")
1300 .join("fixtures");
1301
1302 let graph = build_graph(&fixtures).expect("build_graph should succeed on fixtures");
1303
1304 assert!(
1306 !graph.files.is_empty(),
1307 "graph should contain files from fixtures"
1308 );
1309
1310 let rs_file = graph.files.iter().find(|f| f.path.ends_with("sample.rs"));
1312 assert!(rs_file.is_some(), "should find sample.rs");
1313 let rs_file = rs_file.unwrap();
1314 assert!(
1315 !rs_file.defs.is_empty(),
1316 "sample.rs should have definitions"
1317 );
1318 assert!(
1319 rs_file.defs.iter().any(|d| d.name == "hello"),
1320 "should find 'hello' function in sample.rs"
1321 );
1322
1323 let py_file = graph.files.iter().find(|f| f.path.ends_with("sample.py"));
1325 assert!(py_file.is_some(), "should find sample.py");
1326 let py_file = py_file.unwrap();
1327 assert!(
1328 !py_file.defs.is_empty(),
1329 "sample.py should have definitions"
1330 );
1331 assert!(
1332 py_file.defs.iter().any(|d| d.name == "greet"),
1333 "should find 'greet' function in sample.py"
1334 );
1335
1336 assert_eq!(graph.base_ranks.len(), graph.files.len());
1338 let sum: f32 = graph.base_ranks.iter().sum();
1339 assert!(
1340 (sum - 1.0).abs() < 0.01,
1341 "PageRank scores should sum to ~1.0, got {sum}"
1342 );
1343 }
1344
1345 #[test]
1346 fn test_extract_imports_rust() {
1347 let source = "use crate::foo::bar;\nuse std::collections::HashMap;\n";
1348 let (lang, query) = import_query_for_extension("rs").unwrap();
1349 let imports = extract_imports(source, &lang, &query);
1350 assert_eq!(imports.len(), 2);
1351 assert!(imports[0].contains("crate::foo::bar"));
1352 }
1353
1354 #[test]
1355 fn test_extract_imports_python_stub() {
1356 let source = "from typing import Protocol\nimport pkg.types\n";
1357 let (lang, query) = import_query_for_extension("pyi").unwrap();
1358 let imports = extract_imports(source, &lang, &query);
1359 assert_eq!(imports.len(), 2);
1360 assert!(imports[0].contains("from typing import Protocol"));
1361 assert!(imports[1].contains("import pkg.types"));
1362 }
1363
1364 #[test]
1365 fn test_resolve_python_import_to_stub_file() {
1366 let root = PathBuf::from("/project");
1367 let mut file_index = HashMap::new();
1368 file_index.insert(PathBuf::from("/project/pkg/types.pyi"), 1);
1369
1370 let result = resolve_python_import("import pkg.types", &root, &file_index);
1371 assert_eq!(result, Some(1));
1372 }
1373
1374 #[test]
1375 fn test_resolve_rust_crate_import() {
1376 let root = PathBuf::from("/project");
1377 let file_path = PathBuf::from("/project/src/main.rs");
1378 let mut file_index = HashMap::new();
1379 file_index.insert(PathBuf::from("/project/src/foo/bar.rs"), 1);
1380 file_index.insert(PathBuf::from("/project/src/main.rs"), 0);
1381
1382 let result = resolve_rust_import("use crate::foo::bar;", &file_path, &root, &file_index);
1383 assert_eq!(result, Some(1));
1384 }
1385
1386 #[test]
1387 fn test_resolve_rust_external_crate_dropped() {
1388 let root = PathBuf::from("/project");
1389 let file_path = PathBuf::from("/project/src/main.rs");
1390 let file_index = HashMap::new();
1391
1392 let result = resolve_rust_import(
1393 "use std::collections::HashMap;",
1394 &file_path,
1395 &root,
1396 &file_index,
1397 );
1398 assert_eq!(result, None, "external crate imports should be dropped");
1399 }
1400
1401 #[test]
1402 fn test_neighbor_lists() {
1403 let edges = vec![(0, 1, 1), (0, 2, 1), (1, 2, 1)];
1405 let (incoming, outgoing) = build_neighbor_lists(3, &edges);
1406
1407 assert!(incoming[2].contains(&0));
1409 assert!(incoming[2].contains(&1));
1410
1411 assert!(outgoing[0].contains(&1));
1413 assert!(outgoing[0].contains(&2));
1414 }
1415
1416 #[test]
1417 #[ignore = "runs on full ripvec codebase; use --nocapture to see output"]
1418 fn test_full_repo_map() {
1419 use std::time::Instant;
1420
1421 let root = Path::new(env!("CARGO_MANIFEST_DIR"))
1422 .parent()
1423 .unwrap()
1424 .parent()
1425 .unwrap();
1426
1427 let t0 = Instant::now();
1429 let graph = build_graph(root).expect("build_graph on ripvec root");
1430 let build_ms = t0.elapsed().as_secs_f64() * 1000.0;
1431
1432 let t1 = Instant::now();
1434 let rendered = render(&graph, 2000, None);
1435 let render_ms = t1.elapsed().as_secs_f64() * 1000.0;
1436
1437 let t2 = Instant::now();
1439 let focus_idx = graph
1440 .base_ranks
1441 .iter()
1442 .enumerate()
1443 .max_by(|a, b| a.1.total_cmp(b.1))
1444 .map(|(i, _)| i);
1445 let focused = render(&graph, 2000, focus_idx);
1446 let focus_ms = t2.elapsed().as_secs_f64() * 1000.0;
1447
1448 eprintln!("\n=== Repo Map Performance ===");
1449 eprintln!(
1450 "Files: {}, Edges: {}, Defs: {}",
1451 graph.files.len(),
1452 graph.edges.len(),
1453 graph.files.iter().map(|f| f.defs.len()).sum::<usize>()
1454 );
1455 eprintln!("build_graph: {build_ms:.1}ms (walk + parse + resolve + PageRank)");
1456 eprintln!(
1457 "render(default): {render_ms:.3}ms ({} chars, ~{} tokens)",
1458 rendered.len(),
1459 rendered.len() / 4
1460 );
1461 eprintln!(
1462 "render(focused): {focus_ms:.3}ms ({} chars, ~{} tokens)",
1463 focused.len(),
1464 focused.len() / 4
1465 );
1466
1467 eprintln!("\nTop 5 by PageRank:");
1468 let mut ranked: Vec<(usize, f32)> = graph.base_ranks.iter().copied().enumerate().collect();
1469 ranked.sort_by(|a, b| b.1.total_cmp(&a.1));
1470 for (i, rank) in ranked.iter().take(5) {
1471 eprintln!(" {:.4} {}", rank, graph.files[*i].path);
1472 }
1473
1474 eprintln!("\n=== Default Render ===\n{rendered}");
1475 eprintln!(
1476 "\n=== Focused Render (on {}) ===\n{focused}",
1477 focus_idx
1478 .map(|i| graph.files[i].path.as_str())
1479 .unwrap_or("none")
1480 );
1481 }
1482}