ripvec_core/
repo_map.rs

1//! `PageRank`-weighted structural overview of a codebase.
2//!
3//! Builds a dependency graph from tree-sitter definition and import extraction,
4//! ranks files by importance using `PageRank` (standard or topic-sensitive), and
5//! renders a budget-constrained overview with tiered detail levels.
6
7use std::collections::HashMap;
8use std::fmt::Write as _;
9use std::path::{Path, PathBuf};
10
11use rayon::prelude::*;
12use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
13use streaming_iterator::StreamingIterator;
14use tree_sitter::{Parser, Query, QueryCursor};
15
16use crate::languages;
17use crate::walk;
18
19// ── Data Structures ──────────────────────────────────────────────────
20
21/// Persisted dependency graph with `PageRank` scores.
22#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
23pub struct RepoGraph {
24    /// Files in the repository with definitions, imports, and calls.
25    pub files: Vec<FileNode>,
26    /// File-level edges (derived from def-level call edges).
27    pub edges: Vec<(u32, u32, u32)>,
28    /// File-level `PageRank` scores (aggregated from def-level).
29    pub base_ranks: Vec<f32>,
30    /// File-level callers (indices into `files`).
31    pub callers: Vec<Vec<u32>>,
32    /// File-level callees (indices into `files`).
33    pub callees: Vec<Vec<u32>>,
34    /// Definition-level call edges: `(caller_def, callee_def, weight)`.
35    pub def_edges: Vec<(DefId, DefId, u32)>,
36    /// Definition-level `PageRank` scores (flattened: `offsets[file_idx] + def_idx`).
37    pub def_ranks: Vec<f32>,
38    /// Definition-level callers (flattened, parallel to `def_ranks`).
39    pub def_callers: Vec<Vec<DefId>>,
40    /// Definition-level callees (flattened, parallel to `def_ranks`).
41    pub def_callees: Vec<Vec<DefId>>,
42    /// Prefix-sum offsets for flattening `DefId` to linear index.
43    pub def_offsets: Vec<usize>,
44    /// Auto-tuned alpha for search boost.
45    pub alpha: f32,
46}
47
48/// A file in the repository with its definitions and imports.
49#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
50pub struct FileNode {
51    /// Relative path from the repository root.
52    pub path: String,
53    /// Definitions (functions, structs, classes, etc.) extracted from this file.
54    pub defs: Vec<Definition>,
55    /// Import references extracted from this file.
56    pub imports: Vec<ImportRef>,
57}
58
59/// A definition extracted from a source file.
60#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
61pub struct Definition {
62    /// Name of the definition (e.g., function name, class name).
63    pub name: String,
64    /// Kind of syntax node (e.g., `function_item`, `class_definition`).
65    pub kind: String,
66    /// 1-based start line number.
67    pub start_line: u32,
68    /// 1-based end line number.
69    pub end_line: u32,
70    /// Scope chain (e.g., `"impl_item Foo > fn bar"`).
71    pub scope: String,
72    /// Function/method signature, if available.
73    pub signature: Option<String>,
74    /// Byte offset of this definition's start in the source file.
75    pub start_byte: u32,
76    /// Byte offset of this definition's end in the source file.
77    pub end_byte: u32,
78    /// Call sites within this definition's body.
79    pub calls: Vec<CallRef>,
80}
81
82/// An import reference extracted from a source file.
83#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
84pub struct ImportRef {
85    /// Raw import path as written in source (e.g., `crate::foo::bar`).
86    pub raw_path: String,
87    /// Resolved file index in [`RepoGraph::files`], if resolution succeeded.
88    pub resolved_idx: Option<u32>,
89}
90
91/// Unique identifier for a definition: (file index, definition index within file).
92pub type DefId = (u32, u16);
93
94/// A call site extracted from a definition body.
95#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
96pub struct CallRef {
97    /// Callee function/method name.
98    pub name: String,
99    /// Byte offset of the call in the source file (for scoping to definitions).
100    pub byte_offset: u32,
101    /// Resolved target definition, if resolution succeeded.
102    pub resolved: Option<DefId>,
103}
104
105// ── Constants ────────────────────────────────────────────────────────
106
107/// `PageRank` damping factor.
108const DAMPING: f32 = 0.85;
109
110/// `PageRank` convergence threshold.
111const EPSILON: f32 = 1e-6;
112
113/// Maximum `PageRank` iterations.
114const MAX_ITERATIONS: usize = 100;
115
116/// Maximum callers/callees stored per file.
117const MAX_NEIGHBORS: usize = 5;
118
119/// Approximate characters per token for budget estimation.
120const CHARS_PER_TOKEN: usize = 4;
121
122// ── Import Queries ───────────────────────────────────────────────────
123
124/// Compile a tree-sitter import query for the given extension.
125///
126/// Returns `None` for unsupported extensions.
127fn import_query_for_extension(ext: &str) -> Option<(tree_sitter::Language, Query)> {
128    let (lang, query_str): (tree_sitter::Language, &str) = match ext {
129        "rs" => (
130            tree_sitter_rust::LANGUAGE.into(),
131            "(use_declaration) @import",
132        ),
133        "py" | "pyi" => (
134            tree_sitter_python::LANGUAGE.into(),
135            concat!(
136                "(import_statement) @import\n",
137                "(import_from_statement) @import",
138            ),
139        ),
140        "js" | "jsx" => (
141            tree_sitter_javascript::LANGUAGE.into(),
142            "(import_statement source: (string) @import_path) @import",
143        ),
144        "ts" => (
145            tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
146            "(import_statement source: (string) @import_path) @import",
147        ),
148        "tsx" => (
149            tree_sitter_typescript::LANGUAGE_TSX.into(),
150            "(import_statement source: (string) @import_path) @import",
151        ),
152        "go" => (
153            tree_sitter_go::LANGUAGE.into(),
154            "(import_spec path: (interpreted_string_literal) @import_path) @import",
155        ),
156        // Ruby: require statements.
157        "rb" => (
158            tree_sitter_ruby::LANGUAGE.into(),
159            "(call method: (identifier) @_method arguments: (argument_list (string (string_content) @import_path)) (#eq? @_method \"require\")) @import",
160        ),
161        _ => return None,
162    };
163    let query = match Query::new(&lang, query_str) {
164        Ok(q) => q,
165        Err(e) => {
166            tracing::warn!(ext, %e, "import query compilation failed — language may be ABI-incompatible");
167            return None;
168        }
169    };
170    Some((lang, query))
171}
172
173/// Extract import paths from source using tree-sitter.
174fn extract_imports(
175    source: &str,
176    lang: &tree_sitter::Language,
177    import_query: &Query,
178) -> Vec<String> {
179    let mut parser = Parser::new();
180    if parser.set_language(lang).is_err() {
181        return vec![];
182    }
183    let Some(tree) = parser.parse(source, None) else {
184        return vec![];
185    };
186
187    let mut cursor = QueryCursor::new();
188    let mut imports = Vec::new();
189    let mut matches = cursor.matches(import_query, tree.root_node(), source.as_bytes());
190
191    while let Some(m) = matches.next() {
192        // Prefer @import_path capture (JS/TS/Go), fall back to full @import text
193        let mut import_path_text = None;
194        let mut import_text = None;
195
196        for cap in m.captures {
197            let cap_name = &import_query.capture_names()[cap.index as usize];
198            let text = &source[cap.node.start_byte()..cap.node.end_byte()];
199            if *cap_name == "import_path" {
200                import_path_text = Some(text.trim_matches(|c| c == '"' || c == '\''));
201            } else if *cap_name == "import" {
202                import_text = Some(text);
203            }
204        }
205
206        if let Some(path) = import_path_text {
207            imports.push(path.to_string());
208        } else if let Some(text) = import_text {
209            imports.push(text.to_string());
210        }
211    }
212
213    imports
214}
215
216// ── Import Resolution ────────────────────────────────────────────────
217
218/// Resolve a Rust `use` path to a file index in the file map.
219///
220/// Handles `crate::`, `self::`, and `super::` prefixes. External crate
221/// imports are dropped (returns `None`).
222fn resolve_rust_import(
223    raw: &str,
224    file_path: &Path,
225    root: &Path,
226    file_index: &HashMap<PathBuf, usize>,
227) -> Option<usize> {
228    // Extract the module path from `use crate::foo::bar;` or `use crate::foo::bar::Baz;`
229    let trimmed = raw
230        .trim()
231        .trim_start_matches("use ")
232        .trim_end_matches(';')
233        .trim();
234
235    let segments: Vec<&str> = trimmed.split("::").collect();
236    if segments.is_empty() {
237        return None;
238    }
239
240    // Determine the base directory and skip prefix segments
241    let (base, skip) = match segments[0] {
242        "crate" => {
243            // Find the nearest Cargo.toml ancestor to determine the crate root.
244            // In a workspace, `crate::foo` resolves relative to the crate's src/,
245            // not the workspace root.
246            let mut dir = file_path.parent();
247            let crate_root = loop {
248                match dir {
249                    Some(d) if d.join("Cargo.toml").exists() => break d.join("src"),
250                    Some(d) => dir = d.parent(),
251                    None => break root.join("src"), // fallback
252                }
253            };
254            (crate_root, 1)
255        }
256        "self" => {
257            let dir = file_path.parent()?;
258            (dir.to_path_buf(), 1)
259        }
260        "super" => {
261            let dir = file_path.parent()?.parent()?;
262            (dir.to_path_buf(), 1)
263        }
264        // External crate — drop
265        _ => return None,
266    };
267
268    // Build candidate paths from the remaining segments.
269    // Try progressively shorter prefixes since the last segments
270    // may be items (struct, fn) rather than modules.
271    let path_segments = &segments[skip..];
272    for end in (1..=path_segments.len()).rev() {
273        let mut candidate = base.clone();
274        for seg in &path_segments[..end] {
275            // Strip glob patterns like `{Foo, Bar}`
276            let clean = seg.split('{').next().unwrap_or(seg).trim();
277            if !clean.is_empty() {
278                candidate.push(clean);
279            }
280        }
281
282        // Try file.rs
283        let as_file = candidate.with_extension("rs");
284        if let Some(&idx) = file_index.get(&as_file) {
285            return Some(idx);
286        }
287
288        // Try dir/mod.rs
289        let as_mod = candidate.join("mod.rs");
290        if let Some(&idx) = file_index.get(&as_mod) {
291            return Some(idx);
292        }
293    }
294
295    None
296}
297
298/// Resolve an import path to a file index based on file extension.
299fn resolve_import(
300    raw: &str,
301    ext: &str,
302    file_path: &Path,
303    root: &Path,
304    file_index: &HashMap<PathBuf, usize>,
305) -> Option<usize> {
306    match ext {
307        "rs" => resolve_rust_import(raw, file_path, root, file_index),
308        "py" | "pyi" => resolve_python_import(raw, root, file_index),
309        "js" | "jsx" | "ts" | "tsx" => resolve_js_import(raw, file_path, file_index),
310        // Go imports use full package paths — skip local resolution
311        _ => None,
312    }
313}
314
315/// Resolve a Python import to a file index.
316///
317/// Handles `import foo.bar` and `from foo.bar import baz` patterns.
318fn resolve_python_import(
319    raw: &str,
320    root: &Path,
321    file_index: &HashMap<PathBuf, usize>,
322) -> Option<usize> {
323    let module_path = if let Some(rest) = raw.strip_prefix("from ") {
324        rest.split_whitespace().next()?
325    } else if let Some(rest) = raw.strip_prefix("import ") {
326        rest.split_whitespace().next()?
327    } else {
328        return None;
329    };
330
331    let rel_path: PathBuf = module_path.split('.').collect();
332    for ext in ["py", "pyi"] {
333        let as_file = root.join(&rel_path).with_extension(ext);
334        if let Some(&idx) = file_index.get(&as_file) {
335            return Some(idx);
336        }
337    }
338
339    for init_name in ["__init__.py", "__init__.pyi"] {
340        let as_init = root.join(&rel_path).join(init_name);
341        if let Some(&idx) = file_index.get(&as_init) {
342            return Some(idx);
343        }
344    }
345
346    None
347}
348
349/// Resolve a JS/TS import to a file index.
350///
351/// Handles relative paths like `./foo` or `../bar`.
352fn resolve_js_import(
353    raw: &str,
354    file_path: &Path,
355    file_index: &HashMap<PathBuf, usize>,
356) -> Option<usize> {
357    if !raw.starts_with('.') {
358        return None;
359    }
360
361    let dir = file_path.parent()?;
362    let candidate = dir.join(raw);
363
364    for ext in &["js", "jsx", "ts", "tsx"] {
365        let with_ext = candidate.with_extension(ext);
366        if let Some(&idx) = file_index.get(&with_ext) {
367            return Some(idx);
368        }
369    }
370
371    for ext in &["js", "jsx", "ts", "tsx"] {
372        let index_file = candidate.join("index").with_extension(ext);
373        if let Some(&idx) = file_index.get(&index_file) {
374            return Some(idx);
375        }
376    }
377
378    None
379}
380
381// ── Extraction ───────────────────────────────────────────────────────
382
383/// Extract definitions from a source file using tree-sitter.
384fn extract_definitions(source: &str, config: &languages::LangConfig) -> Vec<Definition> {
385    let mut parser = Parser::new();
386    if parser.set_language(&config.language).is_err() {
387        return vec![];
388    }
389    let Some(tree) = parser.parse(source, None) else {
390        return vec![];
391    };
392
393    let mut cursor = QueryCursor::new();
394    let mut defs = Vec::new();
395    let mut matches = cursor.matches(&config.query, tree.root_node(), source.as_bytes());
396
397    while let Some(m) = matches.next() {
398        let mut name = String::new();
399        let mut def_node = None;
400
401        for cap in m.captures {
402            let cap_name = &config.query.capture_names()[cap.index as usize];
403            if *cap_name == "name" {
404                name = source[cap.node.start_byte()..cap.node.end_byte()].to_string();
405            } else if *cap_name == "def" {
406                def_node = Some(cap.node);
407            }
408        }
409
410        if let Some(node) = def_node {
411            let scope = crate::chunk::build_scope_chain(node, source);
412            let signature = crate::chunk::extract_signature(node, source);
413            #[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
414            let start_line = node.start_position().row as u32 + 1;
415            #[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
416            let end_line = node.end_position().row as u32 + 1;
417            #[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
418            let start_byte = node.start_byte() as u32;
419            #[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
420            let end_byte = node.end_byte() as u32;
421            defs.push(Definition {
422                name,
423                kind: node.kind().to_string(),
424                start_line,
425                end_line,
426                scope,
427                signature,
428                start_byte,
429                end_byte,
430                calls: vec![],
431            });
432        }
433    }
434
435    defs
436}
437
438// ── Call Extraction & Resolution ────────────────────────────────────
439
440/// Extract call sites from a source file and assign them to definitions.
441///
442/// Uses the language's call query to find all call expressions, then
443/// assigns each call to the definition whose byte range contains it.
444/// Calls outside any definition body (module-level) are ignored.
445fn extract_calls(source: &str, call_config: &languages::CallConfig, defs: &mut [Definition]) {
446    let mut parser = Parser::new();
447    if parser.set_language(&call_config.language).is_err() {
448        return;
449    }
450    let Some(tree) = parser.parse(source, None) else {
451        return;
452    };
453
454    let mut cursor = QueryCursor::new();
455    let mut matches = cursor.matches(&call_config.query, tree.root_node(), source.as_bytes());
456
457    while let Some(m) = matches.next() {
458        let mut callee_name = None;
459        let mut call_byte = 0u32;
460
461        for cap in m.captures {
462            let cap_name = &call_config.query.capture_names()[cap.index as usize];
463            if *cap_name == "callee" {
464                callee_name = Some(source[cap.node.start_byte()..cap.node.end_byte()].to_string());
465                #[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
466                {
467                    call_byte = cap.node.start_byte() as u32;
468                }
469            }
470        }
471
472        if let Some(name) = callee_name {
473            // Assign to the enclosing definition by byte range
474            if let Some(def) = defs
475                .iter_mut()
476                .find(|d| d.start_byte <= call_byte && call_byte < d.end_byte)
477            {
478                // Skip self-recursive calls
479                if def.name != name {
480                    def.calls.push(CallRef {
481                        name,
482                        byte_offset: call_byte,
483                        resolved: None,
484                    });
485                }
486            }
487            // Calls outside any definition are ignored (module-level init)
488        }
489    }
490}
491
492/// Build an index from definition name to list of `DefId`s.
493fn build_def_index(files: &[FileNode]) -> HashMap<String, Vec<DefId>> {
494    let mut index: HashMap<String, Vec<DefId>> = HashMap::new();
495    for (file_idx, file) in files.iter().enumerate() {
496        for (def_idx, def) in file.defs.iter().enumerate() {
497            #[expect(clippy::cast_possible_truncation)]
498            let did: DefId = (file_idx as u32, def_idx as u16);
499            index.entry(def.name.clone()).or_default().push(did);
500        }
501    }
502    index
503}
504
505/// Resolve call references to target definitions.
506///
507/// Strategy:
508/// 1. Scoped calls (`mod_a::foo`): look up candidates by bare name, then prefer
509///    the unique candidate whose file path or scope contains the qualifier. If
510///    multiple candidates still match after qualifier filtering, the call is
511///    ambiguous — leave `resolved` as `None` (no silent first-wins).
512/// 2. Unqualified calls — same-file: prefer definitions in the caller's own file.
513/// 3. Unqualified calls — imported-file: check definitions in files this file imports.
514/// 4. Unqualified calls — ambiguous (multiple candidates, none same-file or imported):
515///    leave `resolved` as `None`.
516/// 5. Unresolved: leave `resolved` as `None`.
517fn resolve_calls(files: &mut [FileNode], def_index: &HashMap<String, Vec<DefId>>) {
518    // Pre-compute imported file sets for each file
519    let imported_files: Vec<std::collections::HashSet<u32>> = files
520        .iter()
521        .map(|f| {
522            f.imports
523                .iter()
524                .filter_map(|imp| imp.resolved_idx)
525                .collect()
526        })
527        .collect();
528
529    for file_idx in 0..files.len() {
530        for def_idx in 0..files[file_idx].defs.len() {
531            for call_idx in 0..files[file_idx].defs[def_idx].calls.len() {
532                let call_name = files[file_idx].defs[def_idx].calls[call_idx].name.clone();
533
534                // Determine whether this is a scoped call (contains `::`)
535                let (lookup_key, qualifier) = if let Some(pos) = call_name.rfind("::") {
536                    // Scoped call: bare name is the trailing component; qualifier is everything before
537                    let bare = &call_name[pos + 2..];
538                    let qual = &call_name[..pos];
539                    (bare.to_string(), Some(qual.to_string()))
540                } else {
541                    (call_name.clone(), None)
542                };
543
544                let Some(candidates) = def_index.get(&lookup_key) else {
545                    continue;
546                };
547
548                if let Some(qual) = qualifier {
549                    // Scoped call: filter candidates by qualifier match.
550                    // A candidate matches if the file path or scope chain contains the
551                    // qualifier as a path component (e.g., file "mod_a.rs" for qualifier "mod_a",
552                    // or file "mod_a/lib.rs", or scope containing "mod_a").
553                    let qual_segments: Vec<&str> = qual.split("::").collect();
554                    let matching: Vec<DefId> = candidates
555                        .iter()
556                        .copied()
557                        .filter(|&(f_idx, _)| {
558                            let file_path = &files[f_idx as usize].path;
559                            // Check if the qualifier's last segment appears in the file path stem
560                            // or if the full qualifier appears as path components in the file path.
561                            let last_segment = qual_segments.last().copied().unwrap_or("");
562                            // Normalise: replace path separators with `::` for comparison
563                            let path_as_module =
564                                file_path.trim_end_matches(".rs").replace(['/', '\\'], "::");
565                            path_as_module.contains(last_segment)
566                                || file_path.contains(last_segment)
567                        })
568                        .collect();
569
570                    // Unique match → resolve; ambiguous or no match → leave None
571                    if matching.len() == 1 {
572                        files[file_idx].defs[def_idx].calls[call_idx].resolved = Some(matching[0]);
573                    }
574                    // Multiple matches or no matches → resolved stays None (ambiguous)
575                    continue;
576                }
577
578                // Unqualified call resolution:
579
580                // Priority 1: same file
581                #[expect(clippy::cast_possible_truncation)]
582                let file_idx_u32 = file_idx as u32;
583                if let Some(&did) = candidates.iter().find(|(f, _)| *f == file_idx_u32) {
584                    files[file_idx].defs[def_idx].calls[call_idx].resolved = Some(did);
585                    continue;
586                }
587
588                // Priority 2: imported file — resolve only when unambiguous
589                let imported_candidates: Vec<DefId> = candidates
590                    .iter()
591                    .copied()
592                    .filter(|(f, _)| imported_files[file_idx].contains(f))
593                    .collect();
594                if imported_candidates.len() == 1 {
595                    files[file_idx].defs[def_idx].calls[call_idx].resolved =
596                        Some(imported_candidates[0]);
597                }
598                // Priority 3: ambiguous or unresolved — leave as None
599            }
600        }
601    }
602}
603
604/// Compute a prefix-sum offset table for flattening `DefId`s to linear indices.
605fn def_offsets(files: &[FileNode]) -> Vec<usize> {
606    let mut offsets = Vec::with_capacity(files.len() + 1);
607    offsets.push(0);
608    for file in files {
609        offsets.push(offsets.last().unwrap() + file.defs.len());
610    }
611    offsets
612}
613
614/// Flatten a `DefId` to a linear index using the offset table.
615fn flatten_def_id(offsets: &[usize], did: DefId) -> usize {
616    offsets[did.0 as usize] + did.1 as usize
617}
618
619/// Build top-N caller and callee lists for each definition (flattened).
620fn build_def_neighbor_lists(
621    n: usize,
622    edges: &[(u32, u32, u32)],
623    offsets: &[usize],
624) -> (Vec<Vec<DefId>>, Vec<Vec<DefId>>) {
625    let mut incoming: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
626    let mut outgoing: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
627
628    for &(src, dst, w) in edges {
629        let (s, d) = (src as usize, dst as usize);
630        if s < n && d < n {
631            incoming[d].push((src, w));
632            outgoing[s].push((dst, w));
633        }
634    }
635
636    // Convert flat index back to DefId
637    let to_def_id = |flat: u32| -> DefId {
638        let flat_usize = flat as usize;
639        let file_idx = offsets.partition_point(|&o| o <= flat_usize) - 1;
640        let def_idx = flat_usize - offsets[file_idx];
641        #[expect(clippy::cast_possible_truncation)]
642        (file_idx as u32, def_idx as u16)
643    };
644
645    let callers = incoming
646        .into_iter()
647        .map(|mut v| {
648            v.sort_by_key(|b| std::cmp::Reverse(b.1));
649            v.truncate(MAX_NEIGHBORS);
650            v.into_iter().map(|(idx, _)| to_def_id(idx)).collect()
651        })
652        .collect();
653
654    let callees = outgoing
655        .into_iter()
656        .map(|mut v| {
657            v.sort_by_key(|b| std::cmp::Reverse(b.1));
658            v.truncate(MAX_NEIGHBORS);
659            v.into_iter().map(|(idx, _)| to_def_id(idx)).collect()
660        })
661        .collect();
662
663    (callers, callees)
664}
665
666// ── PageRank ─────────────────────────────────────────────────────────
667
668/// Compute `PageRank` scores for a graph.
669///
670/// If `focus` is `Some(idx)`, computes topic-sensitive `PageRank` biased
671/// toward file `idx`. Otherwise computes standard (uniform) `PageRank`.
672///
673/// Returns one score per node, summing to 1.0.
674#[expect(
675    clippy::cast_precision_loss,
676    reason = "node count fits comfortably in f32"
677)]
678fn pagerank(n: usize, edges: &[(u32, u32, u32)], focus: Option<usize>) -> Vec<f32> {
679    if n == 0 {
680        return vec![];
681    }
682
683    // Build adjacency: out_edges[src] = [(dst, weight)]
684    let mut out_edges: Vec<Vec<(usize, f32)>> = vec![vec![]; n];
685    let mut out_weight: Vec<f32> = vec![0.0; n];
686
687    for &(src, dst, w) in edges {
688        let (s, d) = (src as usize, dst as usize);
689        if s < n && d < n {
690            #[expect(clippy::cast_possible_truncation, reason = "edge weights are small")]
691            let wf = f64::from(w) as f32;
692            out_edges[s].push((d, wf));
693            out_weight[s] += wf;
694        }
695    }
696
697    // Personalization vector: for topic-sensitive PageRank, blend
698    // 70% focus on the target file with 30% uniform. Pure focus
699    // (100%) starves unreachable nodes to rank=0 in sparse graphs.
700    let bias: Vec<f32> = if let Some(idx) = focus {
701        let uniform = 1.0 / n as f32;
702        let mut b = vec![0.3 * uniform; n];
703        if idx < n {
704            b[idx] += 0.7;
705        }
706        // Normalize to sum=1
707        let sum: f32 = b.iter().sum();
708        for v in &mut b {
709            *v /= sum;
710        }
711        b
712    } else {
713        vec![1.0 / n as f32; n]
714    };
715
716    let mut rank = vec![1.0 / n as f32; n];
717    let mut next_rank = vec![0.0_f32; n];
718
719    for _ in 0..MAX_ITERATIONS {
720        // Collect dangling mass (nodes with no outgoing edges)
721        let dangling: f32 = rank
722            .iter()
723            .enumerate()
724            .filter(|&(i, _)| out_edges[i].is_empty())
725            .map(|(_, &r)| r)
726            .sum();
727
728        // Distribute rank
729        for (i, nr) in next_rank.iter_mut().enumerate() {
730            *nr = (1.0 - DAMPING).mul_add(bias[i], DAMPING * dangling * bias[i]);
731        }
732
733        for (src, edges_list) in out_edges.iter().enumerate() {
734            if edges_list.is_empty() {
735                continue;
736            }
737            let src_rank = rank[src];
738            let total_w = out_weight[src];
739            for &(dst, w) in edges_list {
740                next_rank[dst] += DAMPING * src_rank * (w / total_w);
741            }
742        }
743
744        // Check convergence
745        let diff: f32 = rank
746            .iter()
747            .zip(next_rank.iter())
748            .map(|(a, b)| (a - b).abs())
749            .sum();
750
751        std::mem::swap(&mut rank, &mut next_rank);
752
753        if diff < EPSILON {
754            break;
755        }
756    }
757
758    rank
759}
760
761// ── Graph Building ───────────────────────────────────────────────────
762
763/// Intermediate result from definition-level graph computation.
764struct DefGraphData {
765    def_edges: Vec<(DefId, DefId, u32)>,
766    def_ranks: Vec<f32>,
767    def_callers: Vec<Vec<DefId>>,
768    def_callees: Vec<Vec<DefId>>,
769    offsets: Vec<usize>,
770    base_ranks: Vec<f32>,
771    file_edges: Vec<(u32, u32, u32)>,
772}
773
774/// Build definition-level edges, compute `PageRank`, and derive file-level data.
775fn compute_def_graph(files: &[FileNode]) -> DefGraphData {
776    // Build definition-level edge list from resolved calls
777    let mut def_edge_map: HashMap<(DefId, DefId), u32> = HashMap::new();
778    for (file_idx, file) in files.iter().enumerate() {
779        for (def_idx, def) in file.defs.iter().enumerate() {
780            #[expect(clippy::cast_possible_truncation)]
781            let caller_id: DefId = (file_idx as u32, def_idx as u16);
782            for call in &def.calls {
783                if let Some(callee_id) = call.resolved {
784                    *def_edge_map.entry((caller_id, callee_id)).or_insert(0) += 1;
785                }
786            }
787        }
788    }
789    let def_edges: Vec<(DefId, DefId, u32)> = def_edge_map
790        .into_iter()
791        .map(|((src, dst), w)| (src, dst, w))
792        .collect();
793
794    // Compute def-level PageRank
795    let offsets = def_offsets(files);
796    let n_defs = *offsets.last().unwrap_or(&0);
797
798    let flat_def_edges: Vec<(u32, u32, u32)> = def_edges
799        .iter()
800        .map(|(src, dst, w)| {
801            #[expect(clippy::cast_possible_truncation)]
802            (
803                flatten_def_id(&offsets, *src) as u32,
804                flatten_def_id(&offsets, *dst) as u32,
805                *w,
806            )
807        })
808        .collect();
809
810    let def_ranks = pagerank(n_defs, &flat_def_edges, None);
811
812    // Aggregate def ranks to file level
813    let base_ranks: Vec<f32> = files
814        .iter()
815        .enumerate()
816        .map(|(i, file)| {
817            let start = offsets[i];
818            let end = start + file.defs.len();
819            def_ranks[start..end].iter().sum()
820        })
821        .collect();
822
823    // Derive file-level edges from def-level call edges
824    let mut file_edge_map: HashMap<(u32, u32), u32> = HashMap::new();
825    for &(src, dst, w) in &def_edges {
826        let src_file = src.0;
827        let dst_file = dst.0;
828        if src_file != dst_file {
829            *file_edge_map.entry((src_file, dst_file)).or_insert(0) += w;
830        }
831    }
832    let file_edges: Vec<(u32, u32, u32)> = file_edge_map
833        .into_iter()
834        .map(|((src, dst), w)| (src, dst, w))
835        .collect();
836
837    // Build def-level caller/callee lists
838    let (def_callers, def_callees) = build_def_neighbor_lists(n_defs, &flat_def_edges, &offsets);
839
840    DefGraphData {
841        def_edges,
842        def_ranks,
843        def_callers,
844        def_callees,
845        offsets,
846        base_ranks,
847        file_edges,
848    }
849}
850
851/// Build a dependency graph from a repository root.
852///
853/// Walks the directory tree, parses each supported file with tree-sitter,
854/// extracts definitions and imports, resolves import paths to files, runs
855/// `PageRank`, and builds caller/callee lists.
856///
857/// # Errors
858///
859/// Returns an error if file walking or reading fails.
860pub fn build_graph(root: &Path) -> crate::Result<RepoGraph> {
861    let root = root.canonicalize().map_err(|e| crate::Error::Io {
862        path: root.display().to_string(),
863        source: e,
864    })?;
865
866    let mut walk_options = walk::WalkOptions::default();
867    if let Some((_, config)) = crate::cache::config::find_config(&root) {
868        walk_options.ignore_patterns = config.ignore.patterns;
869    }
870    let all_files = walk::collect_files_with_options(&root, &walk_options);
871
872    // Phase 1: parallel filter + read. For each candidate path with a
873    // supported extension, read its source from disk and emit a tuple
874    // alongside its relative path. rayon spreads the I/O cost across
875    // worker threads; on a 1M-file corpus this was ~20s sequential and
876    // now sits in the 2-3s range bounded by disk + filter throughput.
877    let raw_inputs: Vec<(PathBuf, String, String, String)> = all_files
878        .par_iter()
879        .filter_map(|path| {
880            let ext = path
881                .extension()
882                .and_then(|e| e.to_str())
883                .unwrap_or_default()
884                .to_string();
885            if languages::config_for_extension(&ext).is_none()
886                && import_query_for_extension(&ext).is_none()
887            {
888                return None;
889            }
890            let source = std::fs::read_to_string(path).ok()?;
891            let rel_path = path
892                .strip_prefix(&root)
893                .unwrap_or(path)
894                .display()
895                .to_string();
896            Some((path.clone(), rel_path, ext, source))
897        })
898        .collect();
899
900    // Build the contiguous `files` Vec and the absolute-path -> idx
901    // lookup. Sequential because both want stable indices that match
902    // `raw_sources`'s order; the per-file work this gates is trivial.
903    let mut file_index: HashMap<PathBuf, usize> = HashMap::with_capacity(raw_inputs.len());
904    let mut files: Vec<FileNode> = Vec::with_capacity(raw_inputs.len());
905    let mut raw_sources: Vec<(usize, String, String)> = Vec::with_capacity(raw_inputs.len());
906    for (idx, (abs_path, rel_path, ext, source)) in raw_inputs.into_iter().enumerate() {
907        file_index.insert(abs_path, idx);
908        files.push(FileNode {
909            path: rel_path,
910            defs: vec![],
911            imports: vec![],
912        });
913        raw_sources.push((idx, ext, source));
914    }
915
916    // Phase 2: parallel per-file definition + import extraction. Each
917    // file's tree-sitter parse + def/import queries are independent;
918    // par_iter_mut over files.iter_mut().zip(raw_sources.par_iter())
919    // lets every rayon worker grind its own slice. The closures here
920    // borrow `&root` and `&file_index` immutably (both Sync) and write
921    // disjoint `FileNode` slots via the &mut iterator.
922    files
923        .par_iter_mut()
924        .zip(raw_sources.par_iter())
925        .for_each(|(file, (_, ext, source))| {
926            if let Some(config) = languages::config_for_extension(ext) {
927                file.defs = extract_definitions(source, &config);
928            }
929            if let Some((lang, import_query)) = import_query_for_extension(ext) {
930                let raw_imports = extract_imports(source, &lang, &import_query);
931                let file_path = root.join(&file.path);
932                file.imports = raw_imports
933                    .into_iter()
934                    .map(|raw| {
935                        let resolved_idx =
936                            resolve_import(&raw, ext, &file_path, &root, &file_index)
937                                .and_then(|i| u32::try_from(i).ok());
938                        ImportRef {
939                            raw_path: raw,
940                            resolved_idx,
941                        }
942                    })
943                    .collect();
944            }
945        });
946
947    // Phase 3: parallel per-file call extraction. Mutates each
948    // FileNode's `defs[*].calls` independently. Aligned with
949    // raw_sources by index via the zip.
950    files
951        .par_iter_mut()
952        .zip(raw_sources.par_iter())
953        .for_each(|(file, (_, ext, source))| {
954            if let Some(call_config) = languages::call_query_for_extension(ext) {
955                extract_calls(source, &call_config, &mut file.defs);
956            }
957        });
958
959    // Resolve call references to target definitions
960    let def_index = build_def_index(&files);
961    resolve_calls(&mut files, &def_index);
962
963    // Build def-level graph, compute PageRank, and derive file-level data
964    let graph_data = compute_def_graph(&files);
965
966    // Build file-level caller/callee lists
967    let n = files.len();
968    let (callers, callees) = build_neighbor_lists(n, &graph_data.file_edges);
969
970    // Auto-tune alpha based on graph density
971    #[expect(clippy::cast_precision_loss, reason = "graph sizes fit in f32")]
972    let density = if n > 1 {
973        graph_data.file_edges.len() as f32 / (n as f32 * (n as f32 - 1.0))
974    } else {
975        0.0
976    };
977    let alpha = 0.3f32.mul_add(density.min(1.0), 0.5);
978
979    Ok(RepoGraph {
980        files,
981        edges: graph_data.file_edges,
982        base_ranks: graph_data.base_ranks,
983        callers,
984        callees,
985        def_edges: graph_data.def_edges,
986        def_ranks: graph_data.def_ranks,
987        def_callers: graph_data.def_callers,
988        def_callees: graph_data.def_callees,
989        def_offsets: graph_data.offsets,
990        alpha,
991    })
992}
993
994impl RepoGraph {
995    /// Get the `PageRank` score for a specific definition.
996    #[must_use]
997    pub fn def_rank(&self, did: DefId) -> f32 {
998        let flat = self.def_offsets[did.0 as usize] + did.1 as usize;
999        self.def_ranks.get(flat).copied().unwrap_or(0.0)
1000    }
1001
1002    /// Look up a definition by file path and name. Returns the first match.
1003    #[must_use]
1004    pub fn find_def(&self, file_path: &str, def_name: &str) -> Option<DefId> {
1005        for (file_idx, file) in self.files.iter().enumerate() {
1006            if file.path == file_path {
1007                for (def_idx, def) in file.defs.iter().enumerate() {
1008                    if def.name == def_name {
1009                        #[expect(clippy::cast_possible_truncation)]
1010                        return Some((file_idx as u32, def_idx as u16));
1011                    }
1012                }
1013            }
1014        }
1015        None
1016    }
1017}
1018
1019/// Build top-N caller and callee lists for each file.
1020fn build_neighbor_lists(n: usize, edges: &[(u32, u32, u32)]) -> (Vec<Vec<u32>>, Vec<Vec<u32>>) {
1021    let mut incoming: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
1022    let mut outgoing: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
1023
1024    for &(src, dst, w) in edges {
1025        let (s, d) = (src as usize, dst as usize);
1026        if s < n && d < n {
1027            incoming[d].push((src, w));
1028            outgoing[s].push((dst, w));
1029        }
1030    }
1031
1032    // Sort by weight descending, keep top N
1033    let trim = |lists: &mut [Vec<(u32, u32)>]| -> Vec<Vec<u32>> {
1034        lists
1035            .iter_mut()
1036            .map(|list| {
1037                list.sort_by_key(|b| std::cmp::Reverse(b.1));
1038                list.iter()
1039                    .take(MAX_NEIGHBORS)
1040                    .map(|(idx, _)| *idx)
1041                    .collect()
1042            })
1043            .collect()
1044    };
1045
1046    (trim(&mut incoming), trim(&mut outgoing))
1047}
1048
1049// ── Rendering ────────────────────────────────────────────────────────
1050
1051/// Render a budget-constrained overview of the repository.
1052///
1053/// Files are sorted by `PageRank` (or topic-sensitive rank if `focus` is
1054/// `Some`). Output uses four tiers of decreasing detail:
1055///
1056/// - **Tier 0** (top 10%): full path, rank, callers/callees, signatures with scopes
1057/// - **Tier 1** (next 20%): full path, rank, signatures
1058/// - **Tier 2** (next 40%): full path, rank, definition names and kinds
1059/// - **Tier 3** (bottom 30%): file path only
1060///
1061/// Stops accumulating output when the estimated token count exceeds
1062/// `max_tokens`.
1063#[must_use]
1064pub fn render(graph: &RepoGraph, max_tokens: usize, focus: Option<usize>) -> String {
1065    let n = graph.files.len();
1066    if n == 0 {
1067        return String::new();
1068    }
1069
1070    // Compute ranks (recompute topic-sensitive if focus is given)
1071    let ranks = if focus.is_some() {
1072        pagerank(n, &graph.edges, focus)
1073    } else {
1074        graph.base_ranks.clone()
1075    };
1076
1077    // Sort file indices by rank descending
1078    let mut sorted: Vec<usize> = (0..n).collect();
1079    sorted.sort_by(|&a, &b| ranks[b].total_cmp(&ranks[a]));
1080
1081    let mut output = String::new();
1082    let mut used_tokens = 0;
1083    let max_chars = max_tokens * CHARS_PER_TOKEN;
1084
1085    for (rank_pos, &file_idx) in sorted.iter().enumerate() {
1086        if used_tokens >= max_tokens {
1087            break;
1088        }
1089
1090        let file = &graph.files[file_idx];
1091        let score = ranks[file_idx];
1092        #[expect(clippy::cast_precision_loss, reason = "file counts fit in f32")]
1093        let percentile = (rank_pos as f32) / (n as f32);
1094
1095        let section = if percentile < 0.1 {
1096            render_tier0(graph, file_idx, file, score)
1097        } else if percentile < 0.3 {
1098            render_tier1(file, score)
1099        } else if percentile < 0.7 {
1100            render_tier2(file, score)
1101        } else {
1102            render_tier3(file)
1103        };
1104
1105        let section_chars = section.len();
1106        if used_tokens > 0 && used_tokens + section_chars / CHARS_PER_TOKEN > max_tokens {
1107            // Would exceed budget — try to fit at least the path
1108            let path_line = format!("{}\n", file.path);
1109            let path_tokens = path_line.len() / CHARS_PER_TOKEN;
1110            if used_tokens + path_tokens <= max_tokens {
1111                output.push_str(&path_line);
1112            }
1113            break;
1114        }
1115
1116        output.push_str(&section);
1117        used_tokens = output.len().min(max_chars) / CHARS_PER_TOKEN;
1118    }
1119
1120    output
1121}
1122
1123/// Render tier 0: full detail with callers, callees, and signatures.
1124fn render_tier0(graph: &RepoGraph, file_idx: usize, file: &FileNode, score: f32) -> String {
1125    let mut out = format!("## {} (rank: {score:.4})\n", file.path);
1126
1127    // Callers
1128    if file_idx < graph.callers.len() && !graph.callers[file_idx].is_empty() {
1129        let _ = write!(out, "  called by: ");
1130        let names: Vec<&str> = graph.callers[file_idx]
1131            .iter()
1132            .filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
1133            .collect();
1134        let _ = writeln!(out, "{}", names.join(", "));
1135    }
1136
1137    // Callees
1138    if file_idx < graph.callees.len() && !graph.callees[file_idx].is_empty() {
1139        let _ = write!(out, "  calls: ");
1140        let names: Vec<&str> = graph.callees[file_idx]
1141            .iter()
1142            .filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
1143            .collect();
1144        let _ = writeln!(out, "{}", names.join(", "));
1145    }
1146
1147    // Definitions with scope and signature
1148    for def in &file.defs {
1149        let scope_prefix = if def.scope.is_empty() {
1150            String::new()
1151        } else {
1152            format!("{} > ", def.scope)
1153        };
1154        if let Some(sig) = &def.signature {
1155            let _ = writeln!(out, "  {scope_prefix}{} {sig}", def.kind);
1156        } else {
1157            let _ = writeln!(out, "  {scope_prefix}{} {}", def.kind, def.name);
1158        }
1159    }
1160    let _ = writeln!(out);
1161    out
1162}
1163
1164/// Render tier 1: file path, rank, and signatures.
1165fn render_tier1(file: &FileNode, score: f32) -> String {
1166    let mut out = format!("## {} (rank: {score:.4})\n", file.path);
1167    for def in &file.defs {
1168        if let Some(sig) = &def.signature {
1169            let _ = writeln!(out, "  {sig}");
1170        } else {
1171            let _ = writeln!(out, "  {} {}", def.kind, def.name);
1172        }
1173    }
1174    let _ = writeln!(out);
1175    out
1176}
1177
1178/// Render tier 2: file path, rank, and definition names/kinds.
1179fn render_tier2(file: &FileNode, score: f32) -> String {
1180    let mut out = format!("{} (rank: {score:.4})", file.path);
1181    if !file.defs.is_empty() {
1182        let names: Vec<String> = file
1183            .defs
1184            .iter()
1185            .map(|d| format!("{}:{}", d.kind, d.name))
1186            .collect();
1187        let _ = write!(out, " -- {}", names.join(", "));
1188    }
1189    let _ = writeln!(out);
1190    out
1191}
1192
1193/// Render tier 3: file path only.
1194fn render_tier3(file: &FileNode) -> String {
1195    format!("{}\n", file.path)
1196}
1197
1198// ── Tests ────────────────────────────────────────────────────────────
1199
1200#[cfg(test)]
1201mod tests {
1202    use super::*;
1203
1204    #[test]
1205    fn test_pagerank_simple() {
1206        // 3-node graph: 0 -> 1 -> 2, 2 -> 0 (cycle)
1207        let edges = vec![(0, 1, 1), (1, 2, 1), (2, 0, 1)];
1208        let ranks = pagerank(3, &edges, None);
1209
1210        // All nodes in a symmetric cycle should have equal rank
1211        assert_eq!(ranks.len(), 3);
1212        let sum: f32 = ranks.iter().sum();
1213        assert!(
1214            (sum - 1.0).abs() < 0.01,
1215            "ranks should sum to ~1.0, got {sum}"
1216        );
1217
1218        // In a perfect cycle, all ranks should be approximately equal
1219        let expected = 1.0 / 3.0;
1220        for (i, &r) in ranks.iter().enumerate() {
1221            assert!(
1222                (r - expected).abs() < 0.05,
1223                "rank[{i}] = {r}, expected ~{expected}"
1224            );
1225        }
1226    }
1227
1228    #[test]
1229    fn test_pagerank_star() {
1230        // Star graph: 0,1,2 all point to 3
1231        let edges = vec![(0, 3, 1), (1, 3, 1), (2, 3, 1)];
1232        let ranks = pagerank(4, &edges, None);
1233
1234        assert_eq!(ranks.len(), 4);
1235        // Node 3 should have the highest rank
1236        let max_idx = ranks
1237            .iter()
1238            .enumerate()
1239            .max_by(|a, b| a.1.total_cmp(b.1))
1240            .unwrap()
1241            .0;
1242        assert_eq!(max_idx, 3, "node 3 should have highest rank");
1243        assert!(
1244            ranks[3] > ranks[0],
1245            "rank[3]={} should be > rank[0]={}",
1246            ranks[3],
1247            ranks[0]
1248        );
1249    }
1250
1251    #[test]
1252    fn test_pagerank_topic_sensitive() {
1253        // 3-node chain: 0 -> 1 -> 2
1254        let edges = vec![(0, 1, 1), (1, 2, 1)];
1255        let uniform_ranks = pagerank(3, &edges, None);
1256        let biased_ranks = pagerank(3, &edges, Some(0));
1257
1258        // With focus on node 0, it should get a higher rank than uniform
1259        assert!(
1260            biased_ranks[0] > uniform_ranks[0],
1261            "focused rank[0]={} should be > uniform rank[0]={}",
1262            biased_ranks[0],
1263            uniform_ranks[0]
1264        );
1265    }
1266
1267    #[test]
1268    fn test_pagerank_empty() {
1269        let ranks = pagerank(0, &[], None);
1270        assert!(ranks.is_empty());
1271    }
1272
1273    #[test]
1274    fn test_render_tiers() {
1275        // Build a small graph with 10 files to exercise all tiers
1276        let files: Vec<FileNode> = (0..10)
1277            .map(|i| FileNode {
1278                path: format!("src/file_{i}.rs"),
1279                defs: vec![Definition {
1280                    name: format!("func_{i}"),
1281                    kind: "function_item".to_string(),
1282                    start_line: 1,
1283                    end_line: 5,
1284                    scope: String::new(),
1285                    signature: Some(format!("func_{i}(x: i32) -> i32")),
1286                    start_byte: 0,
1287                    end_byte: 0,
1288                    calls: vec![],
1289                }],
1290                imports: vec![],
1291            })
1292            .collect();
1293
1294        // Create a star graph: files 1-9 all import from file 0
1295        let edges: Vec<(u32, u32, u32)> = (1..10).map(|i| (i, 0, 1)).collect();
1296        let base_ranks = pagerank(10, &edges, None);
1297        let (top_callers, top_callees) = build_neighbor_lists(10, &edges);
1298
1299        let graph = RepoGraph {
1300            files,
1301            edges,
1302            base_ranks,
1303            callers: top_callers,
1304            callees: top_callees,
1305            def_edges: vec![],
1306            def_ranks: vec![],
1307            def_callers: vec![],
1308            def_callees: vec![],
1309            def_offsets: vec![0],
1310            alpha: 0.5,
1311        };
1312
1313        // Large budget: should include all files
1314        let full = render(&graph, 10_000, None);
1315        assert!(
1316            full.contains("file_0"),
1317            "output should contain the top-ranked file"
1318        );
1319        // file_0 should appear as tier 0 (highest rank)
1320        assert!(
1321            full.contains("## src/file_0.rs"),
1322            "top file should have tier 0 heading"
1323        );
1324
1325        // Tiny budget: should only fit a few files
1326        let small = render(&graph, 10, None);
1327        assert!(
1328            !small.is_empty(),
1329            "even tiny budget should produce some output"
1330        );
1331        // Should have fewer entries than full render
1332        let full_lines = full.lines().count();
1333        let small_lines = small.lines().count();
1334        assert!(
1335            small_lines < full_lines,
1336            "small budget ({small_lines} lines) should have fewer lines than full ({full_lines})"
1337        );
1338    }
1339
1340    #[test]
1341    fn test_render_empty_graph() {
1342        let graph = RepoGraph {
1343            files: vec![],
1344            edges: vec![],
1345            base_ranks: vec![],
1346            callers: vec![],
1347            callees: vec![],
1348            def_edges: vec![],
1349            def_ranks: vec![],
1350            def_callers: vec![],
1351            def_callees: vec![],
1352            def_offsets: vec![0],
1353            alpha: 0.5,
1354        };
1355        let output = render(&graph, 1000, None);
1356        assert!(output.is_empty(), "empty graph should render empty string");
1357    }
1358
1359    #[test]
1360    fn test_build_graph_on_fixtures() {
1361        let fixtures = Path::new(env!("CARGO_MANIFEST_DIR"))
1362            .parent()
1363            .unwrap()
1364            .parent()
1365            .unwrap()
1366            .join("tests")
1367            .join("fixtures");
1368
1369        let graph = build_graph(&fixtures).expect("build_graph should succeed on fixtures");
1370
1371        // Should find at least the 3 fixture files
1372        assert!(
1373            !graph.files.is_empty(),
1374            "graph should contain files from fixtures"
1375        );
1376
1377        // Should find definitions in the Rust fixture
1378        let rs_file = graph.files.iter().find(|f| f.path.ends_with("sample.rs"));
1379        assert!(rs_file.is_some(), "should find sample.rs");
1380        let rs_file = rs_file.unwrap();
1381        assert!(
1382            !rs_file.defs.is_empty(),
1383            "sample.rs should have definitions"
1384        );
1385        assert!(
1386            rs_file.defs.iter().any(|d| d.name == "hello"),
1387            "should find 'hello' function in sample.rs"
1388        );
1389
1390        // Should find definitions in the Python fixture
1391        let py_file = graph.files.iter().find(|f| f.path.ends_with("sample.py"));
1392        assert!(py_file.is_some(), "should find sample.py");
1393        let py_file = py_file.unwrap();
1394        assert!(
1395            !py_file.defs.is_empty(),
1396            "sample.py should have definitions"
1397        );
1398        assert!(
1399            py_file.defs.iter().any(|d| d.name == "greet"),
1400            "should find 'greet' function in sample.py"
1401        );
1402
1403        // PageRank scores should be computed
1404        assert_eq!(graph.base_ranks.len(), graph.files.len());
1405        let sum: f32 = graph.base_ranks.iter().sum();
1406        assert!(
1407            (sum - 1.0).abs() < 0.01,
1408            "PageRank scores should sum to ~1.0, got {sum}"
1409        );
1410    }
1411
1412    #[test]
1413    fn test_extract_imports_rust() {
1414        let source = "use crate::foo::bar;\nuse std::collections::HashMap;\n";
1415        let (lang, query) = import_query_for_extension("rs").unwrap();
1416        let imports = extract_imports(source, &lang, &query);
1417        assert_eq!(imports.len(), 2);
1418        assert!(imports[0].contains("crate::foo::bar"));
1419    }
1420
1421    #[test]
1422    fn test_extract_imports_python_stub() {
1423        let source = "from typing import Protocol\nimport pkg.types\n";
1424        let (lang, query) = import_query_for_extension("pyi").unwrap();
1425        let imports = extract_imports(source, &lang, &query);
1426        assert_eq!(imports.len(), 2);
1427        assert!(imports[0].contains("from typing import Protocol"));
1428        assert!(imports[1].contains("import pkg.types"));
1429    }
1430
1431    #[test]
1432    fn test_resolve_python_import_to_stub_file() {
1433        let root = PathBuf::from("/project");
1434        let mut file_index = HashMap::new();
1435        file_index.insert(PathBuf::from("/project/pkg/types.pyi"), 1);
1436
1437        let result = resolve_python_import("import pkg.types", &root, &file_index);
1438        assert_eq!(result, Some(1));
1439    }
1440
1441    #[test]
1442    fn test_resolve_rust_crate_import() {
1443        let root = PathBuf::from("/project");
1444        let file_path = PathBuf::from("/project/src/main.rs");
1445        let mut file_index = HashMap::new();
1446        file_index.insert(PathBuf::from("/project/src/foo/bar.rs"), 1);
1447        file_index.insert(PathBuf::from("/project/src/main.rs"), 0);
1448
1449        let result = resolve_rust_import("use crate::foo::bar;", &file_path, &root, &file_index);
1450        assert_eq!(result, Some(1));
1451    }
1452
1453    #[test]
1454    fn test_resolve_rust_external_crate_dropped() {
1455        let root = PathBuf::from("/project");
1456        let file_path = PathBuf::from("/project/src/main.rs");
1457        let file_index = HashMap::new();
1458
1459        let result = resolve_rust_import(
1460            "use std::collections::HashMap;",
1461            &file_path,
1462            &root,
1463            &file_index,
1464        );
1465        assert_eq!(result, None, "external crate imports should be dropped");
1466    }
1467
1468    #[test]
1469    fn test_neighbor_lists() {
1470        // 0 -> 1, 0 -> 2, 1 -> 2
1471        let edges = vec![(0, 1, 1), (0, 2, 1), (1, 2, 1)];
1472        let (incoming, outgoing) = build_neighbor_lists(3, &edges);
1473
1474        // Node 2 should be called by 0 and 1
1475        assert!(incoming[2].contains(&0));
1476        assert!(incoming[2].contains(&1));
1477
1478        // Node 0 should call 1 and 2
1479        assert!(outgoing[0].contains(&1));
1480        assert!(outgoing[0].contains(&2));
1481    }
1482
1483    /// RED test (R2.3 issue a): A scoped call `mod_a::foo()` must record the full path
1484    /// "mod_a::foo" in the CallRef, not just "foo".
1485    #[test]
1486    fn test_scoped_identifier_calls_preserve_path() {
1487        use crate::languages;
1488        use streaming_iterator::StreamingIterator as _;
1489
1490        let source = "
1491mod mod_a {
1492    pub fn foo() {}
1493}
1494mod mod_b {
1495    pub fn foo() {}
1496}
1497fn caller() {
1498    mod_a::foo();
1499    mod_b::foo();
1500}
1501";
1502        let call_config =
1503            languages::call_query_for_extension("rs").expect("Rust call config must exist");
1504        let lang_config =
1505            languages::config_for_extension("rs").expect("Rust lang config must exist");
1506
1507        let mut defs = {
1508            let mut parser = tree_sitter::Parser::new();
1509            parser.set_language(&lang_config.language).unwrap();
1510            let tree = parser.parse(source, None).unwrap();
1511            let mut cursor = tree_sitter::QueryCursor::new();
1512            let mut out = Vec::new();
1513            let mut matches =
1514                cursor.matches(&lang_config.query, tree.root_node(), source.as_bytes());
1515            while let Some(m) = matches.next() {
1516                let mut name = String::new();
1517                let mut def_node = None;
1518                for cap in m.captures {
1519                    let cname = &lang_config.query.capture_names()[cap.index as usize];
1520                    if *cname == "name" {
1521                        name = source[cap.node.start_byte()..cap.node.end_byte()].to_string();
1522                    } else if *cname == "def" {
1523                        def_node = Some(cap.node);
1524                    }
1525                }
1526                if let Some(node) = def_node {
1527                    #[expect(clippy::cast_possible_truncation)]
1528                    out.push(Definition {
1529                        name,
1530                        kind: node.kind().to_string(),
1531                        start_line: node.start_position().row as u32 + 1,
1532                        end_line: node.end_position().row as u32 + 1,
1533                        scope: String::new(),
1534                        signature: None,
1535                        start_byte: node.start_byte() as u32,
1536                        end_byte: node.end_byte() as u32,
1537                        calls: vec![],
1538                    });
1539                }
1540            }
1541            out
1542        };
1543
1544        extract_calls(source, &call_config, &mut defs);
1545
1546        // Find the `caller` function definition
1547        let caller_def = defs
1548            .iter()
1549            .find(|d| d.name == "caller")
1550            .expect("caller def");
1551
1552        // Calls must preserve qualified paths
1553        let call_names: Vec<&str> = caller_def.calls.iter().map(|c| c.name.as_str()).collect();
1554        assert!(
1555            call_names.contains(&"mod_a::foo"),
1556            "expected 'mod_a::foo' in calls, got: {call_names:?}"
1557        );
1558        assert!(
1559            call_names.contains(&"mod_b::foo"),
1560            "expected 'mod_b::foo' in calls, got: {call_names:?}"
1561        );
1562        // Bare 'foo' should NOT appear (the scoped paths replaced it)
1563        assert!(
1564            !call_names.contains(&"foo"),
1565            "bare 'foo' must not appear when scoped path is available. Got: {call_names:?}"
1566        );
1567    }
1568
1569    /// RED test (R2.3 issue b+c): Two defs named `Read` in different modules,
1570    /// an unqualified call to `Read`. Resolution must NOT silently pick the first.
1571    /// Either both are returned (ambiguous) or none.
1572    #[test]
1573    fn test_ambiguous_name_resolution_returns_all_or_none() {
1574        // Build two FileNodes each with a def named "Read", then a third with an
1575        // unqualified call to "Read".
1576        let file_a = FileNode {
1577            path: "mod_a.rs".to_string(),
1578            defs: vec![Definition {
1579                name: "Read".to_string(),
1580                kind: "trait_item".to_string(),
1581                start_line: 1,
1582                end_line: 3,
1583                scope: String::new(),
1584                signature: None,
1585                start_byte: 0,
1586                end_byte: 50,
1587                calls: vec![],
1588            }],
1589            imports: vec![],
1590        };
1591        let file_b = FileNode {
1592            path: "mod_b.rs".to_string(),
1593            defs: vec![Definition {
1594                name: "Read".to_string(),
1595                kind: "trait_item".to_string(),
1596                start_line: 1,
1597                end_line: 3,
1598                scope: String::new(),
1599                signature: None,
1600                start_byte: 0,
1601                end_byte: 50,
1602                calls: vec![],
1603            }],
1604            imports: vec![],
1605        };
1606        let file_c = FileNode {
1607            path: "caller.rs".to_string(),
1608            defs: vec![Definition {
1609                name: "do_thing".to_string(),
1610                kind: "function_item".to_string(),
1611                start_line: 1,
1612                end_line: 5,
1613                scope: String::new(),
1614                signature: None,
1615                start_byte: 0,
1616                end_byte: 100,
1617                calls: vec![CallRef {
1618                    name: "Read".to_string(),
1619                    byte_offset: 10,
1620                    resolved: None,
1621                }],
1622            }],
1623            imports: vec![],
1624        };
1625
1626        let mut files = vec![file_a, file_b, file_c];
1627        let def_index = build_def_index(&files);
1628        resolve_calls(&mut files, &def_index);
1629
1630        // The unqualified call to "Read" is ambiguous (two candidates, neither in same
1631        // file nor imported). Resolution must leave it as None — silent first-wins is wrong.
1632        let resolved = files[2].defs[0].calls[0].resolved;
1633        assert_eq!(
1634            resolved, None,
1635            "ambiguous unqualified call with no import context must resolve to None, not silently pick first"
1636        );
1637    }
1638
1639    #[test]
1640    #[ignore = "runs on full ripvec codebase; use --nocapture to see output"]
1641    fn test_full_repo_map() {
1642        use std::time::Instant;
1643
1644        let root = Path::new(env!("CARGO_MANIFEST_DIR"))
1645            .parent()
1646            .unwrap()
1647            .parent()
1648            .unwrap();
1649
1650        // Phase 1: build_graph (walk + parse + import resolve + PageRank)
1651        let t0 = Instant::now();
1652        let graph = build_graph(root).expect("build_graph on ripvec root");
1653        let build_ms = t0.elapsed().as_secs_f64() * 1000.0;
1654
1655        // Phase 2: render (default, no focus)
1656        let t1 = Instant::now();
1657        let rendered = render(&graph, 2000, None);
1658        let render_ms = t1.elapsed().as_secs_f64() * 1000.0;
1659
1660        // Phase 3: render (topic-sensitive, focused on highest-ranked file)
1661        let t2 = Instant::now();
1662        let focus_idx = graph
1663            .base_ranks
1664            .iter()
1665            .enumerate()
1666            .max_by(|a, b| a.1.total_cmp(b.1))
1667            .map(|(i, _)| i);
1668        let focused = render(&graph, 2000, focus_idx);
1669        let focus_ms = t2.elapsed().as_secs_f64() * 1000.0;
1670
1671        eprintln!("\n=== Repo Map Performance ===");
1672        eprintln!(
1673            "Files: {}, Edges: {}, Defs: {}",
1674            graph.files.len(),
1675            graph.edges.len(),
1676            graph.files.iter().map(|f| f.defs.len()).sum::<usize>()
1677        );
1678        eprintln!("build_graph:     {build_ms:.1}ms (walk + parse + resolve + PageRank)");
1679        eprintln!(
1680            "render(default): {render_ms:.3}ms ({} chars, ~{} tokens)",
1681            rendered.len(),
1682            rendered.len() / 4
1683        );
1684        eprintln!(
1685            "render(focused): {focus_ms:.3}ms ({} chars, ~{} tokens)",
1686            focused.len(),
1687            focused.len() / 4
1688        );
1689
1690        eprintln!("\nTop 5 by PageRank:");
1691        let mut ranked: Vec<(usize, f32)> = graph.base_ranks.iter().copied().enumerate().collect();
1692        ranked.sort_by(|a, b| b.1.total_cmp(&a.1));
1693        for (i, rank) in ranked.iter().take(5) {
1694            eprintln!("  {:.4} {}", rank, graph.files[*i].path);
1695        }
1696
1697        eprintln!("\n=== Default Render ===\n{rendered}");
1698        eprintln!(
1699            "\n=== Focused Render (on {}) ===\n{focused}",
1700            focus_idx
1701                .map(|i| graph.files[i].path.as_str())
1702                .unwrap_or("none")
1703        );
1704    }
1705}
ripvec_core/repo_map.rs

ripvec_core/
repo_map.rs