Skip to main content

ripvec_core/
repo_map.rs

1//! `PageRank`-weighted structural overview of a codebase.
2//!
3//! Builds a dependency graph from tree-sitter definition and import extraction,
4//! ranks files by importance using `PageRank` (standard or topic-sensitive), and
5//! renders a budget-constrained overview with tiered detail levels.
6
7use std::collections::HashMap;
8use std::fmt::Write as _;
9use std::path::{Path, PathBuf};
10
11use rayon::prelude::*;
12use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
13use streaming_iterator::StreamingIterator;
14use tree_sitter::{Parser, Query, QueryCursor};
15
16use crate::languages;
17use crate::walk;
18
19// ── Data Structures ──────────────────────────────────────────────────
20
21/// Persisted dependency graph with `PageRank` scores.
22#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
23pub struct RepoGraph {
24    /// Files in the repository with definitions, imports, and calls.
25    pub files: Vec<FileNode>,
26    /// File-level edges (derived from def-level call edges).
27    pub edges: Vec<(u32, u32, u32)>,
28    /// File-level `PageRank` scores (aggregated from def-level).
29    pub base_ranks: Vec<f32>,
30    /// File-level callers (indices into `files`).
31    pub callers: Vec<Vec<u32>>,
32    /// File-level callees (indices into `files`).
33    pub callees: Vec<Vec<u32>>,
34    /// Definition-level call edges: `(caller_def, callee_def, weight)`.
35    pub def_edges: Vec<(DefId, DefId, u32)>,
36    /// Definition-level `PageRank` scores (flattened: `offsets[file_idx] + def_idx`).
37    pub def_ranks: Vec<f32>,
38    /// Definition-level callers (flattened, parallel to `def_ranks`).
39    pub def_callers: Vec<Vec<DefId>>,
40    /// Definition-level callees (flattened, parallel to `def_ranks`).
41    pub def_callees: Vec<Vec<DefId>>,
42    /// Prefix-sum offsets for flattening `DefId` to linear index.
43    pub def_offsets: Vec<usize>,
44    /// Auto-tuned alpha for search boost.
45    pub alpha: f32,
46}
47
48/// A file in the repository with its definitions and imports.
49#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
50pub struct FileNode {
51    /// Relative path from the repository root.
52    pub path: String,
53    /// Definitions (functions, structs, classes, etc.) extracted from this file.
54    pub defs: Vec<Definition>,
55    /// Import references extracted from this file.
56    pub imports: Vec<ImportRef>,
57}
58
59/// A definition extracted from a source file.
60#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
61pub struct Definition {
62    /// Name of the definition (e.g., function name, class name).
63    pub name: String,
64    /// Kind of syntax node (e.g., `function_item`, `class_definition`).
65    pub kind: String,
66    /// 1-based start line number.
67    pub start_line: u32,
68    /// 1-based end line number.
69    pub end_line: u32,
70    /// Scope chain (e.g., `"impl_item Foo > fn bar"`).
71    pub scope: String,
72    /// Function/method signature, if available.
73    pub signature: Option<String>,
74    /// Byte offset of this definition's start in the source file.
75    pub start_byte: u32,
76    /// Byte offset of this definition's end in the source file.
77    pub end_byte: u32,
78    /// Call sites within this definition's body.
79    pub calls: Vec<CallRef>,
80}
81
82/// An import reference extracted from a source file.
83#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
84pub struct ImportRef {
85    /// Raw import path as written in source (e.g., `crate::foo::bar`).
86    pub raw_path: String,
87    /// Resolved file index in [`RepoGraph::files`], if resolution succeeded.
88    pub resolved_idx: Option<u32>,
89}
90
91/// Unique identifier for a definition: (file index, definition index within file).
92pub type DefId = (u32, u16);
93
94/// A call site extracted from a definition body.
95#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
96pub struct CallRef {
97    /// Callee function/method name.
98    pub name: String,
99    /// Byte offset of the call in the source file (for scoping to definitions).
100    pub byte_offset: u32,
101    /// Resolved target definition, if resolution succeeded.
102    pub resolved: Option<DefId>,
103}
104
105// ── Constants ────────────────────────────────────────────────────────
106
107/// `PageRank` damping factor.
108const DAMPING: f32 = 0.85;
109
110/// `PageRank` convergence threshold.
111const EPSILON: f32 = 1e-6;
112
113/// Maximum `PageRank` iterations.
114const MAX_ITERATIONS: usize = 100;
115
116/// Maximum callers/callees stored per file.
117const MAX_NEIGHBORS: usize = 5;
118
119/// Approximate characters per token for budget estimation.
120const CHARS_PER_TOKEN: usize = 4;
121
122// ── Import Queries ───────────────────────────────────────────────────
123
124/// Compile a tree-sitter import query for the given extension.
125///
126/// Returns `None` for unsupported extensions.
127fn import_query_for_extension(ext: &str) -> Option<(tree_sitter::Language, Query)> {
128    let (lang, query_str): (tree_sitter::Language, &str) = match ext {
129        "rs" => (
130            tree_sitter_rust::LANGUAGE.into(),
131            "(use_declaration) @import",
132        ),
133        "py" | "pyi" => (
134            tree_sitter_python::LANGUAGE.into(),
135            concat!(
136                "(import_statement) @import\n",
137                "(import_from_statement) @import",
138            ),
139        ),
140        "js" | "jsx" => (
141            tree_sitter_javascript::LANGUAGE.into(),
142            "(import_statement source: (string) @import_path) @import",
143        ),
144        "ts" => (
145            tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
146            "(import_statement source: (string) @import_path) @import",
147        ),
148        "tsx" => (
149            tree_sitter_typescript::LANGUAGE_TSX.into(),
150            "(import_statement source: (string) @import_path) @import",
151        ),
152        "go" => (
153            tree_sitter_go::LANGUAGE.into(),
154            "(import_spec path: (interpreted_string_literal) @import_path) @import",
155        ),
156        // Ruby: require statements.
157        "rb" => (
158            tree_sitter_ruby::LANGUAGE.into(),
159            "(call method: (identifier) @_method arguments: (argument_list (string (string_content) @import_path)) (#eq? @_method \"require\")) @import",
160        ),
161        _ => return None,
162    };
163    let query = match Query::new(&lang, query_str) {
164        Ok(q) => q,
165        Err(e) => {
166            tracing::warn!(ext, %e, "import query compilation failed — language may be ABI-incompatible");
167            return None;
168        }
169    };
170    Some((lang, query))
171}
172
173/// Extract import paths from source using tree-sitter.
174fn extract_imports(
175    source: &str,
176    lang: &tree_sitter::Language,
177    import_query: &Query,
178) -> Vec<String> {
179    let mut parser = Parser::new();
180    if parser.set_language(lang).is_err() {
181        return vec![];
182    }
183    let Some(tree) = parser.parse(source, None) else {
184        return vec![];
185    };
186
187    let mut cursor = QueryCursor::new();
188    let mut imports = Vec::new();
189    let mut matches = cursor.matches(import_query, tree.root_node(), source.as_bytes());
190
191    while let Some(m) = matches.next() {
192        // Prefer @import_path capture (JS/TS/Go), fall back to full @import text
193        let mut import_path_text = None;
194        let mut import_text = None;
195
196        for cap in m.captures {
197            let cap_name = &import_query.capture_names()[cap.index as usize];
198            let text = &source[cap.node.start_byte()..cap.node.end_byte()];
199            if *cap_name == "import_path" {
200                import_path_text = Some(text.trim_matches(|c| c == '"' || c == '\''));
201            } else if *cap_name == "import" {
202                import_text = Some(text);
203            }
204        }
205
206        if let Some(path) = import_path_text {
207            imports.push(path.to_string());
208        } else if let Some(text) = import_text {
209            imports.push(text.to_string());
210        }
211    }
212
213    imports
214}
215
216// ── Import Resolution ────────────────────────────────────────────────
217
218/// Resolve a Rust `use` path to a file index in the file map.
219///
220/// Handles `crate::`, `self::`, and `super::` prefixes. External crate
221/// imports are dropped (returns `None`).
222fn resolve_rust_import(
223    raw: &str,
224    file_path: &Path,
225    root: &Path,
226    file_index: &HashMap<PathBuf, usize>,
227) -> Option<usize> {
228    // Extract the module path from `use crate::foo::bar;` or `use crate::foo::bar::Baz;`
229    let trimmed = raw
230        .trim()
231        .trim_start_matches("use ")
232        .trim_end_matches(';')
233        .trim();
234
235    let segments: Vec<&str> = trimmed.split("::").collect();
236    if segments.is_empty() {
237        return None;
238    }
239
240    // Determine the base directory and skip prefix segments
241    let (base, skip) = match segments[0] {
242        "crate" => {
243            // Find the nearest Cargo.toml ancestor to determine the crate root.
244            // In a workspace, `crate::foo` resolves relative to the crate's src/,
245            // not the workspace root.
246            let mut dir = file_path.parent();
247            let crate_root = loop {
248                match dir {
249                    Some(d) if d.join("Cargo.toml").exists() => break d.join("src"),
250                    Some(d) => dir = d.parent(),
251                    None => break root.join("src"), // fallback
252                }
253            };
254            (crate_root, 1)
255        }
256        "self" => {
257            let dir = file_path.parent()?;
258            (dir.to_path_buf(), 1)
259        }
260        "super" => {
261            let dir = file_path.parent()?.parent()?;
262            (dir.to_path_buf(), 1)
263        }
264        // External crate — drop
265        _ => return None,
266    };
267
268    // Build candidate paths from the remaining segments.
269    // Try progressively shorter prefixes since the last segments
270    // may be items (struct, fn) rather than modules.
271    let path_segments = &segments[skip..];
272    for end in (1..=path_segments.len()).rev() {
273        let mut candidate = base.clone();
274        for seg in &path_segments[..end] {
275            // Strip glob patterns like `{Foo, Bar}`
276            let clean = seg.split('{').next().unwrap_or(seg).trim();
277            if !clean.is_empty() {
278                candidate.push(clean);
279            }
280        }
281
282        // Try file.rs
283        let as_file = candidate.with_extension("rs");
284        if let Some(&idx) = file_index.get(&as_file) {
285            return Some(idx);
286        }
287
288        // Try dir/mod.rs
289        let as_mod = candidate.join("mod.rs");
290        if let Some(&idx) = file_index.get(&as_mod) {
291            return Some(idx);
292        }
293    }
294
295    None
296}
297
298/// Resolve an import path to a file index based on file extension.
299fn resolve_import(
300    raw: &str,
301    ext: &str,
302    file_path: &Path,
303    root: &Path,
304    file_index: &HashMap<PathBuf, usize>,
305) -> Option<usize> {
306    match ext {
307        "rs" => resolve_rust_import(raw, file_path, root, file_index),
308        "py" | "pyi" => resolve_python_import(raw, root, file_index),
309        "js" | "jsx" | "ts" | "tsx" => resolve_js_import(raw, file_path, file_index),
310        // Go imports use full package paths — skip local resolution
311        _ => None,
312    }
313}
314
315/// Resolve a Python import to a file index.
316///
317/// Handles `import foo.bar` and `from foo.bar import baz` patterns.
318fn resolve_python_import(
319    raw: &str,
320    root: &Path,
321    file_index: &HashMap<PathBuf, usize>,
322) -> Option<usize> {
323    let module_path = if let Some(rest) = raw.strip_prefix("from ") {
324        rest.split_whitespace().next()?
325    } else if let Some(rest) = raw.strip_prefix("import ") {
326        rest.split_whitespace().next()?
327    } else {
328        return None;
329    };
330
331    let rel_path: PathBuf = module_path.split('.').collect();
332    for ext in ["py", "pyi"] {
333        let as_file = root.join(&rel_path).with_extension(ext);
334        if let Some(&idx) = file_index.get(&as_file) {
335            return Some(idx);
336        }
337    }
338
339    for init_name in ["__init__.py", "__init__.pyi"] {
340        let as_init = root.join(&rel_path).join(init_name);
341        if let Some(&idx) = file_index.get(&as_init) {
342            return Some(idx);
343        }
344    }
345
346    None
347}
348
349/// Resolve a JS/TS import to a file index.
350///
351/// Handles relative paths like `./foo` or `../bar`.
352fn resolve_js_import(
353    raw: &str,
354    file_path: &Path,
355    file_index: &HashMap<PathBuf, usize>,
356) -> Option<usize> {
357    if !raw.starts_with('.') {
358        return None;
359    }
360
361    let dir = file_path.parent()?;
362    let candidate = dir.join(raw);
363
364    for ext in &["js", "jsx", "ts", "tsx"] {
365        let with_ext = candidate.with_extension(ext);
366        if let Some(&idx) = file_index.get(&with_ext) {
367            return Some(idx);
368        }
369    }
370
371    for ext in &["js", "jsx", "ts", "tsx"] {
372        let index_file = candidate.join("index").with_extension(ext);
373        if let Some(&idx) = file_index.get(&index_file) {
374            return Some(idx);
375        }
376    }
377
378    None
379}
380
381// ── Extraction ───────────────────────────────────────────────────────
382
383/// Extract definitions from a source file using tree-sitter.
384fn extract_definitions(source: &str, config: &languages::LangConfig) -> Vec<Definition> {
385    let mut parser = Parser::new();
386    if parser.set_language(&config.language).is_err() {
387        return vec![];
388    }
389    let Some(tree) = parser.parse(source, None) else {
390        return vec![];
391    };
392
393    let mut cursor = QueryCursor::new();
394    let mut defs = Vec::new();
395    let mut matches = cursor.matches(&config.query, tree.root_node(), source.as_bytes());
396
397    while let Some(m) = matches.next() {
398        let mut name = String::new();
399        let mut def_node = None;
400
401        for cap in m.captures {
402            let cap_name = &config.query.capture_names()[cap.index as usize];
403            if *cap_name == "name" {
404                name = source[cap.node.start_byte()..cap.node.end_byte()].to_string();
405            } else if *cap_name == "def" {
406                def_node = Some(cap.node);
407            }
408        }
409
410        if let Some(node) = def_node {
411            let scope = crate::chunk::build_scope_chain(node, source);
412            let signature = crate::chunk::extract_signature(node, source);
413            #[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
414            let start_line = node.start_position().row as u32 + 1;
415            #[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
416            let end_line = node.end_position().row as u32 + 1;
417            #[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
418            let start_byte = node.start_byte() as u32;
419            #[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
420            let end_byte = node.end_byte() as u32;
421            defs.push(Definition {
422                name,
423                kind: node.kind().to_string(),
424                start_line,
425                end_line,
426                scope,
427                signature,
428                start_byte,
429                end_byte,
430                calls: vec![],
431            });
432        }
433    }
434
435    defs
436}
437
438// ── Call Extraction & Resolution ────────────────────────────────────
439
440/// Extract call sites from a source file and assign them to definitions.
441///
442/// Uses the language's call query to find all call expressions, then
443/// assigns each call to the definition whose byte range contains it.
444/// Calls outside any definition body (module-level) are ignored.
445fn extract_calls(source: &str, call_config: &languages::CallConfig, defs: &mut [Definition]) {
446    let mut parser = Parser::new();
447    if parser.set_language(&call_config.language).is_err() {
448        return;
449    }
450    let Some(tree) = parser.parse(source, None) else {
451        return;
452    };
453
454    let mut cursor = QueryCursor::new();
455    let mut matches = cursor.matches(&call_config.query, tree.root_node(), source.as_bytes());
456
457    while let Some(m) = matches.next() {
458        let mut callee_name = None;
459        let mut call_byte = 0u32;
460
461        for cap in m.captures {
462            let cap_name = &call_config.query.capture_names()[cap.index as usize];
463            if *cap_name == "callee" {
464                callee_name = Some(source[cap.node.start_byte()..cap.node.end_byte()].to_string());
465                #[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
466                {
467                    call_byte = cap.node.start_byte() as u32;
468                }
469            }
470        }
471
472        if let Some(name) = callee_name {
473            // Assign to the enclosing definition by byte range
474            if let Some(def) = defs
475                .iter_mut()
476                .find(|d| d.start_byte <= call_byte && call_byte < d.end_byte)
477            {
478                // Skip self-recursive calls
479                if def.name != name {
480                    def.calls.push(CallRef {
481                        name,
482                        byte_offset: call_byte,
483                        resolved: None,
484                    });
485                }
486            }
487            // Calls outside any definition are ignored (module-level init)
488        }
489    }
490}
491
492/// Build an index from definition name to list of `DefId`s.
493fn build_def_index(files: &[FileNode]) -> HashMap<String, Vec<DefId>> {
494    let mut index: HashMap<String, Vec<DefId>> = HashMap::new();
495    for (file_idx, file) in files.iter().enumerate() {
496        for (def_idx, def) in file.defs.iter().enumerate() {
497            #[expect(clippy::cast_possible_truncation)]
498            let did: DefId = (file_idx as u32, def_idx as u16);
499            index.entry(def.name.clone()).or_default().push(did);
500        }
501    }
502    index
503}
504
505/// Resolve call references to target definitions.
506///
507/// Strategy:
508/// 1. Same-file: prefer definitions in the caller's own file.
509/// 2. Imported-file: check definitions in files this file imports.
510/// 3. Unresolved: leave `resolved` as `None`.
511fn resolve_calls(files: &mut [FileNode], def_index: &HashMap<String, Vec<DefId>>) {
512    // Pre-compute imported file sets for each file
513    let imported_files: Vec<std::collections::HashSet<u32>> = files
514        .iter()
515        .map(|f| {
516            f.imports
517                .iter()
518                .filter_map(|imp| imp.resolved_idx)
519                .collect()
520        })
521        .collect();
522
523    for file_idx in 0..files.len() {
524        for def_idx in 0..files[file_idx].defs.len() {
525            for call_idx in 0..files[file_idx].defs[def_idx].calls.len() {
526                let call_name = files[file_idx].defs[def_idx].calls[call_idx].name.clone();
527
528                let Some(candidates) = def_index.get(&call_name) else {
529                    continue;
530                };
531
532                // Priority 1: same file
533                #[expect(clippy::cast_possible_truncation)]
534                let file_idx_u32 = file_idx as u32;
535                if let Some(&did) = candidates.iter().find(|(f, _)| *f == file_idx_u32) {
536                    files[file_idx].defs[def_idx].calls[call_idx].resolved = Some(did);
537                    continue;
538                }
539
540                // Priority 2: imported file
541                if let Some(&did) = candidates
542                    .iter()
543                    .find(|(f, _)| imported_files[file_idx].contains(f))
544                {
545                    files[file_idx].defs[def_idx].calls[call_idx].resolved = Some(did);
546                }
547                // Priority 3: unresolved — leave as None
548            }
549        }
550    }
551}
552
553/// Compute a prefix-sum offset table for flattening `DefId`s to linear indices.
554fn def_offsets(files: &[FileNode]) -> Vec<usize> {
555    let mut offsets = Vec::with_capacity(files.len() + 1);
556    offsets.push(0);
557    for file in files {
558        offsets.push(offsets.last().unwrap() + file.defs.len());
559    }
560    offsets
561}
562
563/// Flatten a `DefId` to a linear index using the offset table.
564fn flatten_def_id(offsets: &[usize], did: DefId) -> usize {
565    offsets[did.0 as usize] + did.1 as usize
566}
567
568/// Build top-N caller and callee lists for each definition (flattened).
569fn build_def_neighbor_lists(
570    n: usize,
571    edges: &[(u32, u32, u32)],
572    offsets: &[usize],
573) -> (Vec<Vec<DefId>>, Vec<Vec<DefId>>) {
574    let mut incoming: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
575    let mut outgoing: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
576
577    for &(src, dst, w) in edges {
578        let (s, d) = (src as usize, dst as usize);
579        if s < n && d < n {
580            incoming[d].push((src, w));
581            outgoing[s].push((dst, w));
582        }
583    }
584
585    // Convert flat index back to DefId
586    let to_def_id = |flat: u32| -> DefId {
587        let flat_usize = flat as usize;
588        let file_idx = offsets.partition_point(|&o| o <= flat_usize) - 1;
589        let def_idx = flat_usize - offsets[file_idx];
590        #[expect(clippy::cast_possible_truncation)]
591        (file_idx as u32, def_idx as u16)
592    };
593
594    let callers = incoming
595        .into_iter()
596        .map(|mut v| {
597            v.sort_by_key(|b| std::cmp::Reverse(b.1));
598            v.truncate(MAX_NEIGHBORS);
599            v.into_iter().map(|(idx, _)| to_def_id(idx)).collect()
600        })
601        .collect();
602
603    let callees = outgoing
604        .into_iter()
605        .map(|mut v| {
606            v.sort_by_key(|b| std::cmp::Reverse(b.1));
607            v.truncate(MAX_NEIGHBORS);
608            v.into_iter().map(|(idx, _)| to_def_id(idx)).collect()
609        })
610        .collect();
611
612    (callers, callees)
613}
614
615// ── PageRank ─────────────────────────────────────────────────────────
616
617/// Compute `PageRank` scores for a graph.
618///
619/// If `focus` is `Some(idx)`, computes topic-sensitive `PageRank` biased
620/// toward file `idx`. Otherwise computes standard (uniform) `PageRank`.
621///
622/// Returns one score per node, summing to 1.0.
623#[expect(
624    clippy::cast_precision_loss,
625    reason = "node count fits comfortably in f32"
626)]
627fn pagerank(n: usize, edges: &[(u32, u32, u32)], focus: Option<usize>) -> Vec<f32> {
628    if n == 0 {
629        return vec![];
630    }
631
632    // Build adjacency: out_edges[src] = [(dst, weight)]
633    let mut out_edges: Vec<Vec<(usize, f32)>> = vec![vec![]; n];
634    let mut out_weight: Vec<f32> = vec![0.0; n];
635
636    for &(src, dst, w) in edges {
637        let (s, d) = (src as usize, dst as usize);
638        if s < n && d < n {
639            #[expect(clippy::cast_possible_truncation, reason = "edge weights are small")]
640            let wf = f64::from(w) as f32;
641            out_edges[s].push((d, wf));
642            out_weight[s] += wf;
643        }
644    }
645
646    // Personalization vector: for topic-sensitive PageRank, blend
647    // 70% focus on the target file with 30% uniform. Pure focus
648    // (100%) starves unreachable nodes to rank=0 in sparse graphs.
649    let bias: Vec<f32> = if let Some(idx) = focus {
650        let uniform = 1.0 / n as f32;
651        let mut b = vec![0.3 * uniform; n];
652        if idx < n {
653            b[idx] += 0.7;
654        }
655        // Normalize to sum=1
656        let sum: f32 = b.iter().sum();
657        for v in &mut b {
658            *v /= sum;
659        }
660        b
661    } else {
662        vec![1.0 / n as f32; n]
663    };
664
665    let mut rank = vec![1.0 / n as f32; n];
666    let mut next_rank = vec![0.0_f32; n];
667
668    for _ in 0..MAX_ITERATIONS {
669        // Collect dangling mass (nodes with no outgoing edges)
670        let dangling: f32 = rank
671            .iter()
672            .enumerate()
673            .filter(|&(i, _)| out_edges[i].is_empty())
674            .map(|(_, &r)| r)
675            .sum();
676
677        // Distribute rank
678        for (i, nr) in next_rank.iter_mut().enumerate() {
679            *nr = (1.0 - DAMPING).mul_add(bias[i], DAMPING * dangling * bias[i]);
680        }
681
682        for (src, edges_list) in out_edges.iter().enumerate() {
683            if edges_list.is_empty() {
684                continue;
685            }
686            let src_rank = rank[src];
687            let total_w = out_weight[src];
688            for &(dst, w) in edges_list {
689                next_rank[dst] += DAMPING * src_rank * (w / total_w);
690            }
691        }
692
693        // Check convergence
694        let diff: f32 = rank
695            .iter()
696            .zip(next_rank.iter())
697            .map(|(a, b)| (a - b).abs())
698            .sum();
699
700        std::mem::swap(&mut rank, &mut next_rank);
701
702        if diff < EPSILON {
703            break;
704        }
705    }
706
707    rank
708}
709
710// ── Graph Building ───────────────────────────────────────────────────
711
712/// Intermediate result from definition-level graph computation.
713struct DefGraphData {
714    def_edges: Vec<(DefId, DefId, u32)>,
715    def_ranks: Vec<f32>,
716    def_callers: Vec<Vec<DefId>>,
717    def_callees: Vec<Vec<DefId>>,
718    offsets: Vec<usize>,
719    base_ranks: Vec<f32>,
720    file_edges: Vec<(u32, u32, u32)>,
721}
722
723/// Build definition-level edges, compute `PageRank`, and derive file-level data.
724fn compute_def_graph(files: &[FileNode]) -> DefGraphData {
725    // Build definition-level edge list from resolved calls
726    let mut def_edge_map: HashMap<(DefId, DefId), u32> = HashMap::new();
727    for (file_idx, file) in files.iter().enumerate() {
728        for (def_idx, def) in file.defs.iter().enumerate() {
729            #[expect(clippy::cast_possible_truncation)]
730            let caller_id: DefId = (file_idx as u32, def_idx as u16);
731            for call in &def.calls {
732                if let Some(callee_id) = call.resolved {
733                    *def_edge_map.entry((caller_id, callee_id)).or_insert(0) += 1;
734                }
735            }
736        }
737    }
738    let def_edges: Vec<(DefId, DefId, u32)> = def_edge_map
739        .into_iter()
740        .map(|((src, dst), w)| (src, dst, w))
741        .collect();
742
743    // Compute def-level PageRank
744    let offsets = def_offsets(files);
745    let n_defs = *offsets.last().unwrap_or(&0);
746
747    let flat_def_edges: Vec<(u32, u32, u32)> = def_edges
748        .iter()
749        .map(|(src, dst, w)| {
750            #[expect(clippy::cast_possible_truncation)]
751            (
752                flatten_def_id(&offsets, *src) as u32,
753                flatten_def_id(&offsets, *dst) as u32,
754                *w,
755            )
756        })
757        .collect();
758
759    let def_ranks = pagerank(n_defs, &flat_def_edges, None);
760
761    // Aggregate def ranks to file level
762    let base_ranks: Vec<f32> = files
763        .iter()
764        .enumerate()
765        .map(|(i, file)| {
766            let start = offsets[i];
767            let end = start + file.defs.len();
768            def_ranks[start..end].iter().sum()
769        })
770        .collect();
771
772    // Derive file-level edges from def-level call edges
773    let mut file_edge_map: HashMap<(u32, u32), u32> = HashMap::new();
774    for &(src, dst, w) in &def_edges {
775        let src_file = src.0;
776        let dst_file = dst.0;
777        if src_file != dst_file {
778            *file_edge_map.entry((src_file, dst_file)).or_insert(0) += w;
779        }
780    }
781    let file_edges: Vec<(u32, u32, u32)> = file_edge_map
782        .into_iter()
783        .map(|((src, dst), w)| (src, dst, w))
784        .collect();
785
786    // Build def-level caller/callee lists
787    let (def_callers, def_callees) = build_def_neighbor_lists(n_defs, &flat_def_edges, &offsets);
788
789    DefGraphData {
790        def_edges,
791        def_ranks,
792        def_callers,
793        def_callees,
794        offsets,
795        base_ranks,
796        file_edges,
797    }
798}
799
800/// Build a dependency graph from a repository root.
801///
802/// Walks the directory tree, parses each supported file with tree-sitter,
803/// extracts definitions and imports, resolves import paths to files, runs
804/// `PageRank`, and builds caller/callee lists.
805///
806/// # Errors
807///
808/// Returns an error if file walking or reading fails.
809pub fn build_graph(root: &Path) -> crate::Result<RepoGraph> {
810    let root = root.canonicalize().map_err(|e| crate::Error::Io {
811        path: root.display().to_string(),
812        source: e,
813    })?;
814
815    let mut walk_options = walk::WalkOptions::default();
816    if let Some((_, config)) = crate::cache::config::find_config(&root) {
817        walk_options.ignore_patterns = config.ignore.patterns;
818    }
819    let all_files = walk::collect_files_with_options(&root, &walk_options);
820
821    // Phase 1: parallel filter + read. For each candidate path with a
822    // supported extension, read its source from disk and emit a tuple
823    // alongside its relative path. rayon spreads the I/O cost across
824    // worker threads; on a 1M-file corpus this was ~20s sequential and
825    // now sits in the 2-3s range bounded by disk + filter throughput.
826    let raw_inputs: Vec<(PathBuf, String, String, String)> = all_files
827        .par_iter()
828        .filter_map(|path| {
829            let ext = path
830                .extension()
831                .and_then(|e| e.to_str())
832                .unwrap_or_default()
833                .to_string();
834            if languages::config_for_extension(&ext).is_none()
835                && import_query_for_extension(&ext).is_none()
836            {
837                return None;
838            }
839            let source = std::fs::read_to_string(path).ok()?;
840            let rel_path = path
841                .strip_prefix(&root)
842                .unwrap_or(path)
843                .display()
844                .to_string();
845            Some((path.clone(), rel_path, ext, source))
846        })
847        .collect();
848
849    // Build the contiguous `files` Vec and the absolute-path -> idx
850    // lookup. Sequential because both want stable indices that match
851    // `raw_sources`'s order; the per-file work this gates is trivial.
852    let mut file_index: HashMap<PathBuf, usize> = HashMap::with_capacity(raw_inputs.len());
853    let mut files: Vec<FileNode> = Vec::with_capacity(raw_inputs.len());
854    let mut raw_sources: Vec<(usize, String, String)> = Vec::with_capacity(raw_inputs.len());
855    for (idx, (abs_path, rel_path, ext, source)) in raw_inputs.into_iter().enumerate() {
856        file_index.insert(abs_path, idx);
857        files.push(FileNode {
858            path: rel_path,
859            defs: vec![],
860            imports: vec![],
861        });
862        raw_sources.push((idx, ext, source));
863    }
864
865    // Phase 2: parallel per-file definition + import extraction. Each
866    // file's tree-sitter parse + def/import queries are independent;
867    // par_iter_mut over files.iter_mut().zip(raw_sources.par_iter())
868    // lets every rayon worker grind its own slice. The closures here
869    // borrow `&root` and `&file_index` immutably (both Sync) and write
870    // disjoint `FileNode` slots via the &mut iterator.
871    files
872        .par_iter_mut()
873        .zip(raw_sources.par_iter())
874        .for_each(|(file, (_, ext, source))| {
875            if let Some(config) = languages::config_for_extension(ext) {
876                file.defs = extract_definitions(source, &config);
877            }
878            if let Some((lang, import_query)) = import_query_for_extension(ext) {
879                let raw_imports = extract_imports(source, &lang, &import_query);
880                let file_path = root.join(&file.path);
881                file.imports = raw_imports
882                    .into_iter()
883                    .map(|raw| {
884                        let resolved_idx =
885                            resolve_import(&raw, ext, &file_path, &root, &file_index)
886                                .and_then(|i| u32::try_from(i).ok());
887                        ImportRef {
888                            raw_path: raw,
889                            resolved_idx,
890                        }
891                    })
892                    .collect();
893            }
894        });
895
896    // Phase 3: parallel per-file call extraction. Mutates each
897    // FileNode's `defs[*].calls` independently. Aligned with
898    // raw_sources by index via the zip.
899    files
900        .par_iter_mut()
901        .zip(raw_sources.par_iter())
902        .for_each(|(file, (_, ext, source))| {
903            if let Some(call_config) = languages::call_query_for_extension(ext) {
904                extract_calls(source, &call_config, &mut file.defs);
905            }
906        });
907
908    // Resolve call references to target definitions
909    let def_index = build_def_index(&files);
910    resolve_calls(&mut files, &def_index);
911
912    // Build def-level graph, compute PageRank, and derive file-level data
913    let graph_data = compute_def_graph(&files);
914
915    // Build file-level caller/callee lists
916    let n = files.len();
917    let (callers, callees) = build_neighbor_lists(n, &graph_data.file_edges);
918
919    // Auto-tune alpha based on graph density
920    #[expect(clippy::cast_precision_loss, reason = "graph sizes fit in f32")]
921    let density = if n > 1 {
922        graph_data.file_edges.len() as f32 / (n as f32 * (n as f32 - 1.0))
923    } else {
924        0.0
925    };
926    let alpha = 0.3f32.mul_add(density.min(1.0), 0.5);
927
928    Ok(RepoGraph {
929        files,
930        edges: graph_data.file_edges,
931        base_ranks: graph_data.base_ranks,
932        callers,
933        callees,
934        def_edges: graph_data.def_edges,
935        def_ranks: graph_data.def_ranks,
936        def_callers: graph_data.def_callers,
937        def_callees: graph_data.def_callees,
938        def_offsets: graph_data.offsets,
939        alpha,
940    })
941}
942
943impl RepoGraph {
944    /// Get the `PageRank` score for a specific definition.
945    #[must_use]
946    pub fn def_rank(&self, did: DefId) -> f32 {
947        let flat = self.def_offsets[did.0 as usize] + did.1 as usize;
948        self.def_ranks.get(flat).copied().unwrap_or(0.0)
949    }
950
951    /// Look up a definition by file path and name. Returns the first match.
952    #[must_use]
953    pub fn find_def(&self, file_path: &str, def_name: &str) -> Option<DefId> {
954        for (file_idx, file) in self.files.iter().enumerate() {
955            if file.path == file_path {
956                for (def_idx, def) in file.defs.iter().enumerate() {
957                    if def.name == def_name {
958                        #[expect(clippy::cast_possible_truncation)]
959                        return Some((file_idx as u32, def_idx as u16));
960                    }
961                }
962            }
963        }
964        None
965    }
966}
967
968/// Build top-N caller and callee lists for each file.
969fn build_neighbor_lists(n: usize, edges: &[(u32, u32, u32)]) -> (Vec<Vec<u32>>, Vec<Vec<u32>>) {
970    let mut incoming: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
971    let mut outgoing: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
972
973    for &(src, dst, w) in edges {
974        let (s, d) = (src as usize, dst as usize);
975        if s < n && d < n {
976            incoming[d].push((src, w));
977            outgoing[s].push((dst, w));
978        }
979    }
980
981    // Sort by weight descending, keep top N
982    let trim = |lists: &mut [Vec<(u32, u32)>]| -> Vec<Vec<u32>> {
983        lists
984            .iter_mut()
985            .map(|list| {
986                list.sort_by_key(|b| std::cmp::Reverse(b.1));
987                list.iter()
988                    .take(MAX_NEIGHBORS)
989                    .map(|(idx, _)| *idx)
990                    .collect()
991            })
992            .collect()
993    };
994
995    (trim(&mut incoming), trim(&mut outgoing))
996}
997
998// ── Rendering ────────────────────────────────────────────────────────
999
1000/// Render a budget-constrained overview of the repository.
1001///
1002/// Files are sorted by `PageRank` (or topic-sensitive rank if `focus` is
1003/// `Some`). Output uses four tiers of decreasing detail:
1004///
1005/// - **Tier 0** (top 10%): full path, rank, callers/callees, signatures with scopes
1006/// - **Tier 1** (next 20%): full path, rank, signatures
1007/// - **Tier 2** (next 40%): full path, rank, definition names and kinds
1008/// - **Tier 3** (bottom 30%): file path only
1009///
1010/// Stops accumulating output when the estimated token count exceeds
1011/// `max_tokens`.
1012#[must_use]
1013pub fn render(graph: &RepoGraph, max_tokens: usize, focus: Option<usize>) -> String {
1014    let n = graph.files.len();
1015    if n == 0 {
1016        return String::new();
1017    }
1018
1019    // Compute ranks (recompute topic-sensitive if focus is given)
1020    let ranks = if focus.is_some() {
1021        pagerank(n, &graph.edges, focus)
1022    } else {
1023        graph.base_ranks.clone()
1024    };
1025
1026    // Sort file indices by rank descending
1027    let mut sorted: Vec<usize> = (0..n).collect();
1028    sorted.sort_by(|&a, &b| ranks[b].total_cmp(&ranks[a]));
1029
1030    let mut output = String::new();
1031    let mut used_tokens = 0;
1032    let max_chars = max_tokens * CHARS_PER_TOKEN;
1033
1034    for (rank_pos, &file_idx) in sorted.iter().enumerate() {
1035        if used_tokens >= max_tokens {
1036            break;
1037        }
1038
1039        let file = &graph.files[file_idx];
1040        let score = ranks[file_idx];
1041        #[expect(clippy::cast_precision_loss, reason = "file counts fit in f32")]
1042        let percentile = (rank_pos as f32) / (n as f32);
1043
1044        let section = if percentile < 0.1 {
1045            render_tier0(graph, file_idx, file, score)
1046        } else if percentile < 0.3 {
1047            render_tier1(file, score)
1048        } else if percentile < 0.7 {
1049            render_tier2(file, score)
1050        } else {
1051            render_tier3(file)
1052        };
1053
1054        let section_chars = section.len();
1055        if used_tokens > 0 && used_tokens + section_chars / CHARS_PER_TOKEN > max_tokens {
1056            // Would exceed budget — try to fit at least the path
1057            let path_line = format!("{}\n", file.path);
1058            let path_tokens = path_line.len() / CHARS_PER_TOKEN;
1059            if used_tokens + path_tokens <= max_tokens {
1060                output.push_str(&path_line);
1061            }
1062            break;
1063        }
1064
1065        output.push_str(&section);
1066        used_tokens = output.len().min(max_chars) / CHARS_PER_TOKEN;
1067    }
1068
1069    output
1070}
1071
1072/// Render tier 0: full detail with callers, callees, and signatures.
1073fn render_tier0(graph: &RepoGraph, file_idx: usize, file: &FileNode, score: f32) -> String {
1074    let mut out = format!("## {} (rank: {score:.4})\n", file.path);
1075
1076    // Callers
1077    if file_idx < graph.callers.len() && !graph.callers[file_idx].is_empty() {
1078        let _ = write!(out, "  called by: ");
1079        let names: Vec<&str> = graph.callers[file_idx]
1080            .iter()
1081            .filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
1082            .collect();
1083        let _ = writeln!(out, "{}", names.join(", "));
1084    }
1085
1086    // Callees
1087    if file_idx < graph.callees.len() && !graph.callees[file_idx].is_empty() {
1088        let _ = write!(out, "  calls: ");
1089        let names: Vec<&str> = graph.callees[file_idx]
1090            .iter()
1091            .filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
1092            .collect();
1093        let _ = writeln!(out, "{}", names.join(", "));
1094    }
1095
1096    // Definitions with scope and signature
1097    for def in &file.defs {
1098        let scope_prefix = if def.scope.is_empty() {
1099            String::new()
1100        } else {
1101            format!("{} > ", def.scope)
1102        };
1103        if let Some(sig) = &def.signature {
1104            let _ = writeln!(out, "  {scope_prefix}{} {sig}", def.kind);
1105        } else {
1106            let _ = writeln!(out, "  {scope_prefix}{} {}", def.kind, def.name);
1107        }
1108    }
1109    let _ = writeln!(out);
1110    out
1111}
1112
1113/// Render tier 1: file path, rank, and signatures.
1114fn render_tier1(file: &FileNode, score: f32) -> String {
1115    let mut out = format!("## {} (rank: {score:.4})\n", file.path);
1116    for def in &file.defs {
1117        if let Some(sig) = &def.signature {
1118            let _ = writeln!(out, "  {sig}");
1119        } else {
1120            let _ = writeln!(out, "  {} {}", def.kind, def.name);
1121        }
1122    }
1123    let _ = writeln!(out);
1124    out
1125}
1126
1127/// Render tier 2: file path, rank, and definition names/kinds.
1128fn render_tier2(file: &FileNode, score: f32) -> String {
1129    let mut out = format!("{} (rank: {score:.4})", file.path);
1130    if !file.defs.is_empty() {
1131        let names: Vec<String> = file
1132            .defs
1133            .iter()
1134            .map(|d| format!("{}:{}", d.kind, d.name))
1135            .collect();
1136        let _ = write!(out, " -- {}", names.join(", "));
1137    }
1138    let _ = writeln!(out);
1139    out
1140}
1141
1142/// Render tier 3: file path only.
1143fn render_tier3(file: &FileNode) -> String {
1144    format!("{}\n", file.path)
1145}
1146
1147// ── Tests ────────────────────────────────────────────────────────────
1148
1149#[cfg(test)]
1150mod tests {
1151    use super::*;
1152
1153    #[test]
1154    fn test_pagerank_simple() {
1155        // 3-node graph: 0 -> 1 -> 2, 2 -> 0 (cycle)
1156        let edges = vec![(0, 1, 1), (1, 2, 1), (2, 0, 1)];
1157        let ranks = pagerank(3, &edges, None);
1158
1159        // All nodes in a symmetric cycle should have equal rank
1160        assert_eq!(ranks.len(), 3);
1161        let sum: f32 = ranks.iter().sum();
1162        assert!(
1163            (sum - 1.0).abs() < 0.01,
1164            "ranks should sum to ~1.0, got {sum}"
1165        );
1166
1167        // In a perfect cycle, all ranks should be approximately equal
1168        let expected = 1.0 / 3.0;
1169        for (i, &r) in ranks.iter().enumerate() {
1170            assert!(
1171                (r - expected).abs() < 0.05,
1172                "rank[{i}] = {r}, expected ~{expected}"
1173            );
1174        }
1175    }
1176
1177    #[test]
1178    fn test_pagerank_star() {
1179        // Star graph: 0,1,2 all point to 3
1180        let edges = vec![(0, 3, 1), (1, 3, 1), (2, 3, 1)];
1181        let ranks = pagerank(4, &edges, None);
1182
1183        assert_eq!(ranks.len(), 4);
1184        // Node 3 should have the highest rank
1185        let max_idx = ranks
1186            .iter()
1187            .enumerate()
1188            .max_by(|a, b| a.1.total_cmp(b.1))
1189            .unwrap()
1190            .0;
1191        assert_eq!(max_idx, 3, "node 3 should have highest rank");
1192        assert!(
1193            ranks[3] > ranks[0],
1194            "rank[3]={} should be > rank[0]={}",
1195            ranks[3],
1196            ranks[0]
1197        );
1198    }
1199
1200    #[test]
1201    fn test_pagerank_topic_sensitive() {
1202        // 3-node chain: 0 -> 1 -> 2
1203        let edges = vec![(0, 1, 1), (1, 2, 1)];
1204        let uniform_ranks = pagerank(3, &edges, None);
1205        let biased_ranks = pagerank(3, &edges, Some(0));
1206
1207        // With focus on node 0, it should get a higher rank than uniform
1208        assert!(
1209            biased_ranks[0] > uniform_ranks[0],
1210            "focused rank[0]={} should be > uniform rank[0]={}",
1211            biased_ranks[0],
1212            uniform_ranks[0]
1213        );
1214    }
1215
1216    #[test]
1217    fn test_pagerank_empty() {
1218        let ranks = pagerank(0, &[], None);
1219        assert!(ranks.is_empty());
1220    }
1221
1222    #[test]
1223    fn test_render_tiers() {
1224        // Build a small graph with 10 files to exercise all tiers
1225        let files: Vec<FileNode> = (0..10)
1226            .map(|i| FileNode {
1227                path: format!("src/file_{i}.rs"),
1228                defs: vec![Definition {
1229                    name: format!("func_{i}"),
1230                    kind: "function_item".to_string(),
1231                    start_line: 1,
1232                    end_line: 5,
1233                    scope: String::new(),
1234                    signature: Some(format!("func_{i}(x: i32) -> i32")),
1235                    start_byte: 0,
1236                    end_byte: 0,
1237                    calls: vec![],
1238                }],
1239                imports: vec![],
1240            })
1241            .collect();
1242
1243        // Create a star graph: files 1-9 all import from file 0
1244        let edges: Vec<(u32, u32, u32)> = (1..10).map(|i| (i, 0, 1)).collect();
1245        let base_ranks = pagerank(10, &edges, None);
1246        let (top_callers, top_callees) = build_neighbor_lists(10, &edges);
1247
1248        let graph = RepoGraph {
1249            files,
1250            edges,
1251            base_ranks,
1252            callers: top_callers,
1253            callees: top_callees,
1254            def_edges: vec![],
1255            def_ranks: vec![],
1256            def_callers: vec![],
1257            def_callees: vec![],
1258            def_offsets: vec![0],
1259            alpha: 0.5,
1260        };
1261
1262        // Large budget: should include all files
1263        let full = render(&graph, 10_000, None);
1264        assert!(
1265            full.contains("file_0"),
1266            "output should contain the top-ranked file"
1267        );
1268        // file_0 should appear as tier 0 (highest rank)
1269        assert!(
1270            full.contains("## src/file_0.rs"),
1271            "top file should have tier 0 heading"
1272        );
1273
1274        // Tiny budget: should only fit a few files
1275        let small = render(&graph, 10, None);
1276        assert!(
1277            !small.is_empty(),
1278            "even tiny budget should produce some output"
1279        );
1280        // Should have fewer entries than full render
1281        let full_lines = full.lines().count();
1282        let small_lines = small.lines().count();
1283        assert!(
1284            small_lines < full_lines,
1285            "small budget ({small_lines} lines) should have fewer lines than full ({full_lines})"
1286        );
1287    }
1288
1289    #[test]
1290    fn test_render_empty_graph() {
1291        let graph = RepoGraph {
1292            files: vec![],
1293            edges: vec![],
1294            base_ranks: vec![],
1295            callers: vec![],
1296            callees: vec![],
1297            def_edges: vec![],
1298            def_ranks: vec![],
1299            def_callers: vec![],
1300            def_callees: vec![],
1301            def_offsets: vec![0],
1302            alpha: 0.5,
1303        };
1304        let output = render(&graph, 1000, None);
1305        assert!(output.is_empty(), "empty graph should render empty string");
1306    }
1307
1308    #[test]
1309    fn test_build_graph_on_fixtures() {
1310        let fixtures = Path::new(env!("CARGO_MANIFEST_DIR"))
1311            .parent()
1312            .unwrap()
1313            .parent()
1314            .unwrap()
1315            .join("tests")
1316            .join("fixtures");
1317
1318        let graph = build_graph(&fixtures).expect("build_graph should succeed on fixtures");
1319
1320        // Should find at least the 3 fixture files
1321        assert!(
1322            !graph.files.is_empty(),
1323            "graph should contain files from fixtures"
1324        );
1325
1326        // Should find definitions in the Rust fixture
1327        let rs_file = graph.files.iter().find(|f| f.path.ends_with("sample.rs"));
1328        assert!(rs_file.is_some(), "should find sample.rs");
1329        let rs_file = rs_file.unwrap();
1330        assert!(
1331            !rs_file.defs.is_empty(),
1332            "sample.rs should have definitions"
1333        );
1334        assert!(
1335            rs_file.defs.iter().any(|d| d.name == "hello"),
1336            "should find 'hello' function in sample.rs"
1337        );
1338
1339        // Should find definitions in the Python fixture
1340        let py_file = graph.files.iter().find(|f| f.path.ends_with("sample.py"));
1341        assert!(py_file.is_some(), "should find sample.py");
1342        let py_file = py_file.unwrap();
1343        assert!(
1344            !py_file.defs.is_empty(),
1345            "sample.py should have definitions"
1346        );
1347        assert!(
1348            py_file.defs.iter().any(|d| d.name == "greet"),
1349            "should find 'greet' function in sample.py"
1350        );
1351
1352        // PageRank scores should be computed
1353        assert_eq!(graph.base_ranks.len(), graph.files.len());
1354        let sum: f32 = graph.base_ranks.iter().sum();
1355        assert!(
1356            (sum - 1.0).abs() < 0.01,
1357            "PageRank scores should sum to ~1.0, got {sum}"
1358        );
1359    }
1360
1361    #[test]
1362    fn test_extract_imports_rust() {
1363        let source = "use crate::foo::bar;\nuse std::collections::HashMap;\n";
1364        let (lang, query) = import_query_for_extension("rs").unwrap();
1365        let imports = extract_imports(source, &lang, &query);
1366        assert_eq!(imports.len(), 2);
1367        assert!(imports[0].contains("crate::foo::bar"));
1368    }
1369
1370    #[test]
1371    fn test_extract_imports_python_stub() {
1372        let source = "from typing import Protocol\nimport pkg.types\n";
1373        let (lang, query) = import_query_for_extension("pyi").unwrap();
1374        let imports = extract_imports(source, &lang, &query);
1375        assert_eq!(imports.len(), 2);
1376        assert!(imports[0].contains("from typing import Protocol"));
1377        assert!(imports[1].contains("import pkg.types"));
1378    }
1379
1380    #[test]
1381    fn test_resolve_python_import_to_stub_file() {
1382        let root = PathBuf::from("/project");
1383        let mut file_index = HashMap::new();
1384        file_index.insert(PathBuf::from("/project/pkg/types.pyi"), 1);
1385
1386        let result = resolve_python_import("import pkg.types", &root, &file_index);
1387        assert_eq!(result, Some(1));
1388    }
1389
1390    #[test]
1391    fn test_resolve_rust_crate_import() {
1392        let root = PathBuf::from("/project");
1393        let file_path = PathBuf::from("/project/src/main.rs");
1394        let mut file_index = HashMap::new();
1395        file_index.insert(PathBuf::from("/project/src/foo/bar.rs"), 1);
1396        file_index.insert(PathBuf::from("/project/src/main.rs"), 0);
1397
1398        let result = resolve_rust_import("use crate::foo::bar;", &file_path, &root, &file_index);
1399        assert_eq!(result, Some(1));
1400    }
1401
1402    #[test]
1403    fn test_resolve_rust_external_crate_dropped() {
1404        let root = PathBuf::from("/project");
1405        let file_path = PathBuf::from("/project/src/main.rs");
1406        let file_index = HashMap::new();
1407
1408        let result = resolve_rust_import(
1409            "use std::collections::HashMap;",
1410            &file_path,
1411            &root,
1412            &file_index,
1413        );
1414        assert_eq!(result, None, "external crate imports should be dropped");
1415    }
1416
1417    #[test]
1418    fn test_neighbor_lists() {
1419        // 0 -> 1, 0 -> 2, 1 -> 2
1420        let edges = vec![(0, 1, 1), (0, 2, 1), (1, 2, 1)];
1421        let (incoming, outgoing) = build_neighbor_lists(3, &edges);
1422
1423        // Node 2 should be called by 0 and 1
1424        assert!(incoming[2].contains(&0));
1425        assert!(incoming[2].contains(&1));
1426
1427        // Node 0 should call 1 and 2
1428        assert!(outgoing[0].contains(&1));
1429        assert!(outgoing[0].contains(&2));
1430    }
1431
1432    #[test]
1433    #[ignore = "runs on full ripvec codebase; use --nocapture to see output"]
1434    fn test_full_repo_map() {
1435        use std::time::Instant;
1436
1437        let root = Path::new(env!("CARGO_MANIFEST_DIR"))
1438            .parent()
1439            .unwrap()
1440            .parent()
1441            .unwrap();
1442
1443        // Phase 1: build_graph (walk + parse + import resolve + PageRank)
1444        let t0 = Instant::now();
1445        let graph = build_graph(root).expect("build_graph on ripvec root");
1446        let build_ms = t0.elapsed().as_secs_f64() * 1000.0;
1447
1448        // Phase 2: render (default, no focus)
1449        let t1 = Instant::now();
1450        let rendered = render(&graph, 2000, None);
1451        let render_ms = t1.elapsed().as_secs_f64() * 1000.0;
1452
1453        // Phase 3: render (topic-sensitive, focused on highest-ranked file)
1454        let t2 = Instant::now();
1455        let focus_idx = graph
1456            .base_ranks
1457            .iter()
1458            .enumerate()
1459            .max_by(|a, b| a.1.total_cmp(b.1))
1460            .map(|(i, _)| i);
1461        let focused = render(&graph, 2000, focus_idx);
1462        let focus_ms = t2.elapsed().as_secs_f64() * 1000.0;
1463
1464        eprintln!("\n=== Repo Map Performance ===");
1465        eprintln!(
1466            "Files: {}, Edges: {}, Defs: {}",
1467            graph.files.len(),
1468            graph.edges.len(),
1469            graph.files.iter().map(|f| f.defs.len()).sum::<usize>()
1470        );
1471        eprintln!("build_graph:     {build_ms:.1}ms (walk + parse + resolve + PageRank)");
1472        eprintln!(
1473            "render(default): {render_ms:.3}ms ({} chars, ~{} tokens)",
1474            rendered.len(),
1475            rendered.len() / 4
1476        );
1477        eprintln!(
1478            "render(focused): {focus_ms:.3}ms ({} chars, ~{} tokens)",
1479            focused.len(),
1480            focused.len() / 4
1481        );
1482
1483        eprintln!("\nTop 5 by PageRank:");
1484        let mut ranked: Vec<(usize, f32)> = graph.base_ranks.iter().copied().enumerate().collect();
1485        ranked.sort_by(|a, b| b.1.total_cmp(&a.1));
1486        for (i, rank) in ranked.iter().take(5) {
1487            eprintln!("  {:.4} {}", rank, graph.files[*i].path);
1488        }
1489
1490        eprintln!("\n=== Default Render ===\n{rendered}");
1491        eprintln!(
1492            "\n=== Focused Render (on {}) ===\n{focused}",
1493            focus_idx
1494                .map(|i| graph.files[i].path.as_str())
1495                .unwrap_or("none")
1496        );
1497    }
1498}