Skip to main content

ripvec_core/
repo_map.rs

1//! `PageRank`-weighted structural overview of a codebase.
2//!
3//! Builds a dependency graph from tree-sitter definition and import extraction,
4//! ranks files by importance using `PageRank` (standard or topic-sensitive), and
5//! renders a budget-constrained overview with tiered detail levels.
6
7use std::collections::HashMap;
8use std::fmt::Write as _;
9use std::path::{Path, PathBuf};
10
11use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
12use streaming_iterator::StreamingIterator;
13use tree_sitter::{Parser, Query, QueryCursor};
14
15use crate::languages;
16use crate::walk;
17
18// ── Data Structures ──────────────────────────────────────────────────
19
20/// Persisted dependency graph with `PageRank` scores.
21#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
22pub struct RepoGraph {
23    /// Files in the repository with definitions, imports, and calls.
24    pub files: Vec<FileNode>,
25    /// File-level edges (derived from def-level call edges).
26    pub edges: Vec<(u32, u32, u32)>,
27    /// File-level `PageRank` scores (aggregated from def-level).
28    pub base_ranks: Vec<f32>,
29    /// File-level callers (indices into `files`).
30    pub callers: Vec<Vec<u32>>,
31    /// File-level callees (indices into `files`).
32    pub callees: Vec<Vec<u32>>,
33    /// Definition-level call edges: `(caller_def, callee_def, weight)`.
34    pub def_edges: Vec<(DefId, DefId, u32)>,
35    /// Definition-level `PageRank` scores (flattened: `offsets[file_idx] + def_idx`).
36    pub def_ranks: Vec<f32>,
37    /// Definition-level callers (flattened, parallel to `def_ranks`).
38    pub def_callers: Vec<Vec<DefId>>,
39    /// Definition-level callees (flattened, parallel to `def_ranks`).
40    pub def_callees: Vec<Vec<DefId>>,
41    /// Prefix-sum offsets for flattening `DefId` to linear index.
42    pub def_offsets: Vec<usize>,
43    /// Auto-tuned alpha for search boost.
44    pub alpha: f32,
45}
46
47/// A file in the repository with its definitions and imports.
48#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
49pub struct FileNode {
50    /// Relative path from the repository root.
51    pub path: String,
52    /// Definitions (functions, structs, classes, etc.) extracted from this file.
53    pub defs: Vec<Definition>,
54    /// Import references extracted from this file.
55    pub imports: Vec<ImportRef>,
56}
57
58/// A definition extracted from a source file.
59#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
60pub struct Definition {
61    /// Name of the definition (e.g., function name, class name).
62    pub name: String,
63    /// Kind of syntax node (e.g., `function_item`, `class_definition`).
64    pub kind: String,
65    /// 1-based start line number.
66    pub start_line: u32,
67    /// 1-based end line number.
68    pub end_line: u32,
69    /// Scope chain (e.g., `"impl_item Foo > fn bar"`).
70    pub scope: String,
71    /// Function/method signature, if available.
72    pub signature: Option<String>,
73    /// Byte offset of this definition's start in the source file.
74    pub start_byte: u32,
75    /// Byte offset of this definition's end in the source file.
76    pub end_byte: u32,
77    /// Call sites within this definition's body.
78    pub calls: Vec<CallRef>,
79}
80
81/// An import reference extracted from a source file.
82#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
83pub struct ImportRef {
84    /// Raw import path as written in source (e.g., `crate::foo::bar`).
85    pub raw_path: String,
86    /// Resolved file index in [`RepoGraph::files`], if resolution succeeded.
87    pub resolved_idx: Option<u32>,
88}
89
90/// Unique identifier for a definition: (file index, definition index within file).
91pub type DefId = (u32, u16);
92
93/// A call site extracted from a definition body.
94#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
95pub struct CallRef {
96    /// Callee function/method name.
97    pub name: String,
98    /// Byte offset of the call in the source file (for scoping to definitions).
99    pub byte_offset: u32,
100    /// Resolved target definition, if resolution succeeded.
101    pub resolved: Option<DefId>,
102}
103
104// ── Constants ────────────────────────────────────────────────────────
105
106/// `PageRank` damping factor.
107const DAMPING: f32 = 0.85;
108
109/// `PageRank` convergence threshold.
110const EPSILON: f32 = 1e-6;
111
112/// Maximum `PageRank` iterations.
113const MAX_ITERATIONS: usize = 100;
114
115/// Maximum callers/callees stored per file.
116const MAX_NEIGHBORS: usize = 5;
117
118/// Approximate characters per token for budget estimation.
119const CHARS_PER_TOKEN: usize = 4;
120
121// ── Import Queries ───────────────────────────────────────────────────
122
123/// Compile a tree-sitter import query for the given extension.
124///
125/// Returns `None` for unsupported extensions.
126fn import_query_for_extension(ext: &str) -> Option<(tree_sitter::Language, Query)> {
127    let (lang, query_str): (tree_sitter::Language, &str) = match ext {
128        "rs" => (
129            tree_sitter_rust::LANGUAGE.into(),
130            "(use_declaration) @import",
131        ),
132        "py" | "pyi" => (
133            tree_sitter_python::LANGUAGE.into(),
134            concat!(
135                "(import_statement) @import\n",
136                "(import_from_statement) @import",
137            ),
138        ),
139        "js" | "jsx" => (
140            tree_sitter_javascript::LANGUAGE.into(),
141            "(import_statement source: (string) @import_path) @import",
142        ),
143        "ts" => (
144            tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
145            "(import_statement source: (string) @import_path) @import",
146        ),
147        "tsx" => (
148            tree_sitter_typescript::LANGUAGE_TSX.into(),
149            "(import_statement source: (string) @import_path) @import",
150        ),
151        "go" => (
152            tree_sitter_go::LANGUAGE.into(),
153            "(import_spec path: (interpreted_string_literal) @import_path) @import",
154        ),
155        // Ruby: require statements.
156        "rb" => (
157            tree_sitter_ruby::LANGUAGE.into(),
158            "(call method: (identifier) @_method arguments: (argument_list (string (string_content) @import_path)) (#eq? @_method \"require\")) @import",
159        ),
160        _ => return None,
161    };
162    let query = match Query::new(&lang, query_str) {
163        Ok(q) => q,
164        Err(e) => {
165            tracing::warn!(ext, %e, "import query compilation failed — language may be ABI-incompatible");
166            return None;
167        }
168    };
169    Some((lang, query))
170}
171
172/// Extract import paths from source using tree-sitter.
173fn extract_imports(
174    source: &str,
175    lang: &tree_sitter::Language,
176    import_query: &Query,
177) -> Vec<String> {
178    let mut parser = Parser::new();
179    if parser.set_language(lang).is_err() {
180        return vec![];
181    }
182    let Some(tree) = parser.parse(source, None) else {
183        return vec![];
184    };
185
186    let mut cursor = QueryCursor::new();
187    let mut imports = Vec::new();
188    let mut matches = cursor.matches(import_query, tree.root_node(), source.as_bytes());
189
190    while let Some(m) = matches.next() {
191        // Prefer @import_path capture (JS/TS/Go), fall back to full @import text
192        let mut import_path_text = None;
193        let mut import_text = None;
194
195        for cap in m.captures {
196            let cap_name = &import_query.capture_names()[cap.index as usize];
197            let text = &source[cap.node.start_byte()..cap.node.end_byte()];
198            if *cap_name == "import_path" {
199                import_path_text = Some(text.trim_matches(|c| c == '"' || c == '\''));
200            } else if *cap_name == "import" {
201                import_text = Some(text);
202            }
203        }
204
205        if let Some(path) = import_path_text {
206            imports.push(path.to_string());
207        } else if let Some(text) = import_text {
208            imports.push(text.to_string());
209        }
210    }
211
212    imports
213}
214
215// ── Import Resolution ────────────────────────────────────────────────
216
217/// Resolve a Rust `use` path to a file index in the file map.
218///
219/// Handles `crate::`, `self::`, and `super::` prefixes. External crate
220/// imports are dropped (returns `None`).
221fn resolve_rust_import(
222    raw: &str,
223    file_path: &Path,
224    root: &Path,
225    file_index: &HashMap<PathBuf, usize>,
226) -> Option<usize> {
227    // Extract the module path from `use crate::foo::bar;` or `use crate::foo::bar::Baz;`
228    let trimmed = raw
229        .trim()
230        .trim_start_matches("use ")
231        .trim_end_matches(';')
232        .trim();
233
234    let segments: Vec<&str> = trimmed.split("::").collect();
235    if segments.is_empty() {
236        return None;
237    }
238
239    // Determine the base directory and skip prefix segments
240    let (base, skip) = match segments[0] {
241        "crate" => {
242            // Find the nearest Cargo.toml ancestor to determine the crate root.
243            // In a workspace, `crate::foo` resolves relative to the crate's src/,
244            // not the workspace root.
245            let mut dir = file_path.parent();
246            let crate_root = loop {
247                match dir {
248                    Some(d) if d.join("Cargo.toml").exists() => break d.join("src"),
249                    Some(d) => dir = d.parent(),
250                    None => break root.join("src"), // fallback
251                }
252            };
253            (crate_root, 1)
254        }
255        "self" => {
256            let dir = file_path.parent()?;
257            (dir.to_path_buf(), 1)
258        }
259        "super" => {
260            let dir = file_path.parent()?.parent()?;
261            (dir.to_path_buf(), 1)
262        }
263        // External crate — drop
264        _ => return None,
265    };
266
267    // Build candidate paths from the remaining segments.
268    // Try progressively shorter prefixes since the last segments
269    // may be items (struct, fn) rather than modules.
270    let path_segments = &segments[skip..];
271    for end in (1..=path_segments.len()).rev() {
272        let mut candidate = base.clone();
273        for seg in &path_segments[..end] {
274            // Strip glob patterns like `{Foo, Bar}`
275            let clean = seg.split('{').next().unwrap_or(seg).trim();
276            if !clean.is_empty() {
277                candidate.push(clean);
278            }
279        }
280
281        // Try file.rs
282        let as_file = candidate.with_extension("rs");
283        if let Some(&idx) = file_index.get(&as_file) {
284            return Some(idx);
285        }
286
287        // Try dir/mod.rs
288        let as_mod = candidate.join("mod.rs");
289        if let Some(&idx) = file_index.get(&as_mod) {
290            return Some(idx);
291        }
292    }
293
294    None
295}
296
297/// Resolve an import path to a file index based on file extension.
298fn resolve_import(
299    raw: &str,
300    ext: &str,
301    file_path: &Path,
302    root: &Path,
303    file_index: &HashMap<PathBuf, usize>,
304) -> Option<usize> {
305    match ext {
306        "rs" => resolve_rust_import(raw, file_path, root, file_index),
307        "py" | "pyi" => resolve_python_import(raw, root, file_index),
308        "js" | "jsx" | "ts" | "tsx" => resolve_js_import(raw, file_path, file_index),
309        // Go imports use full package paths — skip local resolution
310        _ => None,
311    }
312}
313
314/// Resolve a Python import to a file index.
315///
316/// Handles `import foo.bar` and `from foo.bar import baz` patterns.
317fn resolve_python_import(
318    raw: &str,
319    root: &Path,
320    file_index: &HashMap<PathBuf, usize>,
321) -> Option<usize> {
322    let module_path = if let Some(rest) = raw.strip_prefix("from ") {
323        rest.split_whitespace().next()?
324    } else if let Some(rest) = raw.strip_prefix("import ") {
325        rest.split_whitespace().next()?
326    } else {
327        return None;
328    };
329
330    let rel_path: PathBuf = module_path.split('.').collect();
331    for ext in ["py", "pyi"] {
332        let as_file = root.join(&rel_path).with_extension(ext);
333        if let Some(&idx) = file_index.get(&as_file) {
334            return Some(idx);
335        }
336    }
337
338    for init_name in ["__init__.py", "__init__.pyi"] {
339        let as_init = root.join(&rel_path).join(init_name);
340        if let Some(&idx) = file_index.get(&as_init) {
341            return Some(idx);
342        }
343    }
344
345    None
346}
347
348/// Resolve a JS/TS import to a file index.
349///
350/// Handles relative paths like `./foo` or `../bar`.
351fn resolve_js_import(
352    raw: &str,
353    file_path: &Path,
354    file_index: &HashMap<PathBuf, usize>,
355) -> Option<usize> {
356    if !raw.starts_with('.') {
357        return None;
358    }
359
360    let dir = file_path.parent()?;
361    let candidate = dir.join(raw);
362
363    for ext in &["js", "jsx", "ts", "tsx"] {
364        let with_ext = candidate.with_extension(ext);
365        if let Some(&idx) = file_index.get(&with_ext) {
366            return Some(idx);
367        }
368    }
369
370    for ext in &["js", "jsx", "ts", "tsx"] {
371        let index_file = candidate.join("index").with_extension(ext);
372        if let Some(&idx) = file_index.get(&index_file) {
373            return Some(idx);
374        }
375    }
376
377    None
378}
379
380// ── Extraction ───────────────────────────────────────────────────────
381
382/// Extract definitions from a source file using tree-sitter.
383fn extract_definitions(source: &str, config: &languages::LangConfig) -> Vec<Definition> {
384    let mut parser = Parser::new();
385    if parser.set_language(&config.language).is_err() {
386        return vec![];
387    }
388    let Some(tree) = parser.parse(source, None) else {
389        return vec![];
390    };
391
392    let mut cursor = QueryCursor::new();
393    let mut defs = Vec::new();
394    let mut matches = cursor.matches(&config.query, tree.root_node(), source.as_bytes());
395
396    while let Some(m) = matches.next() {
397        let mut name = String::new();
398        let mut def_node = None;
399
400        for cap in m.captures {
401            let cap_name = &config.query.capture_names()[cap.index as usize];
402            if *cap_name == "name" {
403                name = source[cap.node.start_byte()..cap.node.end_byte()].to_string();
404            } else if *cap_name == "def" {
405                def_node = Some(cap.node);
406            }
407        }
408
409        if let Some(node) = def_node {
410            let scope = crate::chunk::build_scope_chain(node, source);
411            let signature = crate::chunk::extract_signature(node, source);
412            #[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
413            let start_line = node.start_position().row as u32 + 1;
414            #[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
415            let end_line = node.end_position().row as u32 + 1;
416            #[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
417            let start_byte = node.start_byte() as u32;
418            #[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
419            let end_byte = node.end_byte() as u32;
420            defs.push(Definition {
421                name,
422                kind: node.kind().to_string(),
423                start_line,
424                end_line,
425                scope,
426                signature,
427                start_byte,
428                end_byte,
429                calls: vec![],
430            });
431        }
432    }
433
434    defs
435}
436
437// ── Call Extraction & Resolution ────────────────────────────────────
438
439/// Extract call sites from a source file and assign them to definitions.
440///
441/// Uses the language's call query to find all call expressions, then
442/// assigns each call to the definition whose byte range contains it.
443/// Calls outside any definition body (module-level) are ignored.
444fn extract_calls(source: &str, call_config: &languages::CallConfig, defs: &mut [Definition]) {
445    let mut parser = Parser::new();
446    if parser.set_language(&call_config.language).is_err() {
447        return;
448    }
449    let Some(tree) = parser.parse(source, None) else {
450        return;
451    };
452
453    let mut cursor = QueryCursor::new();
454    let mut matches = cursor.matches(&call_config.query, tree.root_node(), source.as_bytes());
455
456    while let Some(m) = matches.next() {
457        let mut callee_name = None;
458        let mut call_byte = 0u32;
459
460        for cap in m.captures {
461            let cap_name = &call_config.query.capture_names()[cap.index as usize];
462            if *cap_name == "callee" {
463                callee_name = Some(source[cap.node.start_byte()..cap.node.end_byte()].to_string());
464                #[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
465                {
466                    call_byte = cap.node.start_byte() as u32;
467                }
468            }
469        }
470
471        if let Some(name) = callee_name {
472            // Assign to the enclosing definition by byte range
473            if let Some(def) = defs
474                .iter_mut()
475                .find(|d| d.start_byte <= call_byte && call_byte < d.end_byte)
476            {
477                // Skip self-recursive calls
478                if def.name != name {
479                    def.calls.push(CallRef {
480                        name,
481                        byte_offset: call_byte,
482                        resolved: None,
483                    });
484                }
485            }
486            // Calls outside any definition are ignored (module-level init)
487        }
488    }
489}
490
491/// Build an index from definition name to list of `DefId`s.
492fn build_def_index(files: &[FileNode]) -> HashMap<String, Vec<DefId>> {
493    let mut index: HashMap<String, Vec<DefId>> = HashMap::new();
494    for (file_idx, file) in files.iter().enumerate() {
495        for (def_idx, def) in file.defs.iter().enumerate() {
496            #[expect(clippy::cast_possible_truncation)]
497            let did: DefId = (file_idx as u32, def_idx as u16);
498            index.entry(def.name.clone()).or_default().push(did);
499        }
500    }
501    index
502}
503
504/// Resolve call references to target definitions.
505///
506/// Strategy:
507/// 1. Same-file: prefer definitions in the caller's own file.
508/// 2. Imported-file: check definitions in files this file imports.
509/// 3. Unresolved: leave `resolved` as `None`.
510fn resolve_calls(files: &mut [FileNode], def_index: &HashMap<String, Vec<DefId>>) {
511    // Pre-compute imported file sets for each file
512    let imported_files: Vec<std::collections::HashSet<u32>> = files
513        .iter()
514        .map(|f| {
515            f.imports
516                .iter()
517                .filter_map(|imp| imp.resolved_idx)
518                .collect()
519        })
520        .collect();
521
522    for file_idx in 0..files.len() {
523        for def_idx in 0..files[file_idx].defs.len() {
524            for call_idx in 0..files[file_idx].defs[def_idx].calls.len() {
525                let call_name = files[file_idx].defs[def_idx].calls[call_idx].name.clone();
526
527                let Some(candidates) = def_index.get(&call_name) else {
528                    continue;
529                };
530
531                // Priority 1: same file
532                #[expect(clippy::cast_possible_truncation)]
533                let file_idx_u32 = file_idx as u32;
534                if let Some(&did) = candidates.iter().find(|(f, _)| *f == file_idx_u32) {
535                    files[file_idx].defs[def_idx].calls[call_idx].resolved = Some(did);
536                    continue;
537                }
538
539                // Priority 2: imported file
540                if let Some(&did) = candidates
541                    .iter()
542                    .find(|(f, _)| imported_files[file_idx].contains(f))
543                {
544                    files[file_idx].defs[def_idx].calls[call_idx].resolved = Some(did);
545                }
546                // Priority 3: unresolved — leave as None
547            }
548        }
549    }
550}
551
552/// Compute a prefix-sum offset table for flattening `DefId`s to linear indices.
553fn def_offsets(files: &[FileNode]) -> Vec<usize> {
554    let mut offsets = Vec::with_capacity(files.len() + 1);
555    offsets.push(0);
556    for file in files {
557        offsets.push(offsets.last().unwrap() + file.defs.len());
558    }
559    offsets
560}
561
562/// Flatten a `DefId` to a linear index using the offset table.
563fn flatten_def_id(offsets: &[usize], did: DefId) -> usize {
564    offsets[did.0 as usize] + did.1 as usize
565}
566
567/// Build top-N caller and callee lists for each definition (flattened).
568fn build_def_neighbor_lists(
569    n: usize,
570    edges: &[(u32, u32, u32)],
571    offsets: &[usize],
572) -> (Vec<Vec<DefId>>, Vec<Vec<DefId>>) {
573    let mut incoming: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
574    let mut outgoing: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
575
576    for &(src, dst, w) in edges {
577        let (s, d) = (src as usize, dst as usize);
578        if s < n && d < n {
579            incoming[d].push((src, w));
580            outgoing[s].push((dst, w));
581        }
582    }
583
584    // Convert flat index back to DefId
585    let to_def_id = |flat: u32| -> DefId {
586        let flat_usize = flat as usize;
587        let file_idx = offsets.partition_point(|&o| o <= flat_usize) - 1;
588        let def_idx = flat_usize - offsets[file_idx];
589        #[expect(clippy::cast_possible_truncation)]
590        (file_idx as u32, def_idx as u16)
591    };
592
593    let callers = incoming
594        .into_iter()
595        .map(|mut v| {
596            v.sort_by_key(|b| std::cmp::Reverse(b.1));
597            v.truncate(MAX_NEIGHBORS);
598            v.into_iter().map(|(idx, _)| to_def_id(idx)).collect()
599        })
600        .collect();
601
602    let callees = outgoing
603        .into_iter()
604        .map(|mut v| {
605            v.sort_by_key(|b| std::cmp::Reverse(b.1));
606            v.truncate(MAX_NEIGHBORS);
607            v.into_iter().map(|(idx, _)| to_def_id(idx)).collect()
608        })
609        .collect();
610
611    (callers, callees)
612}
613
614// ── PageRank ─────────────────────────────────────────────────────────
615
616/// Compute `PageRank` scores for a graph.
617///
618/// If `focus` is `Some(idx)`, computes topic-sensitive `PageRank` biased
619/// toward file `idx`. Otherwise computes standard (uniform) `PageRank`.
620///
621/// Returns one score per node, summing to 1.0.
622#[expect(
623    clippy::cast_precision_loss,
624    reason = "node count fits comfortably in f32"
625)]
626fn pagerank(n: usize, edges: &[(u32, u32, u32)], focus: Option<usize>) -> Vec<f32> {
627    if n == 0 {
628        return vec![];
629    }
630
631    // Build adjacency: out_edges[src] = [(dst, weight)]
632    let mut out_edges: Vec<Vec<(usize, f32)>> = vec![vec![]; n];
633    let mut out_weight: Vec<f32> = vec![0.0; n];
634
635    for &(src, dst, w) in edges {
636        let (s, d) = (src as usize, dst as usize);
637        if s < n && d < n {
638            #[expect(clippy::cast_possible_truncation, reason = "edge weights are small")]
639            let wf = f64::from(w) as f32;
640            out_edges[s].push((d, wf));
641            out_weight[s] += wf;
642        }
643    }
644
645    // Personalization vector: for topic-sensitive PageRank, blend
646    // 70% focus on the target file with 30% uniform. Pure focus
647    // (100%) starves unreachable nodes to rank=0 in sparse graphs.
648    let bias: Vec<f32> = if let Some(idx) = focus {
649        let uniform = 1.0 / n as f32;
650        let mut b = vec![0.3 * uniform; n];
651        if idx < n {
652            b[idx] += 0.7;
653        }
654        // Normalize to sum=1
655        let sum: f32 = b.iter().sum();
656        for v in &mut b {
657            *v /= sum;
658        }
659        b
660    } else {
661        vec![1.0 / n as f32; n]
662    };
663
664    let mut rank = vec![1.0 / n as f32; n];
665    let mut next_rank = vec![0.0_f32; n];
666
667    for _ in 0..MAX_ITERATIONS {
668        // Collect dangling mass (nodes with no outgoing edges)
669        let dangling: f32 = rank
670            .iter()
671            .enumerate()
672            .filter(|&(i, _)| out_edges[i].is_empty())
673            .map(|(_, &r)| r)
674            .sum();
675
676        // Distribute rank
677        for (i, nr) in next_rank.iter_mut().enumerate() {
678            *nr = (1.0 - DAMPING).mul_add(bias[i], DAMPING * dangling * bias[i]);
679        }
680
681        for (src, edges_list) in out_edges.iter().enumerate() {
682            if edges_list.is_empty() {
683                continue;
684            }
685            let src_rank = rank[src];
686            let total_w = out_weight[src];
687            for &(dst, w) in edges_list {
688                next_rank[dst] += DAMPING * src_rank * (w / total_w);
689            }
690        }
691
692        // Check convergence
693        let diff: f32 = rank
694            .iter()
695            .zip(next_rank.iter())
696            .map(|(a, b)| (a - b).abs())
697            .sum();
698
699        std::mem::swap(&mut rank, &mut next_rank);
700
701        if diff < EPSILON {
702            break;
703        }
704    }
705
706    rank
707}
708
709// ── Graph Building ───────────────────────────────────────────────────
710
711/// Intermediate result from definition-level graph computation.
712struct DefGraphData {
713    def_edges: Vec<(DefId, DefId, u32)>,
714    def_ranks: Vec<f32>,
715    def_callers: Vec<Vec<DefId>>,
716    def_callees: Vec<Vec<DefId>>,
717    offsets: Vec<usize>,
718    base_ranks: Vec<f32>,
719    file_edges: Vec<(u32, u32, u32)>,
720}
721
722/// Build definition-level edges, compute `PageRank`, and derive file-level data.
723fn compute_def_graph(files: &[FileNode]) -> DefGraphData {
724    // Build definition-level edge list from resolved calls
725    let mut def_edge_map: HashMap<(DefId, DefId), u32> = HashMap::new();
726    for (file_idx, file) in files.iter().enumerate() {
727        for (def_idx, def) in file.defs.iter().enumerate() {
728            #[expect(clippy::cast_possible_truncation)]
729            let caller_id: DefId = (file_idx as u32, def_idx as u16);
730            for call in &def.calls {
731                if let Some(callee_id) = call.resolved {
732                    *def_edge_map.entry((caller_id, callee_id)).or_insert(0) += 1;
733                }
734            }
735        }
736    }
737    let def_edges: Vec<(DefId, DefId, u32)> = def_edge_map
738        .into_iter()
739        .map(|((src, dst), w)| (src, dst, w))
740        .collect();
741
742    // Compute def-level PageRank
743    let offsets = def_offsets(files);
744    let n_defs = *offsets.last().unwrap_or(&0);
745
746    let flat_def_edges: Vec<(u32, u32, u32)> = def_edges
747        .iter()
748        .map(|(src, dst, w)| {
749            #[expect(clippy::cast_possible_truncation)]
750            (
751                flatten_def_id(&offsets, *src) as u32,
752                flatten_def_id(&offsets, *dst) as u32,
753                *w,
754            )
755        })
756        .collect();
757
758    let def_ranks = pagerank(n_defs, &flat_def_edges, None);
759
760    // Aggregate def ranks to file level
761    let base_ranks: Vec<f32> = files
762        .iter()
763        .enumerate()
764        .map(|(i, file)| {
765            let start = offsets[i];
766            let end = start + file.defs.len();
767            def_ranks[start..end].iter().sum()
768        })
769        .collect();
770
771    // Derive file-level edges from def-level call edges
772    let mut file_edge_map: HashMap<(u32, u32), u32> = HashMap::new();
773    for &(src, dst, w) in &def_edges {
774        let src_file = src.0;
775        let dst_file = dst.0;
776        if src_file != dst_file {
777            *file_edge_map.entry((src_file, dst_file)).or_insert(0) += w;
778        }
779    }
780    let file_edges: Vec<(u32, u32, u32)> = file_edge_map
781        .into_iter()
782        .map(|((src, dst), w)| (src, dst, w))
783        .collect();
784
785    // Build def-level caller/callee lists
786    let (def_callers, def_callees) = build_def_neighbor_lists(n_defs, &flat_def_edges, &offsets);
787
788    DefGraphData {
789        def_edges,
790        def_ranks,
791        def_callers,
792        def_callees,
793        offsets,
794        base_ranks,
795        file_edges,
796    }
797}
798
799/// Build a dependency graph from a repository root.
800///
801/// Walks the directory tree, parses each supported file with tree-sitter,
802/// extracts definitions and imports, resolves import paths to files, runs
803/// `PageRank`, and builds caller/callee lists.
804///
805/// # Errors
806///
807/// Returns an error if file walking or reading fails.
808pub fn build_graph(root: &Path) -> crate::Result<RepoGraph> {
809    let root = root.canonicalize().map_err(|e| crate::Error::Io {
810        path: root.display().to_string(),
811        source: e,
812    })?;
813
814    let mut walk_options = walk::WalkOptions::default();
815    if let Some((_, config)) = crate::cache::config::find_config(&root) {
816        walk_options.ignore_patterns = config.ignore.patterns;
817    }
818    let all_files = walk::collect_files_with_options(&root, &walk_options);
819
820    // Build file index mapping canonical paths to indices
821    let mut file_index: HashMap<PathBuf, usize> = HashMap::new();
822    let mut files: Vec<FileNode> = Vec::new();
823    let mut raw_sources: Vec<(usize, String, String)> = Vec::new(); // (idx, ext, source)
824
825    for path in &all_files {
826        let ext = path
827            .extension()
828            .and_then(|e| e.to_str())
829            .unwrap_or_default()
830            .to_string();
831
832        // Only process files with known language support
833        if languages::config_for_extension(&ext).is_none()
834            && import_query_for_extension(&ext).is_none()
835        {
836            continue;
837        }
838
839        let Ok(source) = std::fs::read_to_string(path) else {
840            continue; // Skip binary/unreadable files
841        };
842
843        let rel_path = path
844            .strip_prefix(&root)
845            .unwrap_or(path)
846            .display()
847            .to_string();
848
849        let idx = files.len();
850        file_index.insert(path.clone(), idx);
851        files.push(FileNode {
852            path: rel_path,
853            defs: vec![],
854            imports: vec![],
855        });
856        raw_sources.push((idx, ext, source));
857    }
858
859    // Extract definitions and imports
860    for (idx, ext, source) in &raw_sources {
861        // Extract definitions
862        if let Some(config) = languages::config_for_extension(ext) {
863            files[*idx].defs = extract_definitions(source, &config);
864        }
865
866        // Extract imports
867        if let Some((lang, import_query)) = import_query_for_extension(ext) {
868            let raw_imports = extract_imports(source, &lang, &import_query);
869            let file_path = root.join(&files[*idx].path);
870
871            files[*idx].imports = raw_imports
872                .into_iter()
873                .map(|raw| {
874                    let resolved_idx = resolve_import(&raw, ext, &file_path, &root, &file_index)
875                        .and_then(|i| u32::try_from(i).ok());
876                    ImportRef {
877                        raw_path: raw,
878                        resolved_idx,
879                    }
880                })
881                .collect();
882        }
883    }
884
885    // Extract calls within definitions
886    for (idx, ext, source) in &raw_sources {
887        if let Some(call_config) = languages::call_query_for_extension(ext) {
888            extract_calls(source, &call_config, &mut files[*idx].defs);
889        }
890    }
891
892    // Resolve call references to target definitions
893    let def_index = build_def_index(&files);
894    resolve_calls(&mut files, &def_index);
895
896    // Build def-level graph, compute PageRank, and derive file-level data
897    let graph_data = compute_def_graph(&files);
898
899    // Build file-level caller/callee lists
900    let n = files.len();
901    let (callers, callees) = build_neighbor_lists(n, &graph_data.file_edges);
902
903    // Auto-tune alpha based on graph density
904    #[expect(clippy::cast_precision_loss, reason = "graph sizes fit in f32")]
905    let density = if n > 1 {
906        graph_data.file_edges.len() as f32 / (n as f32 * (n as f32 - 1.0))
907    } else {
908        0.0
909    };
910    let alpha = 0.3f32.mul_add(density.min(1.0), 0.5);
911
912    Ok(RepoGraph {
913        files,
914        edges: graph_data.file_edges,
915        base_ranks: graph_data.base_ranks,
916        callers,
917        callees,
918        def_edges: graph_data.def_edges,
919        def_ranks: graph_data.def_ranks,
920        def_callers: graph_data.def_callers,
921        def_callees: graph_data.def_callees,
922        def_offsets: graph_data.offsets,
923        alpha,
924    })
925}
926
927impl RepoGraph {
928    /// Get the `PageRank` score for a specific definition.
929    #[must_use]
930    pub fn def_rank(&self, did: DefId) -> f32 {
931        let flat = self.def_offsets[did.0 as usize] + did.1 as usize;
932        self.def_ranks.get(flat).copied().unwrap_or(0.0)
933    }
934
935    /// Look up a definition by file path and name. Returns the first match.
936    #[must_use]
937    pub fn find_def(&self, file_path: &str, def_name: &str) -> Option<DefId> {
938        for (file_idx, file) in self.files.iter().enumerate() {
939            if file.path == file_path {
940                for (def_idx, def) in file.defs.iter().enumerate() {
941                    if def.name == def_name {
942                        #[expect(clippy::cast_possible_truncation)]
943                        return Some((file_idx as u32, def_idx as u16));
944                    }
945                }
946            }
947        }
948        None
949    }
950}
951
952/// Build top-N caller and callee lists for each file.
953fn build_neighbor_lists(n: usize, edges: &[(u32, u32, u32)]) -> (Vec<Vec<u32>>, Vec<Vec<u32>>) {
954    let mut incoming: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
955    let mut outgoing: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
956
957    for &(src, dst, w) in edges {
958        let (s, d) = (src as usize, dst as usize);
959        if s < n && d < n {
960            incoming[d].push((src, w));
961            outgoing[s].push((dst, w));
962        }
963    }
964
965    // Sort by weight descending, keep top N
966    let trim = |lists: &mut [Vec<(u32, u32)>]| -> Vec<Vec<u32>> {
967        lists
968            .iter_mut()
969            .map(|list| {
970                list.sort_by_key(|b| std::cmp::Reverse(b.1));
971                list.iter()
972                    .take(MAX_NEIGHBORS)
973                    .map(|(idx, _)| *idx)
974                    .collect()
975            })
976            .collect()
977    };
978
979    (trim(&mut incoming), trim(&mut outgoing))
980}
981
982// ── Rendering ────────────────────────────────────────────────────────
983
984/// Render a budget-constrained overview of the repository.
985///
986/// Files are sorted by `PageRank` (or topic-sensitive rank if `focus` is
987/// `Some`). Output uses four tiers of decreasing detail:
988///
989/// - **Tier 0** (top 10%): full path, rank, callers/callees, signatures with scopes
990/// - **Tier 1** (next 20%): full path, rank, signatures
991/// - **Tier 2** (next 40%): full path, rank, definition names and kinds
992/// - **Tier 3** (bottom 30%): file path only
993///
994/// Stops accumulating output when the estimated token count exceeds
995/// `max_tokens`.
996#[must_use]
997pub fn render(graph: &RepoGraph, max_tokens: usize, focus: Option<usize>) -> String {
998    let n = graph.files.len();
999    if n == 0 {
1000        return String::new();
1001    }
1002
1003    // Compute ranks (recompute topic-sensitive if focus is given)
1004    let ranks = if focus.is_some() {
1005        pagerank(n, &graph.edges, focus)
1006    } else {
1007        graph.base_ranks.clone()
1008    };
1009
1010    // Sort file indices by rank descending
1011    let mut sorted: Vec<usize> = (0..n).collect();
1012    sorted.sort_by(|&a, &b| ranks[b].total_cmp(&ranks[a]));
1013
1014    let mut output = String::new();
1015    let mut used_tokens = 0;
1016    let max_chars = max_tokens * CHARS_PER_TOKEN;
1017
1018    for (rank_pos, &file_idx) in sorted.iter().enumerate() {
1019        if used_tokens >= max_tokens {
1020            break;
1021        }
1022
1023        let file = &graph.files[file_idx];
1024        let score = ranks[file_idx];
1025        #[expect(clippy::cast_precision_loss, reason = "file counts fit in f32")]
1026        let percentile = (rank_pos as f32) / (n as f32);
1027
1028        let section = if percentile < 0.1 {
1029            render_tier0(graph, file_idx, file, score)
1030        } else if percentile < 0.3 {
1031            render_tier1(file, score)
1032        } else if percentile < 0.7 {
1033            render_tier2(file, score)
1034        } else {
1035            render_tier3(file)
1036        };
1037
1038        let section_chars = section.len();
1039        if used_tokens > 0 && used_tokens + section_chars / CHARS_PER_TOKEN > max_tokens {
1040            // Would exceed budget — try to fit at least the path
1041            let path_line = format!("{}\n", file.path);
1042            let path_tokens = path_line.len() / CHARS_PER_TOKEN;
1043            if used_tokens + path_tokens <= max_tokens {
1044                output.push_str(&path_line);
1045            }
1046            break;
1047        }
1048
1049        output.push_str(&section);
1050        used_tokens = output.len().min(max_chars) / CHARS_PER_TOKEN;
1051    }
1052
1053    output
1054}
1055
1056/// Render tier 0: full detail with callers, callees, and signatures.
1057fn render_tier0(graph: &RepoGraph, file_idx: usize, file: &FileNode, score: f32) -> String {
1058    let mut out = format!("## {} (rank: {score:.4})\n", file.path);
1059
1060    // Callers
1061    if file_idx < graph.callers.len() && !graph.callers[file_idx].is_empty() {
1062        let _ = write!(out, "  called by: ");
1063        let names: Vec<&str> = graph.callers[file_idx]
1064            .iter()
1065            .filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
1066            .collect();
1067        let _ = writeln!(out, "{}", names.join(", "));
1068    }
1069
1070    // Callees
1071    if file_idx < graph.callees.len() && !graph.callees[file_idx].is_empty() {
1072        let _ = write!(out, "  calls: ");
1073        let names: Vec<&str> = graph.callees[file_idx]
1074            .iter()
1075            .filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
1076            .collect();
1077        let _ = writeln!(out, "{}", names.join(", "));
1078    }
1079
1080    // Definitions with scope and signature
1081    for def in &file.defs {
1082        let scope_prefix = if def.scope.is_empty() {
1083            String::new()
1084        } else {
1085            format!("{} > ", def.scope)
1086        };
1087        if let Some(sig) = &def.signature {
1088            let _ = writeln!(out, "  {scope_prefix}{} {sig}", def.kind);
1089        } else {
1090            let _ = writeln!(out, "  {scope_prefix}{} {}", def.kind, def.name);
1091        }
1092    }
1093    let _ = writeln!(out);
1094    out
1095}
1096
1097/// Render tier 1: file path, rank, and signatures.
1098fn render_tier1(file: &FileNode, score: f32) -> String {
1099    let mut out = format!("## {} (rank: {score:.4})\n", file.path);
1100    for def in &file.defs {
1101        if let Some(sig) = &def.signature {
1102            let _ = writeln!(out, "  {sig}");
1103        } else {
1104            let _ = writeln!(out, "  {} {}", def.kind, def.name);
1105        }
1106    }
1107    let _ = writeln!(out);
1108    out
1109}
1110
1111/// Render tier 2: file path, rank, and definition names/kinds.
1112fn render_tier2(file: &FileNode, score: f32) -> String {
1113    let mut out = format!("{} (rank: {score:.4})", file.path);
1114    if !file.defs.is_empty() {
1115        let names: Vec<String> = file
1116            .defs
1117            .iter()
1118            .map(|d| format!("{}:{}", d.kind, d.name))
1119            .collect();
1120        let _ = write!(out, " -- {}", names.join(", "));
1121    }
1122    let _ = writeln!(out);
1123    out
1124}
1125
1126/// Render tier 3: file path only.
1127fn render_tier3(file: &FileNode) -> String {
1128    format!("{}\n", file.path)
1129}
1130
1131// ── Tests ────────────────────────────────────────────────────────────
1132
1133#[cfg(test)]
1134mod tests {
1135    use super::*;
1136
1137    #[test]
1138    fn test_pagerank_simple() {
1139        // 3-node graph: 0 -> 1 -> 2, 2 -> 0 (cycle)
1140        let edges = vec![(0, 1, 1), (1, 2, 1), (2, 0, 1)];
1141        let ranks = pagerank(3, &edges, None);
1142
1143        // All nodes in a symmetric cycle should have equal rank
1144        assert_eq!(ranks.len(), 3);
1145        let sum: f32 = ranks.iter().sum();
1146        assert!(
1147            (sum - 1.0).abs() < 0.01,
1148            "ranks should sum to ~1.0, got {sum}"
1149        );
1150
1151        // In a perfect cycle, all ranks should be approximately equal
1152        let expected = 1.0 / 3.0;
1153        for (i, &r) in ranks.iter().enumerate() {
1154            assert!(
1155                (r - expected).abs() < 0.05,
1156                "rank[{i}] = {r}, expected ~{expected}"
1157            );
1158        }
1159    }
1160
1161    #[test]
1162    fn test_pagerank_star() {
1163        // Star graph: 0,1,2 all point to 3
1164        let edges = vec![(0, 3, 1), (1, 3, 1), (2, 3, 1)];
1165        let ranks = pagerank(4, &edges, None);
1166
1167        assert_eq!(ranks.len(), 4);
1168        // Node 3 should have the highest rank
1169        let max_idx = ranks
1170            .iter()
1171            .enumerate()
1172            .max_by(|a, b| a.1.total_cmp(b.1))
1173            .unwrap()
1174            .0;
1175        assert_eq!(max_idx, 3, "node 3 should have highest rank");
1176        assert!(
1177            ranks[3] > ranks[0],
1178            "rank[3]={} should be > rank[0]={}",
1179            ranks[3],
1180            ranks[0]
1181        );
1182    }
1183
1184    #[test]
1185    fn test_pagerank_topic_sensitive() {
1186        // 3-node chain: 0 -> 1 -> 2
1187        let edges = vec![(0, 1, 1), (1, 2, 1)];
1188        let uniform_ranks = pagerank(3, &edges, None);
1189        let biased_ranks = pagerank(3, &edges, Some(0));
1190
1191        // With focus on node 0, it should get a higher rank than uniform
1192        assert!(
1193            biased_ranks[0] > uniform_ranks[0],
1194            "focused rank[0]={} should be > uniform rank[0]={}",
1195            biased_ranks[0],
1196            uniform_ranks[0]
1197        );
1198    }
1199
1200    #[test]
1201    fn test_pagerank_empty() {
1202        let ranks = pagerank(0, &[], None);
1203        assert!(ranks.is_empty());
1204    }
1205
1206    #[test]
1207    fn test_render_tiers() {
1208        // Build a small graph with 10 files to exercise all tiers
1209        let files: Vec<FileNode> = (0..10)
1210            .map(|i| FileNode {
1211                path: format!("src/file_{i}.rs"),
1212                defs: vec![Definition {
1213                    name: format!("func_{i}"),
1214                    kind: "function_item".to_string(),
1215                    start_line: 1,
1216                    end_line: 5,
1217                    scope: String::new(),
1218                    signature: Some(format!("func_{i}(x: i32) -> i32")),
1219                    start_byte: 0,
1220                    end_byte: 0,
1221                    calls: vec![],
1222                }],
1223                imports: vec![],
1224            })
1225            .collect();
1226
1227        // Create a star graph: files 1-9 all import from file 0
1228        let edges: Vec<(u32, u32, u32)> = (1..10).map(|i| (i, 0, 1)).collect();
1229        let base_ranks = pagerank(10, &edges, None);
1230        let (top_callers, top_callees) = build_neighbor_lists(10, &edges);
1231
1232        let graph = RepoGraph {
1233            files,
1234            edges,
1235            base_ranks,
1236            callers: top_callers,
1237            callees: top_callees,
1238            def_edges: vec![],
1239            def_ranks: vec![],
1240            def_callers: vec![],
1241            def_callees: vec![],
1242            def_offsets: vec![0],
1243            alpha: 0.5,
1244        };
1245
1246        // Large budget: should include all files
1247        let full = render(&graph, 10_000, None);
1248        assert!(
1249            full.contains("file_0"),
1250            "output should contain the top-ranked file"
1251        );
1252        // file_0 should appear as tier 0 (highest rank)
1253        assert!(
1254            full.contains("## src/file_0.rs"),
1255            "top file should have tier 0 heading"
1256        );
1257
1258        // Tiny budget: should only fit a few files
1259        let small = render(&graph, 10, None);
1260        assert!(
1261            !small.is_empty(),
1262            "even tiny budget should produce some output"
1263        );
1264        // Should have fewer entries than full render
1265        let full_lines = full.lines().count();
1266        let small_lines = small.lines().count();
1267        assert!(
1268            small_lines < full_lines,
1269            "small budget ({small_lines} lines) should have fewer lines than full ({full_lines})"
1270        );
1271    }
1272
1273    #[test]
1274    fn test_render_empty_graph() {
1275        let graph = RepoGraph {
1276            files: vec![],
1277            edges: vec![],
1278            base_ranks: vec![],
1279            callers: vec![],
1280            callees: vec![],
1281            def_edges: vec![],
1282            def_ranks: vec![],
1283            def_callers: vec![],
1284            def_callees: vec![],
1285            def_offsets: vec![0],
1286            alpha: 0.5,
1287        };
1288        let output = render(&graph, 1000, None);
1289        assert!(output.is_empty(), "empty graph should render empty string");
1290    }
1291
1292    #[test]
1293    fn test_build_graph_on_fixtures() {
1294        let fixtures = Path::new(env!("CARGO_MANIFEST_DIR"))
1295            .parent()
1296            .unwrap()
1297            .parent()
1298            .unwrap()
1299            .join("tests")
1300            .join("fixtures");
1301
1302        let graph = build_graph(&fixtures).expect("build_graph should succeed on fixtures");
1303
1304        // Should find at least the 3 fixture files
1305        assert!(
1306            !graph.files.is_empty(),
1307            "graph should contain files from fixtures"
1308        );
1309
1310        // Should find definitions in the Rust fixture
1311        let rs_file = graph.files.iter().find(|f| f.path.ends_with("sample.rs"));
1312        assert!(rs_file.is_some(), "should find sample.rs");
1313        let rs_file = rs_file.unwrap();
1314        assert!(
1315            !rs_file.defs.is_empty(),
1316            "sample.rs should have definitions"
1317        );
1318        assert!(
1319            rs_file.defs.iter().any(|d| d.name == "hello"),
1320            "should find 'hello' function in sample.rs"
1321        );
1322
1323        // Should find definitions in the Python fixture
1324        let py_file = graph.files.iter().find(|f| f.path.ends_with("sample.py"));
1325        assert!(py_file.is_some(), "should find sample.py");
1326        let py_file = py_file.unwrap();
1327        assert!(
1328            !py_file.defs.is_empty(),
1329            "sample.py should have definitions"
1330        );
1331        assert!(
1332            py_file.defs.iter().any(|d| d.name == "greet"),
1333            "should find 'greet' function in sample.py"
1334        );
1335
1336        // PageRank scores should be computed
1337        assert_eq!(graph.base_ranks.len(), graph.files.len());
1338        let sum: f32 = graph.base_ranks.iter().sum();
1339        assert!(
1340            (sum - 1.0).abs() < 0.01,
1341            "PageRank scores should sum to ~1.0, got {sum}"
1342        );
1343    }
1344
1345    #[test]
1346    fn test_extract_imports_rust() {
1347        let source = "use crate::foo::bar;\nuse std::collections::HashMap;\n";
1348        let (lang, query) = import_query_for_extension("rs").unwrap();
1349        let imports = extract_imports(source, &lang, &query);
1350        assert_eq!(imports.len(), 2);
1351        assert!(imports[0].contains("crate::foo::bar"));
1352    }
1353
1354    #[test]
1355    fn test_extract_imports_python_stub() {
1356        let source = "from typing import Protocol\nimport pkg.types\n";
1357        let (lang, query) = import_query_for_extension("pyi").unwrap();
1358        let imports = extract_imports(source, &lang, &query);
1359        assert_eq!(imports.len(), 2);
1360        assert!(imports[0].contains("from typing import Protocol"));
1361        assert!(imports[1].contains("import pkg.types"));
1362    }
1363
1364    #[test]
1365    fn test_resolve_python_import_to_stub_file() {
1366        let root = PathBuf::from("/project");
1367        let mut file_index = HashMap::new();
1368        file_index.insert(PathBuf::from("/project/pkg/types.pyi"), 1);
1369
1370        let result = resolve_python_import("import pkg.types", &root, &file_index);
1371        assert_eq!(result, Some(1));
1372    }
1373
1374    #[test]
1375    fn test_resolve_rust_crate_import() {
1376        let root = PathBuf::from("/project");
1377        let file_path = PathBuf::from("/project/src/main.rs");
1378        let mut file_index = HashMap::new();
1379        file_index.insert(PathBuf::from("/project/src/foo/bar.rs"), 1);
1380        file_index.insert(PathBuf::from("/project/src/main.rs"), 0);
1381
1382        let result = resolve_rust_import("use crate::foo::bar;", &file_path, &root, &file_index);
1383        assert_eq!(result, Some(1));
1384    }
1385
1386    #[test]
1387    fn test_resolve_rust_external_crate_dropped() {
1388        let root = PathBuf::from("/project");
1389        let file_path = PathBuf::from("/project/src/main.rs");
1390        let file_index = HashMap::new();
1391
1392        let result = resolve_rust_import(
1393            "use std::collections::HashMap;",
1394            &file_path,
1395            &root,
1396            &file_index,
1397        );
1398        assert_eq!(result, None, "external crate imports should be dropped");
1399    }
1400
1401    #[test]
1402    fn test_neighbor_lists() {
1403        // 0 -> 1, 0 -> 2, 1 -> 2
1404        let edges = vec![(0, 1, 1), (0, 2, 1), (1, 2, 1)];
1405        let (incoming, outgoing) = build_neighbor_lists(3, &edges);
1406
1407        // Node 2 should be called by 0 and 1
1408        assert!(incoming[2].contains(&0));
1409        assert!(incoming[2].contains(&1));
1410
1411        // Node 0 should call 1 and 2
1412        assert!(outgoing[0].contains(&1));
1413        assert!(outgoing[0].contains(&2));
1414    }
1415
1416    #[test]
1417    #[ignore = "runs on full ripvec codebase; use --nocapture to see output"]
1418    fn test_full_repo_map() {
1419        use std::time::Instant;
1420
1421        let root = Path::new(env!("CARGO_MANIFEST_DIR"))
1422            .parent()
1423            .unwrap()
1424            .parent()
1425            .unwrap();
1426
1427        // Phase 1: build_graph (walk + parse + import resolve + PageRank)
1428        let t0 = Instant::now();
1429        let graph = build_graph(root).expect("build_graph on ripvec root");
1430        let build_ms = t0.elapsed().as_secs_f64() * 1000.0;
1431
1432        // Phase 2: render (default, no focus)
1433        let t1 = Instant::now();
1434        let rendered = render(&graph, 2000, None);
1435        let render_ms = t1.elapsed().as_secs_f64() * 1000.0;
1436
1437        // Phase 3: render (topic-sensitive, focused on highest-ranked file)
1438        let t2 = Instant::now();
1439        let focus_idx = graph
1440            .base_ranks
1441            .iter()
1442            .enumerate()
1443            .max_by(|a, b| a.1.total_cmp(b.1))
1444            .map(|(i, _)| i);
1445        let focused = render(&graph, 2000, focus_idx);
1446        let focus_ms = t2.elapsed().as_secs_f64() * 1000.0;
1447
1448        eprintln!("\n=== Repo Map Performance ===");
1449        eprintln!(
1450            "Files: {}, Edges: {}, Defs: {}",
1451            graph.files.len(),
1452            graph.edges.len(),
1453            graph.files.iter().map(|f| f.defs.len()).sum::<usize>()
1454        );
1455        eprintln!("build_graph:     {build_ms:.1}ms (walk + parse + resolve + PageRank)");
1456        eprintln!(
1457            "render(default): {render_ms:.3}ms ({} chars, ~{} tokens)",
1458            rendered.len(),
1459            rendered.len() / 4
1460        );
1461        eprintln!(
1462            "render(focused): {focus_ms:.3}ms ({} chars, ~{} tokens)",
1463            focused.len(),
1464            focused.len() / 4
1465        );
1466
1467        eprintln!("\nTop 5 by PageRank:");
1468        let mut ranked: Vec<(usize, f32)> = graph.base_ranks.iter().copied().enumerate().collect();
1469        ranked.sort_by(|a, b| b.1.total_cmp(&a.1));
1470        for (i, rank) in ranked.iter().take(5) {
1471            eprintln!("  {:.4} {}", rank, graph.files[*i].path);
1472        }
1473
1474        eprintln!("\n=== Default Render ===\n{rendered}");
1475        eprintln!(
1476            "\n=== Focused Render (on {}) ===\n{focused}",
1477            focus_idx
1478                .map(|i| graph.files[i].path.as_str())
1479                .unwrap_or("none")
1480        );
1481    }
1482}