Skip to main content

ripvec_core/
repo_map.rs

1//! `PageRank`-weighted structural overview of a codebase.
2//!
3//! Builds a dependency graph from tree-sitter definition and import extraction,
4//! ranks files by importance using `PageRank` (standard or topic-sensitive), and
5//! renders a budget-constrained overview with tiered detail levels.
6
7use std::collections::HashMap;
8use std::fmt::Write as _;
9use std::path::{Path, PathBuf};
10
11use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
12use streaming_iterator::StreamingIterator;
13use tree_sitter::{Parser, Query, QueryCursor};
14
15use crate::languages;
16use crate::walk;
17
18// ── Data Structures ──────────────────────────────────────────────────
19
20/// Persisted dependency graph with `PageRank` scores.
21#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
22pub struct RepoGraph {
23    /// Files in the repository with definitions and imports.
24    pub files: Vec<FileNode>,
25    /// Dependency edges: `(importer_idx, definer_idx, weight)`.
26    pub edges: Vec<(u32, u32, u32)>,
27    /// Standard `PageRank` scores (one per file).
28    pub base_ranks: Vec<f32>,
29    /// Top callers per file (indices into `files`).
30    pub callers: Vec<Vec<u32>>,
31    /// Top callees per file (indices into `files`).
32    pub callees: Vec<Vec<u32>>,
33    /// Auto-tuned alpha for search boost.
34    pub alpha: f32,
35}
36
37/// A file in the repository with its definitions and imports.
38#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
39pub struct FileNode {
40    /// Relative path from the repository root.
41    pub path: String,
42    /// Definitions (functions, structs, classes, etc.) extracted from this file.
43    pub defs: Vec<Definition>,
44    /// Import references extracted from this file.
45    pub imports: Vec<ImportRef>,
46}
47
48/// A definition extracted from a source file.
49#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
50pub struct Definition {
51    /// Name of the definition (e.g., function name, class name).
52    pub name: String,
53    /// Kind of syntax node (e.g., `function_item`, `class_definition`).
54    pub kind: String,
55    /// 1-based start line number.
56    pub start_line: u32,
57    /// 1-based end line number.
58    pub end_line: u32,
59    /// Scope chain (e.g., `"impl_item Foo > fn bar"`).
60    pub scope: String,
61    /// Function/method signature, if available.
62    pub signature: Option<String>,
63}
64
65/// An import reference extracted from a source file.
66#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
67pub struct ImportRef {
68    /// Raw import path as written in source (e.g., `crate::foo::bar`).
69    pub raw_path: String,
70    /// Resolved file index in [`RepoGraph::files`], if resolution succeeded.
71    pub resolved_idx: Option<u32>,
72}
73
74// ── Constants ────────────────────────────────────────────────────────
75
76/// `PageRank` damping factor.
77const DAMPING: f32 = 0.85;
78
79/// `PageRank` convergence threshold.
80const EPSILON: f32 = 1e-6;
81
82/// Maximum `PageRank` iterations.
83const MAX_ITERATIONS: usize = 100;
84
85/// Maximum callers/callees stored per file.
86const MAX_NEIGHBORS: usize = 5;
87
88/// Approximate characters per token for budget estimation.
89const CHARS_PER_TOKEN: usize = 4;
90
91// ── Import Queries ───────────────────────────────────────────────────
92
93/// Compile a tree-sitter import query for the given extension.
94///
95/// Returns `None` for unsupported extensions.
96fn import_query_for_extension(ext: &str) -> Option<(tree_sitter::Language, Query)> {
97    let (lang, query_str): (tree_sitter::Language, &str) = match ext {
98        "rs" => (
99            tree_sitter_rust::LANGUAGE.into(),
100            "(use_declaration) @import",
101        ),
102        "py" => (
103            tree_sitter_python::LANGUAGE.into(),
104            concat!(
105                "(import_statement) @import\n",
106                "(import_from_statement) @import",
107            ),
108        ),
109        "js" | "jsx" => (
110            tree_sitter_javascript::LANGUAGE.into(),
111            "(import_statement source: (string) @import_path) @import",
112        ),
113        "ts" => (
114            tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
115            "(import_statement source: (string) @import_path) @import",
116        ),
117        "tsx" => (
118            tree_sitter_typescript::LANGUAGE_TSX.into(),
119            "(import_statement source: (string) @import_path) @import",
120        ),
121        "go" => (
122            tree_sitter_go::LANGUAGE.into(),
123            "(import_spec path: (interpreted_string_literal) @import_path) @import",
124        ),
125        _ => return None,
126    };
127    let query = Query::new(&lang, query_str).ok()?;
128    Some((lang, query))
129}
130
131/// Extract import paths from source using tree-sitter.
132fn extract_imports(
133    source: &str,
134    lang: &tree_sitter::Language,
135    import_query: &Query,
136) -> Vec<String> {
137    let mut parser = Parser::new();
138    if parser.set_language(lang).is_err() {
139        return vec![];
140    }
141    let Some(tree) = parser.parse(source, None) else {
142        return vec![];
143    };
144
145    let mut cursor = QueryCursor::new();
146    let mut imports = Vec::new();
147    let mut matches = cursor.matches(import_query, tree.root_node(), source.as_bytes());
148
149    while let Some(m) = matches.next() {
150        // Prefer @import_path capture (JS/TS/Go), fall back to full @import text
151        let mut import_path_text = None;
152        let mut import_text = None;
153
154        for cap in m.captures {
155            let cap_name = &import_query.capture_names()[cap.index as usize];
156            let text = &source[cap.node.start_byte()..cap.node.end_byte()];
157            if *cap_name == "import_path" {
158                import_path_text = Some(text.trim_matches(|c| c == '"' || c == '\''));
159            } else if *cap_name == "import" {
160                import_text = Some(text);
161            }
162        }
163
164        if let Some(path) = import_path_text {
165            imports.push(path.to_string());
166        } else if let Some(text) = import_text {
167            imports.push(text.to_string());
168        }
169    }
170
171    imports
172}
173
174// ── Import Resolution ────────────────────────────────────────────────
175
176/// Resolve a Rust `use` path to a file index in the file map.
177///
178/// Handles `crate::`, `self::`, and `super::` prefixes. External crate
179/// imports are dropped (returns `None`).
180fn resolve_rust_import(
181    raw: &str,
182    file_path: &Path,
183    root: &Path,
184    file_index: &HashMap<PathBuf, usize>,
185) -> Option<usize> {
186    // Extract the module path from `use crate::foo::bar;` or `use crate::foo::bar::Baz;`
187    let trimmed = raw
188        .trim()
189        .trim_start_matches("use ")
190        .trim_end_matches(';')
191        .trim();
192
193    let segments: Vec<&str> = trimmed.split("::").collect();
194    if segments.is_empty() {
195        return None;
196    }
197
198    // Determine the base directory and skip prefix segments
199    let (base, skip) = match segments[0] {
200        "crate" => {
201            // Find the nearest Cargo.toml ancestor to determine the crate root.
202            // In a workspace, `crate::foo` resolves relative to the crate's src/,
203            // not the workspace root.
204            let mut dir = file_path.parent();
205            let crate_root = loop {
206                match dir {
207                    Some(d) if d.join("Cargo.toml").exists() => break d.join("src"),
208                    Some(d) => dir = d.parent(),
209                    None => break root.join("src"), // fallback
210                }
211            };
212            (crate_root, 1)
213        }
214        "self" => {
215            let dir = file_path.parent()?;
216            (dir.to_path_buf(), 1)
217        }
218        "super" => {
219            let dir = file_path.parent()?.parent()?;
220            (dir.to_path_buf(), 1)
221        }
222        // External crate — drop
223        _ => return None,
224    };
225
226    // Build candidate paths from the remaining segments.
227    // Try progressively shorter prefixes since the last segments
228    // may be items (struct, fn) rather than modules.
229    let path_segments = &segments[skip..];
230    for end in (1..=path_segments.len()).rev() {
231        let mut candidate = base.clone();
232        for seg in &path_segments[..end] {
233            // Strip glob patterns like `{Foo, Bar}`
234            let clean = seg.split('{').next().unwrap_or(seg).trim();
235            if !clean.is_empty() {
236                candidate.push(clean);
237            }
238        }
239
240        // Try file.rs
241        let as_file = candidate.with_extension("rs");
242        if let Some(&idx) = file_index.get(&as_file) {
243            return Some(idx);
244        }
245
246        // Try dir/mod.rs
247        let as_mod = candidate.join("mod.rs");
248        if let Some(&idx) = file_index.get(&as_mod) {
249            return Some(idx);
250        }
251    }
252
253    None
254}
255
256/// Resolve an import path to a file index based on file extension.
257fn resolve_import(
258    raw: &str,
259    ext: &str,
260    file_path: &Path,
261    root: &Path,
262    file_index: &HashMap<PathBuf, usize>,
263) -> Option<usize> {
264    match ext {
265        "rs" => resolve_rust_import(raw, file_path, root, file_index),
266        "py" => resolve_python_import(raw, root, file_index),
267        "js" | "jsx" | "ts" | "tsx" => resolve_js_import(raw, file_path, file_index),
268        // Go imports use full package paths — skip local resolution
269        _ => None,
270    }
271}
272
273/// Resolve a Python import to a file index.
274///
275/// Handles `import foo.bar` and `from foo.bar import baz` patterns.
276fn resolve_python_import(
277    raw: &str,
278    root: &Path,
279    file_index: &HashMap<PathBuf, usize>,
280) -> Option<usize> {
281    let module_path = if let Some(rest) = raw.strip_prefix("from ") {
282        rest.split_whitespace().next()?
283    } else if let Some(rest) = raw.strip_prefix("import ") {
284        rest.split_whitespace().next()?
285    } else {
286        return None;
287    };
288
289    let rel_path: PathBuf = module_path.split('.').collect();
290    let as_file = root.join(&rel_path).with_extension("py");
291    if let Some(&idx) = file_index.get(&as_file) {
292        return Some(idx);
293    }
294
295    let as_init = root.join(&rel_path).join("__init__.py");
296    file_index.get(&as_init).copied()
297}
298
299/// Resolve a JS/TS import to a file index.
300///
301/// Handles relative paths like `./foo` or `../bar`.
302fn resolve_js_import(
303    raw: &str,
304    file_path: &Path,
305    file_index: &HashMap<PathBuf, usize>,
306) -> Option<usize> {
307    if !raw.starts_with('.') {
308        return None;
309    }
310
311    let dir = file_path.parent()?;
312    let candidate = dir.join(raw);
313
314    for ext in &["js", "jsx", "ts", "tsx"] {
315        let with_ext = candidate.with_extension(ext);
316        if let Some(&idx) = file_index.get(&with_ext) {
317            return Some(idx);
318        }
319    }
320
321    for ext in &["js", "jsx", "ts", "tsx"] {
322        let index_file = candidate.join("index").with_extension(ext);
323        if let Some(&idx) = file_index.get(&index_file) {
324            return Some(idx);
325        }
326    }
327
328    None
329}
330
331// ── Extraction ───────────────────────────────────────────────────────
332
333/// Extract definitions from a source file using tree-sitter.
334fn extract_definitions(source: &str, config: &languages::LangConfig) -> Vec<Definition> {
335    let mut parser = Parser::new();
336    if parser.set_language(&config.language).is_err() {
337        return vec![];
338    }
339    let Some(tree) = parser.parse(source, None) else {
340        return vec![];
341    };
342
343    let mut cursor = QueryCursor::new();
344    let mut defs = Vec::new();
345    let mut matches = cursor.matches(&config.query, tree.root_node(), source.as_bytes());
346
347    while let Some(m) = matches.next() {
348        let mut name = String::new();
349        let mut def_node = None;
350
351        for cap in m.captures {
352            let cap_name = &config.query.capture_names()[cap.index as usize];
353            if *cap_name == "name" {
354                name = source[cap.node.start_byte()..cap.node.end_byte()].to_string();
355            } else if *cap_name == "def" {
356                def_node = Some(cap.node);
357            }
358        }
359
360        if let Some(node) = def_node {
361            let scope = crate::chunk::build_scope_chain(node, source);
362            let signature = crate::chunk::extract_signature(node, source);
363            #[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
364            let start_line = node.start_position().row as u32 + 1;
365            #[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
366            let end_line = node.end_position().row as u32 + 1;
367            defs.push(Definition {
368                name,
369                kind: node.kind().to_string(),
370                start_line,
371                end_line,
372                scope,
373                signature,
374            });
375        }
376    }
377
378    defs
379}
380
381// ── PageRank ─────────────────────────────────────────────────────────
382
383/// Compute `PageRank` scores for a graph.
384///
385/// If `focus` is `Some(idx)`, computes topic-sensitive `PageRank` biased
386/// toward file `idx`. Otherwise computes standard (uniform) `PageRank`.
387///
388/// Returns one score per node, summing to 1.0.
389#[expect(
390    clippy::cast_precision_loss,
391    reason = "node count fits comfortably in f32"
392)]
393fn pagerank(n: usize, edges: &[(u32, u32, u32)], focus: Option<usize>) -> Vec<f32> {
394    if n == 0 {
395        return vec![];
396    }
397
398    // Build adjacency: out_edges[src] = [(dst, weight)]
399    let mut out_edges: Vec<Vec<(usize, f32)>> = vec![vec![]; n];
400    let mut out_weight: Vec<f32> = vec![0.0; n];
401
402    for &(src, dst, w) in edges {
403        let (s, d) = (src as usize, dst as usize);
404        if s < n && d < n {
405            #[expect(clippy::cast_possible_truncation, reason = "edge weights are small")]
406            let wf = f64::from(w) as f32;
407            out_edges[s].push((d, wf));
408            out_weight[s] += wf;
409        }
410    }
411
412    // Personalization vector: for topic-sensitive PageRank, blend
413    // 70% focus on the target file with 30% uniform. Pure focus
414    // (100%) starves unreachable nodes to rank=0 in sparse graphs.
415    let bias: Vec<f32> = if let Some(idx) = focus {
416        let uniform = 1.0 / n as f32;
417        let mut b = vec![0.3 * uniform; n];
418        if idx < n {
419            b[idx] += 0.7;
420        }
421        // Normalize to sum=1
422        let sum: f32 = b.iter().sum();
423        for v in &mut b {
424            *v /= sum;
425        }
426        b
427    } else {
428        vec![1.0 / n as f32; n]
429    };
430
431    let mut rank = vec![1.0 / n as f32; n];
432    let mut next_rank = vec![0.0_f32; n];
433
434    for _ in 0..MAX_ITERATIONS {
435        // Collect dangling mass (nodes with no outgoing edges)
436        let dangling: f32 = rank
437            .iter()
438            .enumerate()
439            .filter(|&(i, _)| out_edges[i].is_empty())
440            .map(|(_, &r)| r)
441            .sum();
442
443        // Distribute rank
444        for (i, nr) in next_rank.iter_mut().enumerate() {
445            *nr = (1.0 - DAMPING).mul_add(bias[i], DAMPING * dangling * bias[i]);
446        }
447
448        for (src, edges_list) in out_edges.iter().enumerate() {
449            if edges_list.is_empty() {
450                continue;
451            }
452            let src_rank = rank[src];
453            let total_w = out_weight[src];
454            for &(dst, w) in edges_list {
455                next_rank[dst] += DAMPING * src_rank * (w / total_w);
456            }
457        }
458
459        // Check convergence
460        let diff: f32 = rank
461            .iter()
462            .zip(next_rank.iter())
463            .map(|(a, b)| (a - b).abs())
464            .sum();
465
466        std::mem::swap(&mut rank, &mut next_rank);
467
468        if diff < EPSILON {
469            break;
470        }
471    }
472
473    rank
474}
475
476// ── Graph Building ───────────────────────────────────────────────────
477
478/// Build a dependency graph from a repository root.
479///
480/// Walks the directory tree, parses each supported file with tree-sitter,
481/// extracts definitions and imports, resolves import paths to files, runs
482/// `PageRank`, and builds caller/callee lists.
483///
484/// # Errors
485///
486/// Returns an error if file walking or reading fails.
487pub fn build_graph(root: &Path) -> crate::Result<RepoGraph> {
488    let root = root.canonicalize().map_err(|e| crate::Error::Io {
489        path: root.display().to_string(),
490        source: e,
491    })?;
492
493    let all_files = walk::collect_files(&root, None);
494
495    // Build file index mapping canonical paths to indices
496    let mut file_index: HashMap<PathBuf, usize> = HashMap::new();
497    let mut files: Vec<FileNode> = Vec::new();
498    let mut raw_sources: Vec<(usize, String, String)> = Vec::new(); // (idx, ext, source)
499
500    for path in &all_files {
501        let ext = path
502            .extension()
503            .and_then(|e| e.to_str())
504            .unwrap_or_default()
505            .to_string();
506
507        // Only process files with known language support
508        if languages::config_for_extension(&ext).is_none()
509            && import_query_for_extension(&ext).is_none()
510        {
511            continue;
512        }
513
514        let Ok(source) = std::fs::read_to_string(path) else {
515            continue; // Skip binary/unreadable files
516        };
517
518        let rel_path = path
519            .strip_prefix(&root)
520            .unwrap_or(path)
521            .display()
522            .to_string();
523
524        let idx = files.len();
525        file_index.insert(path.clone(), idx);
526        files.push(FileNode {
527            path: rel_path,
528            defs: vec![],
529            imports: vec![],
530        });
531        raw_sources.push((idx, ext, source));
532    }
533
534    // Extract definitions and imports
535    for (idx, ext, source) in &raw_sources {
536        // Extract definitions
537        if let Some(config) = languages::config_for_extension(ext) {
538            files[*idx].defs = extract_definitions(source, &config);
539        }
540
541        // Extract imports
542        if let Some((lang, import_query)) = import_query_for_extension(ext) {
543            let raw_imports = extract_imports(source, &lang, &import_query);
544            let file_path = root.join(&files[*idx].path);
545
546            files[*idx].imports = raw_imports
547                .into_iter()
548                .map(|raw| {
549                    let resolved_idx = resolve_import(&raw, ext, &file_path, &root, &file_index)
550                        .and_then(|i| u32::try_from(i).ok());
551                    ImportRef {
552                        raw_path: raw,
553                        resolved_idx,
554                    }
555                })
556                .collect();
557        }
558    }
559
560    // Build edge list from resolved imports
561    let mut edge_map: HashMap<(u32, u32), u32> = HashMap::new();
562    for (importer_idx, file) in files.iter().enumerate() {
563        for import in &file.imports {
564            if let Some(definer_idx) = import.resolved_idx
565                && let Ok(src) = u32::try_from(importer_idx)
566            {
567                *edge_map.entry((src, definer_idx)).or_insert(0) += 1;
568            }
569        }
570    }
571    let edges: Vec<(u32, u32, u32)> = edge_map
572        .into_iter()
573        .map(|((src, dst), w)| (src, dst, w))
574        .collect();
575
576    // Compute PageRank
577    let n = files.len();
578    let base_ranks = pagerank(n, &edges, None);
579
580    // Build caller/callee lists
581    let (inbound, outbound) = build_neighbor_lists(n, &edges);
582
583    // Auto-tune alpha based on graph density
584    #[expect(clippy::cast_precision_loss, reason = "graph sizes fit in f32")]
585    let density = if n > 1 {
586        edges.len() as f32 / (n as f32 * (n as f32 - 1.0))
587    } else {
588        0.0
589    };
590    let alpha = 0.3f32.mul_add(density.min(1.0), 0.5);
591
592    Ok(RepoGraph {
593        files,
594        edges,
595        base_ranks,
596        callers: inbound,
597        callees: outbound,
598        alpha,
599    })
600}
601
602/// Build top-N caller and callee lists for each file.
603fn build_neighbor_lists(n: usize, edges: &[(u32, u32, u32)]) -> (Vec<Vec<u32>>, Vec<Vec<u32>>) {
604    let mut incoming: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
605    let mut outgoing: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
606
607    for &(src, dst, w) in edges {
608        let (s, d) = (src as usize, dst as usize);
609        if s < n && d < n {
610            incoming[d].push((src, w));
611            outgoing[s].push((dst, w));
612        }
613    }
614
615    // Sort by weight descending, keep top N
616    let trim = |lists: &mut [Vec<(u32, u32)>]| -> Vec<Vec<u32>> {
617        lists
618            .iter_mut()
619            .map(|list| {
620                list.sort_by(|a, b| b.1.cmp(&a.1));
621                list.iter()
622                    .take(MAX_NEIGHBORS)
623                    .map(|(idx, _)| *idx)
624                    .collect()
625            })
626            .collect()
627    };
628
629    (trim(&mut incoming), trim(&mut outgoing))
630}
631
632// ── Rendering ────────────────────────────────────────────────────────
633
634/// Render a budget-constrained overview of the repository.
635///
636/// Files are sorted by `PageRank` (or topic-sensitive rank if `focus` is
637/// `Some`). Output uses four tiers of decreasing detail:
638///
639/// - **Tier 0** (top 10%): full path, rank, callers/callees, signatures with scopes
640/// - **Tier 1** (next 20%): full path, rank, signatures
641/// - **Tier 2** (next 40%): full path, rank, definition names and kinds
642/// - **Tier 3** (bottom 30%): file path only
643///
644/// Stops accumulating output when the estimated token count exceeds
645/// `max_tokens`.
646#[must_use]
647pub fn render(graph: &RepoGraph, max_tokens: usize, focus: Option<usize>) -> String {
648    let n = graph.files.len();
649    if n == 0 {
650        return String::new();
651    }
652
653    // Compute ranks (recompute topic-sensitive if focus is given)
654    let ranks = if focus.is_some() {
655        pagerank(n, &graph.edges, focus)
656    } else {
657        graph.base_ranks.clone()
658    };
659
660    // Sort file indices by rank descending
661    let mut sorted: Vec<usize> = (0..n).collect();
662    sorted.sort_by(|&a, &b| ranks[b].total_cmp(&ranks[a]));
663
664    let mut output = String::new();
665    let mut used_tokens = 0;
666    let max_chars = max_tokens * CHARS_PER_TOKEN;
667
668    for (rank_pos, &file_idx) in sorted.iter().enumerate() {
669        if used_tokens >= max_tokens {
670            break;
671        }
672
673        let file = &graph.files[file_idx];
674        let score = ranks[file_idx];
675        #[expect(clippy::cast_precision_loss, reason = "file counts fit in f32")]
676        let percentile = (rank_pos as f32) / (n as f32);
677
678        let section = if percentile < 0.1 {
679            render_tier0(graph, file_idx, file, score)
680        } else if percentile < 0.3 {
681            render_tier1(file, score)
682        } else if percentile < 0.7 {
683            render_tier2(file, score)
684        } else {
685            render_tier3(file)
686        };
687
688        let section_chars = section.len();
689        if used_tokens > 0 && used_tokens + section_chars / CHARS_PER_TOKEN > max_tokens {
690            // Would exceed budget — try to fit at least the path
691            let path_line = format!("{}\n", file.path);
692            let path_tokens = path_line.len() / CHARS_PER_TOKEN;
693            if used_tokens + path_tokens <= max_tokens {
694                output.push_str(&path_line);
695            }
696            break;
697        }
698
699        output.push_str(&section);
700        used_tokens = output.len().min(max_chars) / CHARS_PER_TOKEN;
701    }
702
703    output
704}
705
706/// Render tier 0: full detail with callers, callees, and signatures.
707fn render_tier0(graph: &RepoGraph, file_idx: usize, file: &FileNode, score: f32) -> String {
708    let mut out = format!("## {} (rank: {score:.4})\n", file.path);
709
710    // Callers
711    if file_idx < graph.callers.len() && !graph.callers[file_idx].is_empty() {
712        let _ = write!(out, "  called by: ");
713        let names: Vec<&str> = graph.callers[file_idx]
714            .iter()
715            .filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
716            .collect();
717        let _ = writeln!(out, "{}", names.join(", "));
718    }
719
720    // Callees
721    if file_idx < graph.callees.len() && !graph.callees[file_idx].is_empty() {
722        let _ = write!(out, "  calls: ");
723        let names: Vec<&str> = graph.callees[file_idx]
724            .iter()
725            .filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
726            .collect();
727        let _ = writeln!(out, "{}", names.join(", "));
728    }
729
730    // Definitions with scope and signature
731    for def in &file.defs {
732        let scope_prefix = if def.scope.is_empty() {
733            String::new()
734        } else {
735            format!("{} > ", def.scope)
736        };
737        if let Some(sig) = &def.signature {
738            let _ = writeln!(out, "  {scope_prefix}{} {sig}", def.kind);
739        } else {
740            let _ = writeln!(out, "  {scope_prefix}{} {}", def.kind, def.name);
741        }
742    }
743    let _ = writeln!(out);
744    out
745}
746
747/// Render tier 1: file path, rank, and signatures.
748fn render_tier1(file: &FileNode, score: f32) -> String {
749    let mut out = format!("## {} (rank: {score:.4})\n", file.path);
750    for def in &file.defs {
751        if let Some(sig) = &def.signature {
752            let _ = writeln!(out, "  {sig}");
753        } else {
754            let _ = writeln!(out, "  {} {}", def.kind, def.name);
755        }
756    }
757    let _ = writeln!(out);
758    out
759}
760
761/// Render tier 2: file path, rank, and definition names/kinds.
762fn render_tier2(file: &FileNode, score: f32) -> String {
763    let mut out = format!("{} (rank: {score:.4})", file.path);
764    if !file.defs.is_empty() {
765        let names: Vec<String> = file
766            .defs
767            .iter()
768            .map(|d| format!("{}:{}", d.kind, d.name))
769            .collect();
770        let _ = write!(out, " -- {}", names.join(", "));
771    }
772    let _ = writeln!(out);
773    out
774}
775
776/// Render tier 3: file path only.
777fn render_tier3(file: &FileNode) -> String {
778    format!("{}\n", file.path)
779}
780
781// ── Tests ────────────────────────────────────────────────────────────
782
783#[cfg(test)]
784mod tests {
785    use super::*;
786
787    #[test]
788    fn test_pagerank_simple() {
789        // 3-node graph: 0 -> 1 -> 2, 2 -> 0 (cycle)
790        let edges = vec![(0, 1, 1), (1, 2, 1), (2, 0, 1)];
791        let ranks = pagerank(3, &edges, None);
792
793        // All nodes in a symmetric cycle should have equal rank
794        assert_eq!(ranks.len(), 3);
795        let sum: f32 = ranks.iter().sum();
796        assert!(
797            (sum - 1.0).abs() < 0.01,
798            "ranks should sum to ~1.0, got {sum}"
799        );
800
801        // In a perfect cycle, all ranks should be approximately equal
802        let expected = 1.0 / 3.0;
803        for (i, &r) in ranks.iter().enumerate() {
804            assert!(
805                (r - expected).abs() < 0.05,
806                "rank[{i}] = {r}, expected ~{expected}"
807            );
808        }
809    }
810
811    #[test]
812    fn test_pagerank_star() {
813        // Star graph: 0,1,2 all point to 3
814        let edges = vec![(0, 3, 1), (1, 3, 1), (2, 3, 1)];
815        let ranks = pagerank(4, &edges, None);
816
817        assert_eq!(ranks.len(), 4);
818        // Node 3 should have the highest rank
819        let max_idx = ranks
820            .iter()
821            .enumerate()
822            .max_by(|a, b| a.1.total_cmp(b.1))
823            .unwrap()
824            .0;
825        assert_eq!(max_idx, 3, "node 3 should have highest rank");
826        assert!(
827            ranks[3] > ranks[0],
828            "rank[3]={} should be > rank[0]={}",
829            ranks[3],
830            ranks[0]
831        );
832    }
833
834    #[test]
835    fn test_pagerank_topic_sensitive() {
836        // 3-node chain: 0 -> 1 -> 2
837        let edges = vec![(0, 1, 1), (1, 2, 1)];
838        let uniform_ranks = pagerank(3, &edges, None);
839        let biased_ranks = pagerank(3, &edges, Some(0));
840
841        // With focus on node 0, it should get a higher rank than uniform
842        assert!(
843            biased_ranks[0] > uniform_ranks[0],
844            "focused rank[0]={} should be > uniform rank[0]={}",
845            biased_ranks[0],
846            uniform_ranks[0]
847        );
848    }
849
850    #[test]
851    fn test_pagerank_empty() {
852        let ranks = pagerank(0, &[], None);
853        assert!(ranks.is_empty());
854    }
855
856    #[test]
857    fn test_render_tiers() {
858        // Build a small graph with 10 files to exercise all tiers
859        let files: Vec<FileNode> = (0..10)
860            .map(|i| FileNode {
861                path: format!("src/file_{i}.rs"),
862                defs: vec![Definition {
863                    name: format!("func_{i}"),
864                    kind: "function_item".to_string(),
865                    start_line: 1,
866                    end_line: 5,
867                    scope: String::new(),
868                    signature: Some(format!("func_{i}(x: i32) -> i32")),
869                }],
870                imports: vec![],
871            })
872            .collect();
873
874        // Create a star graph: files 1-9 all import from file 0
875        let edges: Vec<(u32, u32, u32)> = (1..10).map(|i| (i, 0, 1)).collect();
876        let base_ranks = pagerank(10, &edges, None);
877        let (top_callers, top_callees) = build_neighbor_lists(10, &edges);
878
879        let graph = RepoGraph {
880            files,
881            edges,
882            base_ranks,
883            callers: top_callers,
884            callees: top_callees,
885            alpha: 0.5,
886        };
887
888        // Large budget: should include all files
889        let full = render(&graph, 10_000, None);
890        assert!(
891            full.contains("file_0"),
892            "output should contain the top-ranked file"
893        );
894        // file_0 should appear as tier 0 (highest rank)
895        assert!(
896            full.contains("## src/file_0.rs"),
897            "top file should have tier 0 heading"
898        );
899
900        // Tiny budget: should only fit a few files
901        let small = render(&graph, 10, None);
902        assert!(
903            !small.is_empty(),
904            "even tiny budget should produce some output"
905        );
906        // Should have fewer entries than full render
907        let full_lines = full.lines().count();
908        let small_lines = small.lines().count();
909        assert!(
910            small_lines < full_lines,
911            "small budget ({small_lines} lines) should have fewer lines than full ({full_lines})"
912        );
913    }
914
915    #[test]
916    fn test_render_empty_graph() {
917        let graph = RepoGraph {
918            files: vec![],
919            edges: vec![],
920            base_ranks: vec![],
921            callers: vec![],
922            callees: vec![],
923            alpha: 0.5,
924        };
925        let output = render(&graph, 1000, None);
926        assert!(output.is_empty(), "empty graph should render empty string");
927    }
928
929    #[test]
930    fn test_build_graph_on_fixtures() {
931        let fixtures = Path::new(env!("CARGO_MANIFEST_DIR"))
932            .parent()
933            .unwrap()
934            .parent()
935            .unwrap()
936            .join("tests")
937            .join("fixtures");
938
939        let graph = build_graph(&fixtures).expect("build_graph should succeed on fixtures");
940
941        // Should find at least the 3 fixture files
942        assert!(
943            !graph.files.is_empty(),
944            "graph should contain files from fixtures"
945        );
946
947        // Should find definitions in the Rust fixture
948        let rs_file = graph.files.iter().find(|f| f.path.ends_with("sample.rs"));
949        assert!(rs_file.is_some(), "should find sample.rs");
950        let rs_file = rs_file.unwrap();
951        assert!(
952            !rs_file.defs.is_empty(),
953            "sample.rs should have definitions"
954        );
955        assert!(
956            rs_file.defs.iter().any(|d| d.name == "hello"),
957            "should find 'hello' function in sample.rs"
958        );
959
960        // Should find definitions in the Python fixture
961        let py_file = graph.files.iter().find(|f| f.path.ends_with("sample.py"));
962        assert!(py_file.is_some(), "should find sample.py");
963        let py_file = py_file.unwrap();
964        assert!(
965            !py_file.defs.is_empty(),
966            "sample.py should have definitions"
967        );
968        assert!(
969            py_file.defs.iter().any(|d| d.name == "greet"),
970            "should find 'greet' function in sample.py"
971        );
972
973        // PageRank scores should be computed
974        assert_eq!(graph.base_ranks.len(), graph.files.len());
975        let sum: f32 = graph.base_ranks.iter().sum();
976        assert!(
977            (sum - 1.0).abs() < 0.01,
978            "PageRank scores should sum to ~1.0, got {sum}"
979        );
980    }
981
982    #[test]
983    fn test_extract_imports_rust() {
984        let source = "use crate::foo::bar;\nuse std::collections::HashMap;\n";
985        let (lang, query) = import_query_for_extension("rs").unwrap();
986        let imports = extract_imports(source, &lang, &query);
987        assert_eq!(imports.len(), 2);
988        assert!(imports[0].contains("crate::foo::bar"));
989    }
990
991    #[test]
992    fn test_resolve_rust_crate_import() {
993        let root = PathBuf::from("/project");
994        let file_path = PathBuf::from("/project/src/main.rs");
995        let mut file_index = HashMap::new();
996        file_index.insert(PathBuf::from("/project/src/foo/bar.rs"), 1);
997        file_index.insert(PathBuf::from("/project/src/main.rs"), 0);
998
999        let result = resolve_rust_import("use crate::foo::bar;", &file_path, &root, &file_index);
1000        assert_eq!(result, Some(1));
1001    }
1002
1003    #[test]
1004    fn test_resolve_rust_external_crate_dropped() {
1005        let root = PathBuf::from("/project");
1006        let file_path = PathBuf::from("/project/src/main.rs");
1007        let file_index = HashMap::new();
1008
1009        let result = resolve_rust_import(
1010            "use std::collections::HashMap;",
1011            &file_path,
1012            &root,
1013            &file_index,
1014        );
1015        assert_eq!(result, None, "external crate imports should be dropped");
1016    }
1017
1018    #[test]
1019    fn test_neighbor_lists() {
1020        // 0 -> 1, 0 -> 2, 1 -> 2
1021        let edges = vec![(0, 1, 1), (0, 2, 1), (1, 2, 1)];
1022        let (incoming, outgoing) = build_neighbor_lists(3, &edges);
1023
1024        // Node 2 should be called by 0 and 1
1025        assert!(incoming[2].contains(&0));
1026        assert!(incoming[2].contains(&1));
1027
1028        // Node 0 should call 1 and 2
1029        assert!(outgoing[0].contains(&1));
1030        assert!(outgoing[0].contains(&2));
1031    }
1032
1033    #[test]
1034    #[ignore = "runs on full ripvec codebase; use --nocapture to see output"]
1035    fn test_full_repo_map() {
1036        use std::time::Instant;
1037
1038        let root = Path::new(env!("CARGO_MANIFEST_DIR"))
1039            .parent()
1040            .unwrap()
1041            .parent()
1042            .unwrap();
1043
1044        // Phase 1: build_graph (walk + parse + import resolve + PageRank)
1045        let t0 = Instant::now();
1046        let graph = build_graph(root).expect("build_graph on ripvec root");
1047        let build_ms = t0.elapsed().as_secs_f64() * 1000.0;
1048
1049        // Phase 2: render (default, no focus)
1050        let t1 = Instant::now();
1051        let rendered = render(&graph, 2000, None);
1052        let render_ms = t1.elapsed().as_secs_f64() * 1000.0;
1053
1054        // Phase 3: render (topic-sensitive, focused on highest-ranked file)
1055        let t2 = Instant::now();
1056        let focus_idx = graph
1057            .base_ranks
1058            .iter()
1059            .enumerate()
1060            .max_by(|a, b| a.1.total_cmp(b.1))
1061            .map(|(i, _)| i);
1062        let focused = render(&graph, 2000, focus_idx);
1063        let focus_ms = t2.elapsed().as_secs_f64() * 1000.0;
1064
1065        eprintln!("\n=== Repo Map Performance ===");
1066        eprintln!(
1067            "Files: {}, Edges: {}, Defs: {}",
1068            graph.files.len(),
1069            graph.edges.len(),
1070            graph.files.iter().map(|f| f.defs.len()).sum::<usize>()
1071        );
1072        eprintln!("build_graph:     {build_ms:.1}ms (walk + parse + resolve + PageRank)");
1073        eprintln!(
1074            "render(default): {render_ms:.3}ms ({} chars, ~{} tokens)",
1075            rendered.len(),
1076            rendered.len() / 4
1077        );
1078        eprintln!(
1079            "render(focused): {focus_ms:.3}ms ({} chars, ~{} tokens)",
1080            focused.len(),
1081            focused.len() / 4
1082        );
1083
1084        eprintln!("\nTop 5 by PageRank:");
1085        let mut ranked: Vec<(usize, f32)> = graph.base_ranks.iter().copied().enumerate().collect();
1086        ranked.sort_by(|a, b| b.1.total_cmp(&a.1));
1087        for (i, rank) in ranked.iter().take(5) {
1088            eprintln!("  {:.4} {}", rank, graph.files[*i].path);
1089        }
1090
1091        eprintln!("\n=== Default Render ===\n{rendered}");
1092        eprintln!(
1093            "\n=== Focused Render (on {}) ===\n{focused}",
1094            focus_idx
1095                .map(|i| graph.files[i].path.as_str())
1096                .unwrap_or("none")
1097        );
1098    }
1099}