Skip to main content

ripvec_core/
repo_map.rs

1//! `PageRank`-weighted structural overview of a codebase.
2//!
3//! Builds a dependency graph from tree-sitter definition and import extraction,
4//! ranks files by importance using `PageRank` (standard or topic-sensitive), and
5//! renders a budget-constrained overview with tiered detail levels.
6
7use std::collections::HashMap;
8use std::fmt::Write as _;
9use std::path::{Path, PathBuf};
10
11use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
12use streaming_iterator::StreamingIterator;
13use tree_sitter::{Parser, Query, QueryCursor};
14
15use crate::languages;
16use crate::walk;
17
18// ── Data Structures ──────────────────────────────────────────────────
19
20/// Persisted dependency graph with `PageRank` scores.
21#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
22pub struct RepoGraph {
23    /// Files in the repository with definitions and imports.
24    pub files: Vec<FileNode>,
25    /// Dependency edges: `(importer_idx, definer_idx, weight)`.
26    pub edges: Vec<(u32, u32, u32)>,
27    /// Standard `PageRank` scores (one per file).
28    pub base_ranks: Vec<f32>,
29    /// Top callers per file (indices into `files`).
30    pub callers: Vec<Vec<u32>>,
31    /// Top callees per file (indices into `files`).
32    pub callees: Vec<Vec<u32>>,
33    /// Auto-tuned alpha for search boost.
34    pub alpha: f32,
35}
36
37/// A file in the repository with its definitions and imports.
38#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
39pub struct FileNode {
40    /// Relative path from the repository root.
41    pub path: String,
42    /// Definitions (functions, structs, classes, etc.) extracted from this file.
43    pub defs: Vec<Definition>,
44    /// Import references extracted from this file.
45    pub imports: Vec<ImportRef>,
46}
47
48/// A definition extracted from a source file.
49#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
50pub struct Definition {
51    /// Name of the definition (e.g., function name, class name).
52    pub name: String,
53    /// Kind of syntax node (e.g., `function_item`, `class_definition`).
54    pub kind: String,
55    /// 1-based start line number.
56    pub start_line: u32,
57    /// 1-based end line number.
58    pub end_line: u32,
59    /// Scope chain (e.g., `"impl_item Foo > fn bar"`).
60    pub scope: String,
61    /// Function/method signature, if available.
62    pub signature: Option<String>,
63}
64
65/// An import reference extracted from a source file.
66#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
67pub struct ImportRef {
68    /// Raw import path as written in source (e.g., `crate::foo::bar`).
69    pub raw_path: String,
70    /// Resolved file index in [`RepoGraph::files`], if resolution succeeded.
71    pub resolved_idx: Option<u32>,
72}
73
74// ── Constants ────────────────────────────────────────────────────────
75
76/// `PageRank` damping factor.
77const DAMPING: f32 = 0.85;
78
79/// `PageRank` convergence threshold.
80const EPSILON: f32 = 1e-6;
81
82/// Maximum `PageRank` iterations.
83const MAX_ITERATIONS: usize = 100;
84
85/// Maximum callers/callees stored per file.
86const MAX_NEIGHBORS: usize = 5;
87
88/// Approximate characters per token for budget estimation.
89const CHARS_PER_TOKEN: usize = 4;
90
91// ── Import Queries ───────────────────────────────────────────────────
92
93/// Compile a tree-sitter import query for the given extension.
94///
95/// Returns `None` for unsupported extensions.
96fn import_query_for_extension(ext: &str) -> Option<(tree_sitter::Language, Query)> {
97    let (lang, query_str): (tree_sitter::Language, &str) = match ext {
98        "rs" => (
99            tree_sitter_rust::LANGUAGE.into(),
100            "(use_declaration) @import",
101        ),
102        "py" => (
103            tree_sitter_python::LANGUAGE.into(),
104            concat!(
105                "(import_statement) @import\n",
106                "(import_from_statement) @import",
107            ),
108        ),
109        "js" | "jsx" => (
110            tree_sitter_javascript::LANGUAGE.into(),
111            "(import_statement source: (string) @import_path) @import",
112        ),
113        "ts" => (
114            tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
115            "(import_statement source: (string) @import_path) @import",
116        ),
117        "tsx" => (
118            tree_sitter_typescript::LANGUAGE_TSX.into(),
119            "(import_statement source: (string) @import_path) @import",
120        ),
121        "go" => (
122            tree_sitter_go::LANGUAGE.into(),
123            "(import_spec path: (interpreted_string_literal) @import_path) @import",
124        ),
125        _ => return None,
126    };
127    let query = match Query::new(&lang, query_str) {
128        Ok(q) => q,
129        Err(e) => {
130            tracing::warn!(ext, %e, "import query compilation failed — language may be ABI-incompatible");
131            return None;
132        }
133    };
134    Some((lang, query))
135}
136
137/// Extract import paths from source using tree-sitter.
138fn extract_imports(
139    source: &str,
140    lang: &tree_sitter::Language,
141    import_query: &Query,
142) -> Vec<String> {
143    let mut parser = Parser::new();
144    if parser.set_language(lang).is_err() {
145        return vec![];
146    }
147    let Some(tree) = parser.parse(source, None) else {
148        return vec![];
149    };
150
151    let mut cursor = QueryCursor::new();
152    let mut imports = Vec::new();
153    let mut matches = cursor.matches(import_query, tree.root_node(), source.as_bytes());
154
155    while let Some(m) = matches.next() {
156        // Prefer @import_path capture (JS/TS/Go), fall back to full @import text
157        let mut import_path_text = None;
158        let mut import_text = None;
159
160        for cap in m.captures {
161            let cap_name = &import_query.capture_names()[cap.index as usize];
162            let text = &source[cap.node.start_byte()..cap.node.end_byte()];
163            if *cap_name == "import_path" {
164                import_path_text = Some(text.trim_matches(|c| c == '"' || c == '\''));
165            } else if *cap_name == "import" {
166                import_text = Some(text);
167            }
168        }
169
170        if let Some(path) = import_path_text {
171            imports.push(path.to_string());
172        } else if let Some(text) = import_text {
173            imports.push(text.to_string());
174        }
175    }
176
177    imports
178}
179
180// ── Import Resolution ────────────────────────────────────────────────
181
182/// Resolve a Rust `use` path to a file index in the file map.
183///
184/// Handles `crate::`, `self::`, and `super::` prefixes. External crate
185/// imports are dropped (returns `None`).
186fn resolve_rust_import(
187    raw: &str,
188    file_path: &Path,
189    root: &Path,
190    file_index: &HashMap<PathBuf, usize>,
191) -> Option<usize> {
192    // Extract the module path from `use crate::foo::bar;` or `use crate::foo::bar::Baz;`
193    let trimmed = raw
194        .trim()
195        .trim_start_matches("use ")
196        .trim_end_matches(';')
197        .trim();
198
199    let segments: Vec<&str> = trimmed.split("::").collect();
200    if segments.is_empty() {
201        return None;
202    }
203
204    // Determine the base directory and skip prefix segments
205    let (base, skip) = match segments[0] {
206        "crate" => {
207            // Find the nearest Cargo.toml ancestor to determine the crate root.
208            // In a workspace, `crate::foo` resolves relative to the crate's src/,
209            // not the workspace root.
210            let mut dir = file_path.parent();
211            let crate_root = loop {
212                match dir {
213                    Some(d) if d.join("Cargo.toml").exists() => break d.join("src"),
214                    Some(d) => dir = d.parent(),
215                    None => break root.join("src"), // fallback
216                }
217            };
218            (crate_root, 1)
219        }
220        "self" => {
221            let dir = file_path.parent()?;
222            (dir.to_path_buf(), 1)
223        }
224        "super" => {
225            let dir = file_path.parent()?.parent()?;
226            (dir.to_path_buf(), 1)
227        }
228        // External crate — drop
229        _ => return None,
230    };
231
232    // Build candidate paths from the remaining segments.
233    // Try progressively shorter prefixes since the last segments
234    // may be items (struct, fn) rather than modules.
235    let path_segments = &segments[skip..];
236    for end in (1..=path_segments.len()).rev() {
237        let mut candidate = base.clone();
238        for seg in &path_segments[..end] {
239            // Strip glob patterns like `{Foo, Bar}`
240            let clean = seg.split('{').next().unwrap_or(seg).trim();
241            if !clean.is_empty() {
242                candidate.push(clean);
243            }
244        }
245
246        // Try file.rs
247        let as_file = candidate.with_extension("rs");
248        if let Some(&idx) = file_index.get(&as_file) {
249            return Some(idx);
250        }
251
252        // Try dir/mod.rs
253        let as_mod = candidate.join("mod.rs");
254        if let Some(&idx) = file_index.get(&as_mod) {
255            return Some(idx);
256        }
257    }
258
259    None
260}
261
262/// Resolve an import path to a file index based on file extension.
263fn resolve_import(
264    raw: &str,
265    ext: &str,
266    file_path: &Path,
267    root: &Path,
268    file_index: &HashMap<PathBuf, usize>,
269) -> Option<usize> {
270    match ext {
271        "rs" => resolve_rust_import(raw, file_path, root, file_index),
272        "py" => resolve_python_import(raw, root, file_index),
273        "js" | "jsx" | "ts" | "tsx" => resolve_js_import(raw, file_path, file_index),
274        // Go imports use full package paths — skip local resolution
275        _ => None,
276    }
277}
278
279/// Resolve a Python import to a file index.
280///
281/// Handles `import foo.bar` and `from foo.bar import baz` patterns.
282fn resolve_python_import(
283    raw: &str,
284    root: &Path,
285    file_index: &HashMap<PathBuf, usize>,
286) -> Option<usize> {
287    let module_path = if let Some(rest) = raw.strip_prefix("from ") {
288        rest.split_whitespace().next()?
289    } else if let Some(rest) = raw.strip_prefix("import ") {
290        rest.split_whitespace().next()?
291    } else {
292        return None;
293    };
294
295    let rel_path: PathBuf = module_path.split('.').collect();
296    let as_file = root.join(&rel_path).with_extension("py");
297    if let Some(&idx) = file_index.get(&as_file) {
298        return Some(idx);
299    }
300
301    let as_init = root.join(&rel_path).join("__init__.py");
302    file_index.get(&as_init).copied()
303}
304
305/// Resolve a JS/TS import to a file index.
306///
307/// Handles relative paths like `./foo` or `../bar`.
308fn resolve_js_import(
309    raw: &str,
310    file_path: &Path,
311    file_index: &HashMap<PathBuf, usize>,
312) -> Option<usize> {
313    if !raw.starts_with('.') {
314        return None;
315    }
316
317    let dir = file_path.parent()?;
318    let candidate = dir.join(raw);
319
320    for ext in &["js", "jsx", "ts", "tsx"] {
321        let with_ext = candidate.with_extension(ext);
322        if let Some(&idx) = file_index.get(&with_ext) {
323            return Some(idx);
324        }
325    }
326
327    for ext in &["js", "jsx", "ts", "tsx"] {
328        let index_file = candidate.join("index").with_extension(ext);
329        if let Some(&idx) = file_index.get(&index_file) {
330            return Some(idx);
331        }
332    }
333
334    None
335}
336
337// ── Extraction ───────────────────────────────────────────────────────
338
339/// Extract definitions from a source file using tree-sitter.
340fn extract_definitions(source: &str, config: &languages::LangConfig) -> Vec<Definition> {
341    let mut parser = Parser::new();
342    if parser.set_language(&config.language).is_err() {
343        return vec![];
344    }
345    let Some(tree) = parser.parse(source, None) else {
346        return vec![];
347    };
348
349    let mut cursor = QueryCursor::new();
350    let mut defs = Vec::new();
351    let mut matches = cursor.matches(&config.query, tree.root_node(), source.as_bytes());
352
353    while let Some(m) = matches.next() {
354        let mut name = String::new();
355        let mut def_node = None;
356
357        for cap in m.captures {
358            let cap_name = &config.query.capture_names()[cap.index as usize];
359            if *cap_name == "name" {
360                name = source[cap.node.start_byte()..cap.node.end_byte()].to_string();
361            } else if *cap_name == "def" {
362                def_node = Some(cap.node);
363            }
364        }
365
366        if let Some(node) = def_node {
367            let scope = crate::chunk::build_scope_chain(node, source);
368            let signature = crate::chunk::extract_signature(node, source);
369            #[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
370            let start_line = node.start_position().row as u32 + 1;
371            #[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
372            let end_line = node.end_position().row as u32 + 1;
373            defs.push(Definition {
374                name,
375                kind: node.kind().to_string(),
376                start_line,
377                end_line,
378                scope,
379                signature,
380            });
381        }
382    }
383
384    defs
385}
386
387// ── PageRank ─────────────────────────────────────────────────────────
388
389/// Compute `PageRank` scores for a graph.
390///
391/// If `focus` is `Some(idx)`, computes topic-sensitive `PageRank` biased
392/// toward file `idx`. Otherwise computes standard (uniform) `PageRank`.
393///
394/// Returns one score per node, summing to 1.0.
395#[expect(
396    clippy::cast_precision_loss,
397    reason = "node count fits comfortably in f32"
398)]
399fn pagerank(n: usize, edges: &[(u32, u32, u32)], focus: Option<usize>) -> Vec<f32> {
400    if n == 0 {
401        return vec![];
402    }
403
404    // Build adjacency: out_edges[src] = [(dst, weight)]
405    let mut out_edges: Vec<Vec<(usize, f32)>> = vec![vec![]; n];
406    let mut out_weight: Vec<f32> = vec![0.0; n];
407
408    for &(src, dst, w) in edges {
409        let (s, d) = (src as usize, dst as usize);
410        if s < n && d < n {
411            #[expect(clippy::cast_possible_truncation, reason = "edge weights are small")]
412            let wf = f64::from(w) as f32;
413            out_edges[s].push((d, wf));
414            out_weight[s] += wf;
415        }
416    }
417
418    // Personalization vector: for topic-sensitive PageRank, blend
419    // 70% focus on the target file with 30% uniform. Pure focus
420    // (100%) starves unreachable nodes to rank=0 in sparse graphs.
421    let bias: Vec<f32> = if let Some(idx) = focus {
422        let uniform = 1.0 / n as f32;
423        let mut b = vec![0.3 * uniform; n];
424        if idx < n {
425            b[idx] += 0.7;
426        }
427        // Normalize to sum=1
428        let sum: f32 = b.iter().sum();
429        for v in &mut b {
430            *v /= sum;
431        }
432        b
433    } else {
434        vec![1.0 / n as f32; n]
435    };
436
437    let mut rank = vec![1.0 / n as f32; n];
438    let mut next_rank = vec![0.0_f32; n];
439
440    for _ in 0..MAX_ITERATIONS {
441        // Collect dangling mass (nodes with no outgoing edges)
442        let dangling: f32 = rank
443            .iter()
444            .enumerate()
445            .filter(|&(i, _)| out_edges[i].is_empty())
446            .map(|(_, &r)| r)
447            .sum();
448
449        // Distribute rank
450        for (i, nr) in next_rank.iter_mut().enumerate() {
451            *nr = (1.0 - DAMPING).mul_add(bias[i], DAMPING * dangling * bias[i]);
452        }
453
454        for (src, edges_list) in out_edges.iter().enumerate() {
455            if edges_list.is_empty() {
456                continue;
457            }
458            let src_rank = rank[src];
459            let total_w = out_weight[src];
460            for &(dst, w) in edges_list {
461                next_rank[dst] += DAMPING * src_rank * (w / total_w);
462            }
463        }
464
465        // Check convergence
466        let diff: f32 = rank
467            .iter()
468            .zip(next_rank.iter())
469            .map(|(a, b)| (a - b).abs())
470            .sum();
471
472        std::mem::swap(&mut rank, &mut next_rank);
473
474        if diff < EPSILON {
475            break;
476        }
477    }
478
479    rank
480}
481
482// ── Graph Building ───────────────────────────────────────────────────
483
484/// Build a dependency graph from a repository root.
485///
486/// Walks the directory tree, parses each supported file with tree-sitter,
487/// extracts definitions and imports, resolves import paths to files, runs
488/// `PageRank`, and builds caller/callee lists.
489///
490/// # Errors
491///
492/// Returns an error if file walking or reading fails.
493pub fn build_graph(root: &Path) -> crate::Result<RepoGraph> {
494    let root = root.canonicalize().map_err(|e| crate::Error::Io {
495        path: root.display().to_string(),
496        source: e,
497    })?;
498
499    let all_files = walk::collect_files(&root, None);
500
501    // Build file index mapping canonical paths to indices
502    let mut file_index: HashMap<PathBuf, usize> = HashMap::new();
503    let mut files: Vec<FileNode> = Vec::new();
504    let mut raw_sources: Vec<(usize, String, String)> = Vec::new(); // (idx, ext, source)
505
506    for path in &all_files {
507        let ext = path
508            .extension()
509            .and_then(|e| e.to_str())
510            .unwrap_or_default()
511            .to_string();
512
513        // Only process files with known language support
514        if languages::config_for_extension(&ext).is_none()
515            && import_query_for_extension(&ext).is_none()
516        {
517            continue;
518        }
519
520        let Ok(source) = std::fs::read_to_string(path) else {
521            continue; // Skip binary/unreadable files
522        };
523
524        let rel_path = path
525            .strip_prefix(&root)
526            .unwrap_or(path)
527            .display()
528            .to_string();
529
530        let idx = files.len();
531        file_index.insert(path.clone(), idx);
532        files.push(FileNode {
533            path: rel_path,
534            defs: vec![],
535            imports: vec![],
536        });
537        raw_sources.push((idx, ext, source));
538    }
539
540    // Extract definitions and imports
541    for (idx, ext, source) in &raw_sources {
542        // Extract definitions
543        if let Some(config) = languages::config_for_extension(ext) {
544            files[*idx].defs = extract_definitions(source, &config);
545        }
546
547        // Extract imports
548        if let Some((lang, import_query)) = import_query_for_extension(ext) {
549            let raw_imports = extract_imports(source, &lang, &import_query);
550            let file_path = root.join(&files[*idx].path);
551
552            files[*idx].imports = raw_imports
553                .into_iter()
554                .map(|raw| {
555                    let resolved_idx = resolve_import(&raw, ext, &file_path, &root, &file_index)
556                        .and_then(|i| u32::try_from(i).ok());
557                    ImportRef {
558                        raw_path: raw,
559                        resolved_idx,
560                    }
561                })
562                .collect();
563        }
564    }
565
566    // Build edge list from resolved imports
567    let mut edge_map: HashMap<(u32, u32), u32> = HashMap::new();
568    for (importer_idx, file) in files.iter().enumerate() {
569        for import in &file.imports {
570            if let Some(definer_idx) = import.resolved_idx
571                && let Ok(src) = u32::try_from(importer_idx)
572            {
573                *edge_map.entry((src, definer_idx)).or_insert(0) += 1;
574            }
575        }
576    }
577    let edges: Vec<(u32, u32, u32)> = edge_map
578        .into_iter()
579        .map(|((src, dst), w)| (src, dst, w))
580        .collect();
581
582    // Compute PageRank
583    let n = files.len();
584    let base_ranks = pagerank(n, &edges, None);
585
586    // Build caller/callee lists
587    let (inbound, outbound) = build_neighbor_lists(n, &edges);
588
589    // Auto-tune alpha based on graph density
590    #[expect(clippy::cast_precision_loss, reason = "graph sizes fit in f32")]
591    let density = if n > 1 {
592        edges.len() as f32 / (n as f32 * (n as f32 - 1.0))
593    } else {
594        0.0
595    };
596    let alpha = 0.3f32.mul_add(density.min(1.0), 0.5);
597
598    Ok(RepoGraph {
599        files,
600        edges,
601        base_ranks,
602        callers: inbound,
603        callees: outbound,
604        alpha,
605    })
606}
607
608/// Build top-N caller and callee lists for each file.
609fn build_neighbor_lists(n: usize, edges: &[(u32, u32, u32)]) -> (Vec<Vec<u32>>, Vec<Vec<u32>>) {
610    let mut incoming: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
611    let mut outgoing: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
612
613    for &(src, dst, w) in edges {
614        let (s, d) = (src as usize, dst as usize);
615        if s < n && d < n {
616            incoming[d].push((src, w));
617            outgoing[s].push((dst, w));
618        }
619    }
620
621    // Sort by weight descending, keep top N
622    let trim = |lists: &mut [Vec<(u32, u32)>]| -> Vec<Vec<u32>> {
623        lists
624            .iter_mut()
625            .map(|list| {
626                list.sort_by(|a, b| b.1.cmp(&a.1));
627                list.iter()
628                    .take(MAX_NEIGHBORS)
629                    .map(|(idx, _)| *idx)
630                    .collect()
631            })
632            .collect()
633    };
634
635    (trim(&mut incoming), trim(&mut outgoing))
636}
637
638// ── Rendering ────────────────────────────────────────────────────────
639
640/// Render a budget-constrained overview of the repository.
641///
642/// Files are sorted by `PageRank` (or topic-sensitive rank if `focus` is
643/// `Some`). Output uses four tiers of decreasing detail:
644///
645/// - **Tier 0** (top 10%): full path, rank, callers/callees, signatures with scopes
646/// - **Tier 1** (next 20%): full path, rank, signatures
647/// - **Tier 2** (next 40%): full path, rank, definition names and kinds
648/// - **Tier 3** (bottom 30%): file path only
649///
650/// Stops accumulating output when the estimated token count exceeds
651/// `max_tokens`.
652#[must_use]
653pub fn render(graph: &RepoGraph, max_tokens: usize, focus: Option<usize>) -> String {
654    let n = graph.files.len();
655    if n == 0 {
656        return String::new();
657    }
658
659    // Compute ranks (recompute topic-sensitive if focus is given)
660    let ranks = if focus.is_some() {
661        pagerank(n, &graph.edges, focus)
662    } else {
663        graph.base_ranks.clone()
664    };
665
666    // Sort file indices by rank descending
667    let mut sorted: Vec<usize> = (0..n).collect();
668    sorted.sort_by(|&a, &b| ranks[b].total_cmp(&ranks[a]));
669
670    let mut output = String::new();
671    let mut used_tokens = 0;
672    let max_chars = max_tokens * CHARS_PER_TOKEN;
673
674    for (rank_pos, &file_idx) in sorted.iter().enumerate() {
675        if used_tokens >= max_tokens {
676            break;
677        }
678
679        let file = &graph.files[file_idx];
680        let score = ranks[file_idx];
681        #[expect(clippy::cast_precision_loss, reason = "file counts fit in f32")]
682        let percentile = (rank_pos as f32) / (n as f32);
683
684        let section = if percentile < 0.1 {
685            render_tier0(graph, file_idx, file, score)
686        } else if percentile < 0.3 {
687            render_tier1(file, score)
688        } else if percentile < 0.7 {
689            render_tier2(file, score)
690        } else {
691            render_tier3(file)
692        };
693
694        let section_chars = section.len();
695        if used_tokens > 0 && used_tokens + section_chars / CHARS_PER_TOKEN > max_tokens {
696            // Would exceed budget — try to fit at least the path
697            let path_line = format!("{}\n", file.path);
698            let path_tokens = path_line.len() / CHARS_PER_TOKEN;
699            if used_tokens + path_tokens <= max_tokens {
700                output.push_str(&path_line);
701            }
702            break;
703        }
704
705        output.push_str(&section);
706        used_tokens = output.len().min(max_chars) / CHARS_PER_TOKEN;
707    }
708
709    output
710}
711
712/// Render tier 0: full detail with callers, callees, and signatures.
713fn render_tier0(graph: &RepoGraph, file_idx: usize, file: &FileNode, score: f32) -> String {
714    let mut out = format!("## {} (rank: {score:.4})\n", file.path);
715
716    // Callers
717    if file_idx < graph.callers.len() && !graph.callers[file_idx].is_empty() {
718        let _ = write!(out, "  called by: ");
719        let names: Vec<&str> = graph.callers[file_idx]
720            .iter()
721            .filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
722            .collect();
723        let _ = writeln!(out, "{}", names.join(", "));
724    }
725
726    // Callees
727    if file_idx < graph.callees.len() && !graph.callees[file_idx].is_empty() {
728        let _ = write!(out, "  calls: ");
729        let names: Vec<&str> = graph.callees[file_idx]
730            .iter()
731            .filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
732            .collect();
733        let _ = writeln!(out, "{}", names.join(", "));
734    }
735
736    // Definitions with scope and signature
737    for def in &file.defs {
738        let scope_prefix = if def.scope.is_empty() {
739            String::new()
740        } else {
741            format!("{} > ", def.scope)
742        };
743        if let Some(sig) = &def.signature {
744            let _ = writeln!(out, "  {scope_prefix}{} {sig}", def.kind);
745        } else {
746            let _ = writeln!(out, "  {scope_prefix}{} {}", def.kind, def.name);
747        }
748    }
749    let _ = writeln!(out);
750    out
751}
752
753/// Render tier 1: file path, rank, and signatures.
754fn render_tier1(file: &FileNode, score: f32) -> String {
755    let mut out = format!("## {} (rank: {score:.4})\n", file.path);
756    for def in &file.defs {
757        if let Some(sig) = &def.signature {
758            let _ = writeln!(out, "  {sig}");
759        } else {
760            let _ = writeln!(out, "  {} {}", def.kind, def.name);
761        }
762    }
763    let _ = writeln!(out);
764    out
765}
766
767/// Render tier 2: file path, rank, and definition names/kinds.
768fn render_tier2(file: &FileNode, score: f32) -> String {
769    let mut out = format!("{} (rank: {score:.4})", file.path);
770    if !file.defs.is_empty() {
771        let names: Vec<String> = file
772            .defs
773            .iter()
774            .map(|d| format!("{}:{}", d.kind, d.name))
775            .collect();
776        let _ = write!(out, " -- {}", names.join(", "));
777    }
778    let _ = writeln!(out);
779    out
780}
781
782/// Render tier 3: file path only.
783fn render_tier3(file: &FileNode) -> String {
784    format!("{}\n", file.path)
785}
786
787// ── Tests ────────────────────────────────────────────────────────────
788
789#[cfg(test)]
790mod tests {
791    use super::*;
792
793    #[test]
794    fn test_pagerank_simple() {
795        // 3-node graph: 0 -> 1 -> 2, 2 -> 0 (cycle)
796        let edges = vec![(0, 1, 1), (1, 2, 1), (2, 0, 1)];
797        let ranks = pagerank(3, &edges, None);
798
799        // All nodes in a symmetric cycle should have equal rank
800        assert_eq!(ranks.len(), 3);
801        let sum: f32 = ranks.iter().sum();
802        assert!(
803            (sum - 1.0).abs() < 0.01,
804            "ranks should sum to ~1.0, got {sum}"
805        );
806
807        // In a perfect cycle, all ranks should be approximately equal
808        let expected = 1.0 / 3.0;
809        for (i, &r) in ranks.iter().enumerate() {
810            assert!(
811                (r - expected).abs() < 0.05,
812                "rank[{i}] = {r}, expected ~{expected}"
813            );
814        }
815    }
816
817    #[test]
818    fn test_pagerank_star() {
819        // Star graph: 0,1,2 all point to 3
820        let edges = vec![(0, 3, 1), (1, 3, 1), (2, 3, 1)];
821        let ranks = pagerank(4, &edges, None);
822
823        assert_eq!(ranks.len(), 4);
824        // Node 3 should have the highest rank
825        let max_idx = ranks
826            .iter()
827            .enumerate()
828            .max_by(|a, b| a.1.total_cmp(b.1))
829            .unwrap()
830            .0;
831        assert_eq!(max_idx, 3, "node 3 should have highest rank");
832        assert!(
833            ranks[3] > ranks[0],
834            "rank[3]={} should be > rank[0]={}",
835            ranks[3],
836            ranks[0]
837        );
838    }
839
840    #[test]
841    fn test_pagerank_topic_sensitive() {
842        // 3-node chain: 0 -> 1 -> 2
843        let edges = vec![(0, 1, 1), (1, 2, 1)];
844        let uniform_ranks = pagerank(3, &edges, None);
845        let biased_ranks = pagerank(3, &edges, Some(0));
846
847        // With focus on node 0, it should get a higher rank than uniform
848        assert!(
849            biased_ranks[0] > uniform_ranks[0],
850            "focused rank[0]={} should be > uniform rank[0]={}",
851            biased_ranks[0],
852            uniform_ranks[0]
853        );
854    }
855
856    #[test]
857    fn test_pagerank_empty() {
858        let ranks = pagerank(0, &[], None);
859        assert!(ranks.is_empty());
860    }
861
862    #[test]
863    fn test_render_tiers() {
864        // Build a small graph with 10 files to exercise all tiers
865        let files: Vec<FileNode> = (0..10)
866            .map(|i| FileNode {
867                path: format!("src/file_{i}.rs"),
868                defs: vec![Definition {
869                    name: format!("func_{i}"),
870                    kind: "function_item".to_string(),
871                    start_line: 1,
872                    end_line: 5,
873                    scope: String::new(),
874                    signature: Some(format!("func_{i}(x: i32) -> i32")),
875                }],
876                imports: vec![],
877            })
878            .collect();
879
880        // Create a star graph: files 1-9 all import from file 0
881        let edges: Vec<(u32, u32, u32)> = (1..10).map(|i| (i, 0, 1)).collect();
882        let base_ranks = pagerank(10, &edges, None);
883        let (top_callers, top_callees) = build_neighbor_lists(10, &edges);
884
885        let graph = RepoGraph {
886            files,
887            edges,
888            base_ranks,
889            callers: top_callers,
890            callees: top_callees,
891            alpha: 0.5,
892        };
893
894        // Large budget: should include all files
895        let full = render(&graph, 10_000, None);
896        assert!(
897            full.contains("file_0"),
898            "output should contain the top-ranked file"
899        );
900        // file_0 should appear as tier 0 (highest rank)
901        assert!(
902            full.contains("## src/file_0.rs"),
903            "top file should have tier 0 heading"
904        );
905
906        // Tiny budget: should only fit a few files
907        let small = render(&graph, 10, None);
908        assert!(
909            !small.is_empty(),
910            "even tiny budget should produce some output"
911        );
912        // Should have fewer entries than full render
913        let full_lines = full.lines().count();
914        let small_lines = small.lines().count();
915        assert!(
916            small_lines < full_lines,
917            "small budget ({small_lines} lines) should have fewer lines than full ({full_lines})"
918        );
919    }
920
921    #[test]
922    fn test_render_empty_graph() {
923        let graph = RepoGraph {
924            files: vec![],
925            edges: vec![],
926            base_ranks: vec![],
927            callers: vec![],
928            callees: vec![],
929            alpha: 0.5,
930        };
931        let output = render(&graph, 1000, None);
932        assert!(output.is_empty(), "empty graph should render empty string");
933    }
934
935    #[test]
936    fn test_build_graph_on_fixtures() {
937        let fixtures = Path::new(env!("CARGO_MANIFEST_DIR"))
938            .parent()
939            .unwrap()
940            .parent()
941            .unwrap()
942            .join("tests")
943            .join("fixtures");
944
945        let graph = build_graph(&fixtures).expect("build_graph should succeed on fixtures");
946
947        // Should find at least the 3 fixture files
948        assert!(
949            !graph.files.is_empty(),
950            "graph should contain files from fixtures"
951        );
952
953        // Should find definitions in the Rust fixture
954        let rs_file = graph.files.iter().find(|f| f.path.ends_with("sample.rs"));
955        assert!(rs_file.is_some(), "should find sample.rs");
956        let rs_file = rs_file.unwrap();
957        assert!(
958            !rs_file.defs.is_empty(),
959            "sample.rs should have definitions"
960        );
961        assert!(
962            rs_file.defs.iter().any(|d| d.name == "hello"),
963            "should find 'hello' function in sample.rs"
964        );
965
966        // Should find definitions in the Python fixture
967        let py_file = graph.files.iter().find(|f| f.path.ends_with("sample.py"));
968        assert!(py_file.is_some(), "should find sample.py");
969        let py_file = py_file.unwrap();
970        assert!(
971            !py_file.defs.is_empty(),
972            "sample.py should have definitions"
973        );
974        assert!(
975            py_file.defs.iter().any(|d| d.name == "greet"),
976            "should find 'greet' function in sample.py"
977        );
978
979        // PageRank scores should be computed
980        assert_eq!(graph.base_ranks.len(), graph.files.len());
981        let sum: f32 = graph.base_ranks.iter().sum();
982        assert!(
983            (sum - 1.0).abs() < 0.01,
984            "PageRank scores should sum to ~1.0, got {sum}"
985        );
986    }
987
988    #[test]
989    fn test_extract_imports_rust() {
990        let source = "use crate::foo::bar;\nuse std::collections::HashMap;\n";
991        let (lang, query) = import_query_for_extension("rs").unwrap();
992        let imports = extract_imports(source, &lang, &query);
993        assert_eq!(imports.len(), 2);
994        assert!(imports[0].contains("crate::foo::bar"));
995    }
996
997    #[test]
998    fn test_resolve_rust_crate_import() {
999        let root = PathBuf::from("/project");
1000        let file_path = PathBuf::from("/project/src/main.rs");
1001        let mut file_index = HashMap::new();
1002        file_index.insert(PathBuf::from("/project/src/foo/bar.rs"), 1);
1003        file_index.insert(PathBuf::from("/project/src/main.rs"), 0);
1004
1005        let result = resolve_rust_import("use crate::foo::bar;", &file_path, &root, &file_index);
1006        assert_eq!(result, Some(1));
1007    }
1008
1009    #[test]
1010    fn test_resolve_rust_external_crate_dropped() {
1011        let root = PathBuf::from("/project");
1012        let file_path = PathBuf::from("/project/src/main.rs");
1013        let file_index = HashMap::new();
1014
1015        let result = resolve_rust_import(
1016            "use std::collections::HashMap;",
1017            &file_path,
1018            &root,
1019            &file_index,
1020        );
1021        assert_eq!(result, None, "external crate imports should be dropped");
1022    }
1023
1024    #[test]
1025    fn test_neighbor_lists() {
1026        // 0 -> 1, 0 -> 2, 1 -> 2
1027        let edges = vec![(0, 1, 1), (0, 2, 1), (1, 2, 1)];
1028        let (incoming, outgoing) = build_neighbor_lists(3, &edges);
1029
1030        // Node 2 should be called by 0 and 1
1031        assert!(incoming[2].contains(&0));
1032        assert!(incoming[2].contains(&1));
1033
1034        // Node 0 should call 1 and 2
1035        assert!(outgoing[0].contains(&1));
1036        assert!(outgoing[0].contains(&2));
1037    }
1038
1039    #[test]
1040    #[ignore = "runs on full ripvec codebase; use --nocapture to see output"]
1041    fn test_full_repo_map() {
1042        use std::time::Instant;
1043
1044        let root = Path::new(env!("CARGO_MANIFEST_DIR"))
1045            .parent()
1046            .unwrap()
1047            .parent()
1048            .unwrap();
1049
1050        // Phase 1: build_graph (walk + parse + import resolve + PageRank)
1051        let t0 = Instant::now();
1052        let graph = build_graph(root).expect("build_graph on ripvec root");
1053        let build_ms = t0.elapsed().as_secs_f64() * 1000.0;
1054
1055        // Phase 2: render (default, no focus)
1056        let t1 = Instant::now();
1057        let rendered = render(&graph, 2000, None);
1058        let render_ms = t1.elapsed().as_secs_f64() * 1000.0;
1059
1060        // Phase 3: render (topic-sensitive, focused on highest-ranked file)
1061        let t2 = Instant::now();
1062        let focus_idx = graph
1063            .base_ranks
1064            .iter()
1065            .enumerate()
1066            .max_by(|a, b| a.1.total_cmp(b.1))
1067            .map(|(i, _)| i);
1068        let focused = render(&graph, 2000, focus_idx);
1069        let focus_ms = t2.elapsed().as_secs_f64() * 1000.0;
1070
1071        eprintln!("\n=== Repo Map Performance ===");
1072        eprintln!(
1073            "Files: {}, Edges: {}, Defs: {}",
1074            graph.files.len(),
1075            graph.edges.len(),
1076            graph.files.iter().map(|f| f.defs.len()).sum::<usize>()
1077        );
1078        eprintln!("build_graph:     {build_ms:.1}ms (walk + parse + resolve + PageRank)");
1079        eprintln!(
1080            "render(default): {render_ms:.3}ms ({} chars, ~{} tokens)",
1081            rendered.len(),
1082            rendered.len() / 4
1083        );
1084        eprintln!(
1085            "render(focused): {focus_ms:.3}ms ({} chars, ~{} tokens)",
1086            focused.len(),
1087            focused.len() / 4
1088        );
1089
1090        eprintln!("\nTop 5 by PageRank:");
1091        let mut ranked: Vec<(usize, f32)> = graph.base_ranks.iter().copied().enumerate().collect();
1092        ranked.sort_by(|a, b| b.1.total_cmp(&a.1));
1093        for (i, rank) in ranked.iter().take(5) {
1094            eprintln!("  {:.4} {}", rank, graph.files[*i].path);
1095        }
1096
1097        eprintln!("\n=== Default Render ===\n{rendered}");
1098        eprintln!(
1099            "\n=== Focused Render (on {}) ===\n{focused}",
1100            focus_idx
1101                .map(|i| graph.files[i].path.as_str())
1102                .unwrap_or("none")
1103        );
1104    }
1105}