ripvec_core/
repo_map.rs

1//! `PageRank`-weighted structural overview of a codebase.
2//!
3//! Builds a dependency graph from tree-sitter definition and import extraction,
4//! ranks files by importance using `PageRank` (standard or topic-sensitive), and
5//! renders a budget-constrained overview with tiered detail levels.
6
7use std::collections::HashMap;
8use std::fmt::Write as _;
9use std::path::{Path, PathBuf};
10
11use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
12use streaming_iterator::StreamingIterator;
13use tree_sitter::{Parser, Query, QueryCursor};
14
15use crate::languages;
16use crate::walk;
17
18// ── Data Structures ──────────────────────────────────────────────────
19
20/// Persisted dependency graph with `PageRank` scores.
21#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
22pub struct RepoGraph {
23    /// Files in the repository with definitions and imports.
24    pub files: Vec<FileNode>,
25    /// Dependency edges: `(importer_idx, definer_idx, weight)`.
26    pub edges: Vec<(u32, u32, u32)>,
27    /// Standard `PageRank` scores (one per file).
28    pub base_ranks: Vec<f32>,
29    /// Top callers per file (indices into `files`).
30    pub callers: Vec<Vec<u32>>,
31    /// Top callees per file (indices into `files`).
32    pub callees: Vec<Vec<u32>>,
33    /// Auto-tuned alpha for search boost.
34    pub alpha: f32,
35}
36
37/// A file in the repository with its definitions and imports.
38#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
39pub struct FileNode {
40    /// Relative path from the repository root.
41    pub path: String,
42    /// Definitions (functions, structs, classes, etc.) extracted from this file.
43    pub defs: Vec<Definition>,
44    /// Import references extracted from this file.
45    pub imports: Vec<ImportRef>,
46}
47
48/// A definition extracted from a source file.
49#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
50pub struct Definition {
51    /// Name of the definition (e.g., function name, class name).
52    pub name: String,
53    /// Kind of syntax node (e.g., `function_item`, `class_definition`).
54    pub kind: String,
55    /// 1-based start line number.
56    pub start_line: u32,
57    /// 1-based end line number.
58    pub end_line: u32,
59    /// Scope chain (e.g., `"impl_item Foo > fn bar"`).
60    pub scope: String,
61    /// Function/method signature, if available.
62    pub signature: Option<String>,
63}
64
65/// An import reference extracted from a source file.
66#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
67pub struct ImportRef {
68    /// Raw import path as written in source (e.g., `crate::foo::bar`).
69    pub raw_path: String,
70    /// Resolved file index in [`RepoGraph::files`], if resolution succeeded.
71    pub resolved_idx: Option<u32>,
72}
73
74// ── Constants ────────────────────────────────────────────────────────
75
76/// `PageRank` damping factor.
77const DAMPING: f32 = 0.85;
78
79/// `PageRank` convergence threshold.
80const EPSILON: f32 = 1e-6;
81
82/// Maximum `PageRank` iterations.
83const MAX_ITERATIONS: usize = 100;
84
85/// Maximum callers/callees stored per file.
86const MAX_NEIGHBORS: usize = 5;
87
88/// Approximate characters per token for budget estimation.
89const CHARS_PER_TOKEN: usize = 4;
90
91// ── Import Queries ───────────────────────────────────────────────────
92
93/// Compile a tree-sitter import query for the given extension.
94///
95/// Returns `None` for unsupported extensions.
96fn import_query_for_extension(ext: &str) -> Option<(tree_sitter::Language, Query)> {
97    let (lang, query_str): (tree_sitter::Language, &str) = match ext {
98        "rs" => (
99            tree_sitter_rust::LANGUAGE.into(),
100            "(use_declaration) @import",
101        ),
102        "py" => (
103            tree_sitter_python::LANGUAGE.into(),
104            concat!(
105                "(import_statement) @import\n",
106                "(import_from_statement) @import",
107            ),
108        ),
109        "js" | "jsx" => (
110            tree_sitter_javascript::LANGUAGE.into(),
111            "(import_statement source: (string) @import_path) @import",
112        ),
113        "ts" => (
114            tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
115            "(import_statement source: (string) @import_path) @import",
116        ),
117        "tsx" => (
118            tree_sitter_typescript::LANGUAGE_TSX.into(),
119            "(import_statement source: (string) @import_path) @import",
120        ),
121        "go" => (
122            tree_sitter_go::LANGUAGE.into(),
123            "(import_spec path: (interpreted_string_literal) @import_path) @import",
124        ),
125        // Ruby: require statements.
126        "rb" => (
127            tree_sitter_ruby::LANGUAGE.into(),
128            "(call method: (identifier) @_method arguments: (argument_list (string (string_content) @import_path)) (#eq? @_method \"require\")) @import",
129        ),
130        _ => return None,
131    };
132    let query = match Query::new(&lang, query_str) {
133        Ok(q) => q,
134        Err(e) => {
135            tracing::warn!(ext, %e, "import query compilation failed — language may be ABI-incompatible");
136            return None;
137        }
138    };
139    Some((lang, query))
140}
141
142/// Extract import paths from source using tree-sitter.
143fn extract_imports(
144    source: &str,
145    lang: &tree_sitter::Language,
146    import_query: &Query,
147) -> Vec<String> {
148    let mut parser = Parser::new();
149    if parser.set_language(lang).is_err() {
150        return vec![];
151    }
152    let Some(tree) = parser.parse(source, None) else {
153        return vec![];
154    };
155
156    let mut cursor = QueryCursor::new();
157    let mut imports = Vec::new();
158    let mut matches = cursor.matches(import_query, tree.root_node(), source.as_bytes());
159
160    while let Some(m) = matches.next() {
161        // Prefer @import_path capture (JS/TS/Go), fall back to full @import text
162        let mut import_path_text = None;
163        let mut import_text = None;
164
165        for cap in m.captures {
166            let cap_name = &import_query.capture_names()[cap.index as usize];
167            let text = &source[cap.node.start_byte()..cap.node.end_byte()];
168            if *cap_name == "import_path" {
169                import_path_text = Some(text.trim_matches(|c| c == '"' || c == '\''));
170            } else if *cap_name == "import" {
171                import_text = Some(text);
172            }
173        }
174
175        if let Some(path) = import_path_text {
176            imports.push(path.to_string());
177        } else if let Some(text) = import_text {
178            imports.push(text.to_string());
179        }
180    }
181
182    imports
183}
184
185// ── Import Resolution ────────────────────────────────────────────────
186
187/// Resolve a Rust `use` path to a file index in the file map.
188///
189/// Handles `crate::`, `self::`, and `super::` prefixes. External crate
190/// imports are dropped (returns `None`).
191fn resolve_rust_import(
192    raw: &str,
193    file_path: &Path,
194    root: &Path,
195    file_index: &HashMap<PathBuf, usize>,
196) -> Option<usize> {
197    // Extract the module path from `use crate::foo::bar;` or `use crate::foo::bar::Baz;`
198    let trimmed = raw
199        .trim()
200        .trim_start_matches("use ")
201        .trim_end_matches(';')
202        .trim();
203
204    let segments: Vec<&str> = trimmed.split("::").collect();
205    if segments.is_empty() {
206        return None;
207    }
208
209    // Determine the base directory and skip prefix segments
210    let (base, skip) = match segments[0] {
211        "crate" => {
212            // Find the nearest Cargo.toml ancestor to determine the crate root.
213            // In a workspace, `crate::foo` resolves relative to the crate's src/,
214            // not the workspace root.
215            let mut dir = file_path.parent();
216            let crate_root = loop {
217                match dir {
218                    Some(d) if d.join("Cargo.toml").exists() => break d.join("src"),
219                    Some(d) => dir = d.parent(),
220                    None => break root.join("src"), // fallback
221                }
222            };
223            (crate_root, 1)
224        }
225        "self" => {
226            let dir = file_path.parent()?;
227            (dir.to_path_buf(), 1)
228        }
229        "super" => {
230            let dir = file_path.parent()?.parent()?;
231            (dir.to_path_buf(), 1)
232        }
233        // External crate — drop
234        _ => return None,
235    };
236
237    // Build candidate paths from the remaining segments.
238    // Try progressively shorter prefixes since the last segments
239    // may be items (struct, fn) rather than modules.
240    let path_segments = &segments[skip..];
241    for end in (1..=path_segments.len()).rev() {
242        let mut candidate = base.clone();
243        for seg in &path_segments[..end] {
244            // Strip glob patterns like `{Foo, Bar}`
245            let clean = seg.split('{').next().unwrap_or(seg).trim();
246            if !clean.is_empty() {
247                candidate.push(clean);
248            }
249        }
250
251        // Try file.rs
252        let as_file = candidate.with_extension("rs");
253        if let Some(&idx) = file_index.get(&as_file) {
254            return Some(idx);
255        }
256
257        // Try dir/mod.rs
258        let as_mod = candidate.join("mod.rs");
259        if let Some(&idx) = file_index.get(&as_mod) {
260            return Some(idx);
261        }
262    }
263
264    None
265}
266
267/// Resolve an import path to a file index based on file extension.
268fn resolve_import(
269    raw: &str,
270    ext: &str,
271    file_path: &Path,
272    root: &Path,
273    file_index: &HashMap<PathBuf, usize>,
274) -> Option<usize> {
275    match ext {
276        "rs" => resolve_rust_import(raw, file_path, root, file_index),
277        "py" => resolve_python_import(raw, root, file_index),
278        "js" | "jsx" | "ts" | "tsx" => resolve_js_import(raw, file_path, file_index),
279        // Go imports use full package paths — skip local resolution
280        _ => None,
281    }
282}
283
284/// Resolve a Python import to a file index.
285///
286/// Handles `import foo.bar` and `from foo.bar import baz` patterns.
287fn resolve_python_import(
288    raw: &str,
289    root: &Path,
290    file_index: &HashMap<PathBuf, usize>,
291) -> Option<usize> {
292    let module_path = if let Some(rest) = raw.strip_prefix("from ") {
293        rest.split_whitespace().next()?
294    } else if let Some(rest) = raw.strip_prefix("import ") {
295        rest.split_whitespace().next()?
296    } else {
297        return None;
298    };
299
300    let rel_path: PathBuf = module_path.split('.').collect();
301    let as_file = root.join(&rel_path).with_extension("py");
302    if let Some(&idx) = file_index.get(&as_file) {
303        return Some(idx);
304    }
305
306    let as_init = root.join(&rel_path).join("__init__.py");
307    file_index.get(&as_init).copied()
308}
309
310/// Resolve a JS/TS import to a file index.
311///
312/// Handles relative paths like `./foo` or `../bar`.
313fn resolve_js_import(
314    raw: &str,
315    file_path: &Path,
316    file_index: &HashMap<PathBuf, usize>,
317) -> Option<usize> {
318    if !raw.starts_with('.') {
319        return None;
320    }
321
322    let dir = file_path.parent()?;
323    let candidate = dir.join(raw);
324
325    for ext in &["js", "jsx", "ts", "tsx"] {
326        let with_ext = candidate.with_extension(ext);
327        if let Some(&idx) = file_index.get(&with_ext) {
328            return Some(idx);
329        }
330    }
331
332    for ext in &["js", "jsx", "ts", "tsx"] {
333        let index_file = candidate.join("index").with_extension(ext);
334        if let Some(&idx) = file_index.get(&index_file) {
335            return Some(idx);
336        }
337    }
338
339    None
340}
341
342// ── Extraction ───────────────────────────────────────────────────────
343
344/// Extract definitions from a source file using tree-sitter.
345fn extract_definitions(source: &str, config: &languages::LangConfig) -> Vec<Definition> {
346    let mut parser = Parser::new();
347    if parser.set_language(&config.language).is_err() {
348        return vec![];
349    }
350    let Some(tree) = parser.parse(source, None) else {
351        return vec![];
352    };
353
354    let mut cursor = QueryCursor::new();
355    let mut defs = Vec::new();
356    let mut matches = cursor.matches(&config.query, tree.root_node(), source.as_bytes());
357
358    while let Some(m) = matches.next() {
359        let mut name = String::new();
360        let mut def_node = None;
361
362        for cap in m.captures {
363            let cap_name = &config.query.capture_names()[cap.index as usize];
364            if *cap_name == "name" {
365                name = source[cap.node.start_byte()..cap.node.end_byte()].to_string();
366            } else if *cap_name == "def" {
367                def_node = Some(cap.node);
368            }
369        }
370
371        if let Some(node) = def_node {
372            let scope = crate::chunk::build_scope_chain(node, source);
373            let signature = crate::chunk::extract_signature(node, source);
374            #[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
375            let start_line = node.start_position().row as u32 + 1;
376            #[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
377            let end_line = node.end_position().row as u32 + 1;
378            defs.push(Definition {
379                name,
380                kind: node.kind().to_string(),
381                start_line,
382                end_line,
383                scope,
384                signature,
385            });
386        }
387    }
388
389    defs
390}
391
392// ── PageRank ─────────────────────────────────────────────────────────
393
394/// Compute `PageRank` scores for a graph.
395///
396/// If `focus` is `Some(idx)`, computes topic-sensitive `PageRank` biased
397/// toward file `idx`. Otherwise computes standard (uniform) `PageRank`.
398///
399/// Returns one score per node, summing to 1.0.
400#[expect(
401    clippy::cast_precision_loss,
402    reason = "node count fits comfortably in f32"
403)]
404fn pagerank(n: usize, edges: &[(u32, u32, u32)], focus: Option<usize>) -> Vec<f32> {
405    if n == 0 {
406        return vec![];
407    }
408
409    // Build adjacency: out_edges[src] = [(dst, weight)]
410    let mut out_edges: Vec<Vec<(usize, f32)>> = vec![vec![]; n];
411    let mut out_weight: Vec<f32> = vec![0.0; n];
412
413    for &(src, dst, w) in edges {
414        let (s, d) = (src as usize, dst as usize);
415        if s < n && d < n {
416            #[expect(clippy::cast_possible_truncation, reason = "edge weights are small")]
417            let wf = f64::from(w) as f32;
418            out_edges[s].push((d, wf));
419            out_weight[s] += wf;
420        }
421    }
422
423    // Personalization vector: for topic-sensitive PageRank, blend
424    // 70% focus on the target file with 30% uniform. Pure focus
425    // (100%) starves unreachable nodes to rank=0 in sparse graphs.
426    let bias: Vec<f32> = if let Some(idx) = focus {
427        let uniform = 1.0 / n as f32;
428        let mut b = vec![0.3 * uniform; n];
429        if idx < n {
430            b[idx] += 0.7;
431        }
432        // Normalize to sum=1
433        let sum: f32 = b.iter().sum();
434        for v in &mut b {
435            *v /= sum;
436        }
437        b
438    } else {
439        vec![1.0 / n as f32; n]
440    };
441
442    let mut rank = vec![1.0 / n as f32; n];
443    let mut next_rank = vec![0.0_f32; n];
444
445    for _ in 0..MAX_ITERATIONS {
446        // Collect dangling mass (nodes with no outgoing edges)
447        let dangling: f32 = rank
448            .iter()
449            .enumerate()
450            .filter(|&(i, _)| out_edges[i].is_empty())
451            .map(|(_, &r)| r)
452            .sum();
453
454        // Distribute rank
455        for (i, nr) in next_rank.iter_mut().enumerate() {
456            *nr = (1.0 - DAMPING).mul_add(bias[i], DAMPING * dangling * bias[i]);
457        }
458
459        for (src, edges_list) in out_edges.iter().enumerate() {
460            if edges_list.is_empty() {
461                continue;
462            }
463            let src_rank = rank[src];
464            let total_w = out_weight[src];
465            for &(dst, w) in edges_list {
466                next_rank[dst] += DAMPING * src_rank * (w / total_w);
467            }
468        }
469
470        // Check convergence
471        let diff: f32 = rank
472            .iter()
473            .zip(next_rank.iter())
474            .map(|(a, b)| (a - b).abs())
475            .sum();
476
477        std::mem::swap(&mut rank, &mut next_rank);
478
479        if diff < EPSILON {
480            break;
481        }
482    }
483
484    rank
485}
486
487// ── Graph Building ───────────────────────────────────────────────────
488
489/// Build a dependency graph from a repository root.
490///
491/// Walks the directory tree, parses each supported file with tree-sitter,
492/// extracts definitions and imports, resolves import paths to files, runs
493/// `PageRank`, and builds caller/callee lists.
494///
495/// # Errors
496///
497/// Returns an error if file walking or reading fails.
498pub fn build_graph(root: &Path) -> crate::Result<RepoGraph> {
499    let root = root.canonicalize().map_err(|e| crate::Error::Io {
500        path: root.display().to_string(),
501        source: e,
502    })?;
503
504    let all_files = walk::collect_files(&root, None);
505
506    // Build file index mapping canonical paths to indices
507    let mut file_index: HashMap<PathBuf, usize> = HashMap::new();
508    let mut files: Vec<FileNode> = Vec::new();
509    let mut raw_sources: Vec<(usize, String, String)> = Vec::new(); // (idx, ext, source)
510
511    for path in &all_files {
512        let ext = path
513            .extension()
514            .and_then(|e| e.to_str())
515            .unwrap_or_default()
516            .to_string();
517
518        // Only process files with known language support
519        if languages::config_for_extension(&ext).is_none()
520            && import_query_for_extension(&ext).is_none()
521        {
522            continue;
523        }
524
525        let Ok(source) = std::fs::read_to_string(path) else {
526            continue; // Skip binary/unreadable files
527        };
528
529        let rel_path = path
530            .strip_prefix(&root)
531            .unwrap_or(path)
532            .display()
533            .to_string();
534
535        let idx = files.len();
536        file_index.insert(path.clone(), idx);
537        files.push(FileNode {
538            path: rel_path,
539            defs: vec![],
540            imports: vec![],
541        });
542        raw_sources.push((idx, ext, source));
543    }
544
545    // Extract definitions and imports
546    for (idx, ext, source) in &raw_sources {
547        // Extract definitions
548        if let Some(config) = languages::config_for_extension(ext) {
549            files[*idx].defs = extract_definitions(source, &config);
550        }
551
552        // Extract imports
553        if let Some((lang, import_query)) = import_query_for_extension(ext) {
554            let raw_imports = extract_imports(source, &lang, &import_query);
555            let file_path = root.join(&files[*idx].path);
556
557            files[*idx].imports = raw_imports
558                .into_iter()
559                .map(|raw| {
560                    let resolved_idx = resolve_import(&raw, ext, &file_path, &root, &file_index)
561                        .and_then(|i| u32::try_from(i).ok());
562                    ImportRef {
563                        raw_path: raw,
564                        resolved_idx,
565                    }
566                })
567                .collect();
568        }
569    }
570
571    // Build edge list from resolved imports
572    let mut edge_map: HashMap<(u32, u32), u32> = HashMap::new();
573    for (importer_idx, file) in files.iter().enumerate() {
574        for import in &file.imports {
575            if let Some(definer_idx) = import.resolved_idx
576                && let Ok(src) = u32::try_from(importer_idx)
577            {
578                *edge_map.entry((src, definer_idx)).or_insert(0) += 1;
579            }
580        }
581    }
582    let edges: Vec<(u32, u32, u32)> = edge_map
583        .into_iter()
584        .map(|((src, dst), w)| (src, dst, w))
585        .collect();
586
587    // Compute PageRank
588    let n = files.len();
589    let base_ranks = pagerank(n, &edges, None);
590
591    // Build caller/callee lists
592    let (inbound, outbound) = build_neighbor_lists(n, &edges);
593
594    // Auto-tune alpha based on graph density
595    #[expect(clippy::cast_precision_loss, reason = "graph sizes fit in f32")]
596    let density = if n > 1 {
597        edges.len() as f32 / (n as f32 * (n as f32 - 1.0))
598    } else {
599        0.0
600    };
601    let alpha = 0.3f32.mul_add(density.min(1.0), 0.5);
602
603    Ok(RepoGraph {
604        files,
605        edges,
606        base_ranks,
607        callers: inbound,
608        callees: outbound,
609        alpha,
610    })
611}
612
613/// Build top-N caller and callee lists for each file.
614fn build_neighbor_lists(n: usize, edges: &[(u32, u32, u32)]) -> (Vec<Vec<u32>>, Vec<Vec<u32>>) {
615    let mut incoming: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
616    let mut outgoing: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
617
618    for &(src, dst, w) in edges {
619        let (s, d) = (src as usize, dst as usize);
620        if s < n && d < n {
621            incoming[d].push((src, w));
622            outgoing[s].push((dst, w));
623        }
624    }
625
626    // Sort by weight descending, keep top N
627    let trim = |lists: &mut [Vec<(u32, u32)>]| -> Vec<Vec<u32>> {
628        lists
629            .iter_mut()
630            .map(|list| {
631                list.sort_by(|a, b| b.1.cmp(&a.1));
632                list.iter()
633                    .take(MAX_NEIGHBORS)
634                    .map(|(idx, _)| *idx)
635                    .collect()
636            })
637            .collect()
638    };
639
640    (trim(&mut incoming), trim(&mut outgoing))
641}
642
643// ── Rendering ────────────────────────────────────────────────────────
644
645/// Render a budget-constrained overview of the repository.
646///
647/// Files are sorted by `PageRank` (or topic-sensitive rank if `focus` is
648/// `Some`). Output uses four tiers of decreasing detail:
649///
650/// - **Tier 0** (top 10%): full path, rank, callers/callees, signatures with scopes
651/// - **Tier 1** (next 20%): full path, rank, signatures
652/// - **Tier 2** (next 40%): full path, rank, definition names and kinds
653/// - **Tier 3** (bottom 30%): file path only
654///
655/// Stops accumulating output when the estimated token count exceeds
656/// `max_tokens`.
657#[must_use]
658pub fn render(graph: &RepoGraph, max_tokens: usize, focus: Option<usize>) -> String {
659    let n = graph.files.len();
660    if n == 0 {
661        return String::new();
662    }
663
664    // Compute ranks (recompute topic-sensitive if focus is given)
665    let ranks = if focus.is_some() {
666        pagerank(n, &graph.edges, focus)
667    } else {
668        graph.base_ranks.clone()
669    };
670
671    // Sort file indices by rank descending
672    let mut sorted: Vec<usize> = (0..n).collect();
673    sorted.sort_by(|&a, &b| ranks[b].total_cmp(&ranks[a]));
674
675    let mut output = String::new();
676    let mut used_tokens = 0;
677    let max_chars = max_tokens * CHARS_PER_TOKEN;
678
679    for (rank_pos, &file_idx) in sorted.iter().enumerate() {
680        if used_tokens >= max_tokens {
681            break;
682        }
683
684        let file = &graph.files[file_idx];
685        let score = ranks[file_idx];
686        #[expect(clippy::cast_precision_loss, reason = "file counts fit in f32")]
687        let percentile = (rank_pos as f32) / (n as f32);
688
689        let section = if percentile < 0.1 {
690            render_tier0(graph, file_idx, file, score)
691        } else if percentile < 0.3 {
692            render_tier1(file, score)
693        } else if percentile < 0.7 {
694            render_tier2(file, score)
695        } else {
696            render_tier3(file)
697        };
698
699        let section_chars = section.len();
700        if used_tokens > 0 && used_tokens + section_chars / CHARS_PER_TOKEN > max_tokens {
701            // Would exceed budget — try to fit at least the path
702            let path_line = format!("{}\n", file.path);
703            let path_tokens = path_line.len() / CHARS_PER_TOKEN;
704            if used_tokens + path_tokens <= max_tokens {
705                output.push_str(&path_line);
706            }
707            break;
708        }
709
710        output.push_str(&section);
711        used_tokens = output.len().min(max_chars) / CHARS_PER_TOKEN;
712    }
713
714    output
715}
716
717/// Render tier 0: full detail with callers, callees, and signatures.
718fn render_tier0(graph: &RepoGraph, file_idx: usize, file: &FileNode, score: f32) -> String {
719    let mut out = format!("## {} (rank: {score:.4})\n", file.path);
720
721    // Callers
722    if file_idx < graph.callers.len() && !graph.callers[file_idx].is_empty() {
723        let _ = write!(out, "  called by: ");
724        let names: Vec<&str> = graph.callers[file_idx]
725            .iter()
726            .filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
727            .collect();
728        let _ = writeln!(out, "{}", names.join(", "));
729    }
730
731    // Callees
732    if file_idx < graph.callees.len() && !graph.callees[file_idx].is_empty() {
733        let _ = write!(out, "  calls: ");
734        let names: Vec<&str> = graph.callees[file_idx]
735            .iter()
736            .filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
737            .collect();
738        let _ = writeln!(out, "{}", names.join(", "));
739    }
740
741    // Definitions with scope and signature
742    for def in &file.defs {
743        let scope_prefix = if def.scope.is_empty() {
744            String::new()
745        } else {
746            format!("{} > ", def.scope)
747        };
748        if let Some(sig) = &def.signature {
749            let _ = writeln!(out, "  {scope_prefix}{} {sig}", def.kind);
750        } else {
751            let _ = writeln!(out, "  {scope_prefix}{} {}", def.kind, def.name);
752        }
753    }
754    let _ = writeln!(out);
755    out
756}
757
758/// Render tier 1: file path, rank, and signatures.
759fn render_tier1(file: &FileNode, score: f32) -> String {
760    let mut out = format!("## {} (rank: {score:.4})\n", file.path);
761    for def in &file.defs {
762        if let Some(sig) = &def.signature {
763            let _ = writeln!(out, "  {sig}");
764        } else {
765            let _ = writeln!(out, "  {} {}", def.kind, def.name);
766        }
767    }
768    let _ = writeln!(out);
769    out
770}
771
772/// Render tier 2: file path, rank, and definition names/kinds.
773fn render_tier2(file: &FileNode, score: f32) -> String {
774    let mut out = format!("{} (rank: {score:.4})", file.path);
775    if !file.defs.is_empty() {
776        let names: Vec<String> = file
777            .defs
778            .iter()
779            .map(|d| format!("{}:{}", d.kind, d.name))
780            .collect();
781        let _ = write!(out, " -- {}", names.join(", "));
782    }
783    let _ = writeln!(out);
784    out
785}
786
787/// Render tier 3: file path only.
788fn render_tier3(file: &FileNode) -> String {
789    format!("{}\n", file.path)
790}
791
792// ── Tests ────────────────────────────────────────────────────────────
793
794#[cfg(test)]
795mod tests {
796    use super::*;
797
798    #[test]
799    fn test_pagerank_simple() {
800        // 3-node graph: 0 -> 1 -> 2, 2 -> 0 (cycle)
801        let edges = vec![(0, 1, 1), (1, 2, 1), (2, 0, 1)];
802        let ranks = pagerank(3, &edges, None);
803
804        // All nodes in a symmetric cycle should have equal rank
805        assert_eq!(ranks.len(), 3);
806        let sum: f32 = ranks.iter().sum();
807        assert!(
808            (sum - 1.0).abs() < 0.01,
809            "ranks should sum to ~1.0, got {sum}"
810        );
811
812        // In a perfect cycle, all ranks should be approximately equal
813        let expected = 1.0 / 3.0;
814        for (i, &r) in ranks.iter().enumerate() {
815            assert!(
816                (r - expected).abs() < 0.05,
817                "rank[{i}] = {r}, expected ~{expected}"
818            );
819        }
820    }
821
822    #[test]
823    fn test_pagerank_star() {
824        // Star graph: 0,1,2 all point to 3
825        let edges = vec![(0, 3, 1), (1, 3, 1), (2, 3, 1)];
826        let ranks = pagerank(4, &edges, None);
827
828        assert_eq!(ranks.len(), 4);
829        // Node 3 should have the highest rank
830        let max_idx = ranks
831            .iter()
832            .enumerate()
833            .max_by(|a, b| a.1.total_cmp(b.1))
834            .unwrap()
835            .0;
836        assert_eq!(max_idx, 3, "node 3 should have highest rank");
837        assert!(
838            ranks[3] > ranks[0],
839            "rank[3]={} should be > rank[0]={}",
840            ranks[3],
841            ranks[0]
842        );
843    }
844
845    #[test]
846    fn test_pagerank_topic_sensitive() {
847        // 3-node chain: 0 -> 1 -> 2
848        let edges = vec![(0, 1, 1), (1, 2, 1)];
849        let uniform_ranks = pagerank(3, &edges, None);
850        let biased_ranks = pagerank(3, &edges, Some(0));
851
852        // With focus on node 0, it should get a higher rank than uniform
853        assert!(
854            biased_ranks[0] > uniform_ranks[0],
855            "focused rank[0]={} should be > uniform rank[0]={}",
856            biased_ranks[0],
857            uniform_ranks[0]
858        );
859    }
860
861    #[test]
862    fn test_pagerank_empty() {
863        let ranks = pagerank(0, &[], None);
864        assert!(ranks.is_empty());
865    }
866
867    #[test]
868    fn test_render_tiers() {
869        // Build a small graph with 10 files to exercise all tiers
870        let files: Vec<FileNode> = (0..10)
871            .map(|i| FileNode {
872                path: format!("src/file_{i}.rs"),
873                defs: vec![Definition {
874                    name: format!("func_{i}"),
875                    kind: "function_item".to_string(),
876                    start_line: 1,
877                    end_line: 5,
878                    scope: String::new(),
879                    signature: Some(format!("func_{i}(x: i32) -> i32")),
880                }],
881                imports: vec![],
882            })
883            .collect();
884
885        // Create a star graph: files 1-9 all import from file 0
886        let edges: Vec<(u32, u32, u32)> = (1..10).map(|i| (i, 0, 1)).collect();
887        let base_ranks = pagerank(10, &edges, None);
888        let (top_callers, top_callees) = build_neighbor_lists(10, &edges);
889
890        let graph = RepoGraph {
891            files,
892            edges,
893            base_ranks,
894            callers: top_callers,
895            callees: top_callees,
896            alpha: 0.5,
897        };
898
899        // Large budget: should include all files
900        let full = render(&graph, 10_000, None);
901        assert!(
902            full.contains("file_0"),
903            "output should contain the top-ranked file"
904        );
905        // file_0 should appear as tier 0 (highest rank)
906        assert!(
907            full.contains("## src/file_0.rs"),
908            "top file should have tier 0 heading"
909        );
910
911        // Tiny budget: should only fit a few files
912        let small = render(&graph, 10, None);
913        assert!(
914            !small.is_empty(),
915            "even tiny budget should produce some output"
916        );
917        // Should have fewer entries than full render
918        let full_lines = full.lines().count();
919        let small_lines = small.lines().count();
920        assert!(
921            small_lines < full_lines,
922            "small budget ({small_lines} lines) should have fewer lines than full ({full_lines})"
923        );
924    }
925
926    #[test]
927    fn test_render_empty_graph() {
928        let graph = RepoGraph {
929            files: vec![],
930            edges: vec![],
931            base_ranks: vec![],
932            callers: vec![],
933            callees: vec![],
934            alpha: 0.5,
935        };
936        let output = render(&graph, 1000, None);
937        assert!(output.is_empty(), "empty graph should render empty string");
938    }
939
940    #[test]
941    fn test_build_graph_on_fixtures() {
942        let fixtures = Path::new(env!("CARGO_MANIFEST_DIR"))
943            .parent()
944            .unwrap()
945            .parent()
946            .unwrap()
947            .join("tests")
948            .join("fixtures");
949
950        let graph = build_graph(&fixtures).expect("build_graph should succeed on fixtures");
951
952        // Should find at least the 3 fixture files
953        assert!(
954            !graph.files.is_empty(),
955            "graph should contain files from fixtures"
956        );
957
958        // Should find definitions in the Rust fixture
959        let rs_file = graph.files.iter().find(|f| f.path.ends_with("sample.rs"));
960        assert!(rs_file.is_some(), "should find sample.rs");
961        let rs_file = rs_file.unwrap();
962        assert!(
963            !rs_file.defs.is_empty(),
964            "sample.rs should have definitions"
965        );
966        assert!(
967            rs_file.defs.iter().any(|d| d.name == "hello"),
968            "should find 'hello' function in sample.rs"
969        );
970
971        // Should find definitions in the Python fixture
972        let py_file = graph.files.iter().find(|f| f.path.ends_with("sample.py"));
973        assert!(py_file.is_some(), "should find sample.py");
974        let py_file = py_file.unwrap();
975        assert!(
976            !py_file.defs.is_empty(),
977            "sample.py should have definitions"
978        );
979        assert!(
980            py_file.defs.iter().any(|d| d.name == "greet"),
981            "should find 'greet' function in sample.py"
982        );
983
984        // PageRank scores should be computed
985        assert_eq!(graph.base_ranks.len(), graph.files.len());
986        let sum: f32 = graph.base_ranks.iter().sum();
987        assert!(
988            (sum - 1.0).abs() < 0.01,
989            "PageRank scores should sum to ~1.0, got {sum}"
990        );
991    }
992
993    #[test]
994    fn test_extract_imports_rust() {
995        let source = "use crate::foo::bar;\nuse std::collections::HashMap;\n";
996        let (lang, query) = import_query_for_extension("rs").unwrap();
997        let imports = extract_imports(source, &lang, &query);
998        assert_eq!(imports.len(), 2);
999        assert!(imports[0].contains("crate::foo::bar"));
1000    }
1001
1002    #[test]
1003    fn test_resolve_rust_crate_import() {
1004        let root = PathBuf::from("/project");
1005        let file_path = PathBuf::from("/project/src/main.rs");
1006        let mut file_index = HashMap::new();
1007        file_index.insert(PathBuf::from("/project/src/foo/bar.rs"), 1);
1008        file_index.insert(PathBuf::from("/project/src/main.rs"), 0);
1009
1010        let result = resolve_rust_import("use crate::foo::bar;", &file_path, &root, &file_index);
1011        assert_eq!(result, Some(1));
1012    }
1013
1014    #[test]
1015    fn test_resolve_rust_external_crate_dropped() {
1016        let root = PathBuf::from("/project");
1017        let file_path = PathBuf::from("/project/src/main.rs");
1018        let file_index = HashMap::new();
1019
1020        let result = resolve_rust_import(
1021            "use std::collections::HashMap;",
1022            &file_path,
1023            &root,
1024            &file_index,
1025        );
1026        assert_eq!(result, None, "external crate imports should be dropped");
1027    }
1028
1029    #[test]
1030    fn test_neighbor_lists() {
1031        // 0 -> 1, 0 -> 2, 1 -> 2
1032        let edges = vec![(0, 1, 1), (0, 2, 1), (1, 2, 1)];
1033        let (incoming, outgoing) = build_neighbor_lists(3, &edges);
1034
1035        // Node 2 should be called by 0 and 1
1036        assert!(incoming[2].contains(&0));
1037        assert!(incoming[2].contains(&1));
1038
1039        // Node 0 should call 1 and 2
1040        assert!(outgoing[0].contains(&1));
1041        assert!(outgoing[0].contains(&2));
1042    }
1043
1044    #[test]
1045    #[ignore = "runs on full ripvec codebase; use --nocapture to see output"]
1046    fn test_full_repo_map() {
1047        use std::time::Instant;
1048
1049        let root = Path::new(env!("CARGO_MANIFEST_DIR"))
1050            .parent()
1051            .unwrap()
1052            .parent()
1053            .unwrap();
1054
1055        // Phase 1: build_graph (walk + parse + import resolve + PageRank)
1056        let t0 = Instant::now();
1057        let graph = build_graph(root).expect("build_graph on ripvec root");
1058        let build_ms = t0.elapsed().as_secs_f64() * 1000.0;
1059
1060        // Phase 2: render (default, no focus)
1061        let t1 = Instant::now();
1062        let rendered = render(&graph, 2000, None);
1063        let render_ms = t1.elapsed().as_secs_f64() * 1000.0;
1064
1065        // Phase 3: render (topic-sensitive, focused on highest-ranked file)
1066        let t2 = Instant::now();
1067        let focus_idx = graph
1068            .base_ranks
1069            .iter()
1070            .enumerate()
1071            .max_by(|a, b| a.1.total_cmp(b.1))
1072            .map(|(i, _)| i);
1073        let focused = render(&graph, 2000, focus_idx);
1074        let focus_ms = t2.elapsed().as_secs_f64() * 1000.0;
1075
1076        eprintln!("\n=== Repo Map Performance ===");
1077        eprintln!(
1078            "Files: {}, Edges: {}, Defs: {}",
1079            graph.files.len(),
1080            graph.edges.len(),
1081            graph.files.iter().map(|f| f.defs.len()).sum::<usize>()
1082        );
1083        eprintln!("build_graph:     {build_ms:.1}ms (walk + parse + resolve + PageRank)");
1084        eprintln!(
1085            "render(default): {render_ms:.3}ms ({} chars, ~{} tokens)",
1086            rendered.len(),
1087            rendered.len() / 4
1088        );
1089        eprintln!(
1090            "render(focused): {focus_ms:.3}ms ({} chars, ~{} tokens)",
1091            focused.len(),
1092            focused.len() / 4
1093        );
1094
1095        eprintln!("\nTop 5 by PageRank:");
1096        let mut ranked: Vec<(usize, f32)> = graph.base_ranks.iter().copied().enumerate().collect();
1097        ranked.sort_by(|a, b| b.1.total_cmp(&a.1));
1098        for (i, rank) in ranked.iter().take(5) {
1099            eprintln!("  {:.4} {}", rank, graph.files[*i].path);
1100        }
1101
1102        eprintln!("\n=== Default Render ===\n{rendered}");
1103        eprintln!(
1104            "\n=== Focused Render (on {}) ===\n{focused}",
1105            focus_idx
1106                .map(|i| graph.files[i].path.as_str())
1107                .unwrap_or("none")
1108        );
1109    }
1110}
ripvec_core/repo_map.rs

ripvec_core/
repo_map.rs