use std::collections::HashMap;
use std::fmt::Write as _;
use std::path::{Path, PathBuf};
use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
use streaming_iterator::StreamingIterator;
use tree_sitter::{Parser, Query, QueryCursor};
use crate::languages;
use crate::walk;
#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
pub struct RepoGraph {
pub files: Vec<FileNode>,
pub edges: Vec<(u32, u32, u32)>,
pub base_ranks: Vec<f32>,
pub callers: Vec<Vec<u32>>,
pub callees: Vec<Vec<u32>>,
pub def_edges: Vec<(DefId, DefId, u32)>,
pub def_ranks: Vec<f32>,
pub def_callers: Vec<Vec<DefId>>,
pub def_callees: Vec<Vec<DefId>>,
pub def_offsets: Vec<usize>,
pub alpha: f32,
}
#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
pub struct FileNode {
pub path: String,
pub defs: Vec<Definition>,
pub imports: Vec<ImportRef>,
}
#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
pub struct Definition {
pub name: String,
pub kind: String,
pub start_line: u32,
pub end_line: u32,
pub scope: String,
pub signature: Option<String>,
pub start_byte: u32,
pub end_byte: u32,
pub calls: Vec<CallRef>,
}
#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
pub struct ImportRef {
pub raw_path: String,
pub resolved_idx: Option<u32>,
}
pub type DefId = (u32, u16);
#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
pub struct CallRef {
pub name: String,
pub byte_offset: u32,
pub resolved: Option<DefId>,
}
const DAMPING: f32 = 0.85;
const EPSILON: f32 = 1e-6;
const MAX_ITERATIONS: usize = 100;
const MAX_NEIGHBORS: usize = 5;
const CHARS_PER_TOKEN: usize = 4;
fn import_query_for_extension(ext: &str) -> Option<(tree_sitter::Language, Query)> {
let (lang, query_str): (tree_sitter::Language, &str) = match ext {
"rs" => (
tree_sitter_rust::LANGUAGE.into(),
"(use_declaration) @import",
),
"py" | "pyi" => (
tree_sitter_python::LANGUAGE.into(),
concat!(
"(import_statement) @import\n",
"(import_from_statement) @import",
),
),
"js" | "jsx" => (
tree_sitter_javascript::LANGUAGE.into(),
"(import_statement source: (string) @import_path) @import",
),
"ts" => (
tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
"(import_statement source: (string) @import_path) @import",
),
"tsx" => (
tree_sitter_typescript::LANGUAGE_TSX.into(),
"(import_statement source: (string) @import_path) @import",
),
"go" => (
tree_sitter_go::LANGUAGE.into(),
"(import_spec path: (interpreted_string_literal) @import_path) @import",
),
"rb" => (
tree_sitter_ruby::LANGUAGE.into(),
"(call method: (identifier) @_method arguments: (argument_list (string (string_content) @import_path)) (#eq? @_method \"require\")) @import",
),
_ => return None,
};
let query = match Query::new(&lang, query_str) {
Ok(q) => q,
Err(e) => {
tracing::warn!(ext, %e, "import query compilation failed — language may be ABI-incompatible");
return None;
}
};
Some((lang, query))
}
fn extract_imports(
source: &str,
lang: &tree_sitter::Language,
import_query: &Query,
) -> Vec<String> {
let mut parser = Parser::new();
if parser.set_language(lang).is_err() {
return vec![];
}
let Some(tree) = parser.parse(source, None) else {
return vec![];
};
let mut cursor = QueryCursor::new();
let mut imports = Vec::new();
let mut matches = cursor.matches(import_query, tree.root_node(), source.as_bytes());
while let Some(m) = matches.next() {
let mut import_path_text = None;
let mut import_text = None;
for cap in m.captures {
let cap_name = &import_query.capture_names()[cap.index as usize];
let text = &source[cap.node.start_byte()..cap.node.end_byte()];
if *cap_name == "import_path" {
import_path_text = Some(text.trim_matches(|c| c == '"' || c == '\''));
} else if *cap_name == "import" {
import_text = Some(text);
}
}
if let Some(path) = import_path_text {
imports.push(path.to_string());
} else if let Some(text) = import_text {
imports.push(text.to_string());
}
}
imports
}
fn resolve_rust_import(
raw: &str,
file_path: &Path,
root: &Path,
file_index: &HashMap<PathBuf, usize>,
) -> Option<usize> {
let trimmed = raw
.trim()
.trim_start_matches("use ")
.trim_end_matches(';')
.trim();
let segments: Vec<&str> = trimmed.split("::").collect();
if segments.is_empty() {
return None;
}
let (base, skip) = match segments[0] {
"crate" => {
let mut dir = file_path.parent();
let crate_root = loop {
match dir {
Some(d) if d.join("Cargo.toml").exists() => break d.join("src"),
Some(d) => dir = d.parent(),
None => break root.join("src"), }
};
(crate_root, 1)
}
"self" => {
let dir = file_path.parent()?;
(dir.to_path_buf(), 1)
}
"super" => {
let dir = file_path.parent()?.parent()?;
(dir.to_path_buf(), 1)
}
_ => return None,
};
let path_segments = &segments[skip..];
for end in (1..=path_segments.len()).rev() {
let mut candidate = base.clone();
for seg in &path_segments[..end] {
let clean = seg.split('{').next().unwrap_or(seg).trim();
if !clean.is_empty() {
candidate.push(clean);
}
}
let as_file = candidate.with_extension("rs");
if let Some(&idx) = file_index.get(&as_file) {
return Some(idx);
}
let as_mod = candidate.join("mod.rs");
if let Some(&idx) = file_index.get(&as_mod) {
return Some(idx);
}
}
None
}
fn resolve_import(
raw: &str,
ext: &str,
file_path: &Path,
root: &Path,
file_index: &HashMap<PathBuf, usize>,
) -> Option<usize> {
match ext {
"rs" => resolve_rust_import(raw, file_path, root, file_index),
"py" | "pyi" => resolve_python_import(raw, root, file_index),
"js" | "jsx" | "ts" | "tsx" => resolve_js_import(raw, file_path, file_index),
_ => None,
}
}
fn resolve_python_import(
raw: &str,
root: &Path,
file_index: &HashMap<PathBuf, usize>,
) -> Option<usize> {
let module_path = if let Some(rest) = raw.strip_prefix("from ") {
rest.split_whitespace().next()?
} else if let Some(rest) = raw.strip_prefix("import ") {
rest.split_whitespace().next()?
} else {
return None;
};
let rel_path: PathBuf = module_path.split('.').collect();
for ext in ["py", "pyi"] {
let as_file = root.join(&rel_path).with_extension(ext);
if let Some(&idx) = file_index.get(&as_file) {
return Some(idx);
}
}
for init_name in ["__init__.py", "__init__.pyi"] {
let as_init = root.join(&rel_path).join(init_name);
if let Some(&idx) = file_index.get(&as_init) {
return Some(idx);
}
}
None
}
fn resolve_js_import(
raw: &str,
file_path: &Path,
file_index: &HashMap<PathBuf, usize>,
) -> Option<usize> {
if !raw.starts_with('.') {
return None;
}
let dir = file_path.parent()?;
let candidate = dir.join(raw);
for ext in &["js", "jsx", "ts", "tsx"] {
let with_ext = candidate.with_extension(ext);
if let Some(&idx) = file_index.get(&with_ext) {
return Some(idx);
}
}
for ext in &["js", "jsx", "ts", "tsx"] {
let index_file = candidate.join("index").with_extension(ext);
if let Some(&idx) = file_index.get(&index_file) {
return Some(idx);
}
}
None
}
fn extract_definitions(source: &str, config: &languages::LangConfig) -> Vec<Definition> {
let mut parser = Parser::new();
if parser.set_language(&config.language).is_err() {
return vec![];
}
let Some(tree) = parser.parse(source, None) else {
return vec![];
};
let mut cursor = QueryCursor::new();
let mut defs = Vec::new();
let mut matches = cursor.matches(&config.query, tree.root_node(), source.as_bytes());
while let Some(m) = matches.next() {
let mut name = String::new();
let mut def_node = None;
for cap in m.captures {
let cap_name = &config.query.capture_names()[cap.index as usize];
if *cap_name == "name" {
name = source[cap.node.start_byte()..cap.node.end_byte()].to_string();
} else if *cap_name == "def" {
def_node = Some(cap.node);
}
}
if let Some(node) = def_node {
let scope = crate::chunk::build_scope_chain(node, source);
let signature = crate::chunk::extract_signature(node, source);
#[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
let start_line = node.start_position().row as u32 + 1;
#[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
let end_line = node.end_position().row as u32 + 1;
#[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
let start_byte = node.start_byte() as u32;
#[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
let end_byte = node.end_byte() as u32;
defs.push(Definition {
name,
kind: node.kind().to_string(),
start_line,
end_line,
scope,
signature,
start_byte,
end_byte,
calls: vec![],
});
}
}
defs
}
fn extract_calls(source: &str, call_config: &languages::CallConfig, defs: &mut [Definition]) {
let mut parser = Parser::new();
if parser.set_language(&call_config.language).is_err() {
return;
}
let Some(tree) = parser.parse(source, None) else {
return;
};
let mut cursor = QueryCursor::new();
let mut matches = cursor.matches(&call_config.query, tree.root_node(), source.as_bytes());
while let Some(m) = matches.next() {
let mut callee_name = None;
let mut call_byte = 0u32;
for cap in m.captures {
let cap_name = &call_config.query.capture_names()[cap.index as usize];
if *cap_name == "callee" {
callee_name = Some(source[cap.node.start_byte()..cap.node.end_byte()].to_string());
#[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
{
call_byte = cap.node.start_byte() as u32;
}
}
}
if let Some(name) = callee_name {
if let Some(def) = defs
.iter_mut()
.find(|d| d.start_byte <= call_byte && call_byte < d.end_byte)
{
if def.name != name {
def.calls.push(CallRef {
name,
byte_offset: call_byte,
resolved: None,
});
}
}
}
}
}
fn build_def_index(files: &[FileNode]) -> HashMap<String, Vec<DefId>> {
let mut index: HashMap<String, Vec<DefId>> = HashMap::new();
for (file_idx, file) in files.iter().enumerate() {
for (def_idx, def) in file.defs.iter().enumerate() {
#[expect(clippy::cast_possible_truncation)]
let did: DefId = (file_idx as u32, def_idx as u16);
index.entry(def.name.clone()).or_default().push(did);
}
}
index
}
fn resolve_calls(files: &mut [FileNode], def_index: &HashMap<String, Vec<DefId>>) {
let imported_files: Vec<std::collections::HashSet<u32>> = files
.iter()
.map(|f| {
f.imports
.iter()
.filter_map(|imp| imp.resolved_idx)
.collect()
})
.collect();
for file_idx in 0..files.len() {
for def_idx in 0..files[file_idx].defs.len() {
for call_idx in 0..files[file_idx].defs[def_idx].calls.len() {
let call_name = files[file_idx].defs[def_idx].calls[call_idx].name.clone();
let Some(candidates) = def_index.get(&call_name) else {
continue;
};
#[expect(clippy::cast_possible_truncation)]
let file_idx_u32 = file_idx as u32;
if let Some(&did) = candidates.iter().find(|(f, _)| *f == file_idx_u32) {
files[file_idx].defs[def_idx].calls[call_idx].resolved = Some(did);
continue;
}
if let Some(&did) = candidates
.iter()
.find(|(f, _)| imported_files[file_idx].contains(f))
{
files[file_idx].defs[def_idx].calls[call_idx].resolved = Some(did);
}
}
}
}
}
fn def_offsets(files: &[FileNode]) -> Vec<usize> {
let mut offsets = Vec::with_capacity(files.len() + 1);
offsets.push(0);
for file in files {
offsets.push(offsets.last().unwrap() + file.defs.len());
}
offsets
}
fn flatten_def_id(offsets: &[usize], did: DefId) -> usize {
offsets[did.0 as usize] + did.1 as usize
}
fn build_def_neighbor_lists(
n: usize,
edges: &[(u32, u32, u32)],
offsets: &[usize],
) -> (Vec<Vec<DefId>>, Vec<Vec<DefId>>) {
let mut incoming: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
let mut outgoing: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
for &(src, dst, w) in edges {
let (s, d) = (src as usize, dst as usize);
if s < n && d < n {
incoming[d].push((src, w));
outgoing[s].push((dst, w));
}
}
let to_def_id = |flat: u32| -> DefId {
let flat_usize = flat as usize;
let file_idx = offsets.partition_point(|&o| o <= flat_usize) - 1;
let def_idx = flat_usize - offsets[file_idx];
#[expect(clippy::cast_possible_truncation)]
(file_idx as u32, def_idx as u16)
};
let callers = incoming
.into_iter()
.map(|mut v| {
v.sort_by_key(|b| std::cmp::Reverse(b.1));
v.truncate(MAX_NEIGHBORS);
v.into_iter().map(|(idx, _)| to_def_id(idx)).collect()
})
.collect();
let callees = outgoing
.into_iter()
.map(|mut v| {
v.sort_by_key(|b| std::cmp::Reverse(b.1));
v.truncate(MAX_NEIGHBORS);
v.into_iter().map(|(idx, _)| to_def_id(idx)).collect()
})
.collect();
(callers, callees)
}
#[expect(
clippy::cast_precision_loss,
reason = "node count fits comfortably in f32"
)]
fn pagerank(n: usize, edges: &[(u32, u32, u32)], focus: Option<usize>) -> Vec<f32> {
if n == 0 {
return vec![];
}
let mut out_edges: Vec<Vec<(usize, f32)>> = vec![vec![]; n];
let mut out_weight: Vec<f32> = vec![0.0; n];
for &(src, dst, w) in edges {
let (s, d) = (src as usize, dst as usize);
if s < n && d < n {
#[expect(clippy::cast_possible_truncation, reason = "edge weights are small")]
let wf = f64::from(w) as f32;
out_edges[s].push((d, wf));
out_weight[s] += wf;
}
}
let bias: Vec<f32> = if let Some(idx) = focus {
let uniform = 1.0 / n as f32;
let mut b = vec![0.3 * uniform; n];
if idx < n {
b[idx] += 0.7;
}
let sum: f32 = b.iter().sum();
for v in &mut b {
*v /= sum;
}
b
} else {
vec![1.0 / n as f32; n]
};
let mut rank = vec![1.0 / n as f32; n];
let mut next_rank = vec![0.0_f32; n];
for _ in 0..MAX_ITERATIONS {
let dangling: f32 = rank
.iter()
.enumerate()
.filter(|&(i, _)| out_edges[i].is_empty())
.map(|(_, &r)| r)
.sum();
for (i, nr) in next_rank.iter_mut().enumerate() {
*nr = (1.0 - DAMPING).mul_add(bias[i], DAMPING * dangling * bias[i]);
}
for (src, edges_list) in out_edges.iter().enumerate() {
if edges_list.is_empty() {
continue;
}
let src_rank = rank[src];
let total_w = out_weight[src];
for &(dst, w) in edges_list {
next_rank[dst] += DAMPING * src_rank * (w / total_w);
}
}
let diff: f32 = rank
.iter()
.zip(next_rank.iter())
.map(|(a, b)| (a - b).abs())
.sum();
std::mem::swap(&mut rank, &mut next_rank);
if diff < EPSILON {
break;
}
}
rank
}
struct DefGraphData {
def_edges: Vec<(DefId, DefId, u32)>,
def_ranks: Vec<f32>,
def_callers: Vec<Vec<DefId>>,
def_callees: Vec<Vec<DefId>>,
offsets: Vec<usize>,
base_ranks: Vec<f32>,
file_edges: Vec<(u32, u32, u32)>,
}
fn compute_def_graph(files: &[FileNode]) -> DefGraphData {
let mut def_edge_map: HashMap<(DefId, DefId), u32> = HashMap::new();
for (file_idx, file) in files.iter().enumerate() {
for (def_idx, def) in file.defs.iter().enumerate() {
#[expect(clippy::cast_possible_truncation)]
let caller_id: DefId = (file_idx as u32, def_idx as u16);
for call in &def.calls {
if let Some(callee_id) = call.resolved {
*def_edge_map.entry((caller_id, callee_id)).or_insert(0) += 1;
}
}
}
}
let def_edges: Vec<(DefId, DefId, u32)> = def_edge_map
.into_iter()
.map(|((src, dst), w)| (src, dst, w))
.collect();
let offsets = def_offsets(files);
let n_defs = *offsets.last().unwrap_or(&0);
let flat_def_edges: Vec<(u32, u32, u32)> = def_edges
.iter()
.map(|(src, dst, w)| {
#[expect(clippy::cast_possible_truncation)]
(
flatten_def_id(&offsets, *src) as u32,
flatten_def_id(&offsets, *dst) as u32,
*w,
)
})
.collect();
let def_ranks = pagerank(n_defs, &flat_def_edges, None);
let base_ranks: Vec<f32> = files
.iter()
.enumerate()
.map(|(i, file)| {
let start = offsets[i];
let end = start + file.defs.len();
def_ranks[start..end].iter().sum()
})
.collect();
let mut file_edge_map: HashMap<(u32, u32), u32> = HashMap::new();
for &(src, dst, w) in &def_edges {
let src_file = src.0;
let dst_file = dst.0;
if src_file != dst_file {
*file_edge_map.entry((src_file, dst_file)).or_insert(0) += w;
}
}
let file_edges: Vec<(u32, u32, u32)> = file_edge_map
.into_iter()
.map(|((src, dst), w)| (src, dst, w))
.collect();
let (def_callers, def_callees) = build_def_neighbor_lists(n_defs, &flat_def_edges, &offsets);
DefGraphData {
def_edges,
def_ranks,
def_callers,
def_callees,
offsets,
base_ranks,
file_edges,
}
}
pub fn build_graph(root: &Path) -> crate::Result<RepoGraph> {
let root = root.canonicalize().map_err(|e| crate::Error::Io {
path: root.display().to_string(),
source: e,
})?;
let all_files = walk::collect_files(&root, None);
let mut file_index: HashMap<PathBuf, usize> = HashMap::new();
let mut files: Vec<FileNode> = Vec::new();
let mut raw_sources: Vec<(usize, String, String)> = Vec::new();
for path in &all_files {
let ext = path
.extension()
.and_then(|e| e.to_str())
.unwrap_or_default()
.to_string();
if languages::config_for_extension(&ext).is_none()
&& import_query_for_extension(&ext).is_none()
{
continue;
}
let Ok(source) = std::fs::read_to_string(path) else {
continue; };
let rel_path = path
.strip_prefix(&root)
.unwrap_or(path)
.display()
.to_string();
let idx = files.len();
file_index.insert(path.clone(), idx);
files.push(FileNode {
path: rel_path,
defs: vec![],
imports: vec![],
});
raw_sources.push((idx, ext, source));
}
for (idx, ext, source) in &raw_sources {
if let Some(config) = languages::config_for_extension(ext) {
files[*idx].defs = extract_definitions(source, &config);
}
if let Some((lang, import_query)) = import_query_for_extension(ext) {
let raw_imports = extract_imports(source, &lang, &import_query);
let file_path = root.join(&files[*idx].path);
files[*idx].imports = raw_imports
.into_iter()
.map(|raw| {
let resolved_idx = resolve_import(&raw, ext, &file_path, &root, &file_index)
.and_then(|i| u32::try_from(i).ok());
ImportRef {
raw_path: raw,
resolved_idx,
}
})
.collect();
}
}
for (idx, ext, source) in &raw_sources {
if let Some(call_config) = languages::call_query_for_extension(ext) {
extract_calls(source, &call_config, &mut files[*idx].defs);
}
}
let def_index = build_def_index(&files);
resolve_calls(&mut files, &def_index);
let graph_data = compute_def_graph(&files);
let n = files.len();
let (callers, callees) = build_neighbor_lists(n, &graph_data.file_edges);
#[expect(clippy::cast_precision_loss, reason = "graph sizes fit in f32")]
let density = if n > 1 {
graph_data.file_edges.len() as f32 / (n as f32 * (n as f32 - 1.0))
} else {
0.0
};
let alpha = 0.3f32.mul_add(density.min(1.0), 0.5);
Ok(RepoGraph {
files,
edges: graph_data.file_edges,
base_ranks: graph_data.base_ranks,
callers,
callees,
def_edges: graph_data.def_edges,
def_ranks: graph_data.def_ranks,
def_callers: graph_data.def_callers,
def_callees: graph_data.def_callees,
def_offsets: graph_data.offsets,
alpha,
})
}
impl RepoGraph {
#[must_use]
pub fn def_rank(&self, did: DefId) -> f32 {
let flat = self.def_offsets[did.0 as usize] + did.1 as usize;
self.def_ranks.get(flat).copied().unwrap_or(0.0)
}
#[must_use]
pub fn find_def(&self, file_path: &str, def_name: &str) -> Option<DefId> {
for (file_idx, file) in self.files.iter().enumerate() {
if file.path == file_path {
for (def_idx, def) in file.defs.iter().enumerate() {
if def.name == def_name {
#[expect(clippy::cast_possible_truncation)]
return Some((file_idx as u32, def_idx as u16));
}
}
}
}
None
}
}
fn build_neighbor_lists(n: usize, edges: &[(u32, u32, u32)]) -> (Vec<Vec<u32>>, Vec<Vec<u32>>) {
let mut incoming: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
let mut outgoing: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
for &(src, dst, w) in edges {
let (s, d) = (src as usize, dst as usize);
if s < n && d < n {
incoming[d].push((src, w));
outgoing[s].push((dst, w));
}
}
let trim = |lists: &mut [Vec<(u32, u32)>]| -> Vec<Vec<u32>> {
lists
.iter_mut()
.map(|list| {
list.sort_by_key(|b| std::cmp::Reverse(b.1));
list.iter()
.take(MAX_NEIGHBORS)
.map(|(idx, _)| *idx)
.collect()
})
.collect()
};
(trim(&mut incoming), trim(&mut outgoing))
}
#[must_use]
pub fn render(graph: &RepoGraph, max_tokens: usize, focus: Option<usize>) -> String {
let n = graph.files.len();
if n == 0 {
return String::new();
}
let ranks = if focus.is_some() {
pagerank(n, &graph.edges, focus)
} else {
graph.base_ranks.clone()
};
let mut sorted: Vec<usize> = (0..n).collect();
sorted.sort_by(|&a, &b| ranks[b].total_cmp(&ranks[a]));
let mut output = String::new();
let mut used_tokens = 0;
let max_chars = max_tokens * CHARS_PER_TOKEN;
for (rank_pos, &file_idx) in sorted.iter().enumerate() {
if used_tokens >= max_tokens {
break;
}
let file = &graph.files[file_idx];
let score = ranks[file_idx];
#[expect(clippy::cast_precision_loss, reason = "file counts fit in f32")]
let percentile = (rank_pos as f32) / (n as f32);
let section = if percentile < 0.1 {
render_tier0(graph, file_idx, file, score)
} else if percentile < 0.3 {
render_tier1(file, score)
} else if percentile < 0.7 {
render_tier2(file, score)
} else {
render_tier3(file)
};
let section_chars = section.len();
if used_tokens > 0 && used_tokens + section_chars / CHARS_PER_TOKEN > max_tokens {
let path_line = format!("{}\n", file.path);
let path_tokens = path_line.len() / CHARS_PER_TOKEN;
if used_tokens + path_tokens <= max_tokens {
output.push_str(&path_line);
}
break;
}
output.push_str(§ion);
used_tokens = output.len().min(max_chars) / CHARS_PER_TOKEN;
}
output
}
fn render_tier0(graph: &RepoGraph, file_idx: usize, file: &FileNode, score: f32) -> String {
let mut out = format!("## {} (rank: {score:.4})\n", file.path);
if file_idx < graph.callers.len() && !graph.callers[file_idx].is_empty() {
let _ = write!(out, " called by: ");
let names: Vec<&str> = graph.callers[file_idx]
.iter()
.filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
.collect();
let _ = writeln!(out, "{}", names.join(", "));
}
if file_idx < graph.callees.len() && !graph.callees[file_idx].is_empty() {
let _ = write!(out, " calls: ");
let names: Vec<&str> = graph.callees[file_idx]
.iter()
.filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
.collect();
let _ = writeln!(out, "{}", names.join(", "));
}
for def in &file.defs {
let scope_prefix = if def.scope.is_empty() {
String::new()
} else {
format!("{} > ", def.scope)
};
if let Some(sig) = &def.signature {
let _ = writeln!(out, " {scope_prefix}{} {sig}", def.kind);
} else {
let _ = writeln!(out, " {scope_prefix}{} {}", def.kind, def.name);
}
}
let _ = writeln!(out);
out
}
fn render_tier1(file: &FileNode, score: f32) -> String {
let mut out = format!("## {} (rank: {score:.4})\n", file.path);
for def in &file.defs {
if let Some(sig) = &def.signature {
let _ = writeln!(out, " {sig}");
} else {
let _ = writeln!(out, " {} {}", def.kind, def.name);
}
}
let _ = writeln!(out);
out
}
fn render_tier2(file: &FileNode, score: f32) -> String {
let mut out = format!("{} (rank: {score:.4})", file.path);
if !file.defs.is_empty() {
let names: Vec<String> = file
.defs
.iter()
.map(|d| format!("{}:{}", d.kind, d.name))
.collect();
let _ = write!(out, " -- {}", names.join(", "));
}
let _ = writeln!(out);
out
}
fn render_tier3(file: &FileNode) -> String {
format!("{}\n", file.path)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_pagerank_simple() {
let edges = vec![(0, 1, 1), (1, 2, 1), (2, 0, 1)];
let ranks = pagerank(3, &edges, None);
assert_eq!(ranks.len(), 3);
let sum: f32 = ranks.iter().sum();
assert!(
(sum - 1.0).abs() < 0.01,
"ranks should sum to ~1.0, got {sum}"
);
let expected = 1.0 / 3.0;
for (i, &r) in ranks.iter().enumerate() {
assert!(
(r - expected).abs() < 0.05,
"rank[{i}] = {r}, expected ~{expected}"
);
}
}
#[test]
fn test_pagerank_star() {
let edges = vec![(0, 3, 1), (1, 3, 1), (2, 3, 1)];
let ranks = pagerank(4, &edges, None);
assert_eq!(ranks.len(), 4);
let max_idx = ranks
.iter()
.enumerate()
.max_by(|a, b| a.1.total_cmp(b.1))
.unwrap()
.0;
assert_eq!(max_idx, 3, "node 3 should have highest rank");
assert!(
ranks[3] > ranks[0],
"rank[3]={} should be > rank[0]={}",
ranks[3],
ranks[0]
);
}
#[test]
fn test_pagerank_topic_sensitive() {
let edges = vec![(0, 1, 1), (1, 2, 1)];
let uniform_ranks = pagerank(3, &edges, None);
let biased_ranks = pagerank(3, &edges, Some(0));
assert!(
biased_ranks[0] > uniform_ranks[0],
"focused rank[0]={} should be > uniform rank[0]={}",
biased_ranks[0],
uniform_ranks[0]
);
}
#[test]
fn test_pagerank_empty() {
let ranks = pagerank(0, &[], None);
assert!(ranks.is_empty());
}
#[test]
fn test_render_tiers() {
let files: Vec<FileNode> = (0..10)
.map(|i| FileNode {
path: format!("src/file_{i}.rs"),
defs: vec![Definition {
name: format!("func_{i}"),
kind: "function_item".to_string(),
start_line: 1,
end_line: 5,
scope: String::new(),
signature: Some(format!("func_{i}(x: i32) -> i32")),
start_byte: 0,
end_byte: 0,
calls: vec![],
}],
imports: vec![],
})
.collect();
let edges: Vec<(u32, u32, u32)> = (1..10).map(|i| (i, 0, 1)).collect();
let base_ranks = pagerank(10, &edges, None);
let (top_callers, top_callees) = build_neighbor_lists(10, &edges);
let graph = RepoGraph {
files,
edges,
base_ranks,
callers: top_callers,
callees: top_callees,
def_edges: vec![],
def_ranks: vec![],
def_callers: vec![],
def_callees: vec![],
def_offsets: vec![0],
alpha: 0.5,
};
let full = render(&graph, 10_000, None);
assert!(
full.contains("file_0"),
"output should contain the top-ranked file"
);
assert!(
full.contains("## src/file_0.rs"),
"top file should have tier 0 heading"
);
let small = render(&graph, 10, None);
assert!(
!small.is_empty(),
"even tiny budget should produce some output"
);
let full_lines = full.lines().count();
let small_lines = small.lines().count();
assert!(
small_lines < full_lines,
"small budget ({small_lines} lines) should have fewer lines than full ({full_lines})"
);
}
#[test]
fn test_render_empty_graph() {
let graph = RepoGraph {
files: vec![],
edges: vec![],
base_ranks: vec![],
callers: vec![],
callees: vec![],
def_edges: vec![],
def_ranks: vec![],
def_callers: vec![],
def_callees: vec![],
def_offsets: vec![0],
alpha: 0.5,
};
let output = render(&graph, 1000, None);
assert!(output.is_empty(), "empty graph should render empty string");
}
#[test]
fn test_build_graph_on_fixtures() {
let fixtures = Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.parent()
.unwrap()
.join("tests")
.join("fixtures");
let graph = build_graph(&fixtures).expect("build_graph should succeed on fixtures");
assert!(
!graph.files.is_empty(),
"graph should contain files from fixtures"
);
let rs_file = graph.files.iter().find(|f| f.path.ends_with("sample.rs"));
assert!(rs_file.is_some(), "should find sample.rs");
let rs_file = rs_file.unwrap();
assert!(
!rs_file.defs.is_empty(),
"sample.rs should have definitions"
);
assert!(
rs_file.defs.iter().any(|d| d.name == "hello"),
"should find 'hello' function in sample.rs"
);
let py_file = graph.files.iter().find(|f| f.path.ends_with("sample.py"));
assert!(py_file.is_some(), "should find sample.py");
let py_file = py_file.unwrap();
assert!(
!py_file.defs.is_empty(),
"sample.py should have definitions"
);
assert!(
py_file.defs.iter().any(|d| d.name == "greet"),
"should find 'greet' function in sample.py"
);
assert_eq!(graph.base_ranks.len(), graph.files.len());
let sum: f32 = graph.base_ranks.iter().sum();
assert!(
(sum - 1.0).abs() < 0.01,
"PageRank scores should sum to ~1.0, got {sum}"
);
}
#[test]
fn test_extract_imports_rust() {
let source = "use crate::foo::bar;\nuse std::collections::HashMap;\n";
let (lang, query) = import_query_for_extension("rs").unwrap();
let imports = extract_imports(source, &lang, &query);
assert_eq!(imports.len(), 2);
assert!(imports[0].contains("crate::foo::bar"));
}
#[test]
fn test_extract_imports_python_stub() {
let source = "from typing import Protocol\nimport pkg.types\n";
let (lang, query) = import_query_for_extension("pyi").unwrap();
let imports = extract_imports(source, &lang, &query);
assert_eq!(imports.len(), 2);
assert!(imports[0].contains("from typing import Protocol"));
assert!(imports[1].contains("import pkg.types"));
}
#[test]
fn test_resolve_python_import_to_stub_file() {
let root = PathBuf::from("/project");
let mut file_index = HashMap::new();
file_index.insert(PathBuf::from("/project/pkg/types.pyi"), 1);
let result = resolve_python_import("import pkg.types", &root, &file_index);
assert_eq!(result, Some(1));
}
#[test]
fn test_resolve_rust_crate_import() {
let root = PathBuf::from("/project");
let file_path = PathBuf::from("/project/src/main.rs");
let mut file_index = HashMap::new();
file_index.insert(PathBuf::from("/project/src/foo/bar.rs"), 1);
file_index.insert(PathBuf::from("/project/src/main.rs"), 0);
let result = resolve_rust_import("use crate::foo::bar;", &file_path, &root, &file_index);
assert_eq!(result, Some(1));
}
#[test]
fn test_resolve_rust_external_crate_dropped() {
let root = PathBuf::from("/project");
let file_path = PathBuf::from("/project/src/main.rs");
let file_index = HashMap::new();
let result = resolve_rust_import(
"use std::collections::HashMap;",
&file_path,
&root,
&file_index,
);
assert_eq!(result, None, "external crate imports should be dropped");
}
#[test]
fn test_neighbor_lists() {
let edges = vec![(0, 1, 1), (0, 2, 1), (1, 2, 1)];
let (incoming, outgoing) = build_neighbor_lists(3, &edges);
assert!(incoming[2].contains(&0));
assert!(incoming[2].contains(&1));
assert!(outgoing[0].contains(&1));
assert!(outgoing[0].contains(&2));
}
#[test]
#[ignore = "runs on full ripvec codebase; use --nocapture to see output"]
fn test_full_repo_map() {
use std::time::Instant;
let root = Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.parent()
.unwrap();
let t0 = Instant::now();
let graph = build_graph(root).expect("build_graph on ripvec root");
let build_ms = t0.elapsed().as_secs_f64() * 1000.0;
let t1 = Instant::now();
let rendered = render(&graph, 2000, None);
let render_ms = t1.elapsed().as_secs_f64() * 1000.0;
let t2 = Instant::now();
let focus_idx = graph
.base_ranks
.iter()
.enumerate()
.max_by(|a, b| a.1.total_cmp(b.1))
.map(|(i, _)| i);
let focused = render(&graph, 2000, focus_idx);
let focus_ms = t2.elapsed().as_secs_f64() * 1000.0;
eprintln!("\n=== Repo Map Performance ===");
eprintln!(
"Files: {}, Edges: {}, Defs: {}",
graph.files.len(),
graph.edges.len(),
graph.files.iter().map(|f| f.defs.len()).sum::<usize>()
);
eprintln!("build_graph: {build_ms:.1}ms (walk + parse + resolve + PageRank)");
eprintln!(
"render(default): {render_ms:.3}ms ({} chars, ~{} tokens)",
rendered.len(),
rendered.len() / 4
);
eprintln!(
"render(focused): {focus_ms:.3}ms ({} chars, ~{} tokens)",
focused.len(),
focused.len() / 4
);
eprintln!("\nTop 5 by PageRank:");
let mut ranked: Vec<(usize, f32)> = graph.base_ranks.iter().copied().enumerate().collect();
ranked.sort_by(|a, b| b.1.total_cmp(&a.1));
for (i, rank) in ranked.iter().take(5) {
eprintln!(" {:.4} {}", rank, graph.files[*i].path);
}
eprintln!("\n=== Default Render ===\n{rendered}");
eprintln!(
"\n=== Focused Render (on {}) ===\n{focused}",
focus_idx
.map(|i| graph.files[i].path.as_str())
.unwrap_or("none")
);
}
}