use std::collections::{HashMap, HashSet};
use std::fmt::Write as _;
use std::path::{Path, PathBuf};
use rayon::prelude::*;
use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
use streaming_iterator::StreamingIterator;
use tree_sitter::{Parser, Query, QueryCursor};
use serde::Serialize;
use crate::chunk::ContentKind;
use crate::languages;
use crate::walk;
fn content_kind_tag(ck: ContentKind) -> &'static str {
match ck {
ContentKind::Code => "code",
ContentKind::Docs => "docs",
ContentKind::Meta => "meta",
}
}
#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
pub struct RepoGraph {
pub files: Vec<FileNode>,
pub edges: Vec<(u32, u32, u32)>,
pub base_ranks: Vec<f32>,
pub callers: Vec<Vec<u32>>,
pub callees: Vec<Vec<u32>>,
pub def_edges: Vec<(DefId, DefId, u32)>,
pub def_ranks: Vec<f32>,
pub def_callers: Vec<Vec<DefId>>,
pub def_callees: Vec<Vec<DefId>>,
pub def_offsets: Vec<usize>,
pub alpha: f32,
}
#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
pub struct FileNode {
pub path: String,
pub defs: Vec<Definition>,
pub imports: Vec<ImportRef>,
}
#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
pub struct Definition {
pub name: String,
pub kind: String,
pub start_line: u32,
pub end_line: u32,
pub scope: String,
pub signature: Option<String>,
pub start_byte: u32,
pub end_byte: u32,
pub calls: Vec<CallRef>,
}
#[derive(Debug, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
pub struct ImportRef {
pub raw_path: String,
pub resolved_idx: Option<u32>,
}
pub type DefId = (u32, u16);
#[derive(Debug, Clone, Default, Archive, RkyvSerialize, RkyvDeserialize)]
pub struct CallRef {
pub name: String,
pub qualified_path: Option<String>,
pub receiver_type: Option<String>,
pub byte_offset: u32,
pub resolved: Option<DefId>,
}
#[derive(Debug, Clone, Serialize)]
pub struct RepoMapLspLocation {
pub file_path: String,
pub start_line: usize,
pub start_character: usize,
pub end_line: usize,
pub end_character: usize,
}
#[derive(Debug, Clone, Serialize)]
pub struct RepoMapSymbol {
pub name: String,
pub kind: u32,
pub lsp_location: RepoMapLspLocation,
pub rank: f32,
}
#[derive(Debug, Clone, Serialize)]
pub struct RepoMapCall {
pub lsp_location: RepoMapLspLocation,
pub rank: f32,
}
#[derive(Debug, Clone, Serialize)]
pub struct RepoMapFile {
pub lsp_location: RepoMapLspLocation,
pub rank: f32,
pub content_kind: &'static str,
pub calls: Vec<RepoMapCall>,
pub symbols: Vec<RepoMapSymbol>,
pub truncated_symbols: usize,
pub truncated_calls: usize,
}
#[derive(Debug, Clone, Serialize)]
pub struct GetRepoMapResponse {
pub files: Vec<RepoMapFile>,
pub total_files: usize,
pub estimated_bytes: usize,
pub budget_bytes: usize,
pub budget_exhausted: bool,
pub capped: bool,
}
const DAMPING: f32 = 0.85;
const EPSILON: f32 = 1e-6;
const MAX_ITERATIONS: usize = 100;
const MAX_NEIGHBORS: usize = 5;
const CHARS_PER_TOKEN: usize = 4;
const PERSONALIZATION_ALPHA: f32 = 0.15;
fn import_query_for_extension(ext: &str) -> Option<(tree_sitter::Language, Query)> {
let (lang, query_str): (tree_sitter::Language, &str) = match ext {
"rs" => (
tree_sitter_rust::LANGUAGE.into(),
"(use_declaration) @import",
),
"py" | "pyi" => (
tree_sitter_python::LANGUAGE.into(),
concat!(
"(import_statement) @import\n",
"(import_from_statement) @import",
),
),
"js" | "jsx" => (
tree_sitter_javascript::LANGUAGE.into(),
"(import_statement source: (string) @import_path) @import",
),
"ts" => (
tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
"(import_statement source: (string) @import_path) @import",
),
"tsx" => (
tree_sitter_typescript::LANGUAGE_TSX.into(),
"(import_statement source: (string) @import_path) @import",
),
"go" => (
tree_sitter_go::LANGUAGE.into(),
"(import_spec path: (interpreted_string_literal) @import_path) @import",
),
"rb" => (
tree_sitter_ruby::LANGUAGE.into(),
"(call method: (identifier) @_method arguments: (argument_list (string (string_content) @import_path)) (#eq? @_method \"require\")) @import",
),
_ => return None,
};
let query = match Query::new(&lang, query_str) {
Ok(q) => q,
Err(e) => {
tracing::warn!(ext, %e, "import query compilation failed — language may be ABI-incompatible");
return None;
}
};
Some((lang, query))
}
fn extract_imports(
source: &str,
lang: &tree_sitter::Language,
import_query: &Query,
) -> Vec<String> {
let mut parser = Parser::new();
if parser.set_language(lang).is_err() {
return vec![];
}
let Some(tree) = parser.parse(source, None) else {
return vec![];
};
let mut cursor = QueryCursor::new();
let mut imports = Vec::new();
let mut matches = cursor.matches(import_query, tree.root_node(), source.as_bytes());
while let Some(m) = matches.next() {
let mut import_path_text = None;
let mut import_text = None;
for cap in m.captures {
let cap_name = &import_query.capture_names()[cap.index as usize];
let text = &source[cap.node.start_byte()..cap.node.end_byte()];
if *cap_name == "import_path" {
import_path_text = Some(text.trim_matches(|c| c == '"' || c == '\''));
} else if *cap_name == "import" {
import_text = Some(text);
}
}
if let Some(path) = import_path_text {
imports.push(path.to_string());
} else if let Some(text) = import_text {
imports.push(text.to_string());
}
}
imports
}
fn resolve_rust_import(
raw: &str,
file_path: &Path,
root: &Path,
file_index: &HashMap<PathBuf, usize>,
) -> Option<usize> {
let trimmed = raw
.trim()
.trim_start_matches("use ")
.trim_end_matches(';')
.trim();
let segments: Vec<&str> = trimmed.split("::").collect();
if segments.is_empty() {
return None;
}
let (base, skip) = match segments[0] {
"crate" => {
let mut dir = file_path.parent();
let crate_root = loop {
match dir {
Some(d) if d.join("Cargo.toml").exists() => break d.join("src"),
Some(d) => dir = d.parent(),
None => break root.join("src"), }
};
(crate_root, 1)
}
"self" => {
let dir = file_path.parent()?;
(dir.to_path_buf(), 1)
}
"super" => {
let dir = file_path.parent()?.parent()?;
(dir.to_path_buf(), 1)
}
_ => return None,
};
let path_segments = &segments[skip..];
for end in (1..=path_segments.len()).rev() {
let mut candidate = base.clone();
for seg in &path_segments[..end] {
let clean = seg.split('{').next().unwrap_or(seg).trim();
if !clean.is_empty() {
candidate.push(clean);
}
}
let as_file = candidate.with_extension("rs");
if let Some(&idx) = file_index.get(&as_file) {
return Some(idx);
}
let as_mod = candidate.join("mod.rs");
if let Some(&idx) = file_index.get(&as_mod) {
return Some(idx);
}
}
None
}
fn resolve_import(
raw: &str,
ext: &str,
file_path: &Path,
root: &Path,
file_index: &HashMap<PathBuf, usize>,
) -> Option<usize> {
match ext {
"rs" => resolve_rust_import(raw, file_path, root, file_index),
"py" | "pyi" => resolve_python_import(raw, root, file_index),
"js" | "jsx" | "ts" | "tsx" => resolve_js_import(raw, file_path, file_index),
_ => None,
}
}
fn resolve_python_import(
raw: &str,
root: &Path,
file_index: &HashMap<PathBuf, usize>,
) -> Option<usize> {
let module_path = if let Some(rest) = raw.strip_prefix("from ") {
rest.split_whitespace().next()?
} else if let Some(rest) = raw.strip_prefix("import ") {
rest.split_whitespace().next()?
} else {
return None;
};
let rel_path: PathBuf = module_path.split('.').collect();
for ext in ["py", "pyi"] {
let as_file = root.join(&rel_path).with_extension(ext);
if let Some(&idx) = file_index.get(&as_file) {
return Some(idx);
}
}
for init_name in ["__init__.py", "__init__.pyi"] {
let as_init = root.join(&rel_path).join(init_name);
if let Some(&idx) = file_index.get(&as_init) {
return Some(idx);
}
}
None
}
fn resolve_js_import(
raw: &str,
file_path: &Path,
file_index: &HashMap<PathBuf, usize>,
) -> Option<usize> {
if !raw.starts_with('.') {
return None;
}
let dir = file_path.parent()?;
let candidate = dir.join(raw);
for ext in &["js", "jsx", "ts", "tsx"] {
let with_ext = candidate.with_extension(ext);
if let Some(&idx) = file_index.get(&with_ext) {
return Some(idx);
}
}
for ext in &["js", "jsx", "ts", "tsx"] {
let index_file = candidate.join("index").with_extension(ext);
if let Some(&idx) = file_index.get(&index_file) {
return Some(idx);
}
}
None
}
fn extract_definitions(source: &str, config: &languages::LangConfig) -> Vec<Definition> {
let mut parser = Parser::new();
if parser.set_language(&config.language).is_err() {
return vec![];
}
let Some(tree) = parser.parse(source, None) else {
return vec![];
};
let mut cursor = QueryCursor::new();
let mut defs = Vec::new();
let mut matches = cursor.matches(&config.query, tree.root_node(), source.as_bytes());
while let Some(m) = matches.next() {
let mut name = String::new();
let mut def_node = None;
for cap in m.captures {
let cap_name = &config.query.capture_names()[cap.index as usize];
if *cap_name == "name" {
name = source[cap.node.start_byte()..cap.node.end_byte()].to_string();
} else if *cap_name == "def" {
def_node = Some(cap.node);
}
}
if let Some(node) = def_node {
let scope = crate::chunk::build_scope_chain(node, source);
let signature = crate::chunk::extract_signature(node, source);
#[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
let start_line = node.start_position().row as u32 + 1;
#[expect(clippy::cast_possible_truncation, reason = "line numbers fit in u32")]
let end_line = node.end_position().row as u32 + 1;
#[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
let start_byte = node.start_byte() as u32;
#[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
let end_byte = node.end_byte() as u32;
defs.push(Definition {
name,
kind: node.kind().to_string(),
start_line,
end_line,
scope,
signature,
start_byte,
end_byte,
calls: vec![],
});
}
}
defs
}
fn is_callable_def_priority(kind: &str) -> u8 {
match kind {
"function_item"
| "function_definition"
| "function_declaration"
| "function_signature_item"
| "method_definition"
| "method_declaration"
| "method" => 0,
_ => 1,
}
}
fn extract_calls(source: &str, call_config: &languages::CallConfig, defs: &mut [Definition]) {
let mut parser = Parser::new();
if parser.set_language(&call_config.language).is_err() {
return;
}
let Some(tree) = parser.parse(source, None) else {
return;
};
let receiver_map = infer_receiver_types(source, &tree, &call_config.language);
if languages::is_hcl_language(&call_config.language) {
extract_hcl_call_edges(source, tree.root_node(), defs);
}
let mut cursor = QueryCursor::new();
let mut matches = cursor.matches(&call_config.query, tree.root_node(), source.as_bytes());
while let Some(m) = matches.next() {
let mut full_callee_text = None;
let mut call_byte = 0u32;
for cap in m.captures {
let cap_name = &call_config.query.capture_names()[cap.index as usize];
if *cap_name == "callee" {
full_callee_text =
Some(source[cap.node.start_byte()..cap.node.end_byte()].to_string());
#[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
{
call_byte = cap.node.start_byte() as u32;
}
}
}
if let Some(full_text) = full_callee_text {
let (name, qualified_path) = if full_text.contains("::") {
let bare = full_text
.rsplit("::")
.next()
.unwrap_or(&full_text)
.to_string();
(bare, Some(full_text))
} else {
(full_text, None)
};
let receiver_type = receiver_map.get(&call_byte).cloned();
let enclosing_idx = defs
.iter()
.enumerate()
.filter(|(_, d)| d.start_byte <= call_byte && call_byte < d.end_byte)
.min_by_key(|(_, d)| (d.end_byte - d.start_byte, is_callable_def_priority(&d.kind)))
.map(|(i, _)| i);
if let Some(idx) = enclosing_idx {
if defs[idx].name != name {
defs[idx].calls.push(CallRef {
name,
qualified_path,
receiver_type,
byte_offset: call_byte,
resolved: None,
});
}
}
}
}
}
fn extract_hcl_call_edges(source: &str, root: tree_sitter::Node<'_>, defs: &mut [Definition]) {
let mut stack: Vec<tree_sitter::Node<'_>> = vec![root];
while let Some(node) = stack.pop() {
match node.kind() {
"expression" => hcl_visit_expression(source, node, defs),
"block" => hcl_visit_block(source, node, defs),
_ => {}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.is_named() {
stack.push(child);
}
}
}
}
fn hcl_visit_expression(source: &str, node: tree_sitter::Node<'_>, defs: &mut [Definition]) {
let mut cursor = node.walk();
let mut child_iter = node.children(&mut cursor);
let Some(first) = child_iter.next() else {
return;
};
if first.kind() != "variable_expr" {
return;
}
let Some(first_id) = first.child_by_field_name("name").or_else(|| {
let mut c = first.walk();
first.children(&mut c).find(|n| n.kind() == "identifier")
}) else {
return;
};
if &source[first_id.start_byte()..first_id.end_byte()] != "data" {
return;
}
let mut chain: Vec<String> = Vec::new();
for child in child_iter {
if child.kind() != "get_attr" {
return; }
let mut gc = child.walk();
let id = child.children(&mut gc).find(|n| n.kind() == "identifier");
let Some(id_node) = id else { return };
chain.push(source[id_node.start_byte()..id_node.end_byte()].to_string());
}
if chain.len() < 2 || chain[0] != "terraform_remote_state" {
return;
}
let name = chain[1].clone();
let qualified_path = format!("terraform_remote_state.{name}");
#[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
let call_byte = node.start_byte() as u32;
attach_hcl_call(defs, call_byte, name, Some(qualified_path));
}
fn hcl_visit_block(source: &str, node: tree_sitter::Node<'_>, defs: &mut [Definition]) {
let mut cursor = node.walk();
let children: Vec<tree_sitter::Node<'_>> = node.children(&mut cursor).collect();
let Some(first) = children.first() else {
return;
};
if first.kind() != "identifier" || &source[first.start_byte()..first.end_byte()] != "module" {
return;
}
let label_node = children.iter().find(|c| c.kind() == "string_lit");
let Some(label_node) = label_node else {
return;
};
let mut lc = label_node.walk();
let template = label_node
.children(&mut lc)
.find(|n| n.kind() == "template_literal");
let Some(template) = template else {
return;
};
let label = source[template.start_byte()..template.end_byte()].to_string();
let qualified_path = format!("module.{label}");
#[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
let call_byte = node.start_byte() as u32;
attach_hcl_call(defs, call_byte, label, Some(qualified_path));
}
fn attach_hcl_call(
defs: &mut [Definition],
call_byte: u32,
name: String,
qualified_path: Option<String>,
) {
let enclosing_idx = defs
.iter()
.enumerate()
.filter(|(_, d)| d.start_byte <= call_byte && call_byte < d.end_byte)
.min_by_key(|(_, d)| (d.end_byte - d.start_byte, is_callable_def_priority(&d.kind)))
.map(|(i, _)| i);
if let Some(idx) = enclosing_idx {
if defs[idx].name != name {
defs[idx].calls.push(CallRef {
name,
qualified_path,
receiver_type: None,
byte_offset: call_byte,
resolved: None,
});
}
}
}
fn infer_receiver_types(
source: &str,
tree: &tree_sitter::Tree,
language: &tree_sitter::Language,
) -> HashMap<u32, String> {
let mut map: HashMap<u32, String> = HashMap::new();
if languages::is_rust_language(language) {
collect_rust_receiver_types(source, tree.root_node(), &mut map);
} else if languages::is_python_language(language) {
collect_python_receiver_types(source, tree.root_node(), &mut map);
} else if languages::is_go_language(language) {
collect_go_receiver_types(source, tree.root_node(), &mut map);
}
map
}
fn collect_rust_receiver_types(
source: &str,
node: tree_sitter::Node<'_>,
map: &mut HashMap<u32, String>,
) {
let mut stack: Vec<(tree_sitter::Node<'_>, Option<String>)> = vec![(node, None)];
while let Some((n, impl_ctx)) = stack.pop() {
match n.kind() {
"impl_item" => {
let impl_type = extract_impl_self_type(source, n);
let new_ctx = impl_type.or_else(|| impl_ctx.clone());
let mut cursor = n.walk();
for child in n.children(&mut cursor) {
stack.push((child, new_ctx.clone()));
}
}
"function_item" => {
let param_types = extract_param_types(source, n);
let let_types = extract_let_binding_types(source, n);
annotate_method_calls(
source,
n,
impl_ctx.as_deref(),
¶m_types,
&let_types,
map,
);
let mut cursor = n.walk();
for child in n.children(&mut cursor) {
stack.push((child, impl_ctx.clone()));
}
}
_ => {
let mut cursor = n.walk();
for child in n.children(&mut cursor) {
stack.push((child, impl_ctx.clone()));
}
}
}
}
}
fn extract_impl_self_type(source: &str, impl_node: tree_sitter::Node<'_>) -> Option<String> {
let type_node = impl_node.child_by_field_name("type")?;
Some(source[type_node.start_byte()..type_node.end_byte()].to_string())
}
fn extract_param_types(source: &str, fn_node: tree_sitter::Node<'_>) -> HashMap<String, String> {
let mut params: HashMap<String, String> = HashMap::new();
let Some(params_node) = fn_node.child_by_field_name("parameters") else {
return params;
};
let mut cursor = params_node.walk();
for param in params_node.children(&mut cursor) {
if param.kind() == "parameter" {
let mut param_name = None;
let mut param_type = None;
let mut pc = param.walk();
for child in param.children(&mut pc) {
match child.kind() {
"identifier" | "mutable_specifier" if param_name.is_none() => {
let text = source[child.start_byte()..child.end_byte()].to_string();
if text != "mut" {
param_name = Some(text);
}
}
"type_identifier"
| "generic_type"
| "reference_type"
| "scoped_type_identifier"
if param_type.is_none() =>
{
param_type = Some(extract_base_type(source, child));
}
_ => {}
}
}
if let (Some(name), Some(ty)) = (param_name, param_type)
&& !ty.is_empty()
{
params.insert(name, ty);
}
}
if param.kind() == "typed_pattern" {
let mut name_part = None;
let mut type_part = None;
let mut pc = param.walk();
for child in param.children(&mut pc) {
if child.kind() == "identifier" && name_part.is_none() {
name_part = Some(source[child.start_byte()..child.end_byte()].to_string());
} else if matches!(
child.kind(),
"type_identifier"
| "generic_type"
| "reference_type"
| "scoped_type_identifier"
) && type_part.is_none()
{
type_part = Some(extract_base_type(source, child));
}
}
if let (Some(name), Some(ty)) = (name_part, type_part)
&& !ty.is_empty()
{
params.insert(name, ty);
}
}
}
params
}
fn extract_base_type(source: &str, node: tree_sitter::Node<'_>) -> String {
match node.kind() {
"type_identifier" => source[node.start_byte()..node.end_byte()].to_string(),
"generic_type" | "reference_type" | "mutable_specifier" | "scoped_type_identifier" => {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
let t = extract_base_type(source, child);
if !t.is_empty() {
return t;
}
}
String::new()
}
_ => {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "type_identifier" {
return source[child.start_byte()..child.end_byte()].to_string();
}
}
String::new()
}
}
}
fn extract_let_binding_types(
source: &str,
fn_node: tree_sitter::Node<'_>,
) -> HashMap<String, String> {
let mut bindings: HashMap<String, String> = HashMap::new();
let Some(body) = fn_node.child_by_field_name("body") else {
return bindings;
};
let mut stack = vec![body];
while let Some(n) = stack.pop() {
if n.kind() == "let_declaration" {
let mut binding_name = None;
let mut constructor_type = None;
let mut cursor = n.walk();
for child in n.children(&mut cursor) {
match child.kind() {
"identifier" if binding_name.is_none() => {
binding_name =
Some(source[child.start_byte()..child.end_byte()].to_string());
}
"call_expression" => {
if let Some(func) = child.child_by_field_name("function")
&& func.kind() == "scoped_identifier"
{
let full = source[func.start_byte()..func.end_byte()].to_string();
let head = full.split("::").next().unwrap_or("").to_string();
if !head.is_empty()
&& head.chars().next().is_some_and(char::is_uppercase)
{
constructor_type = Some(head);
}
}
}
_ => {}
}
}
if let (Some(name), Some(ty)) = (binding_name, constructor_type) {
bindings.insert(name, ty);
}
}
let mut cursor = n.walk();
for child in n.children(&mut cursor) {
stack.push(child);
}
}
bindings
}
fn annotate_method_calls(
source: &str,
fn_node: tree_sitter::Node<'_>,
impl_ctx: Option<&str>,
param_types: &HashMap<String, String>,
let_types: &HashMap<String, String>,
map: &mut HashMap<u32, String>,
) {
let mut stack = vec![fn_node];
while let Some(n) = stack.pop() {
if n.kind() == "call_expression"
&& let Some(func) = n.child_by_field_name("function")
&& func.kind() == "field_expression"
{
if let (Some(recv), Some(field)) = (
func.child_by_field_name("value"),
func.child_by_field_name("field"),
) {
let recv_text = source[recv.start_byte()..recv.end_byte()].to_string();
let receiver_type = if recv_text == "self" || recv_text == "*self" {
impl_ctx.map(str::to_owned)
} else {
let base = recv_text
.trim_start_matches('*')
.trim_start_matches('&')
.trim();
param_types
.get(base)
.or_else(|| let_types.get(base))
.cloned()
};
if let Some(ty) = receiver_type {
#[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
let field_byte = field.start_byte() as u32;
map.insert(field_byte, ty);
}
}
}
let mut cursor = n.walk();
for child in n.children(&mut cursor) {
stack.push(child);
}
}
}
fn collect_python_receiver_types(
source: &str,
root: tree_sitter::Node<'_>,
map: &mut HashMap<u32, String>,
) {
let mut stack: Vec<(tree_sitter::Node<'_>, Option<String>)> = vec![(root, None)];
while let Some((n, class_ctx)) = stack.pop() {
match n.kind() {
"class_definition" => {
let class_name = n
.child_by_field_name("name")
.map(|c| source[c.start_byte()..c.end_byte()].to_string());
let new_ctx = class_name.or_else(|| class_ctx.clone());
let mut cursor = n.walk();
for child in n.children(&mut cursor) {
stack.push((child, new_ctx.clone()));
}
}
"function_definition" => {
let param_types = extract_python_param_types(source, n);
let let_types = extract_python_assignment_types(source, n);
annotate_python_method_calls(
source,
n,
class_ctx.as_deref(),
¶m_types,
&let_types,
map,
);
let mut cursor = n.walk();
for child in n.children(&mut cursor) {
stack.push((child, class_ctx.clone()));
}
}
_ => {
let mut cursor = n.walk();
for child in n.children(&mut cursor) {
stack.push((child, class_ctx.clone()));
}
}
}
}
}
fn extract_python_param_types(
source: &str,
fn_node: tree_sitter::Node<'_>,
) -> HashMap<String, String> {
let mut params: HashMap<String, String> = HashMap::new();
let Some(params_node) = fn_node.child_by_field_name("parameters") else {
return params;
};
let mut cursor = params_node.walk();
for param in params_node.children(&mut cursor) {
match param.kind() {
"typed_parameter" => {
let mut name_text = None;
let mut type_text = None;
let mut pc = param.walk();
for child in param.children(&mut pc) {
match child.kind() {
"identifier" if name_text.is_none() => {
let t = source[child.start_byte()..child.end_byte()].to_string();
if t != "self" && t != "cls" {
name_text = Some(t);
}
}
"type" | "identifier" | "attribute"
if type_text.is_none() && name_text.is_some() =>
{
type_text = Some(extract_python_base_type(source, child));
}
_ => {}
}
}
if let (Some(name), Some(ty)) = (name_text, type_text)
&& !ty.is_empty()
&& !ty.eq("self")
&& !ty.eq("cls")
{
params.insert(name, ty);
}
}
"typed_default_parameter" => {
let name_node = param.child_by_field_name("name");
let type_node = param.child_by_field_name("type");
if let (Some(nn), Some(tn)) = (name_node, type_node) {
let name = source[nn.start_byte()..nn.end_byte()].to_string();
if name != "self" && name != "cls" {
let ty = extract_python_base_type(source, tn);
if !ty.is_empty() {
params.insert(name, ty);
}
}
}
}
_ => {}
}
}
params
}
fn extract_python_base_type(source: &str, node: tree_sitter::Node<'_>) -> String {
match node.kind() {
"identifier" => source[node.start_byte()..node.end_byte()].to_string(),
"type" => {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
let t = extract_python_base_type(source, child);
if !t.is_empty() {
return t;
}
}
String::new()
}
"subscript" => {
if let Some(sub) = node.child_by_field_name("subscript") {
return extract_python_base_type(source, sub);
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "identifier" {
return source[child.start_byte()..child.end_byte()].to_string();
}
}
String::new()
}
"attribute" => {
if let Some(attr) = node.child_by_field_name("attribute") {
return source[attr.start_byte()..attr.end_byte()].to_string();
}
String::new()
}
_ => {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "identifier" {
return source[child.start_byte()..child.end_byte()].to_string();
}
}
String::new()
}
}
}
fn extract_python_assignment_types(
source: &str,
fn_node: tree_sitter::Node<'_>,
) -> HashMap<String, String> {
let mut bindings: HashMap<String, String> = HashMap::new();
let Some(body) = fn_node.child_by_field_name("body") else {
return bindings;
};
let mut stack = vec![body];
while let Some(n) = stack.pop() {
if n.kind() == "assignment" {
let left = n.child_by_field_name("left");
let right = n.child_by_field_name("right");
if let (Some(lhs), Some(rhs)) = (left, right)
&& lhs.kind() == "identifier"
&& rhs.kind() == "call"
&& let Some(func) = rhs.child_by_field_name("function")
{
let var_name = source[lhs.start_byte()..lhs.end_byte()].to_string();
let constructor_type = match func.kind() {
"identifier" => {
let t = source[func.start_byte()..func.end_byte()].to_string();
if t.chars().next().is_some_and(char::is_uppercase) {
Some(t)
} else {
None
}
}
"attribute" => {
func.child_by_field_name("attribute")
.map(|a| source[a.start_byte()..a.end_byte()].to_string())
}
_ => None,
};
if let Some(ty) = constructor_type {
bindings.insert(var_name, ty);
}
}
}
let mut cursor = n.walk();
for child in n.children(&mut cursor) {
stack.push(child);
}
}
bindings
}
fn annotate_python_method_calls(
source: &str,
fn_node: tree_sitter::Node<'_>,
class_ctx: Option<&str>,
param_types: &HashMap<String, String>,
let_types: &HashMap<String, String>,
map: &mut HashMap<u32, String>,
) {
let mut stack = vec![fn_node];
while let Some(n) = stack.pop() {
if n.kind() == "call"
&& let Some(func) = n.child_by_field_name("function")
&& func.kind() == "attribute"
&& let (Some(recv_node), Some(attr_node)) = (
func.child_by_field_name("object"),
func.child_by_field_name("attribute"),
)
{
let recv_text = source[recv_node.start_byte()..recv_node.end_byte()].to_string();
let receiver_type = if recv_text == "self" || recv_text == "cls" {
class_ctx.map(str::to_owned)
} else if recv_node.kind() == "identifier" {
param_types
.get(&recv_text)
.or_else(|| let_types.get(&recv_text))
.cloned()
} else {
None
};
if let Some(ty) = receiver_type {
#[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
let attr_byte = attr_node.start_byte() as u32;
map.insert(attr_byte, ty);
}
}
let mut cursor = n.walk();
for child in n.children(&mut cursor) {
stack.push(child);
}
}
}
fn extract_python_class_hierarchy_node(
source: &str,
root: tree_sitter::Node<'_>,
out: &mut HashMap<String, Vec<String>>,
) {
let mut stack = vec![root];
while let Some(n) = stack.pop() {
if n.kind() == "class_definition"
&& let Some(name_node) = n.child_by_field_name("name")
{
let class_name = source[name_node.start_byte()..name_node.end_byte()].to_string();
let mut parents: Vec<String> = Vec::new();
if let Some(superclasses) = n.child_by_field_name("superclasses") {
let mut sc = superclasses.walk();
for child in superclasses.children(&mut sc) {
match child.kind() {
"identifier" => {
let t = source[child.start_byte()..child.end_byte()].to_string();
parents.push(t);
}
"attribute" => {
if let Some(attr) = child.child_by_field_name("attribute") {
parents
.push(source[attr.start_byte()..attr.end_byte()].to_string());
}
}
_ => {}
}
}
}
out.insert(class_name, parents);
}
let mut cursor = n.walk();
for child in n.children(&mut cursor) {
stack.push(child);
}
}
}
#[must_use]
pub fn extract_python_class_hierarchy(source: &str) -> HashMap<String, Vec<String>> {
let mut parser = Parser::new();
let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
if parser.set_language(&lang).is_err() {
return HashMap::new();
}
let Some(tree) = parser.parse(source, None) else {
return HashMap::new();
};
let mut out: HashMap<String, Vec<String>> = HashMap::new();
extract_python_class_hierarchy_node(source, tree.root_node(), &mut out);
out
}
fn compute_python_mro<H: std::hash::BuildHasher>(
start: &str,
hierarchy: &HashMap<String, Vec<String>, H>,
) -> Vec<String> {
use std::collections::HashSet;
let mut order: Vec<String> = Vec::new();
let mut visited: HashSet<String> = HashSet::new();
let Some(start_parents) = hierarchy.get(start) else {
return order;
};
let mut stack: Vec<String> = start_parents.iter().rev().cloned().collect();
while let Some(cls) = stack.pop() {
if !visited.insert(cls.clone()) {
continue;
}
order.push(cls.clone());
if let Some(parents) = hierarchy.get(&cls) {
for p in parents.iter().rev() {
if !visited.contains(p) {
stack.push(p.clone());
}
}
}
}
order
}
fn collect_go_receiver_types(
source: &str,
root: tree_sitter::Node<'_>,
map: &mut HashMap<u32, String>,
) {
let mut stack: Vec<(tree_sitter::Node<'_>, Option<(String, String)>)> = vec![(root, None)];
while let Some((n, recv_binding)) = stack.pop() {
if n.kind() == "method_declaration" {
let binding = extract_go_receiver_binding(source, n);
let new_binding = binding.or_else(|| recv_binding.clone());
let mut cursor = n.walk();
for child in n.children(&mut cursor) {
stack.push((child, new_binding.clone()));
}
} else {
if n.kind() == "call_expression"
&& let Some(func) = n.child_by_field_name("function")
&& func.kind() == "selector_expression"
&& let (Some(operand), Some(field)) = (
func.child_by_field_name("operand"),
func.child_by_field_name("field"),
)
{
let recv_text = source[operand.start_byte()..operand.end_byte()].to_string();
let receiver_type = recv_binding.as_ref().and_then(|(recv_name, recv_ty)| {
if recv_text == *recv_name {
Some(recv_ty.clone())
} else {
None
}
});
if let Some(ty) = receiver_type {
#[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
let field_byte = field.start_byte() as u32;
map.insert(field_byte, ty);
}
}
let mut cursor = n.walk();
for child in n.children(&mut cursor) {
stack.push((child, recv_binding.clone()));
}
}
}
}
fn extract_go_receiver_binding(
source: &str,
method_node: tree_sitter::Node<'_>,
) -> Option<(String, String)> {
let receiver_list = method_node.child_by_field_name("receiver")?;
let mut cursor = receiver_list.walk();
for param in receiver_list.children(&mut cursor) {
if param.kind() == "parameter_declaration" {
let name_node = param.child_by_field_name("name");
let type_node = param.child_by_field_name("type");
if let (Some(nn), Some(tn)) = (name_node, type_node) {
let name = source[nn.start_byte()..nn.end_byte()].to_string();
if name == "_" || name.is_empty() {
return None;
}
let ty = extract_go_base_type(source, tn);
if !ty.is_empty() {
return Some((name, ty));
}
}
}
}
None
}
fn extract_go_base_type(source: &str, node: tree_sitter::Node<'_>) -> String {
match node.kind() {
"type_identifier" => source[node.start_byte()..node.end_byte()].to_string(),
"pointer_type" => {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "type_identifier" {
return source[child.start_byte()..child.end_byte()].to_string();
}
let t = extract_go_base_type(source, child);
if !t.is_empty() {
return t;
}
}
String::new()
}
_ => {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "type_identifier" {
return source[child.start_byte()..child.end_byte()].to_string();
}
}
String::new()
}
}
}
fn enrich_go_method_def_scopes(source: &str, defs: &mut [Definition]) {
let go_lang: tree_sitter::Language = tree_sitter_go::LANGUAGE.into();
let mut parser = Parser::new();
if parser.set_language(&go_lang).is_err() {
return;
}
let Some(tree) = parser.parse(source, None) else {
return;
};
let root = tree.root_node();
let mut method_cursor = root.walk();
for child in root.children(&mut method_cursor) {
if child.kind() != "method_declaration" {
continue;
}
let Some((_, recv_type)) = extract_go_receiver_binding(source, child) else {
continue;
};
#[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
let method_start_byte = child.start_byte() as u32;
for def in defs.iter_mut() {
if def.kind == "method_declaration" && def.start_byte == method_start_byte {
def.scope = format!("method_declaration {recv_type}");
break;
}
}
}
}
pub fn enrich_go_method_def_scopes_pub(source: &str, defs: &mut [Definition]) {
enrich_go_method_def_scopes(source, defs);
}
pub(crate) fn enrich_sql_file_def(filename: &str, source: &str, defs: &mut Vec<Definition>) {
if defs
.iter()
.any(|d| d.kind == "sql_file" && d.start_byte == 0)
{
return;
}
let stem = std::path::Path::new(filename)
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or_default();
if stem.is_empty() {
return;
}
let end_line_zero_based = source.bytes().filter(|&b| b == b'\n').count();
#[expect(clippy::cast_possible_truncation, reason = "line counts fit in u32")]
let end_line = (end_line_zero_based as u32) + 1;
#[expect(clippy::cast_possible_truncation, reason = "byte offsets fit in u32")]
let end_byte = source.len() as u32;
let file_def = Definition {
name: stem.to_string(),
kind: "sql_file".to_string(),
start_line: 1,
end_line,
scope: String::new(),
signature: None,
start_byte: 0,
end_byte,
calls: vec![],
};
defs.insert(0, file_def);
}
pub fn enrich_sql_file_def_pub(filename: &str, source: &str, defs: &mut Vec<Definition>) {
enrich_sql_file_def(filename, source, defs);
}
pub fn extract_calls_pub(
source: &str,
call_config: &languages::CallConfig,
defs: &mut [Definition],
) {
extract_calls(source, call_config, defs);
}
#[must_use]
pub fn build_def_index_pub(files: &[FileNode]) -> HashMap<String, Vec<DefId>> {
build_def_index(files)
}
fn build_def_index(files: &[FileNode]) -> HashMap<String, Vec<DefId>> {
let mut index: HashMap<String, Vec<DefId>> = HashMap::new();
for (file_idx, file) in files.iter().enumerate() {
for (def_idx, def) in file.defs.iter().enumerate() {
#[expect(clippy::cast_possible_truncation)]
let did: DefId = (file_idx as u32, def_idx as u16);
index.entry(def.name.clone()).or_default().push(did);
}
}
index
}
pub fn resolve_calls_pub<S: std::hash::BuildHasher>(
files: &mut [FileNode],
def_index: &HashMap<String, Vec<DefId>, S>,
) {
let empty: HashMap<String, Vec<String>> = HashMap::new();
resolve_calls_inner(files, def_index, &empty);
}
pub fn resolve_calls_with_python_mro_pub<S, H>(
files: &mut [FileNode],
def_index: &HashMap<String, Vec<DefId>, S>,
python_class_hierarchy: &HashMap<String, Vec<String>, H>,
) where
S: std::hash::BuildHasher,
H: std::hash::BuildHasher,
{
resolve_calls_inner(files, def_index, python_class_hierarchy);
}
fn resolve_calls<S, H>(
files: &mut [FileNode],
def_index: &HashMap<String, Vec<DefId>, S>,
python_class_hierarchy: &HashMap<String, Vec<String>, H>,
) where
S: std::hash::BuildHasher,
H: std::hash::BuildHasher,
{
resolve_calls_inner(files, def_index, python_class_hierarchy);
}
#[expect(
clippy::too_many_lines,
reason = "7-priority resolution cascade (qualified path, receiver type, MRO walk, same-file, \
SQL suffix-match, imported-file, ambiguous); each priority is a distinct decision \
branch and extracting helpers would require passing large shared state across boundaries"
)]
fn resolve_calls_inner<S, H>(
files: &mut [FileNode],
def_index: &HashMap<String, Vec<DefId>, S>,
python_class_hierarchy: &HashMap<String, Vec<String>, H>,
) where
S: std::hash::BuildHasher,
H: std::hash::BuildHasher,
{
let imported_files: Vec<std::collections::HashSet<u32>> = files
.iter()
.map(|f| {
f.imports
.iter()
.filter_map(|imp| imp.resolved_idx)
.collect()
})
.collect();
for file_idx in 0..files.len() {
for def_idx in 0..files[file_idx].defs.len() {
for call_idx in 0..files[file_idx].defs[def_idx].calls.len() {
let call_name = files[file_idx].defs[def_idx].calls[call_idx].name.clone();
let qualified_path = files[file_idx].defs[def_idx].calls[call_idx]
.qualified_path
.clone();
let receiver_type = files[file_idx].defs[def_idx].calls[call_idx]
.receiver_type
.clone();
if let Some(ref qpath) = qualified_path
&& (qpath.starts_with("terraform_remote_state.")
|| qpath.starts_with("module."))
{
let target = &call_name; let segment_match = format!("/{target}/");
let alt_segment_prefix = format!("{target}/"); let candidate = files.iter().enumerate().find_map(|(idx, f)| {
if f.path.contains(&segment_match)
|| f.path.starts_with(&alt_segment_prefix)
{
if !f.defs.is_empty() {
#[expect(
clippy::cast_possible_truncation,
reason = "file index fits in u32"
)]
{
return Some((idx as u32, 0u16));
}
}
}
None
});
if let Some(did) = candidate {
files[file_idx].defs[def_idx].calls[call_idx].resolved = Some(did);
}
continue;
}
if let Some(ref qpath) = qualified_path {
let qualifier = if let Some(pos) = qpath.rfind("::") {
&qpath[..pos]
} else {
qpath.as_str()
};
let qual_segments: Vec<&str> = qualifier.split("::").collect();
let Some(candidates) = def_index.get(&call_name) else {
continue;
};
let matching: Vec<DefId> = candidates
.iter()
.copied()
.filter(|&(f_idx, _)| {
let file_path = &files[f_idx as usize].path;
let last_segment = qual_segments.last().copied().unwrap_or("");
let path_as_module =
file_path.trim_end_matches(".rs").replace(['/', '\\'], "::");
path_as_module.contains(last_segment)
|| file_path.contains(last_segment)
})
.collect();
if matching.len() == 1 {
files[file_idx].defs[def_idx].calls[call_idx].resolved = Some(matching[0]);
}
continue;
}
if !def_index.contains_key(&call_name)
&& files[file_idx].defs[def_idx].kind == "sql_file"
&& !call_name.is_empty()
{
let suffix = format!("_{call_name}");
let suffix_str = suffix.as_str();
#[expect(clippy::cast_possible_truncation, reason = "file index fits in u32")]
let caller_did: DefId = (file_idx as u32, def_idx as u16);
let suffix_matches: Vec<DefId> = files
.iter()
.enumerate()
.flat_map(|(f_idx, f)| {
f.defs.iter().enumerate().filter_map(move |(d_idx, d)| {
#[expect(
clippy::cast_possible_truncation,
reason = "file and def indices fit in u32/u16"
)]
let did: DefId = (f_idx as u32, d_idx as u16);
if d.kind == "sql_file"
&& d.name.ends_with(suffix_str)
&& did != caller_did
{
Some(did)
} else {
None
}
})
})
.collect();
if suffix_matches.len() == 1 {
files[file_idx].defs[def_idx].calls[call_idx].resolved =
Some(suffix_matches[0]);
}
continue;
}
let Some(candidates) = def_index.get(&call_name) else {
continue;
};
if let Some(ref rtype) = receiver_type {
let receiver_matching: Vec<DefId> = candidates
.iter()
.copied()
.filter(|&(f_idx, d_idx)| {
let scope = &files[f_idx as usize].defs[d_idx as usize].scope;
scope.contains(rtype.as_str())
})
.collect();
if receiver_matching.len() == 1 {
files[file_idx].defs[def_idx].calls[call_idx].resolved =
Some(receiver_matching[0]);
continue;
}
if receiver_matching.len() > 1 {
let imported_receiver_matching: Vec<DefId> = receiver_matching
.iter()
.copied()
.filter(|(f, _)| imported_files[file_idx].contains(f))
.collect();
if imported_receiver_matching.len() == 1 {
files[file_idx].defs[def_idx].calls[call_idx].resolved =
Some(imported_receiver_matching[0]);
}
continue;
}
let mro = compute_python_mro(rtype, python_class_hierarchy);
let mut resolved_via_mro: Option<DefId> = None;
for ancestor in &mro {
let ancestor_matching: Vec<DefId> = candidates
.iter()
.copied()
.filter(|&(f_idx, d_idx)| {
let scope = &files[f_idx as usize].defs[d_idx as usize].scope;
scope.contains(ancestor.as_str())
})
.collect();
if ancestor_matching.len() == 1 {
resolved_via_mro = Some(ancestor_matching[0]);
break;
}
if ancestor_matching.len() > 1 {
let imported_ancestor: Vec<DefId> = ancestor_matching
.iter()
.copied()
.filter(|(f, _)| imported_files[file_idx].contains(f))
.collect();
if imported_ancestor.len() == 1 {
resolved_via_mro = Some(imported_ancestor[0]);
break;
}
resolved_via_mro = Some(ancestor_matching[0]);
break;
}
}
if let Some(did) = resolved_via_mro {
files[file_idx].defs[def_idx].calls[call_idx].resolved = Some(did);
continue;
}
}
#[expect(clippy::cast_possible_truncation)]
let file_idx_u32 = file_idx as u32;
if let Some(&did) = candidates.iter().find(|(f, _)| *f == file_idx_u32) {
files[file_idx].defs[def_idx].calls[call_idx].resolved = Some(did);
continue;
}
let imported_candidates: Vec<DefId> = candidates
.iter()
.copied()
.filter(|(f, _)| imported_files[file_idx].contains(f))
.collect();
if imported_candidates.len() == 1 {
files[file_idx].defs[def_idx].calls[call_idx].resolved =
Some(imported_candidates[0]);
}
}
}
}
}
fn def_offsets(files: &[FileNode]) -> Vec<usize> {
let mut offsets = Vec::with_capacity(files.len() + 1);
offsets.push(0);
for file in files {
offsets.push(offsets.last().unwrap() + file.defs.len());
}
offsets
}
fn flatten_def_id(offsets: &[usize], did: DefId) -> usize {
offsets[did.0 as usize] + did.1 as usize
}
fn build_def_neighbor_lists(
n: usize,
edges: &[(u32, u32, u32)],
offsets: &[usize],
) -> (Vec<Vec<DefId>>, Vec<Vec<DefId>>) {
let mut incoming: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
let mut outgoing: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
for &(src, dst, w) in edges {
let (s, d) = (src as usize, dst as usize);
if s < n && d < n {
incoming[d].push((src, w));
outgoing[s].push((dst, w));
}
}
let to_def_id = |flat: u32| -> DefId {
let flat_usize = flat as usize;
let file_idx = offsets.partition_point(|&o| o <= flat_usize) - 1;
let def_idx = flat_usize - offsets[file_idx];
#[expect(clippy::cast_possible_truncation)]
(file_idx as u32, def_idx as u16)
};
let callers = incoming
.into_iter()
.map(|mut v| {
v.sort_by_key(|b| std::cmp::Reverse(b.1));
v.truncate(MAX_NEIGHBORS);
v.into_iter().map(|(idx, _)| to_def_id(idx)).collect()
})
.collect();
let callees = outgoing
.into_iter()
.map(|mut v| {
v.sort_by_key(|b| std::cmp::Reverse(b.1));
v.truncate(MAX_NEIGHBORS);
v.into_iter().map(|(idx, _)| to_def_id(idx)).collect()
})
.collect();
(callers, callees)
}
#[expect(
clippy::cast_precision_loss,
reason = "node count fits comfortably in f32"
)]
fn pagerank(n: usize, edges: &[(u32, u32, u32)], focus: Option<usize>) -> Vec<f32> {
if n == 0 {
return vec![];
}
let mut out_edges: Vec<Vec<(usize, f32)>> = vec![vec![]; n];
let mut out_weight: Vec<f32> = vec![0.0; n];
for &(src, dst, w) in edges {
let (s, d) = (src as usize, dst as usize);
if s < n && d < n {
#[expect(clippy::cast_possible_truncation, reason = "edge weights are small")]
let wf = f64::from(w) as f32;
out_edges[s].push((d, wf));
out_weight[s] += wf;
}
}
let bias: Vec<f32> = if let Some(idx) = focus {
if n == 1 {
vec![1.0_f32]
} else {
let other_mass = (1.0_f32 - PERSONALIZATION_ALPHA) / (n as f32 - 1.0);
let mut b = vec![other_mass; n];
if idx < n {
b[idx] = PERSONALIZATION_ALPHA;
}
let sum: f32 = b.iter().sum();
for v in &mut b {
*v /= sum;
}
b
}
} else {
vec![1.0 / n as f32; n]
};
let mut rank = vec![1.0 / n as f32; n];
let mut next_rank = vec![0.0_f32; n];
for _ in 0..MAX_ITERATIONS {
let dangling: f32 = rank
.iter()
.enumerate()
.filter(|&(i, _)| out_edges[i].is_empty())
.map(|(_, &r)| r)
.sum();
for (i, nr) in next_rank.iter_mut().enumerate() {
*nr = (1.0 - DAMPING).mul_add(bias[i], DAMPING * dangling * bias[i]);
}
for (src, edges_list) in out_edges.iter().enumerate() {
if edges_list.is_empty() {
continue;
}
let src_rank = rank[src];
let total_w = out_weight[src];
for &(dst, w) in edges_list {
next_rank[dst] += DAMPING * src_rank * (w / total_w);
}
}
let diff: f32 = rank
.iter()
.zip(next_rank.iter())
.map(|(a, b)| (a - b).abs())
.sum();
std::mem::swap(&mut rank, &mut next_rank);
if diff < EPSILON {
break;
}
}
rank
}
struct DefGraphData {
def_edges: Vec<(DefId, DefId, u32)>,
def_ranks: Vec<f32>,
def_callers: Vec<Vec<DefId>>,
def_callees: Vec<Vec<DefId>>,
offsets: Vec<usize>,
base_ranks: Vec<f32>,
file_edges: Vec<(u32, u32, u32)>,
}
#[must_use]
pub fn build_trait_impl_edges_pub(files: &[FileNode]) -> Vec<(DefId, DefId, u32)> {
build_trait_impl_edges(files)
}
fn build_trait_impl_edges(files: &[FileNode]) -> Vec<(DefId, DefId, u32)> {
let mut trait_methods: HashMap<String, Vec<DefId>> = HashMap::new();
let mut impl_methods: HashMap<String, Vec<DefId>> = HashMap::new();
for (fi, file) in files.iter().enumerate() {
for (di, def) in file.defs.iter().enumerate() {
#[expect(clippy::cast_possible_truncation)]
let did: DefId = (fi as u32, di as u16);
if def.kind == "function_signature_item"
|| (def.scope.starts_with("trait_item") && def.kind == "function_item")
{
trait_methods.entry(def.name.clone()).or_default().push(did);
} else if def.kind == "function_item" && def.scope.starts_with("impl_item") {
impl_methods.entry(def.name.clone()).or_default().push(did);
}
}
}
let imported_sets: Vec<std::collections::HashSet<u32>> = files
.iter()
.map(|f| {
f.imports
.iter()
.filter_map(|imp| imp.resolved_idx)
.collect()
})
.collect();
let mut edges: Vec<(DefId, DefId, u32)> = Vec::new();
for (name, trait_defs) in &trait_methods {
let Some(impl_defs) = impl_methods.get(name) else {
continue;
};
for &(tf, td) in trait_defs {
for &(imf, imd) in impl_defs {
let connected = tf == imf
|| imported_sets
.get(imf as usize)
.is_some_and(|s| s.contains(&tf));
if connected {
let trait_id: DefId = (tf, td);
let impl_id: DefId = (imf, imd);
edges.push((trait_id, impl_id, 1));
edges.push((impl_id, trait_id, 1));
}
}
}
}
edges
}
fn compute_def_graph(files: &[FileNode]) -> DefGraphData {
let mut def_edge_map: HashMap<(DefId, DefId), u32> = HashMap::new();
for (file_idx, file) in files.iter().enumerate() {
for (def_idx, def) in file.defs.iter().enumerate() {
#[expect(clippy::cast_possible_truncation)]
let caller_id: DefId = (file_idx as u32, def_idx as u16);
for call in &def.calls {
if let Some(callee_id) = call.resolved {
*def_edge_map.entry((caller_id, callee_id)).or_insert(0) += 1;
}
}
}
}
let trait_impl_edges = build_trait_impl_edges(files);
for (src, dst, w) in trait_impl_edges {
*def_edge_map.entry((src, dst)).or_insert(0) += w;
}
let def_edges: Vec<(DefId, DefId, u32)> = def_edge_map
.into_iter()
.map(|((src, dst), w)| (src, dst, w))
.collect();
let offsets = def_offsets(files);
let n_defs = *offsets.last().unwrap_or(&0);
let flat_def_edges: Vec<(u32, u32, u32)> = def_edges
.iter()
.map(|(src, dst, w)| {
#[expect(clippy::cast_possible_truncation)]
(
flatten_def_id(&offsets, *src) as u32,
flatten_def_id(&offsets, *dst) as u32,
*w,
)
})
.collect();
let def_ranks = pagerank(n_defs, &flat_def_edges, None);
let base_ranks: Vec<f32> = files
.iter()
.enumerate()
.map(|(i, file)| {
let start = offsets[i];
let end = start + file.defs.len();
def_ranks[start..end].iter().sum()
})
.collect();
let mut file_edge_map: HashMap<(u32, u32), u32> = HashMap::new();
for &(src, dst, w) in &def_edges {
let src_file = src.0;
let dst_file = dst.0;
if src_file != dst_file {
*file_edge_map.entry((src_file, dst_file)).or_insert(0) += w;
}
}
let file_edges: Vec<(u32, u32, u32)> = file_edge_map
.into_iter()
.map(|((src, dst), w)| (src, dst, w))
.collect();
let (def_callers, def_callees) = build_def_neighbor_lists(n_defs, &flat_def_edges, &offsets);
DefGraphData {
def_edges,
def_ranks,
def_callers,
def_callees,
offsets,
base_ranks,
file_edges,
}
}
#[expect(
clippy::too_many_lines,
reason = "three-phase parallel pipeline (walk+filter, def+import extraction, call extraction) \
plus resolve + graph build; phases share state (file_index, raw_sources) and \
cannot be meaningfully split without passing large mutable structures across \
boundaries with no clarity gain"
)]
pub fn build_graph(root: &Path) -> crate::Result<RepoGraph> {
let root = root.canonicalize().map_err(|e| crate::Error::Io {
path: root.display().to_string(),
source: e,
})?;
let mut walk_options = walk::WalkOptions::default();
if let Some((_, config)) = crate::cache::config::find_config(&root) {
walk_options.ignore_patterns = config.ignore.patterns;
}
let all_files = walk::collect_files_with_options(&root, &walk_options);
let raw_inputs: Vec<(PathBuf, String, String, String)> = all_files
.par_iter()
.filter_map(|path| {
let ext = path
.extension()
.and_then(|e| e.to_str())
.unwrap_or_default()
.to_string();
if languages::config_for_extension(&ext).is_none()
&& import_query_for_extension(&ext).is_none()
{
return None;
}
let source = std::fs::read_to_string(path).ok()?;
let rel_path = path
.strip_prefix(&root)
.unwrap_or(path)
.display()
.to_string();
Some((path.clone(), rel_path, ext, source))
})
.collect();
let mut file_index: HashMap<PathBuf, usize> = HashMap::with_capacity(raw_inputs.len());
let mut files: Vec<FileNode> = Vec::with_capacity(raw_inputs.len());
let mut raw_sources: Vec<(usize, String, String)> = Vec::with_capacity(raw_inputs.len());
for (idx, (abs_path, rel_path, ext, source)) in raw_inputs.into_iter().enumerate() {
file_index.insert(abs_path, idx);
files.push(FileNode {
path: rel_path,
defs: vec![],
imports: vec![],
});
raw_sources.push((idx, ext, source));
}
files
.par_iter_mut()
.zip(raw_sources.par_iter())
.for_each(|(file, (_, ext, source))| {
if let Some(config) = languages::config_for_extension(ext) {
file.defs = extract_definitions(source, &config);
if languages::is_go_language(&config.language) {
enrich_go_method_def_scopes(source, &mut file.defs);
}
if languages::is_sql_language(&config.language) {
enrich_sql_file_def(&file.path, source, &mut file.defs);
}
}
if let Some((lang, import_query)) = import_query_for_extension(ext) {
let raw_imports = extract_imports(source, &lang, &import_query);
let file_path = root.join(&file.path);
file.imports = raw_imports
.into_iter()
.map(|raw| {
let resolved_idx =
resolve_import(&raw, ext, &file_path, &root, &file_index)
.and_then(|i| u32::try_from(i).ok());
ImportRef {
raw_path: raw,
resolved_idx,
}
})
.collect();
}
});
files
.par_iter_mut()
.zip(raw_sources.par_iter())
.for_each(|(file, (_, ext, source))| {
if let Some(call_config) = languages::call_query_for_extension(ext) {
extract_calls(source, &call_config, &mut file.defs);
}
});
let python_hierarchies: Vec<HashMap<String, Vec<String>>> = raw_sources
.par_iter()
.map(|(_, ext, source)| {
if ext == "py" || ext == "pyi" {
extract_python_class_hierarchy(source)
} else {
HashMap::new()
}
})
.collect();
let mut python_class_hierarchy: HashMap<String, Vec<String>> = HashMap::new();
for local in python_hierarchies {
for (k, v) in local {
python_class_hierarchy.entry(k).or_insert(v);
}
}
let def_index = build_def_index(&files);
resolve_calls(&mut files, &def_index, &python_class_hierarchy);
let graph_data = compute_def_graph(&files);
let n = files.len();
let (callers, callees) = build_neighbor_lists(n, &graph_data.file_edges);
#[expect(clippy::cast_precision_loss, reason = "graph sizes fit in f32")]
let density = if n > 1 {
graph_data.file_edges.len() as f32 / (n as f32 * (n as f32 - 1.0))
} else {
0.0
};
let alpha = 0.3f32.mul_add(density.min(1.0), 0.5);
Ok(RepoGraph {
files,
edges: graph_data.file_edges,
base_ranks: graph_data.base_ranks,
callers,
callees,
def_edges: graph_data.def_edges,
def_ranks: graph_data.def_ranks,
def_callers: graph_data.def_callers,
def_callees: graph_data.def_callees,
def_offsets: graph_data.offsets,
alpha,
})
}
#[must_use]
pub fn build_graph_from_files_pub(mut files: Vec<FileNode>) -> RepoGraph {
let def_index = build_def_index(&files);
let empty_hierarchy: HashMap<String, Vec<String>> = HashMap::new();
resolve_calls(&mut files, &def_index, &empty_hierarchy);
let graph_data = compute_def_graph(&files);
let n = files.len();
let (callers, callees) = build_neighbor_lists(n, &graph_data.file_edges);
#[expect(clippy::cast_precision_loss, reason = "graph sizes fit in f32")]
let density = if n > 1 {
graph_data.file_edges.len() as f32 / (n as f32 * (n as f32 - 1.0))
} else {
0.0
};
let alpha = 0.3f32.mul_add(density.min(1.0), 0.5);
RepoGraph {
files,
edges: graph_data.file_edges,
base_ranks: graph_data.base_ranks,
callers,
callees,
def_edges: graph_data.def_edges,
def_ranks: graph_data.def_ranks,
def_callers: graph_data.def_callers,
def_callees: graph_data.def_callees,
def_offsets: graph_data.offsets,
alpha,
}
}
pub type DefIndex = usize;
#[derive(Debug, Clone)]
pub struct DeadCluster {
pub root_def_idx: usize,
pub size: usize,
pub total_lines: usize,
pub member_def_indices: Vec<usize>,
}
#[derive(Debug, Clone)]
pub struct DeadCodeReport {
pub dead_clusters: Vec<DeadCluster>,
pub total_dead_defs: usize,
pub total_live_defs: usize,
pub dead_fraction: f32,
}
fn is_test_path(path: &str) -> bool {
let path_lc = path.to_lowercase();
if path_lc.contains("tests/") || path_lc.contains("/spec/") || path_lc.contains("/specs/") {
return true;
}
let file_name = path.rsplit('/').next().unwrap_or(path);
let stem = file_name.split('.').next().unwrap_or(file_name);
stem.starts_with("test_") || stem.ends_with("_test") || stem.contains("bench")
}
fn flat_to_file_idx(offsets: &[usize], flat: DefIndex) -> usize {
offsets.partition_point(|&o| o <= flat).saturating_sub(1)
}
fn uf_find(parent: &mut Vec<usize>, x: usize) -> usize {
if parent[x] != x {
parent[x] = uf_find(parent, parent[x]);
}
parent[x]
}
fn uf_union(parent: &mut Vec<usize>, x: usize, y: usize) {
let rx = uf_find(parent, x);
let ry = uf_find(parent, y);
if rx != ry {
parent[rx] = ry;
}
}
#[must_use]
#[expect(
clippy::too_many_lines,
reason = "six-step BFS+clustering pipeline; splitting into sub-functions \
would require passing many interdependent Vec borrows with no \
clarity gain"
)]
pub fn compute_dead_code<S: std::hash::BuildHasher>(
graph: &RepoGraph,
entry_def_indices: &HashSet<DefIndex, S>,
include_test_paths: bool,
) -> DeadCodeReport {
let n_defs = graph.def_ranks.len();
if n_defs == 0 {
return DeadCodeReport {
dead_clusters: vec![],
total_dead_defs: 0,
total_live_defs: 0,
dead_fraction: 0.0,
};
}
let seeds: Vec<DefIndex> = if include_test_paths {
entry_def_indices.iter().copied().collect()
} else {
entry_def_indices
.iter()
.copied()
.filter(|&flat| {
let file_idx = flat_to_file_idx(&graph.def_offsets, flat);
let path = graph
.files
.get(file_idx)
.map(|f| f.path.as_str())
.unwrap_or("");
!is_test_path(path)
})
.collect()
};
let mut reachable: Vec<bool> = vec![false; n_defs];
let mut queue: std::collections::VecDeque<DefIndex> = std::collections::VecDeque::new();
for seed in &seeds {
if *seed < n_defs && !reachable[*seed] {
reachable[*seed] = true;
queue.push_back(*seed);
}
}
while let Some(flat) = queue.pop_front() {
for callee_did in &graph.def_callees[flat] {
let callee_flat = graph.def_offsets[callee_did.0 as usize] + callee_did.1 as usize;
if callee_flat < n_defs && !reachable[callee_flat] {
reachable[callee_flat] = true;
queue.push_back(callee_flat);
}
}
}
let dead: Vec<DefIndex> = (0..n_defs).filter(|&i| !reachable[i]).collect();
let total_dead_defs = dead.len();
let total_live_defs = n_defs - total_dead_defs;
if dead.is_empty() {
return DeadCodeReport {
dead_clusters: vec![],
total_dead_defs: 0,
total_live_defs,
dead_fraction: 0.0,
};
}
let dead_set: HashSet<DefIndex> = dead.iter().copied().collect();
let dead_pos: HashMap<DefIndex, usize> = dead
.iter()
.copied()
.enumerate()
.map(|(pos, idx)| (idx, pos))
.collect();
let m = dead.len();
let mut parent: Vec<usize> = (0..m).collect();
for &flat in &dead {
let pos_flat = dead_pos[&flat];
for callee_did in &graph.def_callees[flat] {
let callee_flat = graph.def_offsets[callee_did.0 as usize] + callee_did.1 as usize;
if dead_set.contains(&callee_flat) {
let pos_callee = dead_pos[&callee_flat];
uf_union(&mut parent, pos_flat, pos_callee);
}
}
for caller_did in &graph.def_callers[flat] {
let caller_flat = graph.def_offsets[caller_did.0 as usize] + caller_did.1 as usize;
if dead_set.contains(&caller_flat) {
let pos_caller = dead_pos[&caller_flat];
uf_union(&mut parent, pos_flat, pos_caller);
}
}
}
for i in 0..m {
uf_find(&mut parent, i);
}
let mut components: HashMap<usize, Vec<DefIndex>> = HashMap::new();
for (pos, &flat) in dead.iter().enumerate() {
let root_pos = parent[pos];
components.entry(root_pos).or_default().push(flat);
}
let mut clusters: Vec<DeadCluster> = components
.into_values()
.map(|members| {
let root_flat = members
.iter()
.copied()
.max_by(|&a, &b| {
let ra = graph.def_ranks.get(a).copied().unwrap_or(0.0);
let rb = graph.def_ranks.get(b).copied().unwrap_or(0.0);
ra.total_cmp(&rb)
})
.unwrap_or(members[0]);
let total_lines: usize = members
.iter()
.copied()
.map(|flat| {
let file_idx = flat_to_file_idx(&graph.def_offsets, flat);
let def_idx = flat - graph.def_offsets[file_idx];
let def = graph.files.get(file_idx).and_then(|f| f.defs.get(def_idx));
def.map(|d| (d.end_line as usize).saturating_sub(d.start_line as usize))
.unwrap_or(0)
})
.sum();
let mut member_def_indices: Vec<usize> = std::iter::once(root_flat)
.chain(members.iter().copied().filter(|&m| m != root_flat))
.collect();
if member_def_indices.len() > 1 {
member_def_indices[1..].sort_unstable();
}
DeadCluster {
root_def_idx: root_flat,
size: member_def_indices.len(),
total_lines,
member_def_indices,
}
})
.collect();
clusters.sort_by(|a, b| {
b.size
.cmp(&a.size)
.then(b.root_def_idx.cmp(&a.root_def_idx))
});
#[expect(
clippy::cast_precision_loss,
reason = "def counts fit comfortably in f32"
)]
let dead_fraction = if n_defs > 0 {
total_dead_defs as f32 / n_defs as f32
} else {
0.0
};
DeadCodeReport {
dead_clusters: clusters,
total_dead_defs,
total_live_defs,
dead_fraction,
}
}
impl RepoGraph {
#[must_use]
pub fn def_rank(&self, did: DefId) -> f32 {
let flat = self.def_offsets[did.0 as usize] + did.1 as usize;
self.def_ranks.get(flat).copied().unwrap_or(0.0)
}
#[must_use]
pub fn find_def(&self, file_path: &str, def_name: &str) -> Option<DefId> {
for (file_idx, file) in self.files.iter().enumerate() {
if file.path == file_path {
for (def_idx, def) in file.defs.iter().enumerate() {
if def.name == def_name {
#[expect(clippy::cast_possible_truncation)]
return Some((file_idx as u32, def_idx as u16));
}
}
}
}
None
}
#[must_use]
pub fn resolve_focus_file(&self, focus: &str) -> FocusResolution {
let normalized = normalize_focus_path(focus);
let matches: Vec<usize> = self
.files
.iter()
.enumerate()
.filter_map(|(idx, f)| {
if focus_matches_path(&f.path, normalized) {
Some(idx)
} else {
None
}
})
.collect();
match matches.len() {
0 => FocusResolution::NotFound,
1 => FocusResolution::Found(matches[0]),
_ => FocusResolution::Ambiguous(
matches
.into_iter()
.map(|i| self.files[i].path.clone())
.collect(),
),
}
}
}
#[derive(Debug, Clone)]
pub enum FocusResolution {
Found(usize),
NotFound,
Ambiguous(Vec<String>),
}
fn normalize_focus_path(focus: &str) -> &str {
focus.strip_prefix("./").unwrap_or(focus)
}
fn focus_matches_path(stored_path: &str, focus: &str) -> bool {
if focus.is_empty() {
return false;
}
if stored_path == focus {
return true;
}
stored_path.len() > focus.len()
&& stored_path.ends_with(focus)
&& stored_path.as_bytes()[stored_path.len() - focus.len() - 1] == b'/'
}
#[must_use]
pub fn build_neighbor_lists(n: usize, edges: &[(u32, u32, u32)]) -> (Vec<Vec<u32>>, Vec<Vec<u32>>) {
let mut incoming: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
let mut outgoing: Vec<Vec<(u32, u32)>> = vec![vec![]; n];
for &(src, dst, w) in edges {
let (s, d) = (src as usize, dst as usize);
if s < n && d < n {
incoming[d].push((src, w));
outgoing[s].push((dst, w));
}
}
let trim = |lists: &mut [Vec<(u32, u32)>]| -> Vec<Vec<u32>> {
lists
.iter_mut()
.map(|list| {
list.sort_by_key(|b| std::cmp::Reverse(b.1));
list.iter()
.take(MAX_NEIGHBORS)
.map(|(idx, _)| *idx)
.collect()
})
.collect()
};
(trim(&mut incoming), trim(&mut outgoing))
}
#[must_use]
pub fn render(graph: &RepoGraph, max_tokens: usize, focus: Option<usize>) -> String {
let n = graph.files.len();
if n == 0 {
return String::new();
}
let ranks = if focus.is_some() {
pagerank(n, &graph.edges, focus)
} else {
graph.base_ranks.clone()
};
let mut sorted: Vec<usize> = (0..n).collect();
sorted.sort_by(|&a, &b| ranks[b].total_cmp(&ranks[a]));
let mut output = String::new();
let mut used_tokens = 0;
let max_chars = max_tokens * CHARS_PER_TOKEN;
for (rank_pos, &file_idx) in sorted.iter().enumerate() {
if used_tokens >= max_tokens {
break;
}
let file = &graph.files[file_idx];
let score = ranks[file_idx];
#[expect(clippy::cast_precision_loss, reason = "file counts fit in f32")]
let percentile = (rank_pos as f32) / (n as f32);
let section = if percentile < 0.1 {
render_tier0(graph, file_idx, file, score)
} else if percentile < 0.3 {
render_tier1(file, score)
} else if percentile < 0.7 {
render_tier2(file, score)
} else {
render_tier3(file)
};
let section_chars = section.len();
if used_tokens > 0 && used_tokens + section_chars / CHARS_PER_TOKEN > max_tokens {
let path_line = format!("{}\n", file.path);
let path_tokens = path_line.len() / CHARS_PER_TOKEN;
if used_tokens + path_tokens <= max_tokens {
output.push_str(&path_line);
}
break;
}
output.push_str(§ion);
used_tokens = output.len().min(max_chars) / CHARS_PER_TOKEN;
}
output
}
fn render_tier0(graph: &RepoGraph, file_idx: usize, file: &FileNode, score: f32) -> String {
let mut out = format!("## {} (rank: {score:.4})\n", file.path);
if file_idx < graph.callers.len() && !graph.callers[file_idx].is_empty() {
let _ = write!(out, " called by: ");
let names: Vec<&str> = graph.callers[file_idx]
.iter()
.filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
.collect();
let _ = writeln!(out, "{}", names.join(", "));
}
if file_idx < graph.callees.len() && !graph.callees[file_idx].is_empty() {
let _ = write!(out, " calls: ");
let names: Vec<&str> = graph.callees[file_idx]
.iter()
.filter_map(|&idx| graph.files.get(idx as usize).map(|f| f.path.as_str()))
.collect();
let _ = writeln!(out, "{}", names.join(", "));
}
for def in &file.defs {
let scope_prefix = if def.scope.is_empty() {
String::new()
} else {
format!("{} > ", def.scope)
};
if let Some(sig) = &def.signature {
let _ = writeln!(out, " {scope_prefix}{} {sig}", def.kind);
} else {
let _ = writeln!(out, " {scope_prefix}{} {}", def.kind, def.name);
}
}
let _ = writeln!(out);
out
}
fn render_tier1(file: &FileNode, score: f32) -> String {
let mut out = format!("## {} (rank: {score:.4})\n", file.path);
for def in &file.defs {
if let Some(sig) = &def.signature {
let _ = writeln!(out, " {sig}");
} else {
let _ = writeln!(out, " {} {}", def.kind, def.name);
}
}
let _ = writeln!(out);
out
}
fn render_tier2(file: &FileNode, score: f32) -> String {
let mut out = format!("{} (rank: {score:.4})", file.path);
if !file.defs.is_empty() {
let names: Vec<String> = file
.defs
.iter()
.map(|d| format!("{}:{}", d.kind, d.name))
.collect();
let _ = write!(out, " -- {}", names.join(", "));
}
let _ = writeln!(out);
out
}
fn render_tier3(file: &FileNode) -> String {
format!("{}\n", file.path)
}
fn file_lsp_location(path: &str) -> RepoMapLspLocation {
RepoMapLspLocation {
file_path: if path.starts_with("./") || path.starts_with('/') {
path.to_string()
} else {
format!("./{path}")
},
start_line: 0,
start_character: 0,
end_line: 0,
end_character: 0,
}
}
fn content_kind_for_path(path: &str) -> ContentKind {
let ext = std::path::Path::new(path)
.extension()
.and_then(|e| e.to_str())
.unwrap_or("");
ContentKind::from_extension(ext)
}
const FILE_ENVELOPE_MIN_BYTES: usize = 250;
const FILE_MIN_USEFUL_BYTES: usize = 600;
const CALLS_BUDGET_FRACTION: f64 = 0.30;
const MAX_FILE_SHARE: f64 = 0.40;
fn ast_kind_priority(kind: &str) -> u32 {
match kind {
"trait_item" | "interface" | "trait" => 30,
"struct_item" | "struct" | "class_definition" | "class" => 29,
"enum_item" | "enum" => 28,
"type_item" | "type_alias_declaration" | "type_alias" => 27,
"mod_item" | "module" | "namespace" => 26,
"function_item" | "function_definition" | "function" | "method_definition" => 20,
"impl_item" | "impl" => 19,
"const_item" | "const_declaration" | "const" => 10,
"static_item" | "static" => 9,
_ => 0,
}
}
fn effective_priority(kind: &str, def_rank: f32, promo_1: f32, promo_2: f32) -> u32 {
let base = ast_kind_priority(kind);
let promo_tiers: u32 = u32::from(def_rank > promo_1) + u32::from(def_rank > promo_2);
base + promo_tiers * 10
}
fn estimate_symbol_bytes(name: &str) -> usize {
165 + name.len()
}
fn estimate_call_bytes(target_path: &str) -> usize {
120 + target_path.len()
}
#[must_use]
#[expect(
clippy::cast_precision_loss,
reason = "rank sums and counts are small f32/f64; precision loss is acceptable"
)]
#[expect(
clippy::too_many_lines,
reason = "the three-step allocation algorithm (file-share → symbol-fill → calls-fill) \
is sequential and share state; splitting into helpers would require passing \
mutable slices across three boundaries with no clarity gain"
)]
pub fn render_json_budgeted(
graph: &RepoGraph,
token_budget: usize,
focus: Option<usize>,
include_metadata: bool,
) -> GetRepoMapResponse {
let n = graph.files.len();
if n == 0 {
let budget_bytes = token_budget * CHARS_PER_TOKEN;
return GetRepoMapResponse {
files: vec![],
total_files: 0,
estimated_bytes: 0,
budget_bytes,
budget_exhausted: false,
capped: false,
};
}
let budget_total_bytes = token_budget * CHARS_PER_TOKEN;
let ranks = if focus.is_some() {
pagerank(n, &graph.edges, focus)
} else {
graph.base_ranks.clone()
};
let mut sorted: Vec<usize> = (0..n).collect();
sorted.sort_by(|&a, &b| ranks[b].total_cmp(&ranks[a]));
let eligible: Vec<usize> = if include_metadata {
sorted
} else {
sorted
.into_iter()
.filter(|&idx| {
let kind = content_kind_for_path(&graph.files[idx].path);
kind != ContentKind::Meta
})
.collect()
};
let total_files = eligible.len();
let corpus_reference_rank: f32 = {
let mut nonzero: Vec<f32> = graph
.def_ranks
.iter()
.copied()
.filter(|r| *r > 0.0)
.collect();
if nonzero.is_empty() {
0.0
} else {
nonzero.sort_unstable_by(f32::total_cmp);
let n = nonzero.len();
let idx = (3 * (n - 1)) / 4;
nonzero[idx]
}
};
let promo_1_threshold = corpus_reference_rank * 4.0; let promo_2_threshold = corpus_reference_rank * 16.0;
let max_admissible = budget_total_bytes / FILE_MIN_USEFUL_BYTES;
let admit_count = eligible.len().min(max_admissible.max(1));
let budget_f64 = budget_total_bytes as f64;
let admitted_rank_sum: f64 = eligible
.iter()
.take(admit_count)
.map(|&idx| f64::from(ranks[idx]))
.sum();
let admitted_rank_sum = if admitted_rank_sum > 0.0 {
admitted_rank_sum
} else {
1.0
};
let mut included_indices: Vec<usize> = Vec::new(); let mut file_budgets: Vec<usize> = Vec::new();
let mut cumulative_budget: usize = 0;
for (i, &file_idx) in eligible.iter().take(admit_count).enumerate() {
let file_rank = f64::from(ranks[file_idx]);
let raw_share = budget_f64 * file_rank / admitted_rank_sum;
let capped = raw_share.min(budget_f64 * MAX_FILE_SHARE);
#[expect(
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
reason = "capped is non-negative and bounded by budget_total_bytes (a usize)"
)]
let budget_i = (capped as usize).max(FILE_MIN_USEFUL_BYTES);
if cumulative_budget + budget_i > budget_total_bytes && !included_indices.is_empty() {
break;
}
cumulative_budget += budget_i;
included_indices.push(i);
file_budgets.push(budget_i);
}
let mut result_files: Vec<RepoMapFile> = Vec::with_capacity(included_indices.len());
let mut leftover: usize = 0;
for (slot, &eligible_i) in included_indices.iter().enumerate() {
let file_idx = eligible[eligible_i];
let file = &graph.files[file_idx];
let file_rank = ranks[file_idx];
let file_path_lsp = file_lsp_location(&file.path);
let budget_in = file_budgets[slot] + leftover;
let post_envelope = budget_in.saturating_sub(FILE_ENVELOPE_MIN_BYTES);
#[expect(
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
reason = "post_envelope * fraction is bounded by post_envelope (a usize)"
)]
let calls_reserve = (post_envelope as f64 * CALLS_BUDGET_FRACTION) as usize;
let symbols_budget = FILE_ENVELOPE_MIN_BYTES + post_envelope.saturating_sub(calls_reserve);
let mut used: usize = FILE_ENVELOPE_MIN_BYTES;
let def_count = file.defs.len();
let def_offset = if file_idx < graph.def_offsets.len() {
graph.def_offsets[file_idx]
} else {
0
};
let mut def_rank_pairs: Vec<(usize, f32, u32, u32)> = (0..def_count)
.map(|di| {
let flat = def_offset + di;
let r = graph.def_ranks.get(flat).copied().unwrap_or(0.0);
let kind_prio = ast_kind_priority(&file.defs[di].kind);
let decl_order = file.defs[di].start_byte;
(di, r, kind_prio, decl_order)
})
.collect();
def_rank_pairs.sort_unstable_by(|a, b| {
let eff_a = effective_priority(
&file.defs[a.0].kind,
a.1,
promo_1_threshold,
promo_2_threshold,
);
let eff_b = effective_priority(
&file.defs[b.0].kind,
b.1,
promo_1_threshold,
promo_2_threshold,
);
eff_b
.cmp(&eff_a)
.then_with(|| b.1.total_cmp(&a.1))
.then_with(|| a.3.cmp(&b.3))
});
let top_def_rank = def_rank_pairs.first().map(|&(_, r, _, _)| r).unwrap_or(0.0);
let mut symbols: Vec<RepoMapSymbol> = Vec::new();
let mut truncated_symbols: usize = 0;
let mut tier_pos: usize = 0;
let mut current_tier: Option<u32> = None;
let mut tier_top_rank: f32 = top_def_rank;
for (pos, &(di, def_r, kind_prio, _)) in def_rank_pairs.iter().enumerate() {
if current_tier != Some(kind_prio) {
current_tier = Some(kind_prio);
tier_pos = 0;
tier_top_rank = def_r;
}
let cutoff = if tier_top_rank > 0.0 {
tier_top_rank / (1.0 + (tier_pos as f32 + 1.0).ln())
} else {
0.0
};
if def_r < cutoff {
truncated_symbols += 1;
tier_pos += 1;
continue;
}
let def = &file.defs[di];
let sym_bytes = estimate_symbol_bytes(&def.name);
if used + sym_bytes > symbols_budget {
truncated_symbols += def_rank_pairs.len() - pos;
break;
}
let kind = crate::languages::lsp_symbol_kind_for_node_kind(&def.kind);
let line_0 = def.start_line.saturating_sub(1) as usize;
symbols.push(RepoMapSymbol {
name: def.name.clone(),
kind,
lsp_location: RepoMapLspLocation {
file_path: file_path_lsp.file_path.clone(),
start_line: line_0,
start_character: 0,
end_line: line_0,
end_character: 0,
},
rank: def_r,
});
used += sym_bytes;
tier_pos += 1;
}
let callee_indices: Vec<usize> = if file_idx < graph.callees.len() {
let mut callees: Vec<(usize, f32)> = graph.callees[file_idx]
.iter()
.filter_map(|&ci| {
let ci = ci as usize;
graph.files.get(ci).map(|_| {
let r = graph.base_ranks.get(ci).copied().unwrap_or(0.0);
(ci, r)
})
})
.collect();
callees.sort_unstable_by(|a, b| b.1.total_cmp(&a.1));
callees.into_iter().map(|(ci, _)| ci).collect()
} else {
vec![]
};
let call_total = callee_indices.len();
let top_call_rank = callee_indices
.first()
.and_then(|&ci| graph.base_ranks.get(ci))
.copied()
.unwrap_or(0.0);
let mut calls: Vec<RepoMapCall> = Vec::new();
let mut truncated_calls: usize = 0;
for (pos, &ci) in callee_indices.iter().enumerate() {
let callee_rank = graph.base_ranks.get(ci).copied().unwrap_or(0.0);
let cutoff = if top_call_rank > 0.0 {
top_call_rank / (1.0 + (pos as f32 + 1.0).ln())
} else {
0.0
};
if callee_rank < cutoff {
truncated_calls += call_total - pos;
break;
}
let callee_path = &graph.files[ci].path;
let call_bytes = estimate_call_bytes(callee_path);
if used + call_bytes > budget_in {
truncated_calls += call_total - pos;
break;
}
calls.push(RepoMapCall {
lsp_location: file_lsp_location(callee_path),
rank: callee_rank,
});
used += call_bytes;
}
leftover = budget_in.saturating_sub(used);
result_files.push(RepoMapFile {
lsp_location: file_path_lsp,
rank: file_rank,
content_kind: content_kind_tag(content_kind_for_path(&file.path)),
calls,
symbols,
truncated_symbols,
truncated_calls,
});
}
let estimated_bytes = serde_json::to_string(&result_files)
.map(|s| s.len())
.unwrap_or(0);
let budget_exhausted = total_files > result_files.len();
GetRepoMapResponse {
files: result_files,
total_files,
estimated_bytes,
budget_bytes: budget_total_bytes,
budget_exhausted,
capped: budget_exhausted,
}
}
#[must_use]
pub fn render_json(
graph: &RepoGraph,
max_files: usize,
focus: Option<usize>,
include_metadata: bool,
) -> GetRepoMapResponse {
let token_budget = max_files.saturating_mul(2000);
render_json_budgeted(graph, token_budget, focus, include_metadata)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_pagerank_simple() {
let edges = vec![(0, 1, 1), (1, 2, 1), (2, 0, 1)];
let ranks = pagerank(3, &edges, None);
assert_eq!(ranks.len(), 3);
let sum: f32 = ranks.iter().sum();
assert!(
(sum - 1.0).abs() < 0.01,
"ranks should sum to ~1.0, got {sum}"
);
let expected = 1.0 / 3.0;
for (i, &r) in ranks.iter().enumerate() {
assert!(
(r - expected).abs() < 0.05,
"rank[{i}] = {r}, expected ~{expected}"
);
}
}
#[test]
fn test_pagerank_star() {
let edges = vec![(0, 3, 1), (1, 3, 1), (2, 3, 1)];
let ranks = pagerank(4, &edges, None);
assert_eq!(ranks.len(), 4);
let max_idx = ranks
.iter()
.enumerate()
.max_by(|a, b| a.1.total_cmp(b.1))
.unwrap()
.0;
assert_eq!(max_idx, 3, "node 3 should have highest rank");
assert!(
ranks[3] > ranks[0],
"rank[3]={} should be > rank[0]={}",
ranks[3],
ranks[0]
);
}
#[test]
fn test_pagerank_topic_sensitive() {
let n = 10_usize;
#[expect(clippy::cast_possible_truncation, reason = "test: n << u32::MAX")]
let edges: Vec<(u32, u32, u32)> = (0..(n - 1))
.map(|i| (i as u32, (i + 1) as u32, 1_u32))
.collect();
let uniform_ranks = pagerank(n, &edges, None);
let biased_ranks = pagerank(n, &edges, Some(0));
assert!(
biased_ranks[0] > uniform_ranks[0],
"focused rank[0]={} should be > uniform rank[0]={}",
biased_ranks[0],
uniform_ranks[0]
);
}
#[test]
fn test_focus_file_topic_pagerank_preserves_rank_dispersion() {
let n = 10_usize;
#[expect(clippy::cast_possible_truncation, reason = "test: n << u32::MAX")]
let edges: Vec<(u32, u32, u32)> = (1..n).map(|i| (i as u32, 0_u32, 1_u32)).collect();
let ranks_focused = pagerank(n, &edges, Some(1));
let focus_rank = ranks_focused[1];
let sum_non_focus: f32 = ranks_focused
.iter()
.enumerate()
.filter(|&(i, _)| i != 1)
.map(|(_, &r)| r)
.sum();
let n_non_focus = (n - 1) as f32;
let avg_non_focus = sum_non_focus / n_non_focus;
let dispersion_ratio = focus_rank / avg_non_focus;
eprintln!(
"J1 dispersion: focus_rank={focus_rank:.6}, avg_non_focus={avg_non_focus:.6}, \
ratio={dispersion_ratio:.2}× (must be <= 40×)"
);
assert!(
dispersion_ratio <= 40.0,
"focus rank is {dispersion_ratio:.1}× avg non-focus rank (must be ≤ 40×); \
pre-fix baseline was ~200× due to 70% concentration — I#16"
);
let total: f32 = ranks_focused.iter().sum();
assert!(
(total - 1.0).abs() < 0.01,
"ranks must sum to ≈1.0; got {total}"
);
}
#[test]
fn test_focus_file_topic_pagerank_does_not_collapse_other_files() {
let n = 10_usize;
#[expect(clippy::cast_possible_truncation, reason = "test: n << u32::MAX")]
let edges: Vec<(u32, u32, u32)> = (0..(n - 1))
.map(|i| (i as u32, (i + 1) as u32, 1_u32))
.collect();
let ranks = pagerank(n, &edges, Some(0));
let focus_rank = ranks[0];
for (i, &r) in ranks.iter().enumerate().skip(1) {
assert!(
r >= focus_rank * 0.10,
"rank[{i}] = {r:.6} is < 10% of focus rank {focus_rank:.6}; \
non-focus files must not collapse to near-zero (I#16)"
);
}
}
#[test]
fn test_focus_file_returns_neighborhood_not_just_focus() {
let n = 12_usize;
#[expect(clippy::cast_possible_truncation, reason = "test: n << u32::MAX")]
let edges: Vec<(u32, u32, u32)> = (1..n).map(|i| (i as u32, 0_u32, 1_u32)).collect();
let base_ranks = pagerank(n, &edges, None);
let (callers, callees) = build_neighbor_lists(n, &edges);
let file_nodes: Vec<FileNode> = (0..n)
.map(|i| FileNode {
path: format!("src/file_{i}.rs"),
defs: vec![Definition {
name: format!("func_{i}"),
kind: "function_item".to_string(),
start_line: 1,
end_line: 5,
scope: String::new(),
signature: Some(format!("fn func_{i}() -> i32")),
start_byte: 0,
end_byte: 100,
calls: vec![],
}],
imports: vec![],
})
.collect();
let graph = RepoGraph {
files: file_nodes,
edges,
base_ranks,
callers,
callees,
def_edges: vec![],
def_ranks: vec![],
def_callers: vec![],
def_callees: vec![],
def_offsets: vec![0],
alpha: 0.5,
};
let budget = 2000; let unfocused = render_json_budgeted(&graph, budget, None, false);
let focused = render_json_budgeted(&graph, budget, Some(1), false);
let unfocused_n = unfocused.files.len();
let focused_n = focused.files.len();
#[expect(
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
reason = "unfocused_n is a file count (small, positive); f32 multiplication \
by 0.80 and ceil produce a value in [0, n]; truncation to usize is safe"
)]
let min_expected = (unfocused_n as f32 * 0.80).ceil() as usize;
eprintln!(
"J2 neighborhood: unfocused={unfocused_n} files, focused={focused_n} files \
(need ≥ {min_expected})"
);
assert!(
focused_n >= min_expected,
"focused call returned {focused_n} files; expected ≥ {min_expected} \
(80% of unfocused {unfocused_n}); soft personalization must preserve \
rank dispersion across files (I#16/J2)"
);
}
#[test]
fn test_focus_delta_topic_fingerprinting_works() {
let n = 8_usize;
#[expect(clippy::cast_possible_truncation, reason = "test: n << u32::MAX")]
let edges: Vec<(u32, u32, u32)> = (0..n)
.flat_map(|i| {
let next = ((i + 1) % n) as u32;
let curr = i as u32;
[(curr, next, 1_u32), (next, curr, 1_u32)]
})
.collect();
let ranks_uniform = pagerank(n, &edges, None);
let ranks_focused = pagerank(n, &edges, Some(3));
let top_idx = ranks_focused
.iter()
.enumerate()
.max_by(|a, b| a.1.total_cmp(b.1))
.map(|(i, _)| i)
.unwrap();
assert_eq!(
top_idx, 3,
"with focus=Some(3), node 3 must have highest rank; top was {top_idx}"
);
let uniform_max = ranks_uniform
.iter()
.copied()
.fold(f32::NEG_INFINITY, f32::max);
let uniform_min = ranks_uniform.iter().copied().fold(f32::INFINITY, f32::min);
assert!(
(uniform_max - uniform_min).abs() < 0.01,
"on a ring without focus all ranks should be ≈equal; max={uniform_max:.6} min={uniform_min:.6}"
);
let focus_rank = ranks_focused[3];
for (i, &r) in ranks_focused.iter().enumerate().filter(|&(i, _)| i != 3) {
assert!(
r >= focus_rank * 0.05,
"rank[{i}]={r:.6} is < 5% of focus rank {focus_rank:.6}; \
soft personalization must preserve non-focus ranks"
);
}
}
fn focus_resolver_graph() -> RepoGraph {
let file_nodes: Vec<FileNode> = vec![
FileNode {
path: "device_opt/services/storage.py".to_string(),
defs: vec![],
imports: vec![],
},
FileNode {
path: "device_opt/ui/textual/screens/settings.py".to_string(),
defs: vec![],
imports: vec![],
},
FileNode {
path: "device_opt/services/registry.py".to_string(),
defs: vec![],
imports: vec![],
},
FileNode {
path: "tests/test_storage.py".to_string(),
defs: vec![],
imports: vec![],
},
];
let n = file_nodes.len();
RepoGraph {
files: file_nodes,
edges: vec![],
base_ranks: vec![1.0 / n as f32; n],
callers: vec![vec![]; n],
callees: vec![vec![]; n],
def_edges: vec![],
def_ranks: vec![],
def_callers: vec![],
def_callees: vec![],
def_offsets: vec![0; n + 1],
alpha: 0.5,
}
}
#[test]
fn test_focus_file_resolver_accepts_lsp_location_path() {
let g = focus_resolver_graph();
let res = g.resolve_focus_file("./device_opt/ui/textual/screens/settings.py");
match res {
FocusResolution::Found(idx) => {
assert_eq!(
g.files[idx].path, "device_opt/ui/textual/screens/settings.py",
"resolver must accept the ./-prefixed LSP path form (I#20)"
);
}
FocusResolution::NotFound | FocusResolution::Ambiguous(_) => {
panic!(
"resolver returned {res:?} for ./device_opt/ui/textual/screens/settings.py; \
the LSP-shaped path form must resolve to exactly one file (I#20)"
);
}
}
}
#[test]
fn test_focus_file_resolver_accepts_bare_stored_path() {
let g = focus_resolver_graph();
let res = g.resolve_focus_file("device_opt/services/storage.py");
match res {
FocusResolution::Found(idx) => {
assert_eq!(g.files[idx].path, "device_opt/services/storage.py");
}
other => panic!("expected Found, got {other:?}"),
}
}
#[test]
fn test_focus_file_resolver_strict_suffix_and_ambiguity() {
let g = focus_resolver_graph();
let res = g.resolve_focus_file("storage.py");
assert!(
matches!(res, FocusResolution::Found(_)),
"strict-suffix `storage.py` must match exactly one file (the `_` in \
test_storage.py blocks the strict-suffix), got {res:?}"
);
let mut g2 = g.clone();
g2.files.push(FileNode {
path: "vendored/services/storage.py".to_string(),
defs: vec![],
imports: vec![],
});
g2.base_ranks.push(0.0);
g2.callers.push(vec![]);
g2.callees.push(vec![]);
g2.def_offsets.push(*g2.def_offsets.last().unwrap());
let res = g2.resolve_focus_file("storage.py");
match res {
FocusResolution::Ambiguous(cands) => {
assert_eq!(cands.len(), 2, "expected two candidates, got {cands:?}");
}
other => panic!("expected Ambiguous, got {other:?}"),
}
}
#[test]
fn test_focus_file_resolver_not_found() {
let g = focus_resolver_graph();
let res = g.resolve_focus_file("./does/not/exist.py");
assert!(
matches!(res, FocusResolution::NotFound),
"expected NotFound, got {res:?}"
);
}
#[test]
fn test_focus_file_resolver_empty_input_is_not_found() {
let g = focus_resolver_graph();
let res = g.resolve_focus_file("");
assert!(
matches!(res, FocusResolution::NotFound),
"empty focus must not match anything, got {res:?}"
);
}
#[test]
#[expect(
clippy::too_many_lines,
reason = "synthetic Python-shaped graph (five FileNodes with defs + \
CallRefs + ImportRefs) plus the two-call assertion sequence \
is inherently long; splitting into helpers would obscure the \
one-shot reproduction the test is locking in."
)]
fn test_focus_file_rank_delta_visible_on_python_corpus() {
let mut files: Vec<FileNode> = vec![
FileNode {
path: "device_opt/services/storage.py".to_string(),
defs: vec![
Definition {
name: "ScanStore".to_string(),
kind: "class_definition".to_string(),
start_line: 1,
end_line: 80,
scope: String::new(),
signature: None,
start_byte: 0,
end_byte: 2000,
calls: vec![],
},
Definition {
name: "save_scan".to_string(),
kind: "function_definition".to_string(),
start_line: 20,
end_line: 40,
scope: "class_definition ScanStore".to_string(),
signature: Some("def save_scan(self, scan)".to_string()),
start_byte: 200,
end_byte: 600,
calls: vec![],
},
],
imports: vec![],
},
FileNode {
path: "device_opt/services/registry.py".to_string(),
defs: vec![Definition {
name: "register".to_string(),
kind: "function_definition".to_string(),
start_line: 1,
end_line: 30,
scope: String::new(),
signature: Some("def register(svc)".to_string()),
start_byte: 0,
end_byte: 600,
calls: vec![CallRef {
name: "save_scan".to_string(),
qualified_path: None,
receiver_type: None,
byte_offset: 100,
resolved: None,
}],
}],
imports: vec![ImportRef {
raw_path: "from device_opt.services import storage".to_string(),
resolved_idx: Some(0),
}],
},
FileNode {
path: "device_opt/ui/screens/browse.py".to_string(),
defs: vec![Definition {
name: "browse_scans".to_string(),
kind: "function_definition".to_string(),
start_line: 1,
end_line: 50,
scope: String::new(),
signature: Some("def browse_scans(app)".to_string()),
start_byte: 0,
end_byte: 1000,
calls: vec![CallRef {
name: "save_scan".to_string(),
qualified_path: None,
receiver_type: None,
byte_offset: 200,
resolved: None,
}],
}],
imports: vec![ImportRef {
raw_path: "from device_opt.services import storage".to_string(),
resolved_idx: Some(0),
}],
},
FileNode {
path: "device_opt/ui/screens/settings.py".to_string(),
defs: vec![Definition {
name: "open_settings".to_string(),
kind: "function_definition".to_string(),
start_line: 1,
end_line: 40,
scope: String::new(),
signature: Some("def open_settings(app)".to_string()),
start_byte: 0,
end_byte: 800,
calls: vec![CallRef {
name: "register".to_string(),
qualified_path: None,
receiver_type: None,
byte_offset: 150,
resolved: None,
}],
}],
imports: vec![ImportRef {
raw_path: "from device_opt.services import registry".to_string(),
resolved_idx: Some(1),
}],
},
FileNode {
path: "tests/test_storage.py".to_string(),
defs: vec![Definition {
name: "test_save".to_string(),
kind: "function_definition".to_string(),
start_line: 1,
end_line: 20,
scope: String::new(),
signature: Some("def test_save()".to_string()),
start_byte: 0,
end_byte: 400,
calls: vec![CallRef {
name: "save_scan".to_string(),
qualified_path: None,
receiver_type: None,
byte_offset: 50,
resolved: None,
}],
}],
imports: vec![ImportRef {
raw_path: "from device_opt.services import storage".to_string(),
resolved_idx: Some(0),
}],
},
];
let def_index = build_def_index(&files);
resolve_calls(&mut files, &def_index, &HashMap::new());
let graph = build_graph_from_files_pub(files);
assert!(
!graph.edges.is_empty(),
"Python-shaped synthetic graph must produce file-level edges; got 0. \
The CallRefs may have failed to resolve."
);
let focus_idx = match graph.resolve_focus_file("./device_opt/ui/screens/settings.py") {
FocusResolution::Found(i) => i,
other => panic!("resolver must find settings.py via LSP-shaped path, got {other:?}"),
};
let budget = 4000;
let unfocused = render_json_budgeted(&graph, budget, None, false);
let focused = render_json_budgeted(&graph, budget, Some(focus_idx), false);
let unfocused_ranks: std::collections::HashMap<String, f32> = unfocused
.files
.iter()
.map(|f| (f.lsp_location.file_path.clone(), f.rank))
.collect();
let focused_ranks: std::collections::HashMap<String, f32> = focused
.files
.iter()
.map(|f| (f.lsp_location.file_path.clone(), f.rank))
.collect();
eprintln!("T1 Python — unfocused ranks: {unfocused_ranks:#?}");
eprintln!("T1 Python — focused ranks: {focused_ranks:#?}");
let focus_path = "./device_opt/ui/screens/settings.py";
let mut max_delta_ratio = 0.0_f32;
for (path, &u_rank) in &unfocused_ranks {
if path == focus_path {
continue;
}
if let Some(&f_rank) = focused_ranks.get(path)
&& u_rank > 0.0
{
let ratio = (f_rank - u_rank).abs() / u_rank;
if ratio > max_delta_ratio {
max_delta_ratio = ratio;
}
}
}
assert!(
max_delta_ratio >= 0.05,
"focus_file must rebias non-focus file ranks by ≥ 5%; \
max observed delta ratio = {max_delta_ratio:.3} \
(I#20: focus_file invisible on Python corpora)"
);
let any_changed = unfocused_ranks.iter().any(|(path, &u_rank)| {
path != focus_path
&& focused_ranks
.get(path)
.is_some_and(|&f_rank| f_rank.to_bits() != u_rank.to_bits())
});
assert!(
any_changed,
"no non-focus file rank changed across focused/unfocused calls — \
bit-identical pathology (I#20). unfocused={unfocused_ranks:#?} \
focused={focused_ranks:#?}"
);
}
#[test]
#[expect(
clippy::too_many_lines,
reason = "synthetic Rust-shaped graph with four FileNodes plus the \
two-call assertion sequence inherently exceeds the 100-line \
cap; the test mirrors the Python-shaped sibling."
)]
fn test_focus_file_rank_delta_preserved_on_rust_corpus() {
let mut files: Vec<FileNode> = vec![
FileNode {
path: "src/lib.rs".to_string(),
defs: vec![Definition {
name: "Engine".to_string(),
kind: "struct_item".to_string(),
start_line: 1,
end_line: 30,
scope: String::new(),
signature: None,
start_byte: 0,
end_byte: 600,
calls: vec![],
}],
imports: vec![],
},
FileNode {
path: "src/encoder/mod.rs".to_string(),
defs: vec![Definition {
name: "encode".to_string(),
kind: "function_item".to_string(),
start_line: 1,
end_line: 40,
scope: String::new(),
signature: Some("fn encode(input: &str) -> Vec<f32>".to_string()),
start_byte: 0,
end_byte: 800,
calls: vec![],
}],
imports: vec![ImportRef {
raw_path: "use crate::lib;".to_string(),
resolved_idx: Some(0),
}],
},
FileNode {
path: "src/search.rs".to_string(),
defs: vec![Definition {
name: "search".to_string(),
kind: "function_item".to_string(),
start_line: 1,
end_line: 30,
scope: String::new(),
signature: Some("fn search(q: &str) -> Hits".to_string()),
start_byte: 0,
end_byte: 600,
calls: vec![CallRef {
name: "encode".to_string(),
qualified_path: None,
receiver_type: None,
byte_offset: 100,
resolved: None,
}],
}],
imports: vec![ImportRef {
raw_path: "use crate::encoder;".to_string(),
resolved_idx: Some(1),
}],
},
FileNode {
path: "src/cli.rs".to_string(),
defs: vec![Definition {
name: "main".to_string(),
kind: "function_item".to_string(),
start_line: 1,
end_line: 20,
scope: String::new(),
signature: Some("fn main()".to_string()),
start_byte: 0,
end_byte: 400,
calls: vec![CallRef {
name: "search".to_string(),
qualified_path: None,
receiver_type: None,
byte_offset: 50,
resolved: None,
}],
}],
imports: vec![ImportRef {
raw_path: "use crate::search;".to_string(),
resolved_idx: Some(2),
}],
},
];
let def_index = build_def_index(&files);
resolve_calls(&mut files, &def_index, &HashMap::new());
let graph = build_graph_from_files_pub(files);
assert!(
!graph.edges.is_empty(),
"Rust-shaped synthetic graph must produce edges"
);
let focus_idx = match graph.resolve_focus_file("./src/encoder/mod.rs") {
FocusResolution::Found(i) => i,
other => panic!("resolver must find encoder/mod.rs via LSP path, got {other:?}"),
};
let budget = 4000;
let unfocused = render_json_budgeted(&graph, budget, None, false);
let focused = render_json_budgeted(&graph, budget, Some(focus_idx), false);
let unfocused_ranks: std::collections::HashMap<String, f32> = unfocused
.files
.iter()
.map(|f| (f.lsp_location.file_path.clone(), f.rank))
.collect();
let focused_ranks: std::collections::HashMap<String, f32> = focused
.files
.iter()
.map(|f| (f.lsp_location.file_path.clone(), f.rank))
.collect();
eprintln!("T1 Rust — unfocused: {unfocused_ranks:#?}");
eprintln!("T1 Rust — focused: {focused_ranks:#?}");
let focus_path = "./src/encoder/mod.rs";
let mut max_delta_ratio = 0.0_f32;
for (path, &u_rank) in &unfocused_ranks {
if path == focus_path {
continue;
}
if let Some(&f_rank) = focused_ranks.get(path)
&& u_rank > 0.0
{
let ratio = (f_rank - u_rank).abs() / u_rank;
if ratio > max_delta_ratio {
max_delta_ratio = ratio;
}
}
}
assert!(
max_delta_ratio >= 0.05,
"focus_file must rebias non-focus file ranks by ≥ 5% on Rust shapes; \
max observed delta = {max_delta_ratio:.3}"
);
}
#[test]
fn test_pagerank_empty() {
let ranks = pagerank(0, &[], None);
assert!(ranks.is_empty());
}
#[test]
fn test_render_tiers() {
let files: Vec<FileNode> = (0..10)
.map(|i| FileNode {
path: format!("src/file_{i}.rs"),
defs: vec![Definition {
name: format!("func_{i}"),
kind: "function_item".to_string(),
start_line: 1,
end_line: 5,
scope: String::new(),
signature: Some(format!("func_{i}(x: i32) -> i32")),
start_byte: 0,
end_byte: 0,
calls: vec![],
}],
imports: vec![],
})
.collect();
let edges: Vec<(u32, u32, u32)> = (1..10).map(|i| (i, 0, 1)).collect();
let base_ranks = pagerank(10, &edges, None);
let (top_callers, top_callees) = build_neighbor_lists(10, &edges);
let graph = RepoGraph {
files,
edges,
base_ranks,
callers: top_callers,
callees: top_callees,
def_edges: vec![],
def_ranks: vec![],
def_callers: vec![],
def_callees: vec![],
def_offsets: vec![0],
alpha: 0.5,
};
let full = render(&graph, 10_000, None);
assert!(
full.contains("file_0"),
"output should contain the top-ranked file"
);
assert!(
full.contains("## src/file_0.rs"),
"top file should have tier 0 heading"
);
let small = render(&graph, 10, None);
assert!(
!small.is_empty(),
"even tiny budget should produce some output"
);
let full_lines = full.lines().count();
let small_lines = small.lines().count();
assert!(
small_lines < full_lines,
"small budget ({small_lines} lines) should have fewer lines than full ({full_lines})"
);
}
#[test]
fn test_render_empty_graph() {
let graph = RepoGraph {
files: vec![],
edges: vec![],
base_ranks: vec![],
callers: vec![],
callees: vec![],
def_edges: vec![],
def_ranks: vec![],
def_callers: vec![],
def_callees: vec![],
def_offsets: vec![0],
alpha: 0.5,
};
let output = render(&graph, 1000, None);
assert!(output.is_empty(), "empty graph should render empty string");
}
#[test]
fn test_build_graph_on_fixtures() {
let fixtures = Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.parent()
.unwrap()
.join("tests")
.join("fixtures");
let graph = build_graph(&fixtures).expect("build_graph should succeed on fixtures");
assert!(
!graph.files.is_empty(),
"graph should contain files from fixtures"
);
let rs_file = graph.files.iter().find(|f| f.path.ends_with("sample.rs"));
assert!(rs_file.is_some(), "should find sample.rs");
let rs_file = rs_file.unwrap();
assert!(
!rs_file.defs.is_empty(),
"sample.rs should have definitions"
);
assert!(
rs_file.defs.iter().any(|d| d.name == "hello"),
"should find 'hello' function in sample.rs"
);
let py_file = graph.files.iter().find(|f| f.path.ends_with("sample.py"));
assert!(py_file.is_some(), "should find sample.py");
let py_file = py_file.unwrap();
assert!(
!py_file.defs.is_empty(),
"sample.py should have definitions"
);
assert!(
py_file.defs.iter().any(|d| d.name == "greet"),
"should find 'greet' function in sample.py"
);
assert_eq!(graph.base_ranks.len(), graph.files.len());
let sum: f32 = graph.base_ranks.iter().sum();
assert!(
(sum - 1.0).abs() < 0.01,
"PageRank scores should sum to ~1.0, got {sum}"
);
}
#[test]
fn test_extract_imports_rust() {
let source = "use crate::foo::bar;\nuse std::collections::HashMap;\n";
let (lang, query) = import_query_for_extension("rs").unwrap();
let imports = extract_imports(source, &lang, &query);
assert_eq!(imports.len(), 2);
assert!(imports[0].contains("crate::foo::bar"));
}
#[test]
fn test_extract_imports_python_stub() {
let source = "from typing import Protocol\nimport pkg.types\n";
let (lang, query) = import_query_for_extension("pyi").unwrap();
let imports = extract_imports(source, &lang, &query);
assert_eq!(imports.len(), 2);
assert!(imports[0].contains("from typing import Protocol"));
assert!(imports[1].contains("import pkg.types"));
}
#[test]
fn test_resolve_python_import_to_stub_file() {
let root = PathBuf::from("/project");
let mut file_index = HashMap::new();
file_index.insert(PathBuf::from("/project/pkg/types.pyi"), 1);
let result = resolve_python_import("import pkg.types", &root, &file_index);
assert_eq!(result, Some(1));
}
#[test]
fn test_resolve_rust_crate_import() {
let root = PathBuf::from("/project");
let file_path = PathBuf::from("/project/src/main.rs");
let mut file_index = HashMap::new();
file_index.insert(PathBuf::from("/project/src/foo/bar.rs"), 1);
file_index.insert(PathBuf::from("/project/src/main.rs"), 0);
let result = resolve_rust_import("use crate::foo::bar;", &file_path, &root, &file_index);
assert_eq!(result, Some(1));
}
#[test]
fn test_resolve_rust_external_crate_dropped() {
let root = PathBuf::from("/project");
let file_path = PathBuf::from("/project/src/main.rs");
let file_index = HashMap::new();
let result = resolve_rust_import(
"use std::collections::HashMap;",
&file_path,
&root,
&file_index,
);
assert_eq!(result, None, "external crate imports should be dropped");
}
#[test]
fn test_neighbor_lists() {
let edges = vec![(0, 1, 1), (0, 2, 1), (1, 2, 1)];
let (incoming, outgoing) = build_neighbor_lists(3, &edges);
assert!(incoming[2].contains(&0));
assert!(incoming[2].contains(&1));
assert!(outgoing[0].contains(&1));
assert!(outgoing[0].contains(&2));
}
#[test]
fn test_scoped_identifier_calls_preserve_path() {
use crate::languages;
use streaming_iterator::StreamingIterator as _;
let source = "
mod mod_a {
pub fn foo() {}
}
mod mod_b {
pub fn foo() {}
}
fn caller() {
mod_a::foo();
mod_b::foo();
}
";
let call_config =
languages::call_query_for_extension("rs").expect("Rust call config must exist");
let lang_config =
languages::config_for_extension("rs").expect("Rust lang config must exist");
let mut defs = {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&lang_config.language).unwrap();
let tree = parser.parse(source, None).unwrap();
let mut cursor = tree_sitter::QueryCursor::new();
let mut out = Vec::new();
let mut matches =
cursor.matches(&lang_config.query, tree.root_node(), source.as_bytes());
while let Some(m) = matches.next() {
let mut name = String::new();
let mut def_node = None;
for cap in m.captures {
let cname = &lang_config.query.capture_names()[cap.index as usize];
if *cname == "name" {
name = source[cap.node.start_byte()..cap.node.end_byte()].to_string();
} else if *cname == "def" {
def_node = Some(cap.node);
}
}
if let Some(node) = def_node {
#[expect(clippy::cast_possible_truncation)]
out.push(Definition {
name,
kind: node.kind().to_string(),
start_line: node.start_position().row as u32 + 1,
end_line: node.end_position().row as u32 + 1,
scope: String::new(),
signature: None,
start_byte: node.start_byte() as u32,
end_byte: node.end_byte() as u32,
calls: vec![],
});
}
}
out
};
extract_calls(source, &call_config, &mut defs);
let caller_def = defs
.iter()
.find(|d| d.name == "caller")
.expect("caller def");
let call_names: Vec<&str> = caller_def.calls.iter().map(|c| c.name.as_str()).collect();
let qualified_paths: Vec<Option<&str>> = caller_def
.calls
.iter()
.map(|c| c.qualified_path.as_deref())
.collect();
assert!(
call_names.contains(&"foo"),
"bare name 'foo' must appear for scoped calls; got: {call_names:?}"
);
assert!(
qualified_paths.contains(&Some("mod_a::foo")),
"qualified_path 'mod_a::foo' must appear; got: {qualified_paths:?}"
);
assert!(
qualified_paths.contains(&Some("mod_b::foo")),
"qualified_path 'mod_b::foo' must appear; got: {qualified_paths:?}"
);
assert!(
!call_names.contains(&"mod_a::foo"),
"full path 'mod_a::foo' must not appear in bare name; got: {call_names:?}"
);
}
#[test]
fn test_ambiguous_name_resolution_returns_all_or_none() {
let file_a = FileNode {
path: "mod_a.rs".to_string(),
defs: vec![Definition {
name: "Read".to_string(),
kind: "trait_item".to_string(),
start_line: 1,
end_line: 3,
scope: String::new(),
signature: None,
start_byte: 0,
end_byte: 50,
calls: vec![],
}],
imports: vec![],
};
let file_b = FileNode {
path: "mod_b.rs".to_string(),
defs: vec![Definition {
name: "Read".to_string(),
kind: "trait_item".to_string(),
start_line: 1,
end_line: 3,
scope: String::new(),
signature: None,
start_byte: 0,
end_byte: 50,
calls: vec![],
}],
imports: vec![],
};
let file_c = FileNode {
path: "caller.rs".to_string(),
defs: vec![Definition {
name: "do_thing".to_string(),
kind: "function_item".to_string(),
start_line: 1,
end_line: 5,
scope: String::new(),
signature: None,
start_byte: 0,
end_byte: 100,
calls: vec![CallRef {
name: "Read".to_string(),
qualified_path: None,
receiver_type: None,
byte_offset: 10,
resolved: None,
}],
}],
imports: vec![],
};
let mut files = vec![file_a, file_b, file_c];
let def_index = build_def_index(&files);
resolve_calls(&mut files, &def_index, &HashMap::new());
let resolved = files[2].defs[0].calls[0].resolved;
assert_eq!(
resolved, None,
"ambiguous unqualified call with no import context must resolve to None, not silently pick first"
);
}
fn build_test_graph(n_code: usize, include_json: bool) -> (RepoGraph, Vec<usize>) {
let mut file_nodes: Vec<FileNode> = (0..n_code)
.map(|i| FileNode {
path: format!("src/file_{i}.rs"),
defs: vec![
Definition {
name: format!("func_{i}"),
kind: "function_item".to_string(),
start_line: 1,
end_line: 5,
scope: String::new(),
signature: Some(format!("fn func_{i}() -> i32")),
start_byte: 0,
end_byte: 100,
calls: vec![],
},
Definition {
name: format!("MyStruct{i}"),
kind: "struct_item".to_string(),
start_line: 7,
end_line: 10,
scope: String::new(),
signature: None,
start_byte: 110,
end_byte: 200,
calls: vec![],
},
],
imports: vec![],
})
.collect();
let json_idx = if include_json {
let idx = file_nodes.len();
file_nodes.push(FileNode {
path: "data/config.json".to_string(),
defs: vec![],
imports: vec![],
});
vec![idx]
} else {
vec![]
};
let n = file_nodes.len();
#[expect(clippy::cast_possible_truncation, reason = "test: n_code << u32::MAX")]
let edges: Vec<(u32, u32, u32)> = (1..n_code).map(|i| (i as u32, 0, 1)).collect();
let base_ranks = pagerank(n, &edges, None);
let (callers, callees) = build_neighbor_lists(n, &edges);
let graph = RepoGraph {
files: file_nodes,
edges,
base_ranks,
callers,
callees,
def_edges: vec![],
def_ranks: vec![],
def_callers: vec![],
def_callees: vec![],
def_offsets: vec![0],
alpha: 0.5,
};
(graph, json_idx)
}
#[test]
fn get_repo_map_returns_json_with_files_array() {
let (graph, _) = build_test_graph(5, false);
let response = render_json(&graph, 50, None, false);
assert!(
!response.files.is_empty(),
"files array should be non-empty for a non-empty graph"
);
let json = serde_json::to_string(&response).expect("serialize");
let parsed: serde_json::Value = serde_json::from_str(&json).expect("parse");
assert!(
parsed["files"].is_array(),
"serialized response must have a `files` JSON array; got: {parsed}"
);
}
#[test]
fn get_repo_map_each_file_has_lsp_location() {
let (graph, _) = build_test_graph(5, false);
let response = render_json(&graph, 50, None, false);
for file in &response.files {
assert!(
!file.lsp_location.file_path.is_empty(),
"each file must have a non-empty lsp_location.file_path"
);
}
let json = serde_json::to_string(&response).expect("serialize");
let parsed: serde_json::Value = serde_json::from_str(&json).expect("parse");
for entry in parsed["files"].as_array().expect("files array") {
assert!(
entry["lsp_location"]["file_path"].is_string(),
"each file entry must have lsp_location.file_path string; entry: {entry}"
);
}
}
#[test]
fn get_repo_map_each_symbol_has_kind_and_lsp_location() {
let (graph, _) = build_test_graph(3, false);
let response = render_json(&graph, 50, None, false);
for file in &response.files {
for sym in &file.symbols {
assert!(
sym.kind > 0,
"symbol kind must be a positive LSP SymbolKind; got 0 for '{}'",
sym.name
);
assert!(
!sym.lsp_location.file_path.is_empty(),
"symbol must have lsp_location.file_path"
);
}
}
let json = serde_json::to_string(&response).expect("serialize");
let parsed: serde_json::Value = serde_json::from_str(&json).expect("parse");
for file_entry in parsed["files"].as_array().expect("files") {
for sym_entry in file_entry["symbols"].as_array().expect("symbols") {
assert!(
sym_entry["kind"].is_number(),
"symbol `kind` must be a JSON number; sym: {sym_entry}"
);
assert!(
sym_entry["lsp_location"]["file_path"].is_string(),
"symbol must have lsp_location.file_path; sym: {sym_entry}"
);
}
}
}
#[test]
fn get_repo_map_calls_field_is_array_of_lsp_locations() {
let (graph, _) = build_test_graph(5, false);
let response = render_json(&graph, 50, None, false);
let json = serde_json::to_string(&response).expect("serialize");
let parsed: serde_json::Value = serde_json::from_str(&json).expect("parse");
for file_entry in parsed["files"].as_array().expect("files") {
let calls = file_entry["calls"]
.as_array()
.expect("calls must be an array");
for call in calls {
assert!(
call["lsp_location"]["file_path"].is_string(),
"each call entry must have lsp_location.file_path string; call: {call}"
);
assert!(
call["rank"].is_number(),
"each call entry must have a numeric rank; call: {call}"
);
}
}
}
#[test]
fn get_repo_map_returns_at_most_max_files_files() {
let (graph, _) = build_test_graph(10, false);
let response = render_json_budgeted(&graph, 150, None, false);
assert!(
response.files.len() <= 3,
"files.len() = {} must be <= 3 for a 600-byte budget",
response.files.len()
);
assert_eq!(
response.total_files, 10,
"total_files must reflect the full eligible count before budget cap"
);
assert!(
response.capped,
"capped must be true when total_files > files.len()"
);
}
#[test]
fn get_repo_map_excludes_meta_by_default() {
let (graph, _) = build_test_graph(3, true);
let response = render_json(&graph, 50, None, false);
for file in &response.files {
assert!(
!std::path::Path::new(&file.lsp_location.file_path)
.extension()
.is_some_and(|e| e.eq_ignore_ascii_case("json")),
"JSON (Meta) files must be excluded when include_metadata=false; found: {}",
file.lsp_location.file_path
);
}
}
#[test]
fn get_repo_map_include_metadata_true_includes_json() {
let (graph, _) = build_test_graph(3, true);
let response = render_json(&graph, 50, None, true);
let has_json = response.files.iter().any(|f| {
std::path::Path::new(&f.lsp_location.file_path)
.extension()
.is_some_and(|e| e.eq_ignore_ascii_case("json"))
});
assert!(
has_json,
"JSON file must be present when include_metadata=true"
);
}
#[test]
#[ignore = "runs on flask corpus at tests/corpus/code/flask; use --ignored --nocapture"]
#[expect(
clippy::too_many_lines,
reason = "end-to-end corpus measurement test; assertion sequence is sequential and cannot be meaningfully split"
)]
fn test_flask_focus_blueprints_rank_dispersion() {
let corpus_root = Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.parent()
.unwrap()
.join("tests/corpus/code/flask");
assert!(
corpus_root.exists(),
"flask corpus not found at {}",
corpus_root.display()
);
let graph = build_graph(&corpus_root).expect("build_graph on flask corpus");
eprintln!("Flask corpus: {} files in graph", graph.files.len());
let focus_path = "src/flask/blueprints.py";
let focus_idx = graph.files.iter().position(|f| f.path == focus_path);
eprintln!("Focus file '{focus_path}' -> idx: {focus_idx:?}");
assert!(
focus_idx.is_some(),
"blueprints.py not found in graph; available files: {:?}",
graph
.files
.iter()
.map(|f| &f.path)
.take(20)
.collect::<Vec<_>>()
);
let response = render_json_budgeted(&graph, 4000, focus_idx, false);
eprintln!(
"Focused response: {} files (total_files={})",
response.files.len(),
response.total_files
);
assert!(
response.files.len() >= 8,
"expected >= 8 files in focused response; got {} — I#16 winner-take-all collapse",
response.files.len()
);
eprintln!("\nTop 10 focused files:");
for (i, f) in response.files.iter().take(10).enumerate() {
eprintln!(" [{i}] rank={:.6} {}", f.rank, f.lsp_location.file_path);
}
let focus_file_rank = response
.files
.iter()
.find(|f| {
f.lsp_location.file_path.contains("blueprints.py")
&& !f.lsp_location.file_path.contains("test_")
&& !f.lsp_location.file_path.contains("sansio")
})
.map(|f| f.rank)
.unwrap_or(0.0);
let focus_position = response
.files
.iter()
.position(|f| {
f.lsp_location.file_path.contains("blueprints.py")
&& !f.lsp_location.file_path.contains("test_")
&& !f.lsp_location.file_path.contains("sansio")
})
.unwrap_or(usize::MAX);
eprintln!(
"\nblueprinets.py position: #{} rank={:.6}",
focus_position + 1,
focus_file_rank
);
assert!(
focus_position < 3,
"blueprints.py must be in top-3 focused results (got position {}); \
soft personalization must rebias toward focus neighborhood — I#16",
focus_position + 1
);
let top_rank = response.files[0].rank;
let non_focus_min_5 = response
.files
.iter()
.filter(|f| {
!(f.lsp_location.file_path.contains("blueprints.py")
&& !f.lsp_location.file_path.contains("test_")
&& !f.lsp_location.file_path.contains("sansio"))
})
.take(5)
.map(|f| f.rank)
.fold(f32::INFINITY, f32::min);
let pct = non_focus_min_5 / top_rank * 100.0;
eprintln!(
"\nNext-5 (non-focus) min rank: {non_focus_min_5:.6} = {pct:.1}% of top rank {top_rank:.6}"
);
assert!(
pct >= 10.0,
"next-5 non-focus files min rank is {pct:.1}% of top rank (need ≥ 10%); \
files are collapsing to near-zero floor — I#16"
);
let related_names = ["app.py", "scaffold.py", "sansio"];
let found_related: Vec<&str> = related_names
.iter()
.copied()
.filter(|name| {
response
.files
.iter()
.any(|f| f.lsp_location.file_path.contains(name))
})
.collect();
eprintln!("\nNeighborhood quality: found related files: {found_related:?}");
if found_related.is_empty() {
eprintln!(
"WARNING: no expected related files (app.py, scaffold.py) found in neighborhood"
);
}
}
#[test]
#[ignore = "runs on full ripvec codebase; use --nocapture to see output"]
fn test_full_repo_map() {
use std::time::Instant;
let root = Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.parent()
.unwrap();
let t0 = Instant::now();
let graph = build_graph(root).expect("build_graph on ripvec root");
let build_ms = t0.elapsed().as_secs_f64() * 1000.0;
let t1 = Instant::now();
let rendered = render(&graph, 2000, None);
let render_ms = t1.elapsed().as_secs_f64() * 1000.0;
let t2 = Instant::now();
let focus_idx = graph
.base_ranks
.iter()
.enumerate()
.max_by(|a, b| a.1.total_cmp(b.1))
.map(|(i, _)| i);
let focused = render(&graph, 2000, focus_idx);
let focus_ms = t2.elapsed().as_secs_f64() * 1000.0;
eprintln!("\n=== Repo Map Performance ===");
eprintln!(
"Files: {}, Edges: {}, Defs: {}",
graph.files.len(),
graph.edges.len(),
graph.files.iter().map(|f| f.defs.len()).sum::<usize>()
);
eprintln!("build_graph: {build_ms:.1}ms (walk + parse + resolve + PageRank)");
eprintln!(
"render(default): {render_ms:.3}ms ({} chars, ~{} tokens)",
rendered.len(),
rendered.len() / 4
);
eprintln!(
"render(focused): {focus_ms:.3}ms ({} chars, ~{} tokens)",
focused.len(),
focused.len() / 4
);
eprintln!("\nTop 5 by PageRank:");
let mut ranked: Vec<(usize, f32)> = graph.base_ranks.iter().copied().enumerate().collect();
ranked.sort_by(|a, b| b.1.total_cmp(&a.1));
for (i, rank) in ranked.iter().take(5) {
eprintln!(" {:.4} {}", rank, graph.files[*i].path);
}
eprintln!("\n=== Default Render ===\n{rendered}");
eprintln!(
"\n=== Focused Render (on {}) ===\n{focused}",
focus_idx
.map(|i| graph.files[i].path.as_str())
.unwrap_or("none")
);
}
}