1use anyhow::{Context, Result};
4use ignore::WalkBuilder;
5use regex::Regex;
6use rig::completion::ToolDefinition;
7use rig::tool::Tool;
8use schemars::JsonSchema;
9use serde::{Deserialize, Serialize};
10use std::cmp::Reverse;
11use std::path::{Path, PathBuf};
12use std::process::Command;
13use std::sync::LazyLock;
14
15use super::common::{current_repo_root, parameters_schema};
16
17crate::define_tool_error!(RepoMapError);
18
19const DEFAULT_TOKEN_BUDGET: u32 = 2_000;
20const MAX_TOKEN_BUDGET: u32 = 8_000;
21const MAX_FILE_BYTES: u64 = 400_000;
22const DEFAULT_MAX_FILES: usize = 60;
23const MAX_DEFINITIONS_PER_FILE: usize = 12;
24const MAX_IMPORTS_PER_FILE: usize = 6;
25
26static DEFINITION_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
27 [
28 r"^\s*(?:pub(?:\([^)]*\))?\s+)?(?:async\s+)?(?:unsafe\s+)?(?:fn|struct|enum|trait|type|mod)\s+([A-Za-z_][A-Za-z0-9_]*)",
29 r"^\s*(?:export\s+)?(?:async\s+)?(?:function|class|interface|type|enum)\s+([A-Za-z_$][A-Za-z0-9_$]*)",
30 r"^\s*(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*=",
31 r"^\s*(?:async\s+)?(?:def|class)\s+([A-Za-z_][A-Za-z0-9_]*)",
32 r"^\s*(?:func|type)\s+([A-Za-z_][A-Za-z0-9_]*)",
33 r"^\s*(?:public\s+|private\s+|internal\s+|open\s+)?(?:fun|class|object|interface|struct|enum|protocol)\s+([A-Za-z_][A-Za-z0-9_]*)",
34 ]
35 .into_iter()
36 .map(|pattern| Regex::new(pattern).expect("definition regex should compile"))
37 .collect()
38});
39
40static IMPORT_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
41 [
42 r"^\s*(?:pub\s+)?use\s+(.+?);",
43 r"^\s*mod\s+([A-Za-z_][A-Za-z0-9_]*);",
44 r#"^\s*import\s+.+?\s+from\s+['"](.+?)['"]"#,
45 r#"^\s*export\s+.+?\s+from\s+['"](.+?)['"]"#,
46 r"^\s*from\s+([A-Za-z_][A-Za-z0-9_.]*)\s+import\s+",
47 r"^\s*import\s+([A-Za-z_][A-Za-z0-9_.]*)",
48 r#"^\s*import\s+["'](.+?)["']"#,
49 ]
50 .into_iter()
51 .map(|pattern| Regex::new(pattern).expect("import regex should compile"))
52 .collect()
53});
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct RepoMapTool;
57
58#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
59pub struct RepoMapArgs {
60 #[serde(default = "default_token_budget")]
61 pub token_budget: u32,
62 #[serde(default)]
63 pub mentioned_files: Vec<PathBuf>,
64 #[serde(default = "default_max_files")]
65 pub max_files: usize,
66}
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct RepoMap {
70 pub files_analyzed: usize,
71 pub files_shown: usize,
72 pub changed_files: Vec<PathBuf>,
73 pub mentioned_files: Vec<PathBuf>,
74 pub content: String,
75}
76
77#[derive(Debug, Clone)]
78struct FileSummary {
79 path: PathBuf,
80 score: usize,
81 definitions: Vec<String>,
82 imports: Vec<String>,
83 changed: bool,
84 mentioned: bool,
85}
86
87impl Default for RepoMapTool {
88 fn default() -> Self {
89 Self
90 }
91}
92
93fn default_token_budget() -> u32 {
94 DEFAULT_TOKEN_BUDGET
95}
96
97fn default_max_files() -> usize {
98 DEFAULT_MAX_FILES
99}
100
101impl RepoMapTool {
102 #[must_use]
103 pub fn new() -> Self {
104 Self
105 }
106
107 pub(super) fn build(repo_root: &Path, args: &RepoMapArgs) -> Result<RepoMap> {
108 let changed_files = changed_files(repo_root);
109 let mentioned_files = normalize_mentions(&args.mentioned_files);
110 let mut summaries = collect_file_summaries(repo_root, &changed_files, &mentioned_files)?;
111 let files_analyzed = summaries.len();
112 summaries.sort_by_key(|summary| {
113 (
114 Reverse(summary.score),
115 summary.path.components().count(),
116 summary.path.clone(),
117 )
118 });
119
120 let max_files = args.max_files.clamp(1, 200);
121 summaries.truncate(max_files);
122 let content = render_repo_map(
123 &summaries,
124 args.token_budget.clamp(50, MAX_TOKEN_BUDGET),
125 files_analyzed,
126 );
127
128 Ok(RepoMap {
129 files_analyzed,
130 files_shown: summaries.len(),
131 changed_files,
132 mentioned_files,
133 content,
134 })
135 }
136}
137
138impl Tool for RepoMapTool {
139 const NAME: &'static str = "repo_map";
140 type Error = RepoMapError;
141 type Args = RepoMapArgs;
142 type Output = RepoMap;
143
144 async fn definition(&self, _: String) -> ToolDefinition {
145 ToolDefinition {
146 name: Self::NAME.to_string(),
147 description: "Build a compact repository map with ranked source files, definitions, imports, and changed or mentioned-file signals. Use this before broad cross-file analysis when you need the codebase skeleton without reading every file.".to_string(),
148 parameters: parameters_schema::<RepoMapArgs>(),
149 }
150 }
151
152 async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
153 let repo_root = current_repo_root().map_err(RepoMapError::from)?;
154 Self::build(&repo_root, &args).map_err(RepoMapError::from)
155 }
156}
157
158fn collect_file_summaries(
159 repo_root: &Path,
160 changed_files: &[PathBuf],
161 mentioned_files: &[PathBuf],
162) -> Result<Vec<FileSummary>> {
163 let mut summaries = Vec::new();
164 for entry in WalkBuilder::new(repo_root)
165 .hidden(false)
166 .git_ignore(true)
167 .git_global(true)
168 .build()
169 .filter_map(std::result::Result::ok)
170 {
171 let path = entry.path();
172 if !entry
173 .file_type()
174 .is_some_and(|file_type| file_type.is_file())
175 || !is_source_file(path)
176 || is_large_file(path)
177 {
178 continue;
179 }
180
181 let relative_path = path
182 .strip_prefix(repo_root)
183 .context("walked path should be inside repo root")?
184 .to_path_buf();
185 let content = std::fs::read_to_string(path).unwrap_or_default();
186 let definitions = extract_matches(&content, &DEFINITION_PATTERNS, MAX_DEFINITIONS_PER_FILE);
187 let imports = extract_matches(&content, &IMPORT_PATTERNS, MAX_IMPORTS_PER_FILE);
188 let changed = changed_files
189 .iter()
190 .any(|changed| changed == &relative_path);
191 let mentioned = mentioned_files
192 .iter()
193 .any(|mentioned| mentioned == &relative_path);
194 let score = score_file(
195 &relative_path,
196 definitions.len(),
197 imports.len(),
198 changed,
199 mentioned,
200 );
201
202 summaries.push(FileSummary {
203 path: relative_path,
204 score,
205 definitions,
206 imports,
207 changed,
208 mentioned,
209 });
210 }
211
212 Ok(summaries)
213}
214
215fn changed_files(repo_root: &Path) -> Vec<PathBuf> {
216 let output = Command::new("git")
217 .args(["status", "--short"])
218 .current_dir(repo_root)
219 .output();
220 let Ok(output) = output else {
221 return Vec::new();
222 };
223
224 String::from_utf8_lossy(&output.stdout)
225 .lines()
226 .filter_map(|line| line.get(3..))
227 .filter_map(|path| path.split(" -> ").last())
228 .map(PathBuf::from)
229 .collect()
230}
231
232fn normalize_mentions(paths: &[PathBuf]) -> Vec<PathBuf> {
233 paths
234 .iter()
235 .map(|path| {
236 PathBuf::from(
237 path.to_string_lossy()
238 .replace('\\', "/")
239 .trim_start_matches("./")
240 .trim_start_matches('/'),
241 )
242 })
243 .collect()
244}
245
246fn is_large_file(path: &Path) -> bool {
247 path.metadata()
248 .map_or(true, |metadata| metadata.len() > MAX_FILE_BYTES)
249}
250
251fn is_source_file(path: &Path) -> bool {
252 path.extension()
253 .and_then(|extension| extension.to_str())
254 .is_some_and(|extension| {
255 matches!(
256 extension,
257 "rs" | "ts"
258 | "tsx"
259 | "js"
260 | "jsx"
261 | "mjs"
262 | "cjs"
263 | "py"
264 | "go"
265 | "java"
266 | "kt"
267 | "kts"
268 | "swift"
269 | "rb"
270 | "lua"
271 | "sh"
272 | "zsh"
273 | "bash"
274 )
275 })
276}
277
278fn extract_matches(content: &str, patterns: &[Regex], limit: usize) -> Vec<String> {
279 let mut matches = Vec::new();
280 for line in content.lines() {
281 for pattern in patterns {
282 if let Some(captures) = pattern.captures(line)
283 && let Some(item) = captures.get(1)
284 {
285 matches.push(item.as_str().trim().to_string());
286 break;
287 }
288 }
289 if matches.len() >= limit {
290 break;
291 }
292 }
293 matches
294}
295
296fn score_file(
297 path: &Path,
298 definitions_count: usize,
299 imports_count: usize,
300 changed: bool,
301 mentioned: bool,
302) -> usize {
303 let path_text = path.to_string_lossy().to_ascii_lowercase();
304 let mut score = definitions_count * 12 + imports_count * 3;
305
306 if mentioned {
307 score += 1_000;
308 }
309 if changed {
310 score += 250;
311 }
312 if path.components().count() <= 2 {
313 score += 40;
314 }
315 for keyword in [
316 "main", "lib", "mod", "app", "config", "router", "agent", "tool", "auth", "api", "db",
317 "state", "service",
318 ] {
319 if path_text.contains(keyword) {
320 score += 20;
321 }
322 }
323
324 score
325}
326
327fn render_repo_map(summaries: &[FileSummary], token_budget: u32, files_analyzed: usize) -> String {
328 let mut output = format!(
329 "Repository map: showing {} of {} analyzed source files.\n",
330 summaries.len(),
331 files_analyzed
332 );
333
334 for summary in summaries {
335 let mut markers = Vec::new();
336 if summary.changed {
337 markers.push("changed");
338 }
339 if summary.mentioned {
340 markers.push("mentioned");
341 }
342 let marker = if markers.is_empty() {
343 String::new()
344 } else {
345 format!(" [{}]", markers.join(", "))
346 };
347
348 output.push_str(&format!(
349 "\n{}{} (score {})",
350 summary.path.display(),
351 marker,
352 summary.score
353 ));
354 if !summary.definitions.is_empty() {
355 output.push_str(&format!("\n defs: {}", summary.definitions.join(", ")));
356 }
357 if !summary.imports.is_empty() {
358 output.push_str(&format!("\n refs: {}", summary.imports.join(", ")));
359 }
360 output.push('\n');
361 }
362
363 let char_budget = usize::try_from(token_budget).map_or(usize::MAX / 4, |budget| budget * 4);
364 truncate_chars(&output, char_budget)
365}
366
367fn truncate_chars(text: &str, max_chars: usize) -> String {
368 if text.chars().count() <= max_chars {
369 return text.to_string();
370 }
371
372 let mut truncated = text.chars().take(max_chars).collect::<String>();
373 truncated.push_str("\n[repo_map truncated]");
374 truncated
375}