Skip to main content

git_iris/agents/tools/
repo_map.rs

1//! Repository map tool for compact codebase orientation.
2
3use anyhow::{Context, Result};
4use ignore::WalkBuilder;
5use regex::Regex;
6use rig::completion::ToolDefinition;
7use rig::tool::Tool;
8use schemars::JsonSchema;
9use serde::{Deserialize, Serialize};
10use std::cmp::Reverse;
11use std::path::{Path, PathBuf};
12use std::process::Command;
13use std::sync::LazyLock;
14
15use super::common::{current_repo_root, parameters_schema};
16
17crate::define_tool_error!(RepoMapError);
18
19const DEFAULT_TOKEN_BUDGET: u32 = 2_000;
20const MAX_TOKEN_BUDGET: u32 = 8_000;
21const MAX_FILE_BYTES: u64 = 400_000;
22const DEFAULT_MAX_FILES: usize = 60;
23const MAX_DEFINITIONS_PER_FILE: usize = 12;
24const MAX_IMPORTS_PER_FILE: usize = 6;
25
26static DEFINITION_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
27    [
28        r"^\s*(?:pub(?:\([^)]*\))?\s+)?(?:async\s+)?(?:unsafe\s+)?(?:fn|struct|enum|trait|type|mod)\s+([A-Za-z_][A-Za-z0-9_]*)",
29        r"^\s*(?:export\s+)?(?:async\s+)?(?:function|class|interface|type|enum)\s+([A-Za-z_$][A-Za-z0-9_$]*)",
30        r"^\s*(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*=",
31        r"^\s*(?:async\s+)?(?:def|class)\s+([A-Za-z_][A-Za-z0-9_]*)",
32        r"^\s*(?:func|type)\s+([A-Za-z_][A-Za-z0-9_]*)",
33        r"^\s*(?:public\s+|private\s+|internal\s+|open\s+)?(?:fun|class|object|interface|struct|enum|protocol)\s+([A-Za-z_][A-Za-z0-9_]*)",
34    ]
35    .into_iter()
36    .map(|pattern| Regex::new(pattern).expect("definition regex should compile"))
37    .collect()
38});
39
40static IMPORT_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
41    [
42        r"^\s*(?:pub\s+)?use\s+(.+?);",
43        r"^\s*mod\s+([A-Za-z_][A-Za-z0-9_]*);",
44        r#"^\s*import\s+.+?\s+from\s+['"](.+?)['"]"#,
45        r#"^\s*export\s+.+?\s+from\s+['"](.+?)['"]"#,
46        r"^\s*from\s+([A-Za-z_][A-Za-z0-9_.]*)\s+import\s+",
47        r"^\s*import\s+([A-Za-z_][A-Za-z0-9_.]*)",
48        r#"^\s*import\s+["'](.+?)["']"#,
49    ]
50    .into_iter()
51    .map(|pattern| Regex::new(pattern).expect("import regex should compile"))
52    .collect()
53});
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct RepoMapTool;
57
58#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
59pub struct RepoMapArgs {
60    #[serde(default = "default_token_budget")]
61    pub token_budget: u32,
62    #[serde(default)]
63    pub mentioned_files: Vec<PathBuf>,
64    #[serde(default = "default_max_files")]
65    pub max_files: usize,
66}
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct RepoMap {
70    pub files_analyzed: usize,
71    pub files_shown: usize,
72    pub changed_files: Vec<PathBuf>,
73    pub mentioned_files: Vec<PathBuf>,
74    pub content: String,
75}
76
77#[derive(Debug, Clone)]
78struct FileSummary {
79    path: PathBuf,
80    score: usize,
81    definitions: Vec<String>,
82    imports: Vec<String>,
83    changed: bool,
84    mentioned: bool,
85}
86
87impl Default for RepoMapTool {
88    fn default() -> Self {
89        Self
90    }
91}
92
93fn default_token_budget() -> u32 {
94    DEFAULT_TOKEN_BUDGET
95}
96
97fn default_max_files() -> usize {
98    DEFAULT_MAX_FILES
99}
100
101impl RepoMapTool {
102    #[must_use]
103    pub fn new() -> Self {
104        Self
105    }
106
107    pub(super) fn build(repo_root: &Path, args: &RepoMapArgs) -> Result<RepoMap> {
108        let changed_files = changed_files(repo_root);
109        let mentioned_files = normalize_mentions(&args.mentioned_files);
110        let mut summaries = collect_file_summaries(repo_root, &changed_files, &mentioned_files)?;
111        let files_analyzed = summaries.len();
112        summaries.sort_by_key(|summary| {
113            (
114                Reverse(summary.score),
115                summary.path.components().count(),
116                summary.path.clone(),
117            )
118        });
119
120        let max_files = args.max_files.clamp(1, 200);
121        summaries.truncate(max_files);
122        let content = render_repo_map(
123            &summaries,
124            args.token_budget.clamp(50, MAX_TOKEN_BUDGET),
125            files_analyzed,
126        );
127
128        Ok(RepoMap {
129            files_analyzed,
130            files_shown: summaries.len(),
131            changed_files,
132            mentioned_files,
133            content,
134        })
135    }
136}
137
138impl Tool for RepoMapTool {
139    const NAME: &'static str = "repo_map";
140    type Error = RepoMapError;
141    type Args = RepoMapArgs;
142    type Output = RepoMap;
143
144    async fn definition(&self, _: String) -> ToolDefinition {
145        ToolDefinition {
146            name: Self::NAME.to_string(),
147            description: "Build a compact repository map with ranked source files, definitions, imports, and changed or mentioned-file signals. Use this before broad cross-file analysis when you need the codebase skeleton without reading every file.".to_string(),
148            parameters: parameters_schema::<RepoMapArgs>(),
149        }
150    }
151
152    async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
153        let repo_root = current_repo_root().map_err(RepoMapError::from)?;
154        Self::build(&repo_root, &args).map_err(RepoMapError::from)
155    }
156}
157
158fn collect_file_summaries(
159    repo_root: &Path,
160    changed_files: &[PathBuf],
161    mentioned_files: &[PathBuf],
162) -> Result<Vec<FileSummary>> {
163    let mut summaries = Vec::new();
164    for entry in WalkBuilder::new(repo_root)
165        .hidden(false)
166        .git_ignore(true)
167        .git_global(true)
168        .build()
169        .filter_map(std::result::Result::ok)
170    {
171        let path = entry.path();
172        if !entry
173            .file_type()
174            .is_some_and(|file_type| file_type.is_file())
175            || !is_source_file(path)
176            || is_large_file(path)
177        {
178            continue;
179        }
180
181        let relative_path = path
182            .strip_prefix(repo_root)
183            .context("walked path should be inside repo root")?
184            .to_path_buf();
185        let content = std::fs::read_to_string(path).unwrap_or_default();
186        let definitions = extract_matches(&content, &DEFINITION_PATTERNS, MAX_DEFINITIONS_PER_FILE);
187        let imports = extract_matches(&content, &IMPORT_PATTERNS, MAX_IMPORTS_PER_FILE);
188        let changed = changed_files
189            .iter()
190            .any(|changed| changed == &relative_path);
191        let mentioned = mentioned_files
192            .iter()
193            .any(|mentioned| mentioned == &relative_path);
194        let score = score_file(
195            &relative_path,
196            definitions.len(),
197            imports.len(),
198            changed,
199            mentioned,
200        );
201
202        summaries.push(FileSummary {
203            path: relative_path,
204            score,
205            definitions,
206            imports,
207            changed,
208            mentioned,
209        });
210    }
211
212    Ok(summaries)
213}
214
215fn changed_files(repo_root: &Path) -> Vec<PathBuf> {
216    let output = Command::new("git")
217        .args(["status", "--short"])
218        .current_dir(repo_root)
219        .output();
220    let Ok(output) = output else {
221        return Vec::new();
222    };
223
224    String::from_utf8_lossy(&output.stdout)
225        .lines()
226        .filter_map(|line| line.get(3..))
227        .filter_map(|path| path.split(" -> ").last())
228        .map(PathBuf::from)
229        .collect()
230}
231
232fn normalize_mentions(paths: &[PathBuf]) -> Vec<PathBuf> {
233    paths
234        .iter()
235        .map(|path| {
236            PathBuf::from(
237                path.to_string_lossy()
238                    .replace('\\', "/")
239                    .trim_start_matches("./")
240                    .trim_start_matches('/'),
241            )
242        })
243        .collect()
244}
245
246fn is_large_file(path: &Path) -> bool {
247    path.metadata()
248        .map_or(true, |metadata| metadata.len() > MAX_FILE_BYTES)
249}
250
251fn is_source_file(path: &Path) -> bool {
252    path.extension()
253        .and_then(|extension| extension.to_str())
254        .is_some_and(|extension| {
255            matches!(
256                extension,
257                "rs" | "ts"
258                    | "tsx"
259                    | "js"
260                    | "jsx"
261                    | "mjs"
262                    | "cjs"
263                    | "py"
264                    | "go"
265                    | "java"
266                    | "kt"
267                    | "kts"
268                    | "swift"
269                    | "rb"
270                    | "lua"
271                    | "sh"
272                    | "zsh"
273                    | "bash"
274            )
275        })
276}
277
278fn extract_matches(content: &str, patterns: &[Regex], limit: usize) -> Vec<String> {
279    let mut matches = Vec::new();
280    for line in content.lines() {
281        for pattern in patterns {
282            if let Some(captures) = pattern.captures(line)
283                && let Some(item) = captures.get(1)
284            {
285                matches.push(item.as_str().trim().to_string());
286                break;
287            }
288        }
289        if matches.len() >= limit {
290            break;
291        }
292    }
293    matches
294}
295
296fn score_file(
297    path: &Path,
298    definitions_count: usize,
299    imports_count: usize,
300    changed: bool,
301    mentioned: bool,
302) -> usize {
303    let path_text = path.to_string_lossy().to_ascii_lowercase();
304    let mut score = definitions_count * 12 + imports_count * 3;
305
306    if mentioned {
307        score += 1_000;
308    }
309    if changed {
310        score += 250;
311    }
312    if path.components().count() <= 2 {
313        score += 40;
314    }
315    for keyword in [
316        "main", "lib", "mod", "app", "config", "router", "agent", "tool", "auth", "api", "db",
317        "state", "service",
318    ] {
319        if path_text.contains(keyword) {
320            score += 20;
321        }
322    }
323
324    score
325}
326
327fn render_repo_map(summaries: &[FileSummary], token_budget: u32, files_analyzed: usize) -> String {
328    let mut output = format!(
329        "Repository map: showing {} of {} analyzed source files.\n",
330        summaries.len(),
331        files_analyzed
332    );
333
334    for summary in summaries {
335        let mut markers = Vec::new();
336        if summary.changed {
337            markers.push("changed");
338        }
339        if summary.mentioned {
340            markers.push("mentioned");
341        }
342        let marker = if markers.is_empty() {
343            String::new()
344        } else {
345            format!(" [{}]", markers.join(", "))
346        };
347
348        output.push_str(&format!(
349            "\n{}{} (score {})",
350            summary.path.display(),
351            marker,
352            summary.score
353        ));
354        if !summary.definitions.is_empty() {
355            output.push_str(&format!("\n  defs: {}", summary.definitions.join(", ")));
356        }
357        if !summary.imports.is_empty() {
358            output.push_str(&format!("\n  refs: {}", summary.imports.join(", ")));
359        }
360        output.push('\n');
361    }
362
363    let char_budget = usize::try_from(token_budget).map_or(usize::MAX / 4, |budget| budget * 4);
364    truncate_chars(&output, char_budget)
365}
366
367fn truncate_chars(text: &str, max_chars: usize) -> String {
368    if text.chars().count() <= max_chars {
369        return text.to_string();
370    }
371
372    let mut truncated = text.chars().take(max_chars).collect::<String>();
373    truncated.push_str("\n[repo_map truncated]");
374    truncated
375}