winx_code_agent/utils/
repo.rs1use crate::errors::Result;
12use crate::utils::display_tree::DirectoryTree;
13use ignore::WalkBuilder;
14use std::collections::HashSet;
15use std::path::{Path, PathBuf};
16use std::process::Command;
17
18const MAX_ENTRIES_CHECK: usize = 100_000;
20const MAX_WALK_DEPTH: usize = 11;
22const MAX_COMMITS_WALK: usize = 500;
24
25pub fn get_repo_context(path: &Path) -> Result<(String, Vec<String>)> {
30 let context_dir = context_dir(path);
31 let is_git_repo = find_git_root(&context_dir).is_some();
32
33 let mut all_files = get_all_files_max_depth(&context_dir, is_git_repo);
34 all_files.sort(); let dynamic_max_files =
37 if is_git_repo { calculate_dynamic_file_limit(all_files.len()) } else { 50 };
38
39 let existing: HashSet<&str> = all_files.iter().map(String::as_str).collect();
40
41 let recent_git_files = if is_git_repo {
42 let count = std::cmp::max(10, (dynamic_max_files as f64 * 0.2) as usize);
43 get_recent_git_files(&context_dir, count, &existing)
44 } else {
45 Vec::new()
46 };
47
48 let ranked = rank_files(&all_files);
49 let active = crate::utils::workspace_stats::active_files_for_context(&context_dir);
50
51 let mut top_files: Vec<String> = Vec::new();
53 let mut seen: HashSet<String> = HashSet::new();
54 let mut push = |file: String, top: &mut Vec<String>, seen: &mut HashSet<String>| {
55 if existing.contains(file.as_str()) && seen.insert(file.clone()) {
56 top.push(file);
57 }
58 };
59
60 for file in active {
61 push(file, &mut top_files, &mut seen);
62 }
63 for file in recent_git_files {
64 push(file, &mut top_files, &mut seen);
65 }
66 if top_files.len() < dynamic_max_files {
67 for file in ranked {
68 if top_files.len() >= dynamic_max_files {
69 break;
70 }
71 if seen.insert(file.clone()) {
72 top_files.push(file);
73 }
74 }
75 }
76
77 let mut tree = DirectoryTree::new(&context_dir);
78 for file in top_files.iter().take(dynamic_max_files) {
79 tree.expand(file);
80 }
81
82 Ok((tree.display(), top_files))
83}
84
85fn context_dir(path: &Path) -> PathBuf {
88 if let Some(git_root) = find_git_root(path) {
89 return git_root;
90 }
91 if path.is_file() {
92 path.parent().unwrap_or(path).to_path_buf()
93 } else {
94 path.to_path_buf()
95 }
96}
97
98fn find_git_root(path: &Path) -> Option<PathBuf> {
100 let start = if path.is_file() { path.parent()? } else { path };
101 let mut current = Some(start);
102 while let Some(dir) = current {
103 if dir.join(".git").exists() {
104 return Some(dir.to_path_buf());
105 }
106 current = dir.parent();
107 }
108 None
109}
110
111fn get_all_files_max_depth(root: &Path, is_git_repo: bool) -> Vec<String> {
118 let walker = WalkBuilder::new(root)
119 .max_depth(Some(MAX_WALK_DEPTH))
120 .hidden(false)
121 .parents(true)
122 .ignore(false)
123 .git_ignore(is_git_repo)
124 .git_global(is_git_repo)
125 .git_exclude(is_git_repo)
126 .require_git(true)
127 .filter_entry(|entry| entry.file_name() != ".git")
128 .build();
129
130 let mut files = Vec::new();
131 for entry in walker.flatten() {
132 if files.len() >= MAX_ENTRIES_CHECK {
133 break;
134 }
135 if entry.file_type().is_some_and(|file_type| file_type.is_file()) {
136 if let Ok(relative) = entry.path().strip_prefix(root) {
137 files.push(relative.to_string_lossy().to_string());
138 }
139 }
140 }
141 files
142}
143
144fn get_recent_git_files(root: &Path, count: usize, existing: &HashSet<&str>) -> Vec<String> {
147 let output = Command::new("git")
148 .arg("-C")
149 .arg(root)
150 .args([
151 "log",
152 "--name-only",
153 "--no-merges",
154 "--topo-order",
155 "--format=",
156 "-n",
157 &MAX_COMMITS_WALK.to_string(),
158 ])
159 .output();
160
161 let Ok(output) = output else {
162 return Vec::new();
163 };
164 if !output.status.success() {
165 return Vec::new();
166 }
167
168 let mut recent = Vec::new();
169 let mut seen = HashSet::new();
170 for line in String::from_utf8_lossy(&output.stdout).lines().map(str::trim) {
171 if line.is_empty() || !existing.contains(line) {
172 continue;
173 }
174 if seen.insert(line.to_string()) {
175 recent.push(line.to_string());
176 if recent.len() >= count {
177 break;
178 }
179 }
180 }
181 recent
182}
183
184fn calculate_dynamic_file_limit(total_files: usize) -> usize {
186 const MIN_FILES: usize = 50;
187 const MAX_FILES: usize = 400;
188 if total_files <= MIN_FILES {
189 return MIN_FILES;
190 }
191 let scale = (MAX_FILES - MIN_FILES) as f64 / (30_000.0 - MIN_FILES as f64);
192 let dynamic = MIN_FILES + ((total_files - MIN_FILES) as f64 * scale) as usize;
193 dynamic.min(MAX_FILES)
194}
195
196fn rank_files(all_files: &[String]) -> Vec<String> {
199 if let Some(scores) = crate::utils::path_prob::score_paths(all_files) {
200 let mut indexed: Vec<(usize, f64)> = scores.into_iter().enumerate().collect();
201 indexed.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
203 return indexed.into_iter().map(|(index, _)| all_files[index].clone()).collect();
204 }
205
206 let mut files = all_files.to_vec();
207 files.sort_by_key(|path| (heuristic_score(path), path.clone()));
208 files
209}
210
211const IMPORTANT_NAMES: &[&str] = &[
212 "Cargo.toml",
213 "README.md",
214 "AGENTS.md",
215 "package.json",
216 "pnpm-workspace.yaml",
217 "pyproject.toml",
218 "go.mod",
219 "Dockerfile",
220 "docker-compose.yml",
221];
222
223fn heuristic_score(path: &str) -> usize {
225 let not_important = usize::from(!IMPORTANT_NAMES.contains(&path));
226 let depth = path.matches('/').count();
227 let test_penalty = usize::from(path.contains("test") || path.contains("spec"));
228 not_important * 10 + depth + test_penalty
229}
230
231#[cfg(test)]
232mod tests {
233 use super::*;
234 use tempfile::TempDir;
235
236 #[test]
237 fn builds_repo_context_from_files() -> Result<()> {
238 let temp_dir = TempDir::new()?;
239 std::fs::write(temp_dir.path().join("Cargo.toml"), "[package]\nname='x'\n")?;
240 std::fs::create_dir(temp_dir.path().join("src"))?;
241 std::fs::write(temp_dir.path().join("src/lib.rs"), "pub fn x() {}\n")?;
242
243 let (context, files) = get_repo_context(temp_dir.path())?;
244 assert!(context.contains("Cargo.toml"));
245 assert!(files.iter().any(|file| file == "src/lib.rs"));
246 Ok(())
247 }
248
249 #[test]
250 fn dynamic_limit_scales_between_bounds() {
251 assert_eq!(calculate_dynamic_file_limit(10), 50);
252 assert_eq!(calculate_dynamic_file_limit(50), 50);
253 assert!(calculate_dynamic_file_limit(1000) > 50);
254 assert_eq!(calculate_dynamic_file_limit(1_000_000), 400);
255 }
256
257 #[test]
258 fn respects_gitignore_in_git_repo() -> Result<()> {
259 let temp_dir = TempDir::new()?;
260 let root = temp_dir.path();
261 std::fs::create_dir(root.join(".git"))?; std::fs::write(root.join(".gitignore"), "ignored.txt\n")?;
263 std::fs::write(root.join("ignored.txt"), "secret\n")?;
264 std::fs::write(root.join("kept.rs"), "fn x() {}\n")?;
265
266 let files = get_all_files_max_depth(root, true);
267 assert!(files.iter().any(|file| file == "kept.rs"));
268 assert!(!files.iter().any(|file| file == "ignored.txt"), "gitignore must hide ignored.txt");
269 Ok(())
270 }
271}