claude_agent/context/
import_extractor.rs1use regex::Regex;
7use std::collections::HashSet;
8use std::path::{Path, PathBuf};
9
10pub struct ImportExtractor {
20 regex: Regex,
21 code_block_regex: Regex,
22 inline_code_regex: Regex,
23}
24
25impl ImportExtractor {
26 pub fn new() -> Self {
28 Self {
29 regex: Regex::new(r"(?:^|\s)@((?:[^\s\\]|\\ )+)").expect("Invalid regex pattern"),
32 code_block_regex: Regex::new(r"(?s)(?:```|~~~).*?(?:```|~~~)").expect("Invalid regex"),
34 inline_code_regex: Regex::new(r"`[^`]+`").expect("Invalid regex"),
36 }
37 }
38
39 pub fn extract(&self, content: &str, base_dir: &Path) -> Vec<PathBuf> {
48 let without_fenced = self.code_block_regex.replace_all(content, " ");
50 let clean_content = self.inline_code_regex.replace_all(&without_fenced, " ");
52
53 let mut seen = HashSet::new();
55 let mut paths = Vec::new();
56 self.extract_from_text_dedup(&clean_content, base_dir, &mut seen, &mut paths);
57 paths
58 }
59
60 fn extract_from_text_dedup(
62 &self,
63 text: &str,
64 base_dir: &Path,
65 seen: &mut HashSet<PathBuf>,
66 paths: &mut Vec<PathBuf>,
67 ) {
68 for cap in self.regex.captures_iter(text) {
69 if let Some(m) = cap.get(1) {
70 let raw_path = m.as_str().replace("\\ ", " ");
72 if let Some(resolved) = self.resolve_path(&raw_path, base_dir) {
73 if seen.insert(resolved.clone()) {
75 paths.push(resolved);
76 }
77 }
78 }
79 }
80 }
81
82 fn resolve_path(&self, path: &str, base_dir: &Path) -> Option<PathBuf> {
89 if !self.is_valid_path(path) {
90 return None;
91 }
92
93 Some(if let Some(rest) = path.strip_prefix("~/") {
94 crate::common::home_dir()?.join(rest)
95 } else if path.starts_with('/') {
96 PathBuf::from(path)
97 } else {
98 base_dir.join(path)
99 })
100 }
101
102 fn is_valid_path(&self, path: &str) -> bool {
112 if path.is_empty() {
113 return false;
114 }
115
116 path.starts_with("./")
118 || path.starts_with("~/")
119 || (path.starts_with('/') && path != "/")
120 || (!path.starts_with('@')
121 && !path.starts_with(|c| "#%^&*()".contains(c))
122 && path
123 .starts_with(|c: char| c.is_alphanumeric() || c == '.' || c == '_' || c == '-'))
124 }
125}
126
127impl Default for ImportExtractor {
128 fn default() -> Self {
129 Self::new()
130 }
131}
132
133#[cfg(test)]
134mod tests {
135 use super::*;
136
137 #[test]
138 fn test_extract_line_start() {
139 let extractor = ImportExtractor::new();
140 let content = "@docs/api.md\n@config/settings.md";
141 let imports = extractor.extract(content, Path::new("/project"));
142 assert_eq!(imports.len(), 2);
143 assert!(imports[0].ends_with("docs/api.md"));
144 assert!(imports[1].ends_with("config/settings.md"));
145 }
146
147 #[test]
148 fn test_extract_inline() {
149 let extractor = ImportExtractor::new();
150 let content = "Prerequisites: @docs/guide.md for details";
151 let imports = extractor.extract(content, Path::new("/project"));
152 assert_eq!(imports.len(), 1);
153 assert!(imports[0].ends_with("docs/guide.md"));
154 }
155
156 #[test]
157 fn test_skip_fenced_code_block() {
158 let extractor = ImportExtractor::new();
159 let content = "```\n@should/not/import.md\n```\n@should/import.md";
160 let imports = extractor.extract(content, Path::new("/project"));
161 assert_eq!(imports.len(), 1);
162 assert!(imports[0].ends_with("should/import.md"));
163 }
164
165 #[test]
166 fn test_skip_indented_code_block() {
167 let extractor = ImportExtractor::new();
168 let content =
169 "Normal text @real/import.md\n\n @indented/code.md\n\nMore @another/import.md";
170 let imports = extractor.extract(content, Path::new("/project"));
171 assert!(imports.iter().any(|p| p.ends_with("real/import.md")));
174 assert!(imports.iter().any(|p| p.ends_with("another/import.md")));
175 }
176
177 #[test]
178 fn test_skip_inline_code() {
179 let extractor = ImportExtractor::new();
180 let content = "Use `@decorator` syntax and @real/import.md file";
181 let imports = extractor.extract(content, Path::new("/project"));
182 assert_eq!(imports.len(), 1);
183 assert!(imports[0].ends_with("real/import.md"));
184 }
185
186 #[test]
187 fn test_home_expansion() {
188 let extractor = ImportExtractor::new();
189 let content = "@~/shared/config.md";
190 let imports = extractor.extract(content, Path::new("/project"));
191 assert_eq!(imports.len(), 1);
192 assert!(!imports[0].to_string_lossy().contains('~'));
193 }
194
195 #[test]
196 fn test_relative_paths() {
197 let extractor = ImportExtractor::new();
198 let content = "@./local/file.md";
199 let imports = extractor.extract(content, Path::new("/project/subdir"));
200 assert_eq!(imports.len(), 1);
201 assert!(imports[0].starts_with("/project/subdir"));
202 }
203
204 #[test]
205 fn test_absolute_path() {
206 let extractor = ImportExtractor::new();
207 let content = "@/absolute/path/file.md";
208 let imports = extractor.extract(content, Path::new("/project"));
209 assert_eq!(imports.len(), 1);
210 assert_eq!(imports[0], PathBuf::from("/absolute/path/file.md"));
211 }
212
213 #[test]
214 fn test_invalid_paths_ignored() {
215 let extractor = ImportExtractor::new();
216 let content = "@#invalid @%also-invalid @^nope @&bad @*bad @(bad @)bad";
217 let imports = extractor.extract(content, Path::new("/project"));
218 assert!(imports.is_empty());
219 }
220
221 #[test]
222 fn test_escaped_at_ignored() {
223 let extractor = ImportExtractor::new();
224 let content = "@@escaped @valid/path.md";
225 let imports = extractor.extract(content, Path::new("/project"));
226 assert!(imports.iter().any(|p| p.ends_with("valid/path.md")));
229 }
230
231 #[test]
232 fn test_escaped_spaces_in_path() {
233 let extractor = ImportExtractor::new();
234 let content = r"@docs/my\ file.md";
235 let imports = extractor.extract(content, Path::new("/project"));
236 assert_eq!(imports.len(), 1);
237 assert!(imports[0].ends_with("docs/my file.md"));
238 }
239
240 #[test]
241 fn test_root_slash_only_invalid() {
242 let extractor = ImportExtractor::new();
243 assert!(!extractor.is_valid_path("/"));
245 }
246
247 #[test]
248 fn test_implicit_relative_path() {
249 let extractor = ImportExtractor::new();
250
251 assert!(
253 extractor.is_valid_path("docs/file.md"),
254 "alphanumeric start"
255 );
256 assert!(
257 extractor.is_valid_path("_private/config.md"),
258 "underscore start"
259 );
260 assert!(
261 extractor.is_valid_path(".hidden/file.md"),
262 "dot start (not ./)"
263 );
264
265 let content1 = "@docs/file.md";
267 let imports1 = extractor.extract(content1, Path::new("/project"));
268 assert_eq!(imports1.len(), 1, "alphanumeric path");
269
270 let content2 = "@_private/config.md";
271 let imports2 = extractor.extract(content2, Path::new("/project"));
272 assert_eq!(imports2.len(), 1, "underscore path");
273
274 let content3 = "@.hidden/file.md";
275 let imports3 = extractor.extract(content3, Path::new("/project"));
276 assert_eq!(imports3.len(), 1, "dot path");
277 }
278
279 #[test]
280 fn test_multiple_imports_same_line() {
281 let extractor = ImportExtractor::new();
282 let content = "Include @first.md and @second.md and @third.md";
283 let imports = extractor.extract(content, Path::new("/project"));
284 assert_eq!(imports.len(), 3);
285 }
286
287 #[test]
288 fn test_empty_content() {
289 let extractor = ImportExtractor::new();
290 let imports = extractor.extract("", Path::new("/project"));
291 assert!(imports.is_empty());
292 }
293
294 #[test]
295 fn test_no_imports() {
296 let extractor = ImportExtractor::new();
297 let content = "# Title\n\nJust regular content without any imports.";
298 let imports = extractor.extract(content, Path::new("/project"));
299 assert!(imports.is_empty());
300 }
301
302 #[test]
303 fn test_markdown_link_not_imported() {
304 let extractor = ImportExtractor::new();
305 let content = "See [@.agents/docs.md](@.agents/docs.md) for details";
308 let imports = extractor.extract(content, Path::new("/project"));
309 assert!(
310 imports.is_empty(),
311 "Markdown links should not be extracted as imports"
312 );
313 }
314
315 #[test]
316 fn test_duplicate_import_paths_deduped() {
317 let extractor = ImportExtractor::new();
318 let content = "@docs/api.md\nSome text\n@docs/api.md";
319 let imports = extractor.extract(content, Path::new("/project"));
320 assert_eq!(imports.len(), 1, "Duplicates should be removed");
321 assert!(imports[0].ends_with("docs/api.md"));
322 }
323
324 #[test]
325 fn test_same_file_inline_twice_deduped() {
326 let extractor = ImportExtractor::new();
327 let content = "See @docs/api.md and also @docs/api.md";
328 let imports = extractor.extract(content, Path::new("/project"));
329 assert_eq!(imports.len(), 1, "Duplicates should be removed");
330 }
331
332 #[test]
333 fn test_different_paths_not_deduped() {
334 let extractor = ImportExtractor::new();
335 let content = "@docs/api.md\n@docs/guide.md\n@docs/api.md";
336 let imports = extractor.extract(content, Path::new("/project"));
337 assert_eq!(imports.len(), 2, "Different paths should be preserved");
338 }
339}