Skip to main content

aster/context/
agents_md_parser.rs

1//! AGENTS.md Parser
2//!
3//! This module provides functionality to parse AGENTS.md files and inject
4//! their content into system prompts. It supports:
5//!
6//! - Searching for AGENTS.md in project root and .kiro directory
7//! - Parsing markdown content and extracting instructions
8//! - Extracting file references from markdown links
9//! - Injecting parsed content into system prompts
10//!
11//! # Example
12//!
13//! ```ignore
14//! use aster::context::agents_md_parser::AgentsMdParser;
15//!
16//! let config = AgentsMdParser::parse("/path/to/project").await?;
17//! if let Some(config) = config {
18//!     println!("Found AGENTS.md with {} file references", config.files.len());
19//! }
20//! ```
21
22use crate::context::types::{AgentsMdConfig, ContextError};
23use regex::Regex;
24use std::path::{Path, PathBuf};
25use tokio::fs;
26
27/// AGENTS.md file names to search for
28const AGENTS_MD_FILENAMES: &[&str] = &["AGENTS.md", "agents.md", "AGENT.md", "agent.md"];
29
30/// Subdirectories to search for AGENTS.md
31const AGENTS_MD_SUBDIRS: &[&str] = &[".kiro", ".claude", ".github"];
32
33/// AGENTS.md parser for extracting project-specific instructions.
34///
35/// The parser searches for AGENTS.md files in standard locations and
36/// extracts their content along with any file references found in
37/// markdown links.
38pub struct AgentsMdParser;
39
40impl AgentsMdParser {
41    /// Parse AGENTS.md file from the given directory.
42    ///
43    /// This method searches for AGENTS.md in:
44    /// 1. Project root directory
45    /// 2. .kiro subdirectory
46    /// 3. .claude subdirectory
47    /// 4. .github subdirectory
48    ///
49    /// # Arguments
50    ///
51    /// * `cwd` - The current working directory (project root)
52    ///
53    /// # Returns
54    ///
55    /// `Some(AgentsMdConfig)` if AGENTS.md is found, `None` otherwise
56    ///
57    /// # Errors
58    ///
59    /// Returns an error if file reading fails
60    pub async fn parse(cwd: &Path) -> Result<Option<AgentsMdConfig>, ContextError> {
61        let possible_paths = Self::get_possible_paths(cwd);
62
63        for path in possible_paths {
64            if path.exists() && path.is_file() {
65                match fs::read_to_string(&path).await {
66                    Ok(content) => {
67                        let files = Self::extract_file_references(&content, cwd);
68                        return Ok(Some(AgentsMdConfig::new(content, files)));
69                    }
70                    Err(e) => {
71                        tracing::warn!("Failed to read AGENTS.md at {}: {}", path.display(), e);
72                        // Continue searching other paths
73                    }
74                }
75            }
76        }
77
78        Ok(None)
79    }
80
81    /// Parse AGENTS.md file synchronously (blocking).
82    ///
83    /// This is a convenience method for contexts where async is not available.
84    ///
85    /// # Arguments
86    ///
87    /// * `cwd` - The current working directory (project root)
88    ///
89    /// # Returns
90    ///
91    /// `Some(AgentsMdConfig)` if AGENTS.md is found, `None` otherwise
92    pub fn parse_sync(cwd: &Path) -> Result<Option<AgentsMdConfig>, ContextError> {
93        let possible_paths = Self::get_possible_paths(cwd);
94
95        for path in possible_paths {
96            if path.exists() && path.is_file() {
97                match std::fs::read_to_string(&path) {
98                    Ok(content) => {
99                        let files = Self::extract_file_references(&content, cwd);
100                        return Ok(Some(AgentsMdConfig::new(content, files)));
101                    }
102                    Err(e) => {
103                        tracing::warn!("Failed to read AGENTS.md at {}: {}", path.display(), e);
104                    }
105                }
106            }
107        }
108
109        Ok(None)
110    }
111
112    /// Get all possible paths where AGENTS.md might be located.
113    ///
114    /// Returns paths in priority order:
115    /// 1. Root directory AGENTS.md variants
116    /// 2. Subdirectory AGENTS.md variants (.kiro, .claude, .github)
117    ///
118    /// # Arguments
119    ///
120    /// * `cwd` - The current working directory (project root)
121    ///
122    /// # Returns
123    ///
124    /// A vector of possible paths to check
125    pub fn get_possible_paths(cwd: &Path) -> Vec<PathBuf> {
126        let mut paths = Vec::new();
127
128        // First, check root directory
129        for filename in AGENTS_MD_FILENAMES {
130            paths.push(cwd.join(filename));
131        }
132
133        // Then check subdirectories
134        for subdir in AGENTS_MD_SUBDIRS {
135            for filename in AGENTS_MD_FILENAMES {
136                paths.push(cwd.join(subdir).join(filename));
137            }
138        }
139
140        paths
141    }
142
143    /// Extract file references from markdown content.
144    ///
145    /// This method extracts file paths from:
146    /// - Markdown links: [text](path/to/file)
147    /// - Code block file references: ```language:path/to/file
148    /// - Explicit file mentions: `path/to/file`
149    ///
150    /// # Arguments
151    ///
152    /// * `text` - The markdown content to parse
153    /// * `cwd` - The current working directory for resolving relative paths
154    ///
155    /// # Returns
156    ///
157    /// A vector of resolved file paths
158    pub fn extract_file_references(text: &str, cwd: &Path) -> Vec<PathBuf> {
159        let mut files = Vec::new();
160
161        // Pattern 1: Markdown links [text](path)
162        // Matches: [any text](relative/path/to/file.ext)
163        let link_pattern = Regex::new(r"\[([^\]]*)\]\(([^)]+)\)").unwrap();
164        for cap in link_pattern.captures_iter(text) {
165            if let Some(path_match) = cap.get(2) {
166                let path_str = path_match.as_str();
167                // Filter out URLs and anchors
168                if !path_str.starts_with("http")
169                    && !path_str.starts_with('#')
170                    && !path_str.starts_with("mailto:")
171                {
172                    let path = Self::resolve_path(path_str, cwd);
173                    if path.exists() && !files.contains(&path) {
174                        files.push(path);
175                    }
176                }
177            }
178        }
179
180        // Pattern 2: Code block with file path ```language:path/to/file
181        let code_block_pattern = Regex::new(r"```\w+:([^\s`]+)").unwrap();
182        for cap in code_block_pattern.captures_iter(text) {
183            if let Some(path_match) = cap.get(1) {
184                let path = Self::resolve_path(path_match.as_str(), cwd);
185                if path.exists() && !files.contains(&path) {
186                    files.push(path);
187                }
188            }
189        }
190
191        // Pattern 3: Inline code file references `path/to/file.ext`
192        // Only match paths that look like file paths (contain / or have extension)
193        let inline_code_pattern = Regex::new(r"`([^`]+\.[a-zA-Z0-9]+)`").unwrap();
194        for cap in inline_code_pattern.captures_iter(text) {
195            if let Some(path_match) = cap.get(1) {
196                let path_str = path_match.as_str();
197                // Filter out code snippets and commands
198                if !path_str.contains(' ')
199                    && !path_str.starts_with('-')
200                    && !path_str.starts_with('$')
201                {
202                    let path = Self::resolve_path(path_str, cwd);
203                    if path.exists() && !files.contains(&path) {
204                        files.push(path);
205                    }
206                }
207            }
208        }
209
210        files
211    }
212
213    /// Resolve a path string relative to the working directory.
214    ///
215    /// Handles both absolute and relative paths.
216    fn resolve_path(path_str: &str, cwd: &Path) -> PathBuf {
217        let path = Path::new(path_str);
218        if path.is_absolute() {
219            path.to_path_buf()
220        } else {
221            cwd.join(path)
222        }
223    }
224
225    /// Inject AGENTS.md content into a system prompt.
226    ///
227    /// This method:
228    /// 1. Searches for AGENTS.md in the project
229    /// 2. If found, appends its content to the system prompt
230    /// 3. Returns the modified system prompt
231    ///
232    /// # Arguments
233    ///
234    /// * `system_prompt` - The original system prompt
235    /// * `cwd` - The current working directory (project root)
236    ///
237    /// # Returns
238    ///
239    /// The system prompt with AGENTS.md content injected (if found)
240    ///
241    /// # Example
242    ///
243    /// ```ignore
244    /// let enhanced_prompt = AgentsMdParser::inject_to_system_prompt(
245    ///     "You are a helpful assistant.",
246    ///     Path::new("/path/to/project")
247    /// ).await?;
248    /// ```
249    pub async fn inject_to_system_prompt(
250        system_prompt: &str,
251        cwd: &Path,
252    ) -> Result<String, ContextError> {
253        match Self::parse(cwd).await? {
254            Some(config) => {
255                let injected = format!(
256                    "{}\n\n## Project Instructions (from AGENTS.md)\n\n{}",
257                    system_prompt, config.content
258                );
259                Ok(injected)
260            }
261            None => Ok(system_prompt.to_string()),
262        }
263    }
264
265    /// Inject AGENTS.md content into a system prompt synchronously.
266    ///
267    /// This is a convenience method for contexts where async is not available.
268    pub fn inject_to_system_prompt_sync(
269        system_prompt: &str,
270        cwd: &Path,
271    ) -> Result<String, ContextError> {
272        match Self::parse_sync(cwd)? {
273            Some(config) => {
274                let injected = format!(
275                    "{}\n\n## Project Instructions (from AGENTS.md)\n\n{}",
276                    system_prompt, config.content
277                );
278                Ok(injected)
279            }
280            None => Ok(system_prompt.to_string()),
281        }
282    }
283
284    /// Check if AGENTS.md exists in the given directory.
285    ///
286    /// # Arguments
287    ///
288    /// * `cwd` - The current working directory (project root)
289    ///
290    /// # Returns
291    ///
292    /// `true` if AGENTS.md exists, `false` otherwise
293    pub fn exists(cwd: &Path) -> bool {
294        Self::get_possible_paths(cwd)
295            .iter()
296            .any(|p| p.exists() && p.is_file())
297    }
298
299    /// Find the first existing AGENTS.md path.
300    ///
301    /// # Arguments
302    ///
303    /// * `cwd` - The current working directory (project root)
304    ///
305    /// # Returns
306    ///
307    /// `Some(PathBuf)` if found, `None` otherwise
308    pub fn find_path(cwd: &Path) -> Option<PathBuf> {
309        Self::get_possible_paths(cwd)
310            .into_iter()
311            .find(|p| p.exists() && p.is_file())
312    }
313}
314
315// ============================================================================
316// Tests
317// ============================================================================
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322    use std::fs;
323    use tempfile::TempDir;
324
325    #[test]
326    fn test_get_possible_paths() {
327        let cwd = Path::new("/test/project");
328        let paths = AgentsMdParser::get_possible_paths(cwd);
329
330        // Should include root directory variants
331        assert!(paths.contains(&PathBuf::from("/test/project/AGENTS.md")));
332        assert!(paths.contains(&PathBuf::from("/test/project/agents.md")));
333
334        // Should include .kiro subdirectory
335        assert!(paths.contains(&PathBuf::from("/test/project/.kiro/AGENTS.md")));
336
337        // Should include .claude subdirectory
338        assert!(paths.contains(&PathBuf::from("/test/project/.claude/AGENTS.md")));
339
340        // Should include .github subdirectory
341        assert!(paths.contains(&PathBuf::from("/test/project/.github/AGENTS.md")));
342    }
343
344    #[test]
345    fn test_extract_file_references_markdown_links() {
346        let temp_dir = TempDir::new().unwrap();
347        let file_path = temp_dir.path().join("src/main.rs");
348        fs::create_dir_all(temp_dir.path().join("src")).unwrap();
349        fs::write(&file_path, "fn main() {}").unwrap();
350
351        let content = "Check [main file](src/main.rs) for details";
352        let files = AgentsMdParser::extract_file_references(content, temp_dir.path());
353
354        assert_eq!(files.len(), 1);
355        assert_eq!(files[0], file_path);
356    }
357
358    #[test]
359    fn test_extract_file_references_ignores_urls() {
360        let temp_dir = TempDir::new().unwrap();
361        let content = "See [docs](https://example.com) and [anchor](#section)";
362        let files = AgentsMdParser::extract_file_references(content, temp_dir.path());
363
364        assert!(files.is_empty());
365    }
366
367    #[test]
368    fn test_extract_file_references_inline_code() {
369        let temp_dir = TempDir::new().unwrap();
370        let file_path = temp_dir.path().join("config.json");
371        fs::write(&file_path, "{}").unwrap();
372
373        let content = "Edit `config.json` to configure";
374        let files = AgentsMdParser::extract_file_references(content, temp_dir.path());
375
376        assert_eq!(files.len(), 1);
377        assert_eq!(files[0], file_path);
378    }
379
380    #[test]
381    fn test_extract_file_references_no_duplicates() {
382        let temp_dir = TempDir::new().unwrap();
383        let file_path = temp_dir.path().join("main.rs");
384        fs::write(&file_path, "fn main() {}").unwrap();
385
386        let content = "See [main](main.rs) and also `main.rs` for details";
387        let files = AgentsMdParser::extract_file_references(content, temp_dir.path());
388
389        assert_eq!(files.len(), 1);
390    }
391
392    #[test]
393    fn test_extract_file_references_nonexistent_files() {
394        let temp_dir = TempDir::new().unwrap();
395        let content = "See [missing](nonexistent.rs) file";
396        let files = AgentsMdParser::extract_file_references(content, temp_dir.path());
397
398        assert!(files.is_empty());
399    }
400
401    #[tokio::test]
402    async fn test_parse_root_agents_md() {
403        let temp_dir = TempDir::new().unwrap();
404        let agents_path = temp_dir.path().join("AGENTS.md");
405        let content = "# Project Instructions\n\nBuild with `cargo build`";
406        fs::write(&agents_path, content).unwrap();
407
408        let result = AgentsMdParser::parse(temp_dir.path()).await.unwrap();
409
410        assert!(result.is_some());
411        let config = result.unwrap();
412        assert_eq!(config.content, content);
413    }
414
415    #[tokio::test]
416    async fn test_parse_kiro_agents_md() {
417        let temp_dir = TempDir::new().unwrap();
418        let kiro_dir = temp_dir.path().join(".kiro");
419        fs::create_dir(&kiro_dir).unwrap();
420        let agents_path = kiro_dir.join("AGENTS.md");
421        let content = "# Kiro Instructions";
422        fs::write(&agents_path, content).unwrap();
423
424        let result = AgentsMdParser::parse(temp_dir.path()).await.unwrap();
425
426        assert!(result.is_some());
427        let config = result.unwrap();
428        assert_eq!(config.content, content);
429    }
430
431    #[tokio::test]
432    async fn test_parse_prefers_root_over_subdir() {
433        let temp_dir = TempDir::new().unwrap();
434
435        // Create root AGENTS.md
436        let root_agents = temp_dir.path().join("AGENTS.md");
437        fs::write(&root_agents, "Root instructions").unwrap();
438
439        // Create .kiro/AGENTS.md
440        let kiro_dir = temp_dir.path().join(".kiro");
441        fs::create_dir(&kiro_dir).unwrap();
442        let kiro_agents = kiro_dir.join("AGENTS.md");
443        fs::write(&kiro_agents, "Kiro instructions").unwrap();
444
445        let result = AgentsMdParser::parse(temp_dir.path()).await.unwrap();
446
447        assert!(result.is_some());
448        let config = result.unwrap();
449        // Should prefer root directory
450        assert_eq!(config.content, "Root instructions");
451    }
452
453    #[tokio::test]
454    async fn test_parse_not_found() {
455        let temp_dir = TempDir::new().unwrap();
456        let result = AgentsMdParser::parse(temp_dir.path()).await.unwrap();
457
458        assert!(result.is_none());
459    }
460
461    #[tokio::test]
462    async fn test_parse_with_file_references() {
463        let temp_dir = TempDir::new().unwrap();
464
465        // Create a referenced file
466        let src_dir = temp_dir.path().join("src");
467        fs::create_dir(&src_dir).unwrap();
468        let main_rs = src_dir.join("main.rs");
469        fs::write(&main_rs, "fn main() {}").unwrap();
470
471        // Create AGENTS.md with reference
472        let agents_path = temp_dir.path().join("AGENTS.md");
473        let content = "# Instructions\n\nSee [main](src/main.rs) for entry point";
474        fs::write(&agents_path, content).unwrap();
475
476        let result = AgentsMdParser::parse(temp_dir.path()).await.unwrap();
477
478        assert!(result.is_some());
479        let config = result.unwrap();
480        assert_eq!(config.files.len(), 1);
481        assert_eq!(config.files[0], main_rs);
482    }
483
484    #[test]
485    fn test_parse_sync() {
486        let temp_dir = TempDir::new().unwrap();
487        let agents_path = temp_dir.path().join("AGENTS.md");
488        let content = "# Sync Test";
489        fs::write(&agents_path, content).unwrap();
490
491        let result = AgentsMdParser::parse_sync(temp_dir.path()).unwrap();
492
493        assert!(result.is_some());
494        assert_eq!(result.unwrap().content, content);
495    }
496
497    #[tokio::test]
498    async fn test_inject_to_system_prompt_with_agents() {
499        let temp_dir = TempDir::new().unwrap();
500        let agents_path = temp_dir.path().join("AGENTS.md");
501        let agents_content = "Build with cargo";
502        fs::write(&agents_path, agents_content).unwrap();
503
504        let system_prompt = "You are a helpful assistant.";
505        let result = AgentsMdParser::inject_to_system_prompt(system_prompt, temp_dir.path())
506            .await
507            .unwrap();
508
509        assert!(result.contains(system_prompt));
510        assert!(result.contains(agents_content));
511        assert!(result.contains("Project Instructions"));
512    }
513
514    #[tokio::test]
515    async fn test_inject_to_system_prompt_without_agents() {
516        let temp_dir = TempDir::new().unwrap();
517        let system_prompt = "You are a helpful assistant.";
518
519        let result = AgentsMdParser::inject_to_system_prompt(system_prompt, temp_dir.path())
520            .await
521            .unwrap();
522
523        assert_eq!(result, system_prompt);
524    }
525
526    #[test]
527    fn test_inject_to_system_prompt_sync() {
528        let temp_dir = TempDir::new().unwrap();
529        let agents_path = temp_dir.path().join("AGENTS.md");
530        fs::write(&agents_path, "Sync instructions").unwrap();
531
532        let system_prompt = "Base prompt";
533        let result =
534            AgentsMdParser::inject_to_system_prompt_sync(system_prompt, temp_dir.path()).unwrap();
535
536        assert!(result.contains(system_prompt));
537        assert!(result.contains("Sync instructions"));
538    }
539
540    #[test]
541    fn test_exists() {
542        let temp_dir = TempDir::new().unwrap();
543
544        // Initially should not exist
545        assert!(!AgentsMdParser::exists(temp_dir.path()));
546
547        // Create AGENTS.md
548        let agents_path = temp_dir.path().join("AGENTS.md");
549        fs::write(&agents_path, "test").unwrap();
550
551        // Now should exist
552        assert!(AgentsMdParser::exists(temp_dir.path()));
553    }
554
555    #[test]
556    fn test_find_path() {
557        let temp_dir = TempDir::new().unwrap();
558
559        // Initially should not find
560        assert!(AgentsMdParser::find_path(temp_dir.path()).is_none());
561
562        // Create AGENTS.md
563        let agents_path = temp_dir.path().join("AGENTS.md");
564        fs::write(&agents_path, "test").unwrap();
565
566        // Now should find
567        let found = AgentsMdParser::find_path(temp_dir.path());
568        assert!(found.is_some());
569        assert_eq!(found.unwrap(), agents_path);
570    }
571
572    #[test]
573    fn test_lowercase_agents_md() {
574        let temp_dir = TempDir::new().unwrap();
575        let agents_path = temp_dir.path().join("agents.md");
576        fs::write(&agents_path, "lowercase").unwrap();
577
578        let result = AgentsMdParser::parse_sync(temp_dir.path()).unwrap();
579
580        assert!(result.is_some());
581        assert_eq!(result.unwrap().content, "lowercase");
582    }
583
584    #[test]
585    fn test_claude_subdir() {
586        let temp_dir = TempDir::new().unwrap();
587        let claude_dir = temp_dir.path().join(".claude");
588        fs::create_dir(&claude_dir).unwrap();
589        let agents_path = claude_dir.join("AGENTS.md");
590        fs::write(&agents_path, "claude instructions").unwrap();
591
592        let result = AgentsMdParser::parse_sync(temp_dir.path()).unwrap();
593
594        assert!(result.is_some());
595        assert_eq!(result.unwrap().content, "claude instructions");
596    }
597}