Skip to main content

vtcode_core/skills/
file_references.rs

1//! File reference validation for Agent Skills
2//!
3//! Validates file references in SKILL.md bodies to ensure they meet
4//! the Agent Skills specification requirements.
5
6use hashbrown::HashSet;
7use regex::Regex;
8use std::path::{Path, PathBuf};
9
10/// Validates that file references in skill instructions follow the Agent Skills spec
11///
12/// Requirements:
13/// - References must be relative paths from the skill root
14/// - Must be one level deep (no nested chains like `references/subdir/file.md`)
15/// - Must reference files in supported directories: scripts/, references/, assets/
16/// - Referenced files must exist
17pub struct FileReferenceValidator {
18    skill_root: PathBuf,
19}
20
21impl FileReferenceValidator {
22    /// Create a new validator for a skill at the given root path
23    pub fn new(skill_root: PathBuf) -> Self {
24        Self { skill_root }
25    }
26
27    /// Validate all file references in the instruction text
28    ///
29    /// Returns a list of validation errors (empty if valid)
30    pub fn validate_references(&self, instructions: &str) -> Vec<String> {
31        let mut errors = Vec::new();
32        let references = self.extract_references(instructions);
33
34        for reference in &references {
35            if let Err(e) = self.validate_reference(reference) {
36                errors.push(format!("Invalid reference '{}': {}", reference, e));
37            }
38        }
39
40        errors
41    }
42
43    /// Extract file references from instruction text
44    ///
45    /// Looks for patterns like:
46    /// - `[text](references/FILE.md)`
47    /// - `scripts/script.py`
48    /// - `assets/image.png`
49    fn extract_references(&self, instructions: &str) -> HashSet<String> {
50        let mut references = HashSet::new();
51        let Ok(md_link_regex) = Regex::new(r"\[.*?\]\((.*?)\)") else {
52            return references;
53        };
54        let Ok(plain_path_regex) = Regex::new(r"\b(scripts|references|assets)/[^\s\),\]]+") else {
55            return references;
56        };
57
58        // Extract markdown links
59        for cap in md_link_regex.captures_iter(instructions) {
60            if let Some(path_match) = cap.get(1) {
61                let path = path_match.as_str();
62                references.insert(path.to_string());
63            }
64        }
65
66        // Extract plain paths
67        for cap in plain_path_regex.captures_iter(instructions) {
68            if let Some(path_match) = cap.get(0) {
69                references.insert(path_match.as_str().to_string());
70            }
71        }
72
73        references
74    }
75
76    /// Validate a single file reference
77    fn validate_reference(&self, reference: &str) -> Result<(), String> {
78        // Check if it's a valid path format
79        let path = Path::new(reference);
80
81        // Must be relative (no absolute paths)
82        if path.is_absolute() {
83            return Err("Absolute paths are not allowed".to_string());
84        }
85
86        // Must be within supported directories
87        let components: Vec<_> = path.components().collect();
88        if components.is_empty() {
89            return Err("Empty path".to_string());
90        }
91
92        // Check first component is a supported directory
93        if let Some(first_component) = components.first() {
94            let first_dir = first_component.as_os_str().to_string_lossy();
95            if !matches!(first_dir.as_ref(), "scripts" | "references" | "assets") {
96                return Err(format!(
97                    "Invalid directory '{}'. Must be 'scripts/', 'references/', or 'assets/'",
98                    first_dir
99                ));
100            }
101        }
102
103        // Check depth - must be one level deep (e.g., scripts/file.py, not scripts/subdir/file.py)
104        if components.len() > 2 {
105            return Err(format!(
106                "Path is too deep: '{}'. Per Agent Skills spec, references must be one level deep.",
107                reference
108            ));
109        }
110
111        // For paths with 2 components (dir + file), validate file exists
112        if components.len() == 2 {
113            let full_path = self.skill_root.join(path);
114            if !full_path.exists() {
115                return Err(format!("Referenced file does not exist: {:?}", full_path));
116            }
117        }
118
119        Ok(())
120    }
121
122    /// Get all valid references from a skill directory
123    pub fn list_valid_references(&self) -> Vec<PathBuf> {
124        let mut references = Vec::new();
125
126        for subdir in &["scripts", "references", "assets"] {
127            let dir = self.skill_root.join(subdir);
128            if dir.is_dir()
129                && let Ok(entries) = std::fs::read_dir(&dir)
130            {
131                for entry in entries.flatten() {
132                    let path = entry.path();
133                    if path.is_file() {
134                        references.push(
135                            path.strip_prefix(&self.skill_root)
136                                .unwrap_or(&path)
137                                .to_path_buf(),
138                        );
139                    }
140                }
141            }
142        }
143
144        references
145    }
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151    use std::fs;
152    use tempfile::TempDir;
153
154    #[test]
155    fn test_valid_file_references() {
156        let temp_dir = TempDir::new().unwrap();
157        let skill_root = temp_dir.path().to_path_buf();
158
159        // Create test files
160        fs::create_dir(skill_root.join("scripts")).unwrap();
161        fs::write(skill_root.join("scripts/helper.py"), "# test").unwrap();
162
163        let validator = FileReferenceValidator::new(skill_root);
164        let instructions = r#"
165            See [the reference](references/REFERENCE.md) for details.
166            Run the extraction script: scripts/helper.py
167        "#;
168
169        let errors = validator.validate_references(instructions);
170        // Should have an error for references/REFERENCE.md (doesn't exist)
171        assert_eq!(errors.len(), 1);
172        assert!(errors[0].contains("references/REFERENCE.md"));
173    }
174
175    #[test]
176    fn test_invalid_directory() {
177        let validator = FileReferenceValidator::new(PathBuf::from("/tmp"));
178        // Use a valid directory pattern but non-existent file
179        let errors = validator.validate_references("See `scripts/nonexistent.py`");
180        assert!(!errors.is_empty());
181        assert!(errors[0].contains("nonexistent.py"));
182    }
183
184    #[test]
185    fn test_deep_path_error() {
186        let validator = FileReferenceValidator::new(PathBuf::from("/tmp"));
187        let errors = validator.validate_references("See `scripts/subdir/deep.py`");
188        assert!(!errors.is_empty());
189        assert!(errors[0].contains("too deep"));
190    }
191
192    #[test]
193    fn test_list_valid_references() {
194        let temp_dir = TempDir::new().unwrap();
195        let skill_root = temp_dir.path().to_path_buf();
196
197        fs::create_dir(skill_root.join("scripts")).unwrap();
198        fs::create_dir(skill_root.join("references")).unwrap();
199        fs::write(skill_root.join("scripts/test.py"), "# test").unwrap();
200        fs::write(skill_root.join("references/ref.md"), "# ref").unwrap();
201
202        let validator = FileReferenceValidator::new(skill_root);
203        let refs = validator.list_valid_references();
204
205        assert_eq!(refs.len(), 2);
206        assert!(
207            refs.iter()
208                .any(|p| p.to_string_lossy() == "scripts/test.py")
209        );
210        assert!(
211            refs.iter()
212                .any(|p| p.to_string_lossy() == "references/ref.md")
213        );
214    }
215}