Skip to main content

thulp_skill_files/
parser.rs

1//! SKILL.md file parser.
2//!
3//! Parses SKILL.md files with optional YAML frontmatter and markdown content.
4
5use crate::error::{Result, SkillFileError};
6use crate::frontmatter::SkillFrontmatter;
7use std::path::{Path, PathBuf};
8use walkdir::WalkDir;
9
10const FRONTMATTER_DELIMITER: &str = "---";
11
12/// A supporting file in the skill directory.
13#[derive(Debug, Clone, PartialEq)]
14pub struct SupportingFile {
15    /// File name.
16    pub name: String,
17    /// Full path to the file.
18    pub path: PathBuf,
19    /// Classification of the file.
20    pub file_type: SupportingFileType,
21}
22
23/// Type classification for supporting files.
24#[derive(Debug, Clone, PartialEq, Eq)]
25pub enum SupportingFileType {
26    /// Template files (.md, .txt in templates/).
27    Template,
28    /// Example files (in examples/).
29    Example,
30    /// Script files (in scripts/ or .sh/.py/.js).
31    Script,
32    /// Reference documentation (.md files).
33    Reference,
34    /// Other file types.
35    Other,
36}
37
38/// Parsed skill file with frontmatter and content.
39#[derive(Debug, Clone)]
40pub struct SkillFile {
41    /// Parsed YAML frontmatter.
42    pub frontmatter: SkillFrontmatter,
43
44    /// Markdown content (instructions).
45    pub content: String,
46
47    /// Path to the SKILL.md file.
48    pub path: PathBuf,
49
50    /// Directory containing the skill.
51    pub directory: PathBuf,
52
53    /// Supporting files discovered in the directory.
54    pub supporting_files: Vec<SupportingFile>,
55}
56
57impl SkillFile {
58    /// Parse a SKILL.md file from the given path.
59    pub fn parse<P: AsRef<Path>>(path: P) -> Result<Self> {
60        let path = path.as_ref();
61        let content = std::fs::read_to_string(path)?;
62
63        Self::parse_content(&content, path)
64    }
65
66    /// Parse SKILL.md content with path context.
67    pub fn parse_content(content: &str, path: &Path) -> Result<Self> {
68        let (frontmatter, body) = Self::split_frontmatter(content)?;
69
70        let directory = path
71            .parent()
72            .ok_or_else(|| SkillFileError::InvalidPath("No parent directory".into()))?
73            .to_path_buf();
74
75        let supporting_files = Self::discover_supporting_files(&directory)?;
76
77        Ok(Self {
78            frontmatter,
79            content: body,
80            path: path.to_path_buf(),
81            directory,
82            supporting_files,
83        })
84    }
85
86    /// Parse SKILL.md content without path context (for testing).
87    pub fn parse_content_only(content: &str) -> Result<(SkillFrontmatter, String)> {
88        Self::split_frontmatter(content)
89    }
90
91    /// Split content into frontmatter and body.
92    fn split_frontmatter(content: &str) -> Result<(SkillFrontmatter, String)> {
93        let trimmed = content.trim_start();
94
95        if !trimmed.starts_with(FRONTMATTER_DELIMITER) {
96            // No frontmatter, entire content is body
97            return Ok((SkillFrontmatter::default(), content.to_string()));
98        }
99
100        // Skip the opening delimiter and find the closing one
101        let rest = &trimmed[FRONTMATTER_DELIMITER.len()..];
102
103        // Skip any newline after opening delimiter
104        let rest = rest.trim_start_matches('\n').trim_start_matches('\r');
105
106        // Find the closing delimiter
107        let end_pos = rest
108            .find(FRONTMATTER_DELIMITER)
109            .ok_or_else(|| SkillFileError::Parse("Missing closing frontmatter delimiter".into()))?;
110
111        let yaml_content = rest[..end_pos].trim();
112        let body = rest[end_pos + FRONTMATTER_DELIMITER.len()..]
113            .trim_start_matches('\n')
114            .trim_start_matches('\r');
115
116        // Parse YAML, allowing empty frontmatter
117        let frontmatter: SkillFrontmatter = if yaml_content.is_empty() {
118            SkillFrontmatter::default()
119        } else {
120            serde_yaml::from_str(yaml_content)?
121        };
122
123        Ok((frontmatter, body.to_string()))
124    }
125
126    /// Discover supporting files in the skill directory.
127    fn discover_supporting_files(directory: &Path) -> Result<Vec<SupportingFile>> {
128        let mut files = Vec::new();
129
130        if !directory.exists() {
131            return Ok(files);
132        }
133
134        for entry in WalkDir::new(directory)
135            .max_depth(2)
136            .into_iter()
137            .filter_map(|e| e.ok())
138        {
139            let path = entry.path();
140            if path.is_file() && path.file_name() != Some(std::ffi::OsStr::new("SKILL.md")) {
141                let name = path
142                    .file_name()
143                    .and_then(|n| n.to_str())
144                    .unwrap_or("")
145                    .to_string();
146
147                let file_type = Self::classify_supporting_file(path, directory);
148
149                files.push(SupportingFile {
150                    name,
151                    path: path.to_path_buf(),
152                    file_type,
153                });
154            }
155        }
156
157        Ok(files)
158    }
159
160    /// Classify a supporting file by its location and extension.
161    fn classify_supporting_file(path: &Path, base: &Path) -> SupportingFileType {
162        let relative = path.strip_prefix(base).unwrap_or(path);
163        let components: Vec<_> = relative.components().collect();
164
165        // Check if file is in a special subdirectory
166        if components.len() > 1 {
167            let first_dir = components[0].as_os_str().to_str().unwrap_or("");
168            match first_dir {
169                "examples" => return SupportingFileType::Example,
170                "scripts" => return SupportingFileType::Script,
171                "templates" => return SupportingFileType::Template,
172                _ => {}
173            }
174        }
175
176        // Classify by extension
177        match path.extension().and_then(|e| e.to_str()) {
178            Some("md") => SupportingFileType::Reference,
179            Some("txt") => SupportingFileType::Template,
180            Some("sh") | Some("py") | Some("js") | Some("ts") => SupportingFileType::Script,
181            _ => SupportingFileType::Other,
182        }
183    }
184
185    /// Get effective name (from frontmatter or directory name).
186    pub fn effective_name(&self) -> String {
187        self.frontmatter.name.clone().unwrap_or_else(|| {
188            self.directory
189                .file_name()
190                .and_then(|n| n.to_str())
191                .unwrap_or("unnamed")
192                .to_string()
193        })
194    }
195
196    /// Get effective description (from frontmatter or first paragraph).
197    pub fn effective_description(&self) -> String {
198        self.frontmatter.description.clone().unwrap_or_else(|| {
199            // Extract first paragraph as description
200            self.content
201                .split("\n\n")
202                .next()
203                .unwrap_or("")
204                .lines()
205                .filter(|l| !l.starts_with('#'))
206                .collect::<Vec<_>>()
207                .join(" ")
208                .trim()
209                .to_string()
210        })
211    }
212
213    /// Check if a tool is allowed for this skill.
214    ///
215    /// Supports wildcard patterns with `*` which matches any characters.
216    /// Examples:
217    /// - `Bash` matches exactly `Bash`
218    /// - `Bash*` matches `Bash`, `Bash(python:test.py)`, etc.
219    /// - `Bash(python:*)` matches `Bash(python:foo)`, `Bash(python:bar)`, etc.
220    pub fn is_tool_allowed(&self, tool_name: &str) -> bool {
221        match &self.frontmatter.allowed_tools {
222            Some(allowed) => {
223                allowed.iter().any(|pattern| {
224                    if pattern.contains('*') {
225                        // Convert glob pattern to regex
226                        // Escape regex special chars except *, then replace * with .*
227                        let regex_pattern = regex::escape(pattern).replace(r"\*", ".*");
228                        regex::Regex::new(&format!("^{}$", regex_pattern))
229                            .map(|re| re.is_match(tool_name))
230                            .unwrap_or(false)
231                    } else {
232                        tool_name == pattern
233                    }
234                })
235            }
236            None => true, // No restrictions
237        }
238    }
239}
240
241#[cfg(test)]
242mod tests {
243    use super::*;
244
245    #[test]
246    fn test_parse_no_frontmatter() {
247        let content = "# My Skill\n\nThis is the skill content.";
248        let (fm, body) = SkillFile::parse_content_only(content).unwrap();
249        assert!(fm.name.is_none());
250        assert!(body.contains("My Skill"));
251    }
252
253    #[test]
254    fn test_parse_with_frontmatter() {
255        let content = r#"---
256name: test-skill
257description: A test skill
258---
259# Instructions
260
261Do something useful.
262"#;
263        let (fm, body) = SkillFile::parse_content_only(content).unwrap();
264        assert_eq!(fm.name, Some("test-skill".to_string()));
265        assert_eq!(fm.description, Some("A test skill".to_string()));
266        assert!(body.contains("Instructions"));
267        assert!(body.contains("Do something useful"));
268    }
269
270    #[test]
271    fn test_parse_empty_frontmatter() {
272        let content = r#"---
273---
274# Just content here
275"#;
276        let (fm, body) = SkillFile::parse_content_only(content).unwrap();
277        assert!(fm.name.is_none());
278        assert!(body.contains("Just content here"));
279    }
280
281    #[test]
282    fn test_missing_closing_delimiter() {
283        let content = r#"---
284name: broken
285This has no closing delimiter
286"#;
287        let result = SkillFile::parse_content_only(content);
288        assert!(result.is_err());
289    }
290
291    #[test]
292    fn test_tool_allowed_exact_match() {
293        let content = r#"---
294allowed-tools:
295  - Read
296  - Write
297---
298Content
299"#;
300        let (fm, _) = SkillFile::parse_content_only(content).unwrap();
301        let skill = SkillFile {
302            frontmatter: fm,
303            content: String::new(),
304            path: PathBuf::new(),
305            directory: PathBuf::new(),
306            supporting_files: Vec::new(),
307        };
308
309        assert!(skill.is_tool_allowed("Read"));
310        assert!(skill.is_tool_allowed("Write"));
311        assert!(!skill.is_tool_allowed("Bash"));
312    }
313
314    #[test]
315    fn test_tool_allowed_wildcard() {
316        let content = r#"---
317allowed-tools:
318  - "Bash(python:*)"
319  - Read
320---
321Content
322"#;
323        let (fm, _) = SkillFile::parse_content_only(content).unwrap();
324        let skill = SkillFile {
325            frontmatter: fm,
326            content: String::new(),
327            path: PathBuf::new(),
328            directory: PathBuf::new(),
329            supporting_files: Vec::new(),
330        };
331
332        // Pattern "Bash(python:*)" matches "Bash(python:...)" - the * matches the inner content
333        assert!(skill.is_tool_allowed("Bash(python:test.py)"));
334        assert!(skill.is_tool_allowed("Bash(python:run.py)"));
335        assert!(skill.is_tool_allowed("Bash(python:)")); // Empty is ok too
336        assert!(!skill.is_tool_allowed("Bash(node:test.js)"));
337        assert!(!skill.is_tool_allowed("Bash")); // No parens, doesn't match
338        assert!(skill.is_tool_allowed("Read"));
339    }
340
341    #[test]
342    fn test_tool_allowed_no_restrictions() {
343        let content = r#"---
344name: unrestricted
345---
346Content
347"#;
348        let (fm, _) = SkillFile::parse_content_only(content).unwrap();
349        let skill = SkillFile {
350            frontmatter: fm,
351            content: String::new(),
352            path: PathBuf::new(),
353            directory: PathBuf::new(),
354            supporting_files: Vec::new(),
355        };
356
357        assert!(skill.is_tool_allowed("Anything"));
358        assert!(skill.is_tool_allowed("Read"));
359        assert!(skill.is_tool_allowed("Bash"));
360    }
361}