agpm_cli/skills/mod.rs
1//! Skills module for AGPM
2//!
3//! This module provides functionality for managing Claude Skills, which are
4//! directory-based resources containing a SKILL.md file with frontmatter and
5//! optional supporting files.
6//!
7//! ## What are Skills?
8//!
9//! Skills are directories that:
10//! - Contain a SKILL.md file with required YAML frontmatter
11//! - May include additional files (REFERENCE.md, scripts, examples)
12//! - Install to `.claude/skills/<name>/` as directories
13//! - Can declare dependencies on other resources
14//! - Support patching for customization
15//!
16//! ## SKILL.md Format
17//!
18//! ```yaml
19//! ---
20//! name: Skill Name
21//! description: What this skill does
22//! version: 1.0.0 # optional
23//! allowed-tools: Read, Grep # optional
24//! dependencies: # optional
25//! agents:
26//! - path: agents/helper.md
27//! ---
28//! # Skill content in markdown
29//! ```
30//!
31//! ## Async vs Sync Functions
32//!
33//! This module uses a hybrid async/sync approach for performance and compatibility:
34//!
35//! - **`validate_skill_size`**: Async wrapper around sync `walkdir`. Integrates with
36//! the async installer pipeline while using `spawn_blocking` for the actual I/O.
37//! This prevents blocking the Tokio runtime during directory traversal.
38//!
39//! - **`extract_skill_metadata`**: Async for the same reason - wraps sync directory
40//! iteration in `spawn_blocking` to avoid blocking async contexts.
41//!
42//! - **`collect_skill_directory_info`**: Sync helper that performs the actual directory
43//! walk. Called via `spawn_blocking` from async functions. Uses `walkdir` which is
44//! inherently synchronous.
45//!
46//! - **`validate_skill_frontmatter`**: Pure sync function that only parses in-memory
47//! YAML. No I/O, so no need for async.
48//!
49//! The `walkdir` crate is synchronous, so we wrap it in `spawn_blocking` rather than
50//! using a fake async interface. This is the recommended Tokio pattern for CPU-bound
51//! or blocking I/O operations.
52
53pub mod patches;
54
55use crate::core::file_error::{FileOperation, FileResultExt};
56use anyhow::{Result, anyhow};
57use serde::{Deserialize, Serialize};
58use std::path::{Path, PathBuf};
59
60/// Maximum number of files allowed in a skill directory (hard limit)
61const MAX_SKILL_FILES: usize = 1000;
62
63/// Maximum total size in bytes for all files in a skill (hard limit)
64const MAX_SKILL_SIZE_BYTES: u64 = 100 * 1024 * 1024; // 100 MB
65
66/// Maximum YAML frontmatter size in bytes (defense-in-depth against DoS)
67const MAX_FRONTMATTER_SIZE_BYTES: usize = 64 * 1024; // 64 KB
68
69/// Maximum skill name length (for filesystem compatibility)
70const MAX_NAME_LENGTH: usize = 100;
71
72/// Maximum skill description length (reasonable limit for metadata)
73const MAX_DESCRIPTION_LENGTH: usize = 1000;
74
75/// Information collected from iterating over a skill directory.
76///
77/// This struct consolidates all directory traversal results to enable
78/// a single pass over the directory for both validation and metadata extraction.
79#[derive(Debug, Clone)]
80pub struct SkillDirectoryInfo {
81 /// List of relative file paths in the skill directory (sorted)
82 pub files: Vec<String>,
83 /// Total size of all files in bytes
84 pub total_size: u64,
85 /// Path to the SKILL.md file (if found)
86 pub skill_md_path: Option<PathBuf>,
87 /// Content of the SKILL.md file (if found and read)
88 pub skill_md_content: Option<String>,
89}
90
91/// Iterate over a skill directory and collect file information.
92///
93/// This function performs a single pass over the skill directory, collecting:
94/// - All file paths (relative to skill root)
95/// - Total size of all files
96/// - The SKILL.md content (if present)
97///
98/// # Arguments
99///
100/// * `skill_path` - Path to the skill directory
101///
102/// # Returns
103///
104/// Returns `SkillDirectoryInfo` with all collected information
105///
106/// # Errors
107///
108/// Returns an error if:
109/// - The path is not a directory
110/// - Directory traversal fails
111/// - Symlinks are found (security risk)
112/// - File count exceeds `MAX_SKILL_FILES`
113/// - Total size exceeds `MAX_SKILL_SIZE_BYTES`
114///
115/// # Security
116///
117/// This function rejects symlinks to prevent data exfiltration and
118/// path traversal attacks.
119fn collect_skill_directory_info(skill_path: &Path) -> Result<SkillDirectoryInfo> {
120 use walkdir::WalkDir;
121
122 if !skill_path.is_dir() {
123 return Err(anyhow!("Skill path {} is not a directory", skill_path.display()));
124 }
125
126 let mut files = Vec::new();
127 let mut total_size = 0u64;
128 let mut skill_md_path = None;
129 let mut skill_md_content = None;
130
131 for entry in WalkDir::new(skill_path).follow_links(false) {
132 let entry = entry?;
133
134 // Reject symlinks (security: could point to /etc/passwd, etc.)
135 if entry.file_type().is_symlink() {
136 return Err(anyhow!(
137 "Skill at {} contains symlinks, which are not allowed for security reasons. \
138 Symlinks could point to sensitive files or cause unexpected behavior across platforms.",
139 skill_path.display()
140 ));
141 }
142
143 if entry.file_type().is_file() {
144 let file_path = entry.path();
145 let relative_path = file_path
146 .strip_prefix(skill_path)
147 .map_err(|e| anyhow!("Failed to get relative path: {}", e))?
148 .to_string_lossy()
149 .to_string();
150
151 // Check if this is the SKILL.md file
152 if relative_path == "SKILL.md" {
153 skill_md_path = Some(file_path.to_path_buf());
154 // Read SKILL.md content while we're iterating
155 // BLOCKING I/O is safe here: called via spawn_blocking from async context
156 // (see validate_skill_size and extract_skill_metadata which wrap this function)
157 skill_md_content = Some(std::fs::read_to_string(file_path).with_file_context(
158 FileOperation::Read,
159 file_path,
160 "loading skill metadata",
161 "collect_skill_directory_info",
162 )?);
163 }
164
165 let metadata = entry.metadata()?;
166 total_size += metadata.len();
167 files.push(relative_path);
168
169 // Check file count limit
170 if files.len() > MAX_SKILL_FILES {
171 return Err(anyhow!(
172 "Skill at {} contains {} files, which exceeds the maximum limit of {} files. \
173 Skills should be focused and minimal. Consider splitting into multiple skills.",
174 skill_path.display(),
175 files.len(),
176 MAX_SKILL_FILES
177 ));
178 }
179
180 // Check size limit
181 if total_size > MAX_SKILL_SIZE_BYTES {
182 let size_mb = total_size as f64 / (1024.0 * 1024.0);
183 let limit_mb = MAX_SKILL_SIZE_BYTES as f64 / (1024.0 * 1024.0);
184 return Err(anyhow!(
185 "Skill at {} total size is {:.2} MB, which exceeds the maximum limit of {:.0} MB. \
186 Skills should be focused and minimal. Consider optimizing file sizes or removing unnecessary files.",
187 skill_path.display(),
188 size_mb,
189 limit_mb
190 ));
191 }
192 }
193 }
194
195 // Sort files for consistent ordering
196 files.sort();
197
198 Ok(SkillDirectoryInfo {
199 files,
200 total_size,
201 skill_md_path,
202 skill_md_content,
203 })
204}
205
206/// Frontmatter structure for SKILL.md files
207///
208/// This struct represents the YAML frontmatter that must be present
209/// in every SKILL.md file. It defines the skill's metadata and
210/// configuration.
211#[derive(Debug, Clone, Serialize, Deserialize)]
212pub struct SkillFrontmatter {
213 /// Human-readable name of the skill
214 pub name: String,
215
216 /// Description of what the skill does
217 pub description: String,
218
219 /// Optional version identifier
220 #[serde(skip_serializing_if = "Option::is_none")]
221 pub version: Option<String>,
222
223 /// Optional list of tools the skill is allowed to use
224 #[serde(rename = "allowed-tools", skip_serializing_if = "Option::is_none")]
225 pub allowed_tools: Option<Vec<String>>,
226
227 /// Optional dependencies on other resources
228 #[serde(skip_serializing_if = "Option::is_none")]
229 pub dependencies: Option<serde_yaml::Value>,
230}
231
232/// Validate and extract frontmatter from SKILL.md content
233///
234/// This function parses the YAML frontmatter from a SKILL.md file,
235/// validates that required fields are present, and returns the
236/// structured frontmatter data.
237///
238/// # Arguments
239///
240/// * `content` - The full content of the SKILL.md file
241///
242/// # Returns
243///
244/// Returns the parsed frontmatter if valid
245///
246/// # Errors
247///
248/// Returns an error if:
249/// - The file doesn't have proper YAML frontmatter (missing --- markers)
250/// - The YAML is invalid
251/// - Required fields (name, description) are missing or empty
252///
253/// # Examples
254///
255/// ```
256/// use agpm_cli::skills::validate_skill_frontmatter;
257///
258/// # fn example() -> anyhow::Result<()> {
259/// let content = r#"---
260/// name: My Skill
261/// description: A helpful skill
262/// ---
263/// # My Skill
264///
265/// This skill helps with...
266/// "#;
267///
268/// let frontmatter = validate_skill_frontmatter(content)?;
269/// assert_eq!(frontmatter.name, "My Skill");
270/// assert_eq!(frontmatter.description, "A helpful skill");
271/// # Ok(())
272/// # }
273/// ```
274pub fn validate_skill_frontmatter(content: &str) -> Result<SkillFrontmatter> {
275 // Split content by --- markers
276 let parts: Vec<&str> = content.splitn(3, "---").collect();
277
278 if parts.len() < 3 {
279 return Err(anyhow!(
280 "SKILL.md missing required YAML frontmatter. Format:\n---\nname: Skill Name\ndescription: What it does\n---\n# Content"
281 ));
282 }
283
284 // Parse YAML frontmatter
285 let frontmatter_str = parts[1].trim();
286
287 // Validate frontmatter size (defense-in-depth against DoS)
288 if frontmatter_str.len() > MAX_FRONTMATTER_SIZE_BYTES {
289 return Err(anyhow!(
290 "SKILL.md frontmatter exceeds maximum size of {} KB",
291 MAX_FRONTMATTER_SIZE_BYTES / 1024
292 ));
293 }
294
295 let frontmatter: SkillFrontmatter = serde_yaml::from_str(frontmatter_str).map_err(|e| {
296 // Truncate YAML content in error messages to avoid leaking sensitive data from patches
297 // Use 80 chars (single line) to minimize potential exposure of API keys or secrets
298 // Use chars().take() to avoid splitting UTF-8 character boundaries
299 let char_count = frontmatter_str.chars().count();
300 let yaml_preview = if char_count > 80 {
301 let truncated: String = frontmatter_str.chars().take(80).collect();
302 format!("{}... ({} chars total)", truncated, char_count)
303 } else {
304 frontmatter_str.to_string()
305 };
306 anyhow!("Invalid SKILL.md frontmatter: {}\nYAML content:\n{}", e, yaml_preview)
307 })?;
308
309 // Validate required fields
310 if frontmatter.name.trim().is_empty() {
311 return Err(anyhow!("SKILL.md frontmatter missing required 'name' field"));
312 }
313
314 if frontmatter.description.trim().is_empty() {
315 return Err(anyhow!("SKILL.md frontmatter missing required 'description' field"));
316 }
317
318 // Validate field lengths
319 if frontmatter.name.len() > MAX_NAME_LENGTH {
320 return Err(anyhow!("Skill name exceeds maximum length of {} characters", MAX_NAME_LENGTH));
321 }
322
323 if frontmatter.description.len() > MAX_DESCRIPTION_LENGTH {
324 return Err(anyhow!(
325 "Skill description exceeds maximum length of {} characters",
326 MAX_DESCRIPTION_LENGTH
327 ));
328 }
329
330 // Validate name contains only allowed ASCII characters for cross-platform filename compatibility
331 // Defense-in-depth: explicitly check for path traversal sequences even though
332 // the allowlist below would block them anyway
333 if frontmatter.name.contains("..")
334 || frontmatter.name.contains('/')
335 || frontmatter.name.contains('\\')
336 {
337 return Err(anyhow!(
338 "Skill name contains path traversal sequences or path separators. \
339 Use ASCII letters, numbers, spaces, hyphens, and underscores only"
340 ));
341 }
342
343 if !frontmatter
344 .name
345 .chars()
346 .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == ' ')
347 {
348 return Err(anyhow!(
349 "Skill name contains invalid characters. Use ASCII letters, numbers, spaces, hyphens, and underscores only"
350 ));
351 }
352
353 Ok(frontmatter)
354}
355
356/// Validate skill directory size and file count before installation.
357///
358/// This prevents malicious or accidentally large skills from consuming
359/// excessive disk space or inodes. Checks:
360/// - File count ≤ MAX_SKILL_FILES (1000)
361/// - Total size ≤ MAX_SKILL_SIZE_BYTES (100MB)
362/// - No symlinks (security risk: could point to sensitive files)
363///
364/// # Arguments
365///
366/// * `skill_path` - Path to the skill directory to validate
367///
368/// # Returns
369///
370/// * `Ok(SkillDirectoryInfo)` - Skill passes all checks, returns collected info
371/// * `Err(anyhow::Error)` - Skill exceeds limits or contains symlinks
372///
373/// # Security
374///
375/// This function rejects symlinks to prevent:
376/// - Data exfiltration (symlink to /etc/passwd, ~/.ssh/id_rsa)
377/// - Path traversal attacks
378/// - Unexpected behavior across platforms
379///
380/// # Examples
381///
382/// ```no_run
383/// use agpm_cli::skills::validate_skill_size;
384/// use std::path::Path;
385///
386/// # async fn example() -> anyhow::Result<()> {
387/// let info = validate_skill_size(Path::new("my-skill")).await?;
388/// println!("Skill has {} files totaling {} bytes", info.files.len(), info.total_size);
389/// # Ok(())
390/// # }
391/// ```
392pub async fn validate_skill_size(skill_path: &Path) -> Result<SkillDirectoryInfo> {
393 let path = skill_path.to_path_buf();
394
395 // Run blocking directory iteration in a separate thread
396 tokio::task::spawn_blocking(move || collect_skill_directory_info(&path))
397 .await
398 .map_err(|e| anyhow!("Task join error during skill validation: {}", e))?
399}
400
401/// Extract metadata from a skill directory.
402///
403/// This function reads a skill directory, validates its structure,
404/// and extracts metadata including the frontmatter and file list.
405/// Uses the shared `SkillDirectoryInfo` to perform validation and
406/// metadata extraction in a single pass.
407///
408/// # Arguments
409///
410/// * `skill_path` - Path to the skill directory
411///
412/// # Returns
413///
414/// Returns a tuple of (frontmatter, file_list) if valid
415///
416/// # Examples
417///
418/// ```no_run
419/// use agpm_cli::skills::extract_skill_metadata;
420/// use std::path::Path;
421///
422/// # async fn example() -> anyhow::Result<()> {
423/// let (frontmatter, files) = extract_skill_metadata(Path::new("my-skill")).await?;
424/// println!("Skill: {}", frontmatter.name);
425/// println!("Files: {:?}", files);
426/// # Ok(())
427/// # }
428/// ```
429pub async fn extract_skill_metadata(skill_path: &Path) -> Result<(SkillFrontmatter, Vec<String>)> {
430 tracing::debug!("extract_skill_metadata called with path: {}", skill_path.display());
431
432 let path = skill_path.to_path_buf();
433 let display_path = skill_path.display().to_string();
434
435 // Run blocking directory iteration in a separate thread
436 let info = tokio::task::spawn_blocking(move || collect_skill_directory_info(&path))
437 .await
438 .map_err(|e| anyhow!("Task join error during skill metadata extraction: {}", e))??;
439
440 // Validate that SKILL.md was found and read
441 let skill_md_content = info
442 .skill_md_content
443 .ok_or_else(|| anyhow!("Skill at {} missing required SKILL.md file", display_path))?;
444
445 // Parse and validate frontmatter
446 let frontmatter = validate_skill_frontmatter(&skill_md_content)?;
447
448 tracing::debug!(
449 "Extracted metadata for skill '{}': {} files, {} bytes",
450 frontmatter.name,
451 info.files.len(),
452 info.total_size
453 );
454
455 Ok((frontmatter, info.files))
456}
457
458/// Extract metadata from pre-collected skill directory info.
459///
460/// This is a synchronous helper that extracts frontmatter from already-collected
461/// directory information. Use this when you have already called `validate_skill_size`
462/// and want to avoid re-iterating the directory.
463///
464/// # Arguments
465///
466/// * `info` - Pre-collected directory information from `validate_skill_size`
467/// * `skill_path` - Path to the skill directory (for error messages)
468///
469/// # Returns
470///
471/// Returns a tuple of (frontmatter, file_list) if valid
472pub fn extract_skill_metadata_from_info(
473 info: &SkillDirectoryInfo,
474 skill_path: &Path,
475) -> Result<(SkillFrontmatter, Vec<String>)> {
476 let skill_md_content = info.skill_md_content.as_ref().ok_or_else(|| {
477 anyhow!("Skill at {} missing required SKILL.md file", skill_path.display())
478 })?;
479
480 let frontmatter = validate_skill_frontmatter(skill_md_content)?;
481
482 Ok((frontmatter, info.files.clone()))
483}
484
485#[cfg(test)]
486mod tests {
487 use super::*;
488
489 #[test]
490 fn test_validate_skill_frontmatter_valid() {
491 let content = r#"---
492name: Test Skill
493description: A test skill
494version: 1.0.0
495allowed-tools:
496 - Read
497 - Write
498dependencies:
499 agents:
500 - path: helper.md
501---
502# Test Skill
503
504This is a test skill.
505"#;
506
507 // Test assertion: valid frontmatter must parse successfully
508 let result = validate_skill_frontmatter(content).unwrap();
509 assert_eq!(result.name, "Test Skill");
510 assert_eq!(result.description, "A test skill");
511 assert_eq!(result.version, Some("1.0.0".to_string()));
512 assert_eq!(result.allowed_tools, Some(vec!["Read".to_string(), "Write".to_string()]));
513 }
514
515 #[test]
516 fn test_validate_skill_frontmatter_missing_fields() {
517 let content = r#"---
518name: Test Skill
519---
520# Test Skill
521"#;
522
523 let result = validate_skill_frontmatter(content);
524 assert!(result.is_err());
525 // Test assertion: error guaranteed by is_err() check above
526 assert!(result.unwrap_err().to_string().contains("description"));
527 }
528
529 #[test]
530 fn test_validate_skill_frontmatter_no_frontmatter() {
531 let content = r#"# Test Skill
532
533This skill has no frontmatter.
534"#;
535
536 let result = validate_skill_frontmatter(content);
537 assert!(result.is_err());
538 // Test assertion: error guaranteed by is_err() check above
539 assert!(result.unwrap_err().to_string().contains("missing required YAML frontmatter"));
540 }
541
542 #[test]
543 fn test_validate_skill_frontmatter_invalid_yaml() {
544 let content = r#"---
545name: Test Skill
546description: Invalid YAML
547unclosed: [ "item1", "item2"
548---
549# Test Skill
550"#;
551
552 let result = validate_skill_frontmatter(content);
553 assert!(result.is_err());
554 // Test assertion: error guaranteed by is_err() check above
555 assert!(result.unwrap_err().to_string().contains("Invalid SKILL.md frontmatter"));
556 }
557}