agpm_cli/skills/
mod.rs

1//! Skills module for AGPM
2//!
3//! This module provides functionality for managing Claude Skills, which are
4//! directory-based resources containing a SKILL.md file with frontmatter and
5//! optional supporting files.
6//!
7//! ## What are Skills?
8//!
9//! Skills are directories that:
10//! - Contain a SKILL.md file with required YAML frontmatter
11//! - May include additional files (REFERENCE.md, scripts, examples)
12//! - Install to `.claude/skills/<name>/` as directories
13//! - Can declare dependencies on other resources
14//! - Support patching for customization
15//!
16//! ## SKILL.md Format
17//!
18//! ```yaml
19//! ---
20//! name: Skill Name
21//! description: What this skill does
22//! version: 1.0.0  # optional
23//! allowed-tools: Read, Grep  # optional
24//! dependencies:  # optional
25//!   agents:
26//!     - path: agents/helper.md
27//! ---
28//! # Skill content in markdown
29//! ```
30//!
31//! ## Async vs Sync Functions
32//!
33//! This module uses a hybrid async/sync approach for performance and compatibility:
34//!
35//! - **`validate_skill_size`**: Async wrapper around sync `walkdir`. Integrates with
36//!   the async installer pipeline while using `spawn_blocking` for the actual I/O.
37//!   This prevents blocking the Tokio runtime during directory traversal.
38//!
39//! - **`extract_skill_metadata`**: Async for the same reason - wraps sync directory
40//!   iteration in `spawn_blocking` to avoid blocking async contexts.
41//!
42//! - **`collect_skill_directory_info`**: Sync helper that performs the actual directory
43//!   walk. Called via `spawn_blocking` from async functions. Uses `walkdir` which is
44//!   inherently synchronous.
45//!
46//! - **`validate_skill_frontmatter`**: Pure sync function that only parses in-memory
47//!   YAML. No I/O, so no need for async.
48//!
49//! The `walkdir` crate is synchronous, so we wrap it in `spawn_blocking` rather than
50//! using a fake async interface. This is the recommended Tokio pattern for CPU-bound
51//! or blocking I/O operations.
52
53pub mod patches;
54
55use crate::core::file_error::{FileOperation, FileResultExt};
56use anyhow::{Result, anyhow};
57use serde::{Deserialize, Serialize};
58use std::path::{Path, PathBuf};
59
60/// Maximum number of files allowed in a skill directory (hard limit)
61const MAX_SKILL_FILES: usize = 1000;
62
63/// Maximum total size in bytes for all files in a skill (hard limit)
64const MAX_SKILL_SIZE_BYTES: u64 = 100 * 1024 * 1024; // 100 MB
65
66/// Maximum YAML frontmatter size in bytes (defense-in-depth against DoS)
67const MAX_FRONTMATTER_SIZE_BYTES: usize = 64 * 1024; // 64 KB
68
69/// Maximum skill name length (for filesystem compatibility)
70const MAX_NAME_LENGTH: usize = 100;
71
72/// Maximum skill description length (reasonable limit for metadata)
73const MAX_DESCRIPTION_LENGTH: usize = 1000;
74
75/// Information collected from iterating over a skill directory.
76///
77/// This struct consolidates all directory traversal results to enable
78/// a single pass over the directory for both validation and metadata extraction.
79#[derive(Debug, Clone)]
80pub struct SkillDirectoryInfo {
81    /// List of relative file paths in the skill directory (sorted)
82    pub files: Vec<String>,
83    /// Total size of all files in bytes
84    pub total_size: u64,
85    /// Path to the SKILL.md file (if found)
86    pub skill_md_path: Option<PathBuf>,
87    /// Content of the SKILL.md file (if found and read)
88    pub skill_md_content: Option<String>,
89}
90
91/// Iterate over a skill directory and collect file information.
92///
93/// This function performs a single pass over the skill directory, collecting:
94/// - All file paths (relative to skill root)
95/// - Total size of all files
96/// - The SKILL.md content (if present)
97///
98/// # Arguments
99///
100/// * `skill_path` - Path to the skill directory
101///
102/// # Returns
103///
104/// Returns `SkillDirectoryInfo` with all collected information
105///
106/// # Errors
107///
108/// Returns an error if:
109/// - The path is not a directory
110/// - Directory traversal fails
111/// - Symlinks are found (security risk)
112/// - File count exceeds `MAX_SKILL_FILES`
113/// - Total size exceeds `MAX_SKILL_SIZE_BYTES`
114///
115/// # Security
116///
117/// This function rejects symlinks to prevent data exfiltration and
118/// path traversal attacks.
119fn collect_skill_directory_info(skill_path: &Path) -> Result<SkillDirectoryInfo> {
120    use walkdir::WalkDir;
121
122    if !skill_path.is_dir() {
123        return Err(anyhow!("Skill path {} is not a directory", skill_path.display()));
124    }
125
126    let mut files = Vec::new();
127    let mut total_size = 0u64;
128    let mut skill_md_path = None;
129    let mut skill_md_content = None;
130
131    for entry in WalkDir::new(skill_path).follow_links(false) {
132        let entry = entry?;
133
134        // Reject symlinks (security: could point to /etc/passwd, etc.)
135        if entry.file_type().is_symlink() {
136            return Err(anyhow!(
137                "Skill at {} contains symlinks, which are not allowed for security reasons. \
138                Symlinks could point to sensitive files or cause unexpected behavior across platforms.",
139                skill_path.display()
140            ));
141        }
142
143        if entry.file_type().is_file() {
144            let file_path = entry.path();
145            let relative_path = file_path
146                .strip_prefix(skill_path)
147                .map_err(|e| anyhow!("Failed to get relative path: {}", e))?
148                .to_string_lossy()
149                .to_string();
150
151            // Check if this is the SKILL.md file
152            if relative_path == "SKILL.md" {
153                skill_md_path = Some(file_path.to_path_buf());
154                // Read SKILL.md content while we're iterating
155                // BLOCKING I/O is safe here: called via spawn_blocking from async context
156                // (see validate_skill_size and extract_skill_metadata which wrap this function)
157                skill_md_content = Some(std::fs::read_to_string(file_path).with_file_context(
158                    FileOperation::Read,
159                    file_path,
160                    "loading skill metadata",
161                    "collect_skill_directory_info",
162                )?);
163            }
164
165            let metadata = entry.metadata()?;
166            total_size += metadata.len();
167            files.push(relative_path);
168
169            // Check file count limit
170            if files.len() > MAX_SKILL_FILES {
171                return Err(anyhow!(
172                    "Skill at {} contains {} files, which exceeds the maximum limit of {} files. \
173                    Skills should be focused and minimal. Consider splitting into multiple skills.",
174                    skill_path.display(),
175                    files.len(),
176                    MAX_SKILL_FILES
177                ));
178            }
179
180            // Check size limit
181            if total_size > MAX_SKILL_SIZE_BYTES {
182                let size_mb = total_size as f64 / (1024.0 * 1024.0);
183                let limit_mb = MAX_SKILL_SIZE_BYTES as f64 / (1024.0 * 1024.0);
184                return Err(anyhow!(
185                    "Skill at {} total size is {:.2} MB, which exceeds the maximum limit of {:.0} MB. \
186                    Skills should be focused and minimal. Consider optimizing file sizes or removing unnecessary files.",
187                    skill_path.display(),
188                    size_mb,
189                    limit_mb
190                ));
191            }
192        }
193    }
194
195    // Sort files for consistent ordering
196    files.sort();
197
198    Ok(SkillDirectoryInfo {
199        files,
200        total_size,
201        skill_md_path,
202        skill_md_content,
203    })
204}
205
206/// Frontmatter structure for SKILL.md files
207///
208/// This struct represents the YAML frontmatter that must be present
209/// in every SKILL.md file. It defines the skill's metadata and
210/// configuration.
211#[derive(Debug, Clone, Serialize, Deserialize)]
212pub struct SkillFrontmatter {
213    /// Human-readable name of the skill
214    pub name: String,
215
216    /// Description of what the skill does
217    pub description: String,
218
219    /// Optional version identifier
220    #[serde(skip_serializing_if = "Option::is_none")]
221    pub version: Option<String>,
222
223    /// Optional list of tools the skill is allowed to use
224    #[serde(rename = "allowed-tools", skip_serializing_if = "Option::is_none")]
225    pub allowed_tools: Option<Vec<String>>,
226
227    /// Optional dependencies on other resources
228    #[serde(skip_serializing_if = "Option::is_none")]
229    pub dependencies: Option<serde_yaml::Value>,
230}
231
232/// Validate and extract frontmatter from SKILL.md content
233///
234/// This function parses the YAML frontmatter from a SKILL.md file,
235/// validates that required fields are present, and returns the
236/// structured frontmatter data.
237///
238/// # Arguments
239///
240/// * `content` - The full content of the SKILL.md file
241///
242/// # Returns
243///
244/// Returns the parsed frontmatter if valid
245///
246/// # Errors
247///
248/// Returns an error if:
249/// - The file doesn't have proper YAML frontmatter (missing --- markers)
250/// - The YAML is invalid
251/// - Required fields (name, description) are missing or empty
252///
253/// # Examples
254///
255/// ```
256/// use agpm_cli::skills::validate_skill_frontmatter;
257///
258/// # fn example() -> anyhow::Result<()> {
259/// let content = r#"---
260/// name: My Skill
261/// description: A helpful skill
262/// ---
263/// # My Skill
264///
265/// This skill helps with...
266/// "#;
267///
268/// let frontmatter = validate_skill_frontmatter(content)?;
269/// assert_eq!(frontmatter.name, "My Skill");
270/// assert_eq!(frontmatter.description, "A helpful skill");
271/// # Ok(())
272/// # }
273/// ```
274pub fn validate_skill_frontmatter(content: &str) -> Result<SkillFrontmatter> {
275    // Split content by --- markers
276    let parts: Vec<&str> = content.splitn(3, "---").collect();
277
278    if parts.len() < 3 {
279        return Err(anyhow!(
280            "SKILL.md missing required YAML frontmatter. Format:\n---\nname: Skill Name\ndescription: What it does\n---\n# Content"
281        ));
282    }
283
284    // Parse YAML frontmatter
285    let frontmatter_str = parts[1].trim();
286
287    // Validate frontmatter size (defense-in-depth against DoS)
288    if frontmatter_str.len() > MAX_FRONTMATTER_SIZE_BYTES {
289        return Err(anyhow!(
290            "SKILL.md frontmatter exceeds maximum size of {} KB",
291            MAX_FRONTMATTER_SIZE_BYTES / 1024
292        ));
293    }
294
295    let frontmatter: SkillFrontmatter = serde_yaml::from_str(frontmatter_str).map_err(|e| {
296        // Truncate YAML content in error messages to avoid leaking sensitive data from patches
297        // Use 80 chars (single line) to minimize potential exposure of API keys or secrets
298        // Use chars().take() to avoid splitting UTF-8 character boundaries
299        let char_count = frontmatter_str.chars().count();
300        let yaml_preview = if char_count > 80 {
301            let truncated: String = frontmatter_str.chars().take(80).collect();
302            format!("{}... ({} chars total)", truncated, char_count)
303        } else {
304            frontmatter_str.to_string()
305        };
306        anyhow!("Invalid SKILL.md frontmatter: {}\nYAML content:\n{}", e, yaml_preview)
307    })?;
308
309    // Validate required fields
310    if frontmatter.name.trim().is_empty() {
311        return Err(anyhow!("SKILL.md frontmatter missing required 'name' field"));
312    }
313
314    if frontmatter.description.trim().is_empty() {
315        return Err(anyhow!("SKILL.md frontmatter missing required 'description' field"));
316    }
317
318    // Validate field lengths
319    if frontmatter.name.len() > MAX_NAME_LENGTH {
320        return Err(anyhow!("Skill name exceeds maximum length of {} characters", MAX_NAME_LENGTH));
321    }
322
323    if frontmatter.description.len() > MAX_DESCRIPTION_LENGTH {
324        return Err(anyhow!(
325            "Skill description exceeds maximum length of {} characters",
326            MAX_DESCRIPTION_LENGTH
327        ));
328    }
329
330    // Validate name contains only allowed ASCII characters for cross-platform filename compatibility
331    // Defense-in-depth: explicitly check for path traversal sequences even though
332    // the allowlist below would block them anyway
333    if frontmatter.name.contains("..")
334        || frontmatter.name.contains('/')
335        || frontmatter.name.contains('\\')
336    {
337        return Err(anyhow!(
338            "Skill name contains path traversal sequences or path separators. \
339             Use ASCII letters, numbers, spaces, hyphens, and underscores only"
340        ));
341    }
342
343    if !frontmatter
344        .name
345        .chars()
346        .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == ' ')
347    {
348        return Err(anyhow!(
349            "Skill name contains invalid characters. Use ASCII letters, numbers, spaces, hyphens, and underscores only"
350        ));
351    }
352
353    Ok(frontmatter)
354}
355
356/// Validate skill directory size and file count before installation.
357///
358/// This prevents malicious or accidentally large skills from consuming
359/// excessive disk space or inodes. Checks:
360/// - File count ≤ MAX_SKILL_FILES (1000)
361/// - Total size ≤ MAX_SKILL_SIZE_BYTES (100MB)
362/// - No symlinks (security risk: could point to sensitive files)
363///
364/// # Arguments
365///
366/// * `skill_path` - Path to the skill directory to validate
367///
368/// # Returns
369///
370/// * `Ok(SkillDirectoryInfo)` - Skill passes all checks, returns collected info
371/// * `Err(anyhow::Error)` - Skill exceeds limits or contains symlinks
372///
373/// # Security
374///
375/// This function rejects symlinks to prevent:
376/// - Data exfiltration (symlink to /etc/passwd, ~/.ssh/id_rsa)
377/// - Path traversal attacks
378/// - Unexpected behavior across platforms
379///
380/// # Examples
381///
382/// ```no_run
383/// use agpm_cli::skills::validate_skill_size;
384/// use std::path::Path;
385///
386/// # async fn example() -> anyhow::Result<()> {
387/// let info = validate_skill_size(Path::new("my-skill")).await?;
388/// println!("Skill has {} files totaling {} bytes", info.files.len(), info.total_size);
389/// # Ok(())
390/// # }
391/// ```
392pub async fn validate_skill_size(skill_path: &Path) -> Result<SkillDirectoryInfo> {
393    let path = skill_path.to_path_buf();
394
395    // Run blocking directory iteration in a separate thread
396    tokio::task::spawn_blocking(move || collect_skill_directory_info(&path))
397        .await
398        .map_err(|e| anyhow!("Task join error during skill validation: {}", e))?
399}
400
401/// Extract metadata from a skill directory.
402///
403/// This function reads a skill directory, validates its structure,
404/// and extracts metadata including the frontmatter and file list.
405/// Uses the shared `SkillDirectoryInfo` to perform validation and
406/// metadata extraction in a single pass.
407///
408/// # Arguments
409///
410/// * `skill_path` - Path to the skill directory
411///
412/// # Returns
413///
414/// Returns a tuple of (frontmatter, file_list) if valid
415///
416/// # Examples
417///
418/// ```no_run
419/// use agpm_cli::skills::extract_skill_metadata;
420/// use std::path::Path;
421///
422/// # async fn example() -> anyhow::Result<()> {
423/// let (frontmatter, files) = extract_skill_metadata(Path::new("my-skill")).await?;
424/// println!("Skill: {}", frontmatter.name);
425/// println!("Files: {:?}", files);
426/// # Ok(())
427/// # }
428/// ```
429pub async fn extract_skill_metadata(skill_path: &Path) -> Result<(SkillFrontmatter, Vec<String>)> {
430    tracing::debug!("extract_skill_metadata called with path: {}", skill_path.display());
431
432    let path = skill_path.to_path_buf();
433    let display_path = skill_path.display().to_string();
434
435    // Run blocking directory iteration in a separate thread
436    let info = tokio::task::spawn_blocking(move || collect_skill_directory_info(&path))
437        .await
438        .map_err(|e| anyhow!("Task join error during skill metadata extraction: {}", e))??;
439
440    // Validate that SKILL.md was found and read
441    let skill_md_content = info
442        .skill_md_content
443        .ok_or_else(|| anyhow!("Skill at {} missing required SKILL.md file", display_path))?;
444
445    // Parse and validate frontmatter
446    let frontmatter = validate_skill_frontmatter(&skill_md_content)?;
447
448    tracing::debug!(
449        "Extracted metadata for skill '{}': {} files, {} bytes",
450        frontmatter.name,
451        info.files.len(),
452        info.total_size
453    );
454
455    Ok((frontmatter, info.files))
456}
457
458/// Extract metadata from pre-collected skill directory info.
459///
460/// This is a synchronous helper that extracts frontmatter from already-collected
461/// directory information. Use this when you have already called `validate_skill_size`
462/// and want to avoid re-iterating the directory.
463///
464/// # Arguments
465///
466/// * `info` - Pre-collected directory information from `validate_skill_size`
467/// * `skill_path` - Path to the skill directory (for error messages)
468///
469/// # Returns
470///
471/// Returns a tuple of (frontmatter, file_list) if valid
472pub fn extract_skill_metadata_from_info(
473    info: &SkillDirectoryInfo,
474    skill_path: &Path,
475) -> Result<(SkillFrontmatter, Vec<String>)> {
476    let skill_md_content = info.skill_md_content.as_ref().ok_or_else(|| {
477        anyhow!("Skill at {} missing required SKILL.md file", skill_path.display())
478    })?;
479
480    let frontmatter = validate_skill_frontmatter(skill_md_content)?;
481
482    Ok((frontmatter, info.files.clone()))
483}
484
485#[cfg(test)]
486mod tests {
487    use super::*;
488
489    #[test]
490    fn test_validate_skill_frontmatter_valid() {
491        let content = r#"---
492name: Test Skill
493description: A test skill
494version: 1.0.0
495allowed-tools:
496  - Read
497  - Write
498dependencies:
499  agents:
500    - path: helper.md
501---
502# Test Skill
503
504This is a test skill.
505"#;
506
507        // Test assertion: valid frontmatter must parse successfully
508        let result = validate_skill_frontmatter(content).unwrap();
509        assert_eq!(result.name, "Test Skill");
510        assert_eq!(result.description, "A test skill");
511        assert_eq!(result.version, Some("1.0.0".to_string()));
512        assert_eq!(result.allowed_tools, Some(vec!["Read".to_string(), "Write".to_string()]));
513    }
514
515    #[test]
516    fn test_validate_skill_frontmatter_missing_fields() {
517        let content = r#"---
518name: Test Skill
519---
520# Test Skill
521"#;
522
523        let result = validate_skill_frontmatter(content);
524        assert!(result.is_err());
525        // Test assertion: error guaranteed by is_err() check above
526        assert!(result.unwrap_err().to_string().contains("description"));
527    }
528
529    #[test]
530    fn test_validate_skill_frontmatter_no_frontmatter() {
531        let content = r#"# Test Skill
532
533This skill has no frontmatter.
534"#;
535
536        let result = validate_skill_frontmatter(content);
537        assert!(result.is_err());
538        // Test assertion: error guaranteed by is_err() check above
539        assert!(result.unwrap_err().to_string().contains("missing required YAML frontmatter"));
540    }
541
542    #[test]
543    fn test_validate_skill_frontmatter_invalid_yaml() {
544        let content = r#"---
545name: Test Skill
546description: Invalid YAML
547unclosed: [ "item1", "item2"
548---
549# Test Skill
550"#;
551
552        let result = validate_skill_frontmatter(content);
553        assert!(result.is_err());
554        // Test assertion: error guaranteed by is_err() check above
555        assert!(result.unwrap_err().to_string().contains("Invalid SKILL.md frontmatter"));
556    }
557}