use crate::text::{fnv1a_64, tokenize};
use std::fs;
use std::path::{Path, PathBuf};
#[derive(Clone, Debug)]
pub struct Skill {
pub id: String,
pub name: String,
pub description: String,
pub body_head: String,
pub keywords: Vec<String>,
pub trigger_phrases: Vec<String>,
pub path: PathBuf,
pub hash: String,
}
impl Skill {
pub fn doc_text(&self) -> String {
if self.body_head.is_empty() {
self.description.clone()
} else {
format!("{}\n{}", self.description, self.body_head)
}
}
}
pub struct Discovery {
pub skills: Vec<Skill>,
pub skipped: Vec<(PathBuf, String)>,
}
pub fn discover_all(roots: &[PathBuf]) -> Discovery {
let mut files = Vec::new();
for r in roots {
collect(r, &mut files, 0);
}
files.sort();
files.dedup();
let mut skills = Vec::new();
let mut skipped = Vec::new();
for f in files {
match parse_skill(&f) {
Ok(s) => skills.push(s),
Err(reason) => {
crate::trace::debug(&format!("skipping skill file {}", f.display()), &reason);
skipped.push((f, reason));
}
}
}
skills.sort_by(|a, b| a.id.cmp(&b.id));
skills.dedup_by(|a, b| a.id == b.id);
Discovery { skills, skipped }
}
pub fn discover(roots: &[PathBuf]) -> anyhow::Result<Vec<Skill>> {
Ok(discover_all(roots).skills)
}
const MAX_WALK_DEPTH: usize = 12;
fn collect(dir: &Path, out: &mut Vec<PathBuf>, depth: usize) {
if depth >= MAX_WALK_DEPTH {
return;
}
let Ok(rd) = fs::read_dir(dir) else { return };
for entry in rd.flatten() {
let p = entry.path();
if p.is_dir() {
let skip = matches!(
p.file_name().and_then(|s| s.to_str()),
Some(
".git"
| "target"
| "node_modules"
| "tests"
| "fixtures"
| "examples"
| "template"
| "templates"
)
);
if !skip {
collect(&p, out, depth + 1);
}
} else if p.file_name().and_then(|s| s.to_str()) == Some("SKILL.md") {
out.push(p);
}
}
}
pub fn parse_file(path: &Path) -> anyhow::Result<Option<Skill>> {
Ok(parse_skill(path).ok())
}
fn parse_skill(path: &Path) -> Result<Skill, String> {
let bytes = fs::read(path).map_err(|e| format!("read failed: {e}"))?;
let content = String::from_utf8_lossy(&bytes);
let content = content.strip_prefix('\u{feff}').unwrap_or(&content);
let Some((name, description, mut keywords)) = parse_frontmatter(content) else {
return Err("no leading `--- ... ---` YAML frontmatter".into());
};
if name.is_empty() {
return Err("frontmatter has no `name:`".into());
}
if description.is_empty() {
return Err("frontmatter has no `description:`".into());
}
if is_placeholder(&description) {
return Err("unfilled template placeholder description".into());
}
for tok in tokenize(&name) {
if !keywords.contains(&tok) {
keywords.push(tok);
}
}
let hash = format!("{:016x}", fnv1a_64(content.as_bytes()));
let trigger_phrases = extract_phrases(&description);
Ok(Skill {
id: name.clone(),
name,
description,
body_head: body_head(content, 8, 600),
keywords,
trigger_phrases,
path: path.to_path_buf(),
hash,
})
}
const MIN_PHRASE_TOKENS: usize = 2;
const MAX_PHRASE_TOKENS: usize = 10;
pub fn extract_phrases(description: &str) -> Vec<String> {
let mut out: Vec<String> = Vec::new();
let chars: Vec<char> = description.chars().collect();
let mut i = 0;
while i < chars.len() {
let c = chars[i];
if let Some(close) = opens_quote(&chars, i) {
if let Some(end) = find_close(&chars, i + 1, close) {
let span: String = chars[i + 1..end].iter().collect();
let toks = crate::text::content_tokens(&span);
if (MIN_PHRASE_TOKENS..=MAX_PHRASE_TOKENS).contains(&toks.len()) {
let phrase = toks.join(" ");
if !out.contains(&phrase) {
out.push(phrase);
}
}
i = end + 1;
continue;
}
}
let _ = c;
i += 1;
}
out
}
fn opens_quote(chars: &[char], i: usize) -> Option<char> {
let c = chars[i];
let close = match c {
'\u{201c}' => '\u{201d}', '\u{2018}' => '\u{2019}', '"' | '\'' => c, _ => return None,
};
let boundary = i == 0 || !chars[i - 1].is_alphanumeric();
boundary.then_some(close)
}
fn find_close(chars: &[char], from: usize, close: char) -> Option<usize> {
let straight = close == '"' || close == '\'';
(from..chars.len()).find(|&j| {
chars[j] == close && (!straight || chars.get(j + 1).is_none_or(|n| !n.is_alphanumeric()))
})
}
fn body_head(content: &str, max_lines: usize, max_chars: usize) -> String {
let mut lines = content.lines();
if lines.next().map(|l| l.trim()) == Some("---") {
for l in lines.by_ref() {
if l.trim() == "---" {
break;
}
}
}
let mut out: Vec<String> = Vec::new();
for l in lines {
let t = l
.trim()
.trim_start_matches(['#', '-', '*', '>', ' '])
.trim();
if t.is_empty() {
continue;
}
out.push(t.to_string());
if out.len() >= max_lines {
break;
}
}
let joined = out.join(" ");
match joined.char_indices().nth(max_chars) {
Some((i, _)) => joined[..i].to_string(),
None => joined,
}
}
pub fn parse_frontmatter(content: &str) -> Option<(String, String, Vec<String>)> {
let content = content.strip_prefix('\u{FEFF}').unwrap_or(content);
let mut lines = content.lines().peekable();
if lines.next()?.trim() != "---" {
return None;
}
let (mut name, mut description, mut keywords) = (String::new(), String::new(), Vec::new());
while let Some(line) = lines.next() {
let t = line.trim_end();
if t.trim() == "---" {
break;
}
if let Some(v) = t.strip_prefix("name:") {
name = scalar_value(v, &mut lines);
} else if let Some(v) = t.strip_prefix("description:") {
description = scalar_value(v, &mut lines);
} else if let Some(v) = t.strip_prefix("keywords:") {
keywords = list_value(v, &mut lines);
} else if let Some(v) = t.strip_prefix("aliases:") {
keywords.extend(list_value(v, &mut lines));
}
}
Some((name, description, keywords))
}
type FrontmatterLines<'a> = std::iter::Peekable<std::str::Lines<'a>>;
fn is_block_scalar_header(head: &str) -> bool {
let mut chars = head.chars();
matches!(chars.next(), Some('|' | '>'))
&& chars.all(|c| matches!(c, '+' | '-') || c.is_ascii_digit())
}
fn scalar_value(first: &str, lines: &mut FrontmatterLines) -> String {
let head = first.trim();
let block = is_block_scalar_header(head);
let mut parts: Vec<String> = Vec::new();
if !block && !head.is_empty() {
parts.push(unquote(head));
}
while let Some(next) = lines.peek() {
let trimmed = next.trim();
let indented = next.starts_with([' ', '\t']);
if trimmed == "---" || (!indented && !trimmed.is_empty()) {
break; }
if trimmed.is_empty() && !block {
break; }
lines.next();
if !trimmed.is_empty() {
parts.push(trimmed.to_string());
}
}
parts.join(" ")
}
fn list_value(first: &str, lines: &mut FrontmatterLines) -> Vec<String> {
let head = first.trim();
if !head.is_empty() {
return parse_list(head);
}
let mut out = Vec::new();
while let Some(next) = lines.peek() {
let trimmed = next.trim();
if !next.starts_with([' ', '\t']) || !trimmed.starts_with('-') {
break;
}
let item = trimmed.strip_prefix('-').unwrap_or(trimmed).trim();
let item = unquote(item).to_ascii_lowercase();
lines.next();
if !item.is_empty() {
out.push(item);
}
}
out
}
fn is_placeholder(description: &str) -> bool {
description
.trim_start()
.to_ascii_lowercase()
.starts_with("replace with")
}
fn unquote(s: &str) -> String {
let s = s.trim();
let bytes = s.as_bytes();
if bytes.len() >= 2
&& ((bytes[0] == b'"' && bytes[bytes.len() - 1] == b'"')
|| (bytes[0] == b'\'' && bytes[bytes.len() - 1] == b'\''))
{
s[1..s.len() - 1].to_string()
} else {
s.to_string()
}
}
fn parse_list(s: &str) -> Vec<String> {
s.trim_start_matches('[')
.trim_end_matches(']')
.split(',')
.map(|x| unquote(x.trim()).to_ascii_lowercase())
.filter(|x| !x.is_empty())
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_basic_frontmatter() {
let md = "---\nname: git-attribution\ndescription: Credit AI in commits.\n---\nbody\n";
let (name, desc, _) = parse_frontmatter(md).unwrap();
assert_eq!(name, "git-attribution");
assert_eq!(desc, "Credit AI in commits.");
}
#[test]
fn parses_quotes_and_keywords() {
let md = "---\nname: \"x\"\ndescription: 'd'\nkeywords: [Foo, bar]\n---\n";
let (name, desc, kw) = parse_frontmatter(md).unwrap();
assert_eq!(name, "x");
assert_eq!(desc, "d");
assert_eq!(kw, ["foo", "bar"]);
}
#[test]
fn rejects_without_frontmatter() {
assert!(parse_frontmatter("no frontmatter here").is_none());
}
#[test]
fn parses_folded_block_scalar_description() {
let md = "---\nname: web-scraper\ndescription: >-\n Scrape structured data from web pages.\n Use when the user wants tables extracted from HTML.\nversion: 1\n---\nbody\n";
let (name, desc, _) = parse_frontmatter(md).unwrap();
assert_eq!(name, "web-scraper");
assert_eq!(
desc,
"Scrape structured data from web pages. Use when the user wants tables extracted from HTML."
);
}
#[test]
fn parses_literal_block_scalar_and_plain_continuation() {
let md = "---\nname: x\ndescription: |\n Line one.\n Line two.\n---\n";
let (_, desc, _) = parse_frontmatter(md).unwrap();
assert_eq!(desc, "Line one. Line two.");
let md = "---\nname: x\ndescription: Edit Word documents\n with tracked changes.\n---\n";
let (_, desc, _) = parse_frontmatter(md).unwrap();
assert_eq!(desc, "Edit Word documents with tracked changes.");
}
#[test]
fn block_scalar_stops_at_next_key_and_fence() {
let md = "---\ndescription: >\n folded text\nname: real-name\n---\n";
let (name, desc, _) = parse_frontmatter(md).unwrap();
assert_eq!(desc, "folded text");
assert_eq!(name, "real-name");
}
#[test]
fn parses_indented_keyword_list() {
let md = "---\nname: x\ndescription: d\nkeywords:\n - Foo\n - \"Bar Baz\"\n---\n";
let (_, _, kw) = parse_frontmatter(md).unwrap();
assert_eq!(kw, ["foo", "bar baz"]);
}
#[test]
fn nested_indented_keys_are_not_top_level() {
let md = "---\nname: x\nmetadata:\n description: nested, not ours\n---\n";
let (name, desc, _) = parse_frontmatter(md).unwrap();
assert_eq!(name, "x");
assert_eq!(desc, "");
}
#[test]
fn tolerates_utf8_bom() {
let md = "\u{feff}---\nname: x\ndescription: d\n---\n";
let (name, desc, _) = parse_frontmatter(md).unwrap();
assert_eq!(name, "x");
assert_eq!(desc, "d");
}
#[test]
fn block_scalar_header_detection() {
for h in ["|", ">", "|-", ">-", "|+", ">2", ">-2"] {
assert!(is_block_scalar_header(h), "{h}");
}
for h in ["", "text", "> text", "|x"] {
assert!(!is_block_scalar_header(h), "{h}");
}
}
#[test]
fn detects_template_placeholder() {
assert!(is_placeholder(
"Replace with description of the skill and when Claude should use it."
));
assert!(is_placeholder(" replace WITH something"));
assert!(!is_placeholder("Credit AI assistance in git commits."));
}
#[test]
fn extracts_multiword_trigger_phrases() {
let desc = "Use when the user says \"find that page online\" or asks to \"search the public web archive\".";
let ph = extract_phrases(desc);
assert!(ph.contains(&"find page online".to_string()), "got {ph:?}");
assert!(
ph.contains(&"search public web archive".to_string()),
"got {ph:?}"
);
}
#[test]
fn ignores_short_and_common_quoted_spans() {
let desc = "Triggers include 'report', 'memo', 'set up', and \"the file\".";
assert!(
extract_phrases(desc).is_empty(),
"short/common quotes leaked: {:?}",
extract_phrases(desc)
);
}
#[test]
fn extraction_ignores_yaml_outer_quoting() {
let md = "---\nname: docx\ndescription: \"Edit Word docs. Triggers include any mention of 'word document export'.\"\n---\nbody\n";
let s = parse_file_from_str(md);
assert!(
s.trigger_phrases
.iter()
.all(|p| p.split_whitespace().count() <= 4),
"outer YAML quote captured as phrase: {:?}",
s.trigger_phrases
);
assert!(s
.trigger_phrases
.contains(&"word document export".to_string()));
}
fn parse_file_from_str(md: &str) -> Skill {
use std::io::Write;
let dir = std::env::temp_dir().join(format!(
"ski-phrase-{}-{}",
std::process::id(),
fnv1a_64(md.as_bytes())
));
fs::create_dir_all(&dir).unwrap();
let path = dir.join("SKILL.md");
let mut f = fs::File::create(&path).unwrap();
write!(f, "{md}").unwrap();
let s = parse_file(&path).unwrap().unwrap();
let _ = fs::remove_dir_all(&dir);
s
}
#[test]
fn non_utf8_skill_neither_dies_nor_kills_discovery() {
let dir = std::env::temp_dir().join(format!(
"ski-utf8-{}-{}",
std::process::id(),
fnv1a_64(b"non-utf8")
));
let bad = dir.join("bad");
let good = dir.join("good");
fs::create_dir_all(&bad).unwrap();
fs::create_dir_all(&good).unwrap();
fs::write(
bad.join("SKILL.md"),
b"---\nname: latin\ndescription: caf\xe9 menus\n---\nbody\n",
)
.unwrap();
fs::write(
good.join("SKILL.md"),
"---\nname: fine\ndescription: works\n---\n",
)
.unwrap();
let d = discover_all(std::slice::from_ref(&dir));
let ids: Vec<&str> = d.skills.iter().map(|s| s.id.as_str()).collect();
assert!(ids.contains(&"fine"), "good skill lost: {ids:?}");
assert!(ids.contains(&"latin"), "lossy parse dropped: {ids:?}");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn discover_all_reports_skipped_files_with_reason() {
let dir = std::env::temp_dir().join(format!(
"ski-skip-{}-{}",
std::process::id(),
fnv1a_64(b"skipped")
));
let broken = dir.join("broken");
fs::create_dir_all(&broken).unwrap();
fs::write(broken.join("SKILL.md"), "---\nname: no-desc\n---\n").unwrap();
let d = discover_all(std::slice::from_ref(&dir));
assert!(d.skills.is_empty());
assert_eq!(d.skipped.len(), 1);
assert!(d.skipped[0].1.contains("description"), "{:?}", d.skipped);
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn collect_caps_recursion_depth() {
let root = std::env::temp_dir().join(format!(
"ski-depth-{}-{}",
std::process::id(),
fnv1a_64(b"depth")
));
let mut deep = root.clone();
for i in 0..(MAX_WALK_DEPTH + 3) {
deep = deep.join(format!("d{i}"));
}
fs::create_dir_all(&deep).unwrap();
fs::write(
deep.join("SKILL.md"),
"---\nname: deep\ndescription: too deep\n---\n",
)
.unwrap();
let d = discover_all(std::slice::from_ref(&root));
assert!(d.skills.is_empty());
let _ = fs::remove_dir_all(&root);
}
#[test]
fn parse_file_rejects_placeholder_skill() {
use std::io::Write;
let dir = std::env::temp_dir().join(format!("ski-tpl-{}", std::process::id()));
fs::create_dir_all(&dir).unwrap();
let path = dir.join("SKILL.md");
let mut f = fs::File::create(&path).unwrap();
write!(
f,
"---\nname: template-skill\ndescription: Replace with description of the skill.\n---\nbody\n"
)
.unwrap();
assert!(parse_file(&path).unwrap().is_none());
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn parse_file_tolerates_non_utf8_bytes() {
let dir = std::env::temp_dir().join(format!("ski-nonutf8-{}", std::process::id()));
fs::create_dir_all(&dir).unwrap();
let path = dir.join("SKILL.md");
fs::write(&path, [0xff, 0xfe, b'-', b'-', b'-', 0x00]).unwrap();
assert!(parse_file(&path).is_ok());
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn discover_skips_unreadable_file_instead_of_aborting() {
let dir = std::env::temp_dir().join(format!("ski-discover-skip-{}", std::process::id()));
let good = dir.join("good");
fs::create_dir_all(&good).unwrap();
fs::write(
good.join("SKILL.md"),
"---\nname: good-skill\ndescription: A perfectly fine skill.\n---\nbody\n",
)
.unwrap();
let bad = dir.join("bad");
fs::create_dir_all(&bad).unwrap();
fs::create_dir_all(bad.join("SKILL.md")).unwrap();
let found = discover(std::slice::from_ref(&dir)).unwrap();
assert_eq!(found.len(), 1);
assert_eq!(found[0].id, "good-skill");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn parse_frontmatter_strips_leading_bom() {
let md = "\u{FEFF}---\nname: x\ndescription: d\n---\n";
let (name, desc, _) = parse_frontmatter(md).unwrap();
assert_eq!(name, "x");
assert_eq!(desc, "d");
}
#[test]
fn collect_bounds_recursion_depth() {
let root = std::env::temp_dir().join(format!("ski-deep-{}", std::process::id()));
let mut dir = root.clone();
for i in 0..MAX_WALK_DEPTH + 5 {
dir = dir.join(format!("d{i}"));
}
fs::create_dir_all(&dir).unwrap();
fs::write(
dir.join("SKILL.md"),
"---\nname: too-deep\ndescription: unreachable.\n---\n",
)
.unwrap();
let mut out = Vec::new();
collect(&root, &mut out, 0);
assert!(out.is_empty(), "found a file past the depth cap: {out:?}");
let _ = fs::remove_dir_all(&root);
}
}