1use crate::text::{fnv1a_64, tokenize};
4use std::fs;
5use std::path::{Path, PathBuf};
6
7#[derive(Clone, Debug)]
8pub struct Skill {
9 pub id: String,
11 pub name: String,
12 pub description: String,
13 pub body_head: String,
18 pub keywords: Vec<String>,
21 pub trigger_phrases: Vec<String>,
26 pub path: PathBuf,
27 pub hash: String,
29}
30
31impl Skill {
32 pub fn doc_text(&self) -> String {
37 if self.body_head.is_empty() {
38 self.description.clone()
39 } else {
40 format!("{}\n{}", self.description, self.body_head)
41 }
42 }
43}
44
45pub struct Discovery {
50 pub skills: Vec<Skill>,
51 pub skipped: Vec<(PathBuf, String)>,
52}
53
54pub fn discover_all(roots: &[PathBuf]) -> Discovery {
58 let mut files = Vec::new();
59 for r in roots {
60 collect(r, &mut files, 0);
61 }
62 files.sort();
63 files.dedup();
64
65 let mut skills = Vec::new();
66 let mut skipped = Vec::new();
67 for f in files {
68 match parse_skill(&f) {
69 Ok(s) => skills.push(s),
70 Err(reason) => {
71 crate::trace::debug(&format!("skipping skill file {}", f.display()), &reason);
72 skipped.push((f, reason));
73 }
74 }
75 }
76 skills.sort_by(|a, b| a.id.cmp(&b.id));
77 skills.dedup_by(|a, b| a.id == b.id);
78 Discovery { skills, skipped }
79}
80
81pub fn discover(roots: &[PathBuf]) -> anyhow::Result<Vec<Skill>> {
84 Ok(discover_all(roots).skills)
85}
86
87const MAX_WALK_DEPTH: usize = 12;
92
93fn collect(dir: &Path, out: &mut Vec<PathBuf>, depth: usize) {
94 if depth >= MAX_WALK_DEPTH {
95 return;
96 }
97 let Ok(rd) = fs::read_dir(dir) else { return };
98 for entry in rd.flatten() {
99 let p = entry.path();
100 if p.is_dir() {
101 let skip = matches!(
107 p.file_name().and_then(|s| s.to_str()),
108 Some(
109 ".git"
110 | "target"
111 | "node_modules"
112 | "tests"
113 | "fixtures"
114 | "examples"
115 | "template"
116 | "templates"
117 )
118 );
119 if !skip {
120 collect(&p, out, depth + 1);
121 }
122 } else if p.file_name().and_then(|s| s.to_str()) == Some("SKILL.md") {
123 out.push(p);
124 }
125 }
126}
127
128pub fn parse_file(path: &Path) -> anyhow::Result<Option<Skill>> {
134 Ok(parse_skill(path).ok())
135}
136
137fn parse_skill(path: &Path) -> Result<Skill, String> {
142 let bytes = fs::read(path).map_err(|e| format!("read failed: {e}"))?;
143 let content = String::from_utf8_lossy(&bytes);
144 let content = content.strip_prefix('\u{feff}').unwrap_or(&content);
145 let Some((name, description, mut keywords)) = parse_frontmatter(content) else {
146 return Err("no leading `--- ... ---` YAML frontmatter".into());
147 };
148 if name.is_empty() {
149 return Err("frontmatter has no `name:`".into());
150 }
151 if description.is_empty() {
152 return Err("frontmatter has no `description:`".into());
153 }
154 if is_placeholder(&description) {
155 return Err("unfilled template placeholder description".into());
156 }
157 for tok in tokenize(&name) {
158 if !keywords.contains(&tok) {
159 keywords.push(tok);
160 }
161 }
162 let hash = format!("{:016x}", fnv1a_64(content.as_bytes()));
163 let trigger_phrases = extract_phrases(&description);
164 Ok(Skill {
165 id: name.clone(),
166 name,
167 description,
168 body_head: body_head(content, 8, 600),
169 keywords,
170 trigger_phrases,
171 path: path.to_path_buf(),
172 hash,
173 })
174}
175
176const MIN_PHRASE_TOKENS: usize = 2;
184
185const MAX_PHRASE_TOKENS: usize = 10;
189
190pub fn extract_phrases(description: &str) -> Vec<String> {
205 let mut out: Vec<String> = Vec::new();
206 let chars: Vec<char> = description.chars().collect();
207 let mut i = 0;
208 while i < chars.len() {
209 let c = chars[i];
210 if let Some(close) = opens_quote(&chars, i) {
211 if let Some(end) = find_close(&chars, i + 1, close) {
213 let span: String = chars[i + 1..end].iter().collect();
214 let toks = crate::text::content_tokens(&span);
215 if (MIN_PHRASE_TOKENS..=MAX_PHRASE_TOKENS).contains(&toks.len()) {
216 let phrase = toks.join(" ");
217 if !out.contains(&phrase) {
218 out.push(phrase);
219 }
220 }
221 i = end + 1;
222 continue;
223 }
224 }
225 let _ = c;
226 i += 1;
227 }
228 out
229}
230
231fn opens_quote(chars: &[char], i: usize) -> Option<char> {
234 let c = chars[i];
235 let close = match c {
236 '\u{201c}' => '\u{201d}', '\u{2018}' => '\u{2019}', '"' | '\'' => c, _ => return None,
240 };
241 let boundary = i == 0 || !chars[i - 1].is_alphanumeric();
242 boundary.then_some(close)
243}
244
245fn find_close(chars: &[char], from: usize, close: char) -> Option<usize> {
248 let straight = close == '"' || close == '\'';
249 (from..chars.len()).find(|&j| {
250 chars[j] == close && (!straight || chars.get(j + 1).is_none_or(|n| !n.is_alphanumeric()))
251 })
252}
253
254fn body_head(content: &str, max_lines: usize, max_chars: usize) -> String {
258 let mut lines = content.lines();
259 if lines.next().map(|l| l.trim()) == Some("---") {
261 for l in lines.by_ref() {
262 if l.trim() == "---" {
263 break;
264 }
265 }
266 }
267 let mut out: Vec<String> = Vec::new();
268 for l in lines {
269 let t = l
270 .trim()
271 .trim_start_matches(['#', '-', '*', '>', ' '])
272 .trim();
273 if t.is_empty() {
274 continue;
275 }
276 out.push(t.to_string());
277 if out.len() >= max_lines {
278 break;
279 }
280 }
281 let joined = out.join(" ");
282 match joined.char_indices().nth(max_chars) {
283 Some((i, _)) => joined[..i].to_string(),
284 None => joined,
285 }
286}
287
288pub fn parse_frontmatter(content: &str) -> Option<(String, String, Vec<String>)> {
299 let content = content.strip_prefix('\u{FEFF}').unwrap_or(content);
302 let mut lines = content.lines().peekable();
303 if lines.next()?.trim() != "---" {
304 return None;
305 }
306 let (mut name, mut description, mut keywords) = (String::new(), String::new(), Vec::new());
307 while let Some(line) = lines.next() {
308 let t = line.trim_end();
309 if t.trim() == "---" {
310 break;
311 }
312 if let Some(v) = t.strip_prefix("name:") {
313 name = scalar_value(v, &mut lines);
314 } else if let Some(v) = t.strip_prefix("description:") {
315 description = scalar_value(v, &mut lines);
316 } else if let Some(v) = t.strip_prefix("keywords:") {
317 keywords = list_value(v, &mut lines);
318 } else if let Some(v) = t.strip_prefix("aliases:") {
319 keywords.extend(list_value(v, &mut lines));
320 }
321 }
322 Some((name, description, keywords))
323}
324
325type FrontmatterLines<'a> = std::iter::Peekable<std::str::Lines<'a>>;
326
327fn is_block_scalar_header(head: &str) -> bool {
331 let mut chars = head.chars();
332 matches!(chars.next(), Some('|' | '>'))
333 && chars.all(|c| matches!(c, '+' | '-') || c.is_ascii_digit())
334}
335
336fn scalar_value(first: &str, lines: &mut FrontmatterLines) -> String {
342 let head = first.trim();
343 let block = is_block_scalar_header(head);
344 let mut parts: Vec<String> = Vec::new();
345 if !block && !head.is_empty() {
346 parts.push(unquote(head));
347 }
348 while let Some(next) = lines.peek() {
349 let trimmed = next.trim();
350 let indented = next.starts_with([' ', '\t']);
351 if trimmed == "---" || (!indented && !trimmed.is_empty()) {
352 break; }
354 if trimmed.is_empty() && !block {
355 break; }
357 lines.next();
358 if !trimmed.is_empty() {
359 parts.push(trimmed.to_string());
360 }
361 }
362 parts.join(" ")
363}
364
365fn list_value(first: &str, lines: &mut FrontmatterLines) -> Vec<String> {
369 let head = first.trim();
370 if !head.is_empty() {
371 return parse_list(head);
372 }
373 let mut out = Vec::new();
374 while let Some(next) = lines.peek() {
375 let trimmed = next.trim();
376 if !next.starts_with([' ', '\t']) || !trimmed.starts_with('-') {
377 break;
378 }
379 let item = trimmed.strip_prefix('-').unwrap_or(trimmed).trim();
380 let item = unquote(item).to_ascii_lowercase();
381 lines.next();
382 if !item.is_empty() {
383 out.push(item);
384 }
385 }
386 out
387}
388
389fn is_placeholder(description: &str) -> bool {
393 description
394 .trim_start()
395 .to_ascii_lowercase()
396 .starts_with("replace with")
397}
398
399fn unquote(s: &str) -> String {
400 let s = s.trim();
401 let bytes = s.as_bytes();
402 if bytes.len() >= 2
403 && ((bytes[0] == b'"' && bytes[bytes.len() - 1] == b'"')
404 || (bytes[0] == b'\'' && bytes[bytes.len() - 1] == b'\''))
405 {
406 s[1..s.len() - 1].to_string()
407 } else {
408 s.to_string()
409 }
410}
411
412fn parse_list(s: &str) -> Vec<String> {
413 s.trim_start_matches('[')
414 .trim_end_matches(']')
415 .split(',')
416 .map(|x| unquote(x.trim()).to_ascii_lowercase())
417 .filter(|x| !x.is_empty())
418 .collect()
419}
420
421#[cfg(test)]
422mod tests {
423 use super::*;
424
425 #[test]
426 fn parses_basic_frontmatter() {
427 let md = "---\nname: git-attribution\ndescription: Credit AI in commits.\n---\nbody\n";
428 let (name, desc, _) = parse_frontmatter(md).unwrap();
429 assert_eq!(name, "git-attribution");
430 assert_eq!(desc, "Credit AI in commits.");
431 }
432
433 #[test]
434 fn parses_quotes_and_keywords() {
435 let md = "---\nname: \"x\"\ndescription: 'd'\nkeywords: [Foo, bar]\n---\n";
436 let (name, desc, kw) = parse_frontmatter(md).unwrap();
437 assert_eq!(name, "x");
438 assert_eq!(desc, "d");
439 assert_eq!(kw, ["foo", "bar"]);
440 }
441
442 #[test]
443 fn rejects_without_frontmatter() {
444 assert!(parse_frontmatter("no frontmatter here").is_none());
445 }
446
447 #[test]
448 fn parses_folded_block_scalar_description() {
449 let md = "---\nname: web-scraper\ndescription: >-\n Scrape structured data from web pages.\n Use when the user wants tables extracted from HTML.\nversion: 1\n---\nbody\n";
453 let (name, desc, _) = parse_frontmatter(md).unwrap();
454 assert_eq!(name, "web-scraper");
455 assert_eq!(
456 desc,
457 "Scrape structured data from web pages. Use when the user wants tables extracted from HTML."
458 );
459 }
460
461 #[test]
462 fn parses_literal_block_scalar_and_plain_continuation() {
463 let md = "---\nname: x\ndescription: |\n Line one.\n Line two.\n---\n";
465 let (_, desc, _) = parse_frontmatter(md).unwrap();
466 assert_eq!(desc, "Line one. Line two.");
467 let md = "---\nname: x\ndescription: Edit Word documents\n with tracked changes.\n---\n";
470 let (_, desc, _) = parse_frontmatter(md).unwrap();
471 assert_eq!(desc, "Edit Word documents with tracked changes.");
472 }
473
474 #[test]
475 fn block_scalar_stops_at_next_key_and_fence() {
476 let md = "---\ndescription: >\n folded text\nname: real-name\n---\n";
477 let (name, desc, _) = parse_frontmatter(md).unwrap();
478 assert_eq!(desc, "folded text");
479 assert_eq!(name, "real-name");
480 }
481
482 #[test]
483 fn parses_indented_keyword_list() {
484 let md = "---\nname: x\ndescription: d\nkeywords:\n - Foo\n - \"Bar Baz\"\n---\n";
485 let (_, _, kw) = parse_frontmatter(md).unwrap();
486 assert_eq!(kw, ["foo", "bar baz"]);
487 }
488
489 #[test]
490 fn nested_indented_keys_are_not_top_level() {
491 let md = "---\nname: x\nmetadata:\n description: nested, not ours\n---\n";
494 let (name, desc, _) = parse_frontmatter(md).unwrap();
495 assert_eq!(name, "x");
496 assert_eq!(desc, "");
497 }
498
499 #[test]
500 fn tolerates_utf8_bom() {
501 let md = "\u{feff}---\nname: x\ndescription: d\n---\n";
502 let (name, desc, _) = parse_frontmatter(md).unwrap();
503 assert_eq!(name, "x");
504 assert_eq!(desc, "d");
505 }
506
507 #[test]
508 fn block_scalar_header_detection() {
509 for h in ["|", ">", "|-", ">-", "|+", ">2", ">-2"] {
510 assert!(is_block_scalar_header(h), "{h}");
511 }
512 for h in ["", "text", "> text", "|x"] {
513 assert!(!is_block_scalar_header(h), "{h}");
514 }
515 }
516
517 #[test]
518 fn detects_template_placeholder() {
519 assert!(is_placeholder(
520 "Replace with description of the skill and when Claude should use it."
521 ));
522 assert!(is_placeholder(" replace WITH something"));
523 assert!(!is_placeholder("Credit AI assistance in git commits."));
524 }
525
526 #[test]
527 fn extracts_multiword_trigger_phrases() {
528 let desc = "Use when the user says \"find that page online\" or asks to \"search the public web archive\".";
531 let ph = extract_phrases(desc);
532 assert!(ph.contains(&"find page online".to_string()), "got {ph:?}");
533 assert!(
535 ph.contains(&"search public web archive".to_string()),
536 "got {ph:?}"
537 );
538 }
539
540 #[test]
541 fn ignores_short_and_common_quoted_spans() {
542 let desc = "Triggers include 'report', 'memo', 'set up', and \"the file\".";
545 assert!(
546 extract_phrases(desc).is_empty(),
547 "short/common quotes leaked: {:?}",
548 extract_phrases(desc)
549 );
550 }
551
552 #[test]
553 fn extraction_ignores_yaml_outer_quoting() {
554 let md = "---\nname: docx\ndescription: \"Edit Word docs. Triggers include any mention of 'word document export'.\"\n---\nbody\n";
558 let s = parse_file_from_str(md);
559 assert!(
560 s.trigger_phrases
561 .iter()
562 .all(|p| p.split_whitespace().count() <= 4),
563 "outer YAML quote captured as phrase: {:?}",
564 s.trigger_phrases
565 );
566 assert!(s
567 .trigger_phrases
568 .contains(&"word document export".to_string()));
569 }
570
571 fn parse_file_from_str(md: &str) -> Skill {
573 use std::io::Write;
574 let dir = std::env::temp_dir().join(format!(
575 "ski-phrase-{}-{}",
576 std::process::id(),
577 fnv1a_64(md.as_bytes())
578 ));
579 fs::create_dir_all(&dir).unwrap();
580 let path = dir.join("SKILL.md");
581 let mut f = fs::File::create(&path).unwrap();
582 write!(f, "{md}").unwrap();
583 let s = parse_file(&path).unwrap().unwrap();
584 let _ = fs::remove_dir_all(&dir);
585 s
586 }
587
588 #[test]
589 fn non_utf8_skill_neither_dies_nor_kills_discovery() {
590 let dir = std::env::temp_dir().join(format!(
594 "ski-utf8-{}-{}",
595 std::process::id(),
596 fnv1a_64(b"non-utf8")
597 ));
598 let bad = dir.join("bad");
599 let good = dir.join("good");
600 fs::create_dir_all(&bad).unwrap();
601 fs::create_dir_all(&good).unwrap();
602 fs::write(
603 bad.join("SKILL.md"),
604 b"---\nname: latin\ndescription: caf\xe9 menus\n---\nbody\n",
605 )
606 .unwrap();
607 fs::write(
608 good.join("SKILL.md"),
609 "---\nname: fine\ndescription: works\n---\n",
610 )
611 .unwrap();
612 let d = discover_all(std::slice::from_ref(&dir));
613 let ids: Vec<&str> = d.skills.iter().map(|s| s.id.as_str()).collect();
614 assert!(ids.contains(&"fine"), "good skill lost: {ids:?}");
615 assert!(ids.contains(&"latin"), "lossy parse dropped: {ids:?}");
616 let _ = fs::remove_dir_all(&dir);
617 }
618
619 #[test]
620 fn discover_all_reports_skipped_files_with_reason() {
621 let dir = std::env::temp_dir().join(format!(
622 "ski-skip-{}-{}",
623 std::process::id(),
624 fnv1a_64(b"skipped")
625 ));
626 let broken = dir.join("broken");
627 fs::create_dir_all(&broken).unwrap();
628 fs::write(broken.join("SKILL.md"), "---\nname: no-desc\n---\n").unwrap();
629 let d = discover_all(std::slice::from_ref(&dir));
630 assert!(d.skills.is_empty());
631 assert_eq!(d.skipped.len(), 1);
632 assert!(d.skipped[0].1.contains("description"), "{:?}", d.skipped);
633 let _ = fs::remove_dir_all(&dir);
634 }
635
636 #[test]
637 fn collect_caps_recursion_depth() {
638 let root = std::env::temp_dir().join(format!(
641 "ski-depth-{}-{}",
642 std::process::id(),
643 fnv1a_64(b"depth")
644 ));
645 let mut deep = root.clone();
646 for i in 0..(MAX_WALK_DEPTH + 3) {
647 deep = deep.join(format!("d{i}"));
648 }
649 fs::create_dir_all(&deep).unwrap();
650 fs::write(
651 deep.join("SKILL.md"),
652 "---\nname: deep\ndescription: too deep\n---\n",
653 )
654 .unwrap();
655 let d = discover_all(std::slice::from_ref(&root));
656 assert!(d.skills.is_empty());
657 let _ = fs::remove_dir_all(&root);
658 }
659
660 #[test]
661 fn parse_file_rejects_placeholder_skill() {
662 use std::io::Write;
663 let dir = std::env::temp_dir().join(format!("ski-tpl-{}", std::process::id()));
664 fs::create_dir_all(&dir).unwrap();
665 let path = dir.join("SKILL.md");
666 let mut f = fs::File::create(&path).unwrap();
667 write!(
668 f,
669 "---\nname: template-skill\ndescription: Replace with description of the skill.\n---\nbody\n"
670 )
671 .unwrap();
672 assert!(parse_file(&path).unwrap().is_none());
673 let _ = fs::remove_dir_all(&dir);
674 }
675
676 #[test]
677 fn parse_file_tolerates_non_utf8_bytes() {
678 let dir = std::env::temp_dir().join(format!("ski-nonutf8-{}", std::process::id()));
683 fs::create_dir_all(&dir).unwrap();
684 let path = dir.join("SKILL.md");
685 fs::write(&path, [0xff, 0xfe, b'-', b'-', b'-', 0x00]).unwrap();
686 assert!(parse_file(&path).is_ok());
687 let _ = fs::remove_dir_all(&dir);
688 }
689
690 #[test]
691 fn discover_skips_unreadable_file_instead_of_aborting() {
692 let dir = std::env::temp_dir().join(format!("ski-discover-skip-{}", std::process::id()));
695 let good = dir.join("good");
696 fs::create_dir_all(&good).unwrap();
697 fs::write(
698 good.join("SKILL.md"),
699 "---\nname: good-skill\ndescription: A perfectly fine skill.\n---\nbody\n",
700 )
701 .unwrap();
702 let bad = dir.join("bad");
704 fs::create_dir_all(&bad).unwrap();
705 fs::create_dir_all(bad.join("SKILL.md")).unwrap();
706
707 let found = discover(std::slice::from_ref(&dir)).unwrap();
708 assert_eq!(found.len(), 1);
709 assert_eq!(found[0].id, "good-skill");
710 let _ = fs::remove_dir_all(&dir);
711 }
712
713 #[test]
714 fn parse_frontmatter_strips_leading_bom() {
715 let md = "\u{FEFF}---\nname: x\ndescription: d\n---\n";
716 let (name, desc, _) = parse_frontmatter(md).unwrap();
717 assert_eq!(name, "x");
718 assert_eq!(desc, "d");
719 }
720
721 #[test]
722 fn collect_bounds_recursion_depth() {
723 let root = std::env::temp_dir().join(format!("ski-deep-{}", std::process::id()));
727 let mut dir = root.clone();
728 for i in 0..MAX_WALK_DEPTH + 5 {
729 dir = dir.join(format!("d{i}"));
730 }
731 fs::create_dir_all(&dir).unwrap();
732 fs::write(
733 dir.join("SKILL.md"),
734 "---\nname: too-deep\ndescription: unreachable.\n---\n",
735 )
736 .unwrap();
737 let mut out = Vec::new();
738 collect(&root, &mut out, 0);
739 assert!(out.is_empty(), "found a file past the depth cap: {out:?}");
740 let _ = fs::remove_dir_all(&root);
741 }
742}