Skip to main content

ski/
skill.rs

1//! Skill discovery and `SKILL.md` frontmatter parsing.
2
3use crate::text::{fnv1a_64, tokenize};
4use std::fs;
5use std::path::{Path, PathBuf};
6
7#[derive(Clone, Debug)]
8pub struct Skill {
9    /// Unique id (the skill's declared `name`).
10    pub id: String,
11    pub name: String,
12    pub description: String,
13    /// First few prose lines of the body — dense topical signal that disambiguates
14    /// confusable descriptions without the dilution of the full document. Read by
15    /// the stage-2 cross-encoder alongside `description` (see [`Skill::doc_text`]);
16    /// the stage-1 index embeds the description alone.
17    pub body_head: String,
18    /// Keywords for the hybrid keyword boost: explicit `keywords`/`aliases`
19    /// frontmatter, plus tokens derived from the name.
20    pub keywords: Vec<String>,
21    /// Multi-word trigger phrases mined from the description's quoted spans (the
22    /// literal wording a skill says to invoke it on, e.g. `"find that online"`).
23    /// Each is normalized to its content tokens; the ranker boosts a skill when a
24    /// prompt contains all of a phrase's tokens. See [`extract_phrases`].
25    pub trigger_phrases: Vec<String>,
26    pub path: PathBuf,
27    /// Content hash for index cache invalidation.
28    pub hash: String,
29}
30
31impl Skill {
32    /// Document text for the stage-2 cross-encoder: the curated description plus
33    /// the body head — more topical signal for the reranker's joint read than the
34    /// one-line description alone. (Stage-1 retrieval embeds the description
35    /// only; its thresholds are calibrated to that distribution.)
36    pub fn doc_text(&self) -> String {
37        if self.body_head.is_empty() {
38            self.description.clone()
39        } else {
40            format!("{}\n{}", self.description, self.body_head)
41        }
42    }
43}
44
45/// A discovery pass: the parsed skills, plus every `SKILL.md` that was found but
46/// yielded no skill (unreadable, unusable frontmatter, placeholder) with the
47/// reason — so `ski index` can say *why* a skill is missing instead of silently
48/// indexing without it.
49pub struct Discovery {
50    pub skills: Vec<Skill>,
51    pub skipped: Vec<(PathBuf, String)>,
52}
53
54/// Walk `roots` and parse every `SKILL.md` found. One bad file never aborts the
55/// pass — it is recorded in `skipped` (and traced under `SKI_DEBUG`) and the
56/// rest of the library survives.
57pub fn discover_all(roots: &[PathBuf]) -> Discovery {
58    let mut files = Vec::new();
59    for r in roots {
60        collect(r, &mut files, 0);
61    }
62    files.sort();
63    files.dedup();
64
65    let mut skills = Vec::new();
66    let mut skipped = Vec::new();
67    for f in files {
68        match parse_skill(&f) {
69            Ok(s) => skills.push(s),
70            Err(reason) => {
71                crate::trace::debug(&format!("skipping skill file {}", f.display()), &reason);
72                skipped.push((f, reason));
73            }
74        }
75    }
76    skills.sort_by(|a, b| a.id.cmp(&b.id));
77    skills.dedup_by(|a, b| a.id == b.id);
78    Discovery { skills, skipped }
79}
80
81/// Walk `roots` and parse every `SKILL.md` found (skills only; see
82/// [`discover_all`] for the skip diagnostics).
83pub fn discover(roots: &[PathBuf]) -> anyhow::Result<Vec<Skill>> {
84    Ok(discover_all(roots).skills)
85}
86
87/// Deepest subdirectory nesting `collect` will descend into per root, mirroring
88/// `context::PROJECT_WALK_LEVELS`. Bounds the walk against a pathologically deep
89/// real tree; a symlink loop is already safe (kernel `ELOOP`), this guards the
90/// non-symlink case.
91const MAX_WALK_DEPTH: usize = 12;
92
93fn collect(dir: &Path, out: &mut Vec<PathBuf>, depth: usize) {
94    if depth >= MAX_WALK_DEPTH {
95        return;
96    }
97    let Ok(rd) = fs::read_dir(dir) else { return };
98    for entry in rd.flatten() {
99        let p = entry.path();
100        if p.is_dir() {
101            // Skip build/VCS dirs, and the test/example/template trees that ship
102            // inside checked-out skill repos: their `SKILL.md` files are fixtures
103            // and placeholders, not installed skills, and indexing them injects
104            // pure noise (e.g. a repo's `tests/fixtures/skills/*` cloned under
105            // `~/.claude/plugins`).
106            let skip = matches!(
107                p.file_name().and_then(|s| s.to_str()),
108                Some(
109                    ".git"
110                        | "target"
111                        | "node_modules"
112                        | "tests"
113                        | "fixtures"
114                        | "examples"
115                        | "template"
116                        | "templates"
117                )
118            );
119            if !skip {
120                collect(&p, out, depth + 1);
121            }
122        } else if p.file_name().and_then(|s| s.to_str()) == Some("SKILL.md") {
123            out.push(p);
124        }
125    }
126}
127
128/// Parse a single `SKILL.md`. Returns `None` when the file yields no usable
129/// skill for *any* reason — unreadable file as well as missing/placeholder
130/// frontmatter — so a caller holding a possibly-stale path (e.g. the reranker's
131/// doc-text read) degrades gracefully. Use [`discover_all`] when the skip
132/// *reason* matters; the `Result` wrapper is kept for signature compatibility.
133pub fn parse_file(path: &Path) -> anyhow::Result<Option<Skill>> {
134    Ok(parse_skill(path).ok())
135}
136
137/// Parse a single `SKILL.md`, or the reason it yields no skill. The content is
138/// read lossily (one stray non-UTF8 byte must not disqualify a skill, let alone
139/// abort discovery of the whole library) and a leading UTF-8 BOM is stripped so
140/// BOM-saved files still match the `---` frontmatter fence.
141fn parse_skill(path: &Path) -> Result<Skill, String> {
142    let bytes = fs::read(path).map_err(|e| format!("read failed: {e}"))?;
143    let content = String::from_utf8_lossy(&bytes);
144    let content = content.strip_prefix('\u{feff}').unwrap_or(&content);
145    let Some((name, description, mut keywords)) = parse_frontmatter(content) else {
146        return Err("no leading `--- ... ---` YAML frontmatter".into());
147    };
148    if name.is_empty() {
149        return Err("frontmatter has no `name:`".into());
150    }
151    if description.is_empty() {
152        return Err("frontmatter has no `description:`".into());
153    }
154    if is_placeholder(&description) {
155        return Err("unfilled template placeholder description".into());
156    }
157    for tok in tokenize(&name) {
158        if !keywords.contains(&tok) {
159            keywords.push(tok);
160        }
161    }
162    let hash = format!("{:016x}", fnv1a_64(content.as_bytes()));
163    let trigger_phrases = extract_phrases(&description);
164    Ok(Skill {
165        id: name.clone(),
166        name,
167        description,
168        body_head: body_head(content, 8, 600),
169        keywords,
170        trigger_phrases,
171        path: path.to_path_buf(),
172        hash,
173    })
174}
175
176/// Minimum content tokens (stopwords excluded) for a quoted span to qualify as a
177/// trigger phrase. Two is the floor: a full two-token match (e.g. `connect mysql`,
178/// `screen reader support`) requires *both* discriminative tokens present, which
179/// stays high-precision on realistic prompts while covering the many two-word
180/// triggers skills actually ship. Single-token spans ("set up" → `set`, "report",
181/// "the file") collapse below this and are dropped — they are common-word noise
182/// that belongs to the dense/keyword channels, not here.
183const MIN_PHRASE_TOKENS: usize = 2;
184
185/// Upper bound on content tokens. A quoted span longer than this is a sentence or
186/// a wholly-quoted description, not a trigger phrase — reject it so the channel
187/// stays a *phrase* matcher and never demands a paragraph-length token overlap.
188const MAX_PHRASE_TOKENS: usize = 10;
189
190/// Mine multi-word trigger phrases from a skill description. Scans the *already
191/// unquoted* description for inner quoted spans (single or double quotes, ASCII or
192/// curly), keeps those with [`MIN_PHRASE_TOKENS`]..=[`MAX_PHRASE_TOKENS`] content
193/// tokens, and returns each normalized to a space-joined string of its content
194/// tokens (the form the ranker matches against a prompt). De-duplicated,
195/// order-preserving.
196///
197/// Runs on the parsed description, never the raw YAML line, so a wholly
198/// double-quoted `description:` value does not surface its entire text as one
199/// phrase — only the genuinely inner quotes remain.
200///
201/// A straight `'` only opens/closes a span at a word boundary (preceded/followed
202/// by a non-alphanumeric or the string edge), so apostrophes in contractions and
203/// possessives — `don't`, `user's` — are not mistaken for quotes.
204pub fn extract_phrases(description: &str) -> Vec<String> {
205    let mut out: Vec<String> = Vec::new();
206    let chars: Vec<char> = description.chars().collect();
207    let mut i = 0;
208    while i < chars.len() {
209        let c = chars[i];
210        if let Some(close) = opens_quote(&chars, i) {
211            // Find the matching close at a word boundary.
212            if let Some(end) = find_close(&chars, i + 1, close) {
213                let span: String = chars[i + 1..end].iter().collect();
214                let toks = crate::text::content_tokens(&span);
215                if (MIN_PHRASE_TOKENS..=MAX_PHRASE_TOKENS).contains(&toks.len()) {
216                    let phrase = toks.join(" ");
217                    if !out.contains(&phrase) {
218                        out.push(phrase);
219                    }
220                }
221                i = end + 1;
222                continue;
223            }
224        }
225        let _ = c;
226        i += 1;
227    }
228    out
229}
230
231/// If position `i` is an opening quote, return the char that closes it. A straight
232/// quote must sit at a left word boundary to count (else it is an apostrophe).
233fn opens_quote(chars: &[char], i: usize) -> Option<char> {
234    let c = chars[i];
235    let close = match c {
236        '\u{201c}' => '\u{201d}', // “ ”
237        '\u{2018}' => '\u{2019}', // ‘ ’
238        '"' | '\'' => c,          // straight quotes close themselves
239        _ => return None,
240    };
241    let boundary = i == 0 || !chars[i - 1].is_alphanumeric();
242    boundary.then_some(close)
243}
244
245/// Index of the closing quote `close` at or after `from`, requiring a right word
246/// boundary for straight quotes so contraction apostrophes do not close early.
247fn find_close(chars: &[char], from: usize, close: char) -> Option<usize> {
248    let straight = close == '"' || close == '\'';
249    (from..chars.len()).find(|&j| {
250        chars[j] == close && (!straight || chars.get(j + 1).is_none_or(|n| !n.is_alphanumeric()))
251    })
252}
253
254/// Pull the first `max_lines` non-blank body lines (after the frontmatter),
255/// capped at `max_chars`. Markdown heading/list markers are stripped so the
256/// embedder sees prose, not punctuation. Empty when there is no body.
257fn body_head(content: &str, max_lines: usize, max_chars: usize) -> String {
258    let mut lines = content.lines();
259    // Skip the leading `--- ... ---` frontmatter block, if present.
260    if lines.next().map(|l| l.trim()) == Some("---") {
261        for l in lines.by_ref() {
262            if l.trim() == "---" {
263                break;
264            }
265        }
266    }
267    let mut out: Vec<String> = Vec::new();
268    for l in lines {
269        let t = l
270            .trim()
271            .trim_start_matches(['#', '-', '*', '>', ' '])
272            .trim();
273        if t.is_empty() {
274            continue;
275        }
276        out.push(t.to_string());
277        if out.len() >= max_lines {
278            break;
279        }
280    }
281    let joined = out.join(" ");
282    match joined.char_indices().nth(max_chars) {
283        Some((i, _)) => joined[..i].to_string(),
284        None => joined,
285    }
286}
287
288/// Extract `name`, `description`, and `keywords`/`aliases` from a leading
289/// `--- ... ---` YAML frontmatter block. Intentionally minimal — not the full
290/// YAML grammar (no nested maps, anchors, flow maps) — but it covers every shape
291/// real skills ship: single-line `key: value`, quoted values, inline lists,
292/// block scalars (`description: >-` + indented lines — common in community
293/// skills, and previously parsed as the literal description `">-"`), multi-line
294/// plain scalars (indented continuation lines), and indented `- item` lists.
295///
296/// Keys are matched at column 0 only, so an indented key nested under another
297/// map is never mistaken for a top-level one.
298pub fn parse_frontmatter(content: &str) -> Option<(String, String, Vec<String>)> {
299    // A leading UTF-8 BOM (U+FEFF) is not whitespace to `str::trim`, so an
300    // untouched line 1 would never equal "---"; strip it before the check.
301    let content = content.strip_prefix('\u{FEFF}').unwrap_or(content);
302    let mut lines = content.lines().peekable();
303    if lines.next()?.trim() != "---" {
304        return None;
305    }
306    let (mut name, mut description, mut keywords) = (String::new(), String::new(), Vec::new());
307    while let Some(line) = lines.next() {
308        let t = line.trim_end();
309        if t.trim() == "---" {
310            break;
311        }
312        if let Some(v) = t.strip_prefix("name:") {
313            name = scalar_value(v, &mut lines);
314        } else if let Some(v) = t.strip_prefix("description:") {
315            description = scalar_value(v, &mut lines);
316        } else if let Some(v) = t.strip_prefix("keywords:") {
317            keywords = list_value(v, &mut lines);
318        } else if let Some(v) = t.strip_prefix("aliases:") {
319            keywords.extend(list_value(v, &mut lines));
320        }
321    }
322    Some((name, description, keywords))
323}
324
325type FrontmatterLines<'a> = std::iter::Peekable<std::str::Lines<'a>>;
326
327/// Whether the text after `key:` announces a YAML block scalar: `|` or `>`,
328/// optionally followed by a chomping indicator (`+`/`-`) and/or an explicit
329/// indentation digit.
330fn is_block_scalar_header(head: &str) -> bool {
331    let mut chars = head.chars();
332    matches!(chars.next(), Some('|' | '>'))
333        && chars.all(|c| matches!(c, '+' | '-') || c.is_ascii_digit())
334}
335
336/// A scalar value that may continue past its key's line: a single-line value
337/// (`description: Edit files.`), a block scalar (`description: >-` plus
338/// indented lines), or a multi-line plain scalar (indented continuation lines).
339/// Multi-line forms are folded with single spaces — downstream consumers (the
340/// embedder, phrase extraction, BM25) want prose, not layout.
341fn scalar_value(first: &str, lines: &mut FrontmatterLines) -> String {
342    let head = first.trim();
343    let block = is_block_scalar_header(head);
344    let mut parts: Vec<String> = Vec::new();
345    if !block && !head.is_empty() {
346        parts.push(unquote(head));
347    }
348    while let Some(next) = lines.peek() {
349        let trimmed = next.trim();
350        let indented = next.starts_with([' ', '\t']);
351        if trimmed == "---" || (!indented && !trimmed.is_empty()) {
352            break; // closing fence or the next top-level key
353        }
354        if trimmed.is_empty() && !block {
355            break; // a blank line ends a plain scalar
356        }
357        lines.next();
358        if !trimmed.is_empty() {
359            parts.push(trimmed.to_string());
360        }
361    }
362    parts.join(" ")
363}
364
365/// A list value: inline (`keywords: [a, b]`) on the key's own line, or indented
366/// `- item` lines after a bare `keywords:`. Items are lowercased like
367/// [`parse_list`].
368fn list_value(first: &str, lines: &mut FrontmatterLines) -> Vec<String> {
369    let head = first.trim();
370    if !head.is_empty() {
371        return parse_list(head);
372    }
373    let mut out = Vec::new();
374    while let Some(next) = lines.peek() {
375        let trimmed = next.trim();
376        if !next.starts_with([' ', '\t']) || !trimmed.starts_with('-') {
377            break;
378        }
379        let item = trimmed.strip_prefix('-').unwrap_or(trimmed).trim();
380        let item = unquote(item).to_ascii_lowercase();
381        lines.next();
382        if !item.is_empty() {
383            out.push(item);
384        }
385    }
386    out
387}
388
389/// Whether a description is the unfilled skeleton from a `template/SKILL.md`
390/// (e.g. "Replace with description of the skill…"). Such files are scaffolding,
391/// not installed skills, so they must never be indexed or injected.
392fn is_placeholder(description: &str) -> bool {
393    description
394        .trim_start()
395        .to_ascii_lowercase()
396        .starts_with("replace with")
397}
398
399fn unquote(s: &str) -> String {
400    let s = s.trim();
401    let bytes = s.as_bytes();
402    if bytes.len() >= 2
403        && ((bytes[0] == b'"' && bytes[bytes.len() - 1] == b'"')
404            || (bytes[0] == b'\'' && bytes[bytes.len() - 1] == b'\''))
405    {
406        s[1..s.len() - 1].to_string()
407    } else {
408        s.to_string()
409    }
410}
411
412fn parse_list(s: &str) -> Vec<String> {
413    s.trim_start_matches('[')
414        .trim_end_matches(']')
415        .split(',')
416        .map(|x| unquote(x.trim()).to_ascii_lowercase())
417        .filter(|x| !x.is_empty())
418        .collect()
419}
420
421#[cfg(test)]
422mod tests {
423    use super::*;
424
425    #[test]
426    fn parses_basic_frontmatter() {
427        let md = "---\nname: git-attribution\ndescription: Credit AI in commits.\n---\nbody\n";
428        let (name, desc, _) = parse_frontmatter(md).unwrap();
429        assert_eq!(name, "git-attribution");
430        assert_eq!(desc, "Credit AI in commits.");
431    }
432
433    #[test]
434    fn parses_quotes_and_keywords() {
435        let md = "---\nname: \"x\"\ndescription: 'd'\nkeywords: [Foo, bar]\n---\n";
436        let (name, desc, kw) = parse_frontmatter(md).unwrap();
437        assert_eq!(name, "x");
438        assert_eq!(desc, "d");
439        assert_eq!(kw, ["foo", "bar"]);
440    }
441
442    #[test]
443    fn rejects_without_frontmatter() {
444        assert!(parse_frontmatter("no frontmatter here").is_none());
445    }
446
447    #[test]
448    fn parses_folded_block_scalar_description() {
449        // The common community shape: `description: >-` with indented lines.
450        // Previously parsed as the literal description ">-", which embedded
451        // garbage and matched nothing.
452        let md = "---\nname: web-scraper\ndescription: >-\n  Scrape structured data from web pages.\n  Use when the user wants tables extracted from HTML.\nversion: 1\n---\nbody\n";
453        let (name, desc, _) = parse_frontmatter(md).unwrap();
454        assert_eq!(name, "web-scraper");
455        assert_eq!(
456            desc,
457            "Scrape structured data from web pages. Use when the user wants tables extracted from HTML."
458        );
459    }
460
461    #[test]
462    fn parses_literal_block_scalar_and_plain_continuation() {
463        // `|` literal block.
464        let md = "---\nname: x\ndescription: |\n  Line one.\n  Line two.\n---\n";
465        let (_, desc, _) = parse_frontmatter(md).unwrap();
466        assert_eq!(desc, "Line one. Line two.");
467        // Plain scalar continued on an indented next line (valid YAML,
468        // previously truncated to the first line).
469        let md = "---\nname: x\ndescription: Edit Word documents\n  with tracked changes.\n---\n";
470        let (_, desc, _) = parse_frontmatter(md).unwrap();
471        assert_eq!(desc, "Edit Word documents with tracked changes.");
472    }
473
474    #[test]
475    fn block_scalar_stops_at_next_key_and_fence() {
476        let md = "---\ndescription: >\n  folded text\nname: real-name\n---\n";
477        let (name, desc, _) = parse_frontmatter(md).unwrap();
478        assert_eq!(desc, "folded text");
479        assert_eq!(name, "real-name");
480    }
481
482    #[test]
483    fn parses_indented_keyword_list() {
484        let md = "---\nname: x\ndescription: d\nkeywords:\n  - Foo\n  - \"Bar Baz\"\n---\n";
485        let (_, _, kw) = parse_frontmatter(md).unwrap();
486        assert_eq!(kw, ["foo", "bar baz"]);
487    }
488
489    #[test]
490    fn nested_indented_keys_are_not_top_level() {
491        // An indented `description:` under some other map must not clobber the
492        // real (absent) top-level one.
493        let md = "---\nname: x\nmetadata:\n  description: nested, not ours\n---\n";
494        let (name, desc, _) = parse_frontmatter(md).unwrap();
495        assert_eq!(name, "x");
496        assert_eq!(desc, "");
497    }
498
499    #[test]
500    fn tolerates_utf8_bom() {
501        let md = "\u{feff}---\nname: x\ndescription: d\n---\n";
502        let (name, desc, _) = parse_frontmatter(md).unwrap();
503        assert_eq!(name, "x");
504        assert_eq!(desc, "d");
505    }
506
507    #[test]
508    fn block_scalar_header_detection() {
509        for h in ["|", ">", "|-", ">-", "|+", ">2", ">-2"] {
510            assert!(is_block_scalar_header(h), "{h}");
511        }
512        for h in ["", "text", "> text", "|x"] {
513            assert!(!is_block_scalar_header(h), "{h}");
514        }
515    }
516
517    #[test]
518    fn detects_template_placeholder() {
519        assert!(is_placeholder(
520            "Replace with description of the skill and when Claude should use it."
521        ));
522        assert!(is_placeholder("  replace WITH something"));
523        assert!(!is_placeholder("Credit AI assistance in git commits."));
524    }
525
526    #[test]
527    fn extracts_multiword_trigger_phrases() {
528        // Inner-quoted spans in the (already unquoted) description that carry >=3
529        // content tokens become trigger phrases, normalized to their content tokens.
530        let desc = "Use when the user says \"find that page online\" or asks to \"search the public web archive\".";
531        let ph = extract_phrases(desc);
532        assert!(ph.contains(&"find page online".to_string()), "got {ph:?}");
533        // "the" is a stopword and dropped; the rest survive as content tokens.
534        assert!(
535            ph.contains(&"search public web archive".to_string()),
536            "got {ph:?}"
537        );
538    }
539
540    #[test]
541    fn ignores_short_and_common_quoted_spans() {
542        // Single-word or all-stopword quotes are noise, not triggers, and must not
543        // become phrases (they would over-fire the lexical channel).
544        let desc = "Triggers include 'report', 'memo', 'set up', and \"the file\".";
545        assert!(
546            extract_phrases(desc).is_empty(),
547            "short/common quotes leaked: {:?}",
548            extract_phrases(desc)
549        );
550    }
551
552    #[test]
553    fn extraction_ignores_yaml_outer_quoting() {
554        // A description whose YAML value is wholly double-quoted must not yield the
555        // entire description as one giant "phrase": extraction runs on the parsed,
556        // unquoted value, and the only real triggers are the inner single quotes.
557        let md = "---\nname: docx\ndescription: \"Edit Word docs. Triggers include any mention of 'word document export'.\"\n---\nbody\n";
558        let s = parse_file_from_str(md);
559        assert!(
560            s.trigger_phrases
561                .iter()
562                .all(|p| p.split_whitespace().count() <= 4),
563            "outer YAML quote captured as phrase: {:?}",
564            s.trigger_phrases
565        );
566        assert!(s
567            .trigger_phrases
568            .contains(&"word document export".to_string()));
569    }
570
571    /// Test helper: parse a SKILL.md from a string via a temp file.
572    fn parse_file_from_str(md: &str) -> Skill {
573        use std::io::Write;
574        let dir = std::env::temp_dir().join(format!(
575            "ski-phrase-{}-{}",
576            std::process::id(),
577            fnv1a_64(md.as_bytes())
578        ));
579        fs::create_dir_all(&dir).unwrap();
580        let path = dir.join("SKILL.md");
581        let mut f = fs::File::create(&path).unwrap();
582        write!(f, "{md}").unwrap();
583        let s = parse_file(&path).unwrap().unwrap();
584        let _ = fs::remove_dir_all(&dir);
585        s
586    }
587
588    #[test]
589    fn non_utf8_skill_neither_dies_nor_kills_discovery() {
590        // One stray non-UTF8 byte used to abort `discover` for the WHOLE library
591        // (read_to_string bubbled an error): zero injections for every skill.
592        // Now the file reads lossily and still parses.
593        let dir = std::env::temp_dir().join(format!(
594            "ski-utf8-{}-{}",
595            std::process::id(),
596            fnv1a_64(b"non-utf8")
597        ));
598        let bad = dir.join("bad");
599        let good = dir.join("good");
600        fs::create_dir_all(&bad).unwrap();
601        fs::create_dir_all(&good).unwrap();
602        fs::write(
603            bad.join("SKILL.md"),
604            b"---\nname: latin\ndescription: caf\xe9 menus\n---\nbody\n",
605        )
606        .unwrap();
607        fs::write(
608            good.join("SKILL.md"),
609            "---\nname: fine\ndescription: works\n---\n",
610        )
611        .unwrap();
612        let d = discover_all(std::slice::from_ref(&dir));
613        let ids: Vec<&str> = d.skills.iter().map(|s| s.id.as_str()).collect();
614        assert!(ids.contains(&"fine"), "good skill lost: {ids:?}");
615        assert!(ids.contains(&"latin"), "lossy parse dropped: {ids:?}");
616        let _ = fs::remove_dir_all(&dir);
617    }
618
619    #[test]
620    fn discover_all_reports_skipped_files_with_reason() {
621        let dir = std::env::temp_dir().join(format!(
622            "ski-skip-{}-{}",
623            std::process::id(),
624            fnv1a_64(b"skipped")
625        ));
626        let broken = dir.join("broken");
627        fs::create_dir_all(&broken).unwrap();
628        fs::write(broken.join("SKILL.md"), "---\nname: no-desc\n---\n").unwrap();
629        let d = discover_all(std::slice::from_ref(&dir));
630        assert!(d.skills.is_empty());
631        assert_eq!(d.skipped.len(), 1);
632        assert!(d.skipped[0].1.contains("description"), "{:?}", d.skipped);
633        let _ = fs::remove_dir_all(&dir);
634    }
635
636    #[test]
637    fn collect_caps_recursion_depth() {
638        // A tree deeper than the cap must terminate and simply not surface the
639        // too-deep file.
640        let root = std::env::temp_dir().join(format!(
641            "ski-depth-{}-{}",
642            std::process::id(),
643            fnv1a_64(b"depth")
644        ));
645        let mut deep = root.clone();
646        for i in 0..(MAX_WALK_DEPTH + 3) {
647            deep = deep.join(format!("d{i}"));
648        }
649        fs::create_dir_all(&deep).unwrap();
650        fs::write(
651            deep.join("SKILL.md"),
652            "---\nname: deep\ndescription: too deep\n---\n",
653        )
654        .unwrap();
655        let d = discover_all(std::slice::from_ref(&root));
656        assert!(d.skills.is_empty());
657        let _ = fs::remove_dir_all(&root);
658    }
659
660    #[test]
661    fn parse_file_rejects_placeholder_skill() {
662        use std::io::Write;
663        let dir = std::env::temp_dir().join(format!("ski-tpl-{}", std::process::id()));
664        fs::create_dir_all(&dir).unwrap();
665        let path = dir.join("SKILL.md");
666        let mut f = fs::File::create(&path).unwrap();
667        write!(
668            f,
669            "---\nname: template-skill\ndescription: Replace with description of the skill.\n---\nbody\n"
670        )
671        .unwrap();
672        assert!(parse_file(&path).unwrap().is_none());
673        let _ = fs::remove_dir_all(&dir);
674    }
675
676    #[test]
677    fn parse_file_tolerates_non_utf8_bytes() {
678        // A non-UTF8 SKILL.md must not error `parse_file` (which would otherwise
679        // bubble through `discover` and blank out the whole library) — it lossily
680        // decodes, and since the mangled frontmatter check then fails, it degrades
681        // to `Ok(None)` (skipped) rather than `Err`.
682        let dir = std::env::temp_dir().join(format!("ski-nonutf8-{}", std::process::id()));
683        fs::create_dir_all(&dir).unwrap();
684        let path = dir.join("SKILL.md");
685        fs::write(&path, [0xff, 0xfe, b'-', b'-', b'-', 0x00]).unwrap();
686        assert!(parse_file(&path).is_ok());
687        let _ = fs::remove_dir_all(&dir);
688    }
689
690    #[test]
691    fn discover_skips_unreadable_file_instead_of_aborting() {
692        // One bad path among several must not blank out the rest of the library:
693        // discover() should skip the unreadable entry and still return the others.
694        let dir = std::env::temp_dir().join(format!("ski-discover-skip-{}", std::process::id()));
695        let good = dir.join("good");
696        fs::create_dir_all(&good).unwrap();
697        fs::write(
698            good.join("SKILL.md"),
699            "---\nname: good-skill\ndescription: A perfectly fine skill.\n---\nbody\n",
700        )
701        .unwrap();
702        // A directory named SKILL.md can never be opened as a file -> read error.
703        let bad = dir.join("bad");
704        fs::create_dir_all(&bad).unwrap();
705        fs::create_dir_all(bad.join("SKILL.md")).unwrap();
706
707        let found = discover(std::slice::from_ref(&dir)).unwrap();
708        assert_eq!(found.len(), 1);
709        assert_eq!(found[0].id, "good-skill");
710        let _ = fs::remove_dir_all(&dir);
711    }
712
713    #[test]
714    fn parse_frontmatter_strips_leading_bom() {
715        let md = "\u{FEFF}---\nname: x\ndescription: d\n---\n";
716        let (name, desc, _) = parse_frontmatter(md).unwrap();
717        assert_eq!(name, "x");
718        assert_eq!(desc, "d");
719    }
720
721    #[test]
722    fn collect_bounds_recursion_depth() {
723        // Build a chain of nested dirs deeper than MAX_WALK_DEPTH with a SKILL.md
724        // at the bottom; it must not be found (and, more importantly, must not
725        // blow the stack on a real pathological tree).
726        let root = std::env::temp_dir().join(format!("ski-deep-{}", std::process::id()));
727        let mut dir = root.clone();
728        for i in 0..MAX_WALK_DEPTH + 5 {
729            dir = dir.join(format!("d{i}"));
730        }
731        fs::create_dir_all(&dir).unwrap();
732        fs::write(
733            dir.join("SKILL.md"),
734            "---\nname: too-deep\ndescription: unreachable.\n---\n",
735        )
736        .unwrap();
737        let mut out = Vec::new();
738        collect(&root, &mut out, 0);
739        assert!(out.is_empty(), "found a file past the depth cap: {out:?}");
740        let _ = fs::remove_dir_all(&root);
741    }
742}