Skip to main content

harn_skills/
lib.rs

1//! Embedded Harn skill corpus.
2//!
3//! This crate exposes the bundled corpus as metadata plus `SKILL.md`
4//! bodies. CLI commands that enumerate, dump, or install these skills
5//! are layered above this foundation.
6
7use std::collections::BTreeMap;
8use std::env;
9use std::fmt;
10use std::fs;
11use std::io;
12use std::path::{Path, PathBuf};
13use std::sync::OnceLock;
14
15/// Environment override for canonical Harn skill discovery.
16pub const HARN_SKILLS_DIR_ENV: &str = "HARN_SKILLS_DIR";
17
18/// Frontmatter fields embedded with each bundled skill.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub struct SkillFrontmatter {
21    pub name: &'static str,
22    pub short: &'static str,
23    pub description: &'static str,
24    pub when_to_use: Option<&'static str>,
25}
26
27/// A single skill embedded into the Harn build.
28#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub struct EmbeddedSkill {
30    pub name: &'static str,
31    pub frontmatter: SkillFrontmatter,
32    pub body: &'static str,
33    /// The full original SKILL.md source — frontmatter delimiter,
34    /// frontmatter block, blank line, and body — exactly as embedded.
35    /// Use this when round-tripping a skill back to disk so the dumped
36    /// copy is byte-identical to the binary's canonical record.
37    pub source: &'static str,
38}
39
40/// Owned frontmatter fields loaded from a `SKILL.md` on disk.
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub struct DiskSkillFrontmatter {
43    pub name: String,
44    pub short: String,
45    pub description: String,
46    pub when_to_use: Option<String>,
47}
48
49/// A single skill discovered recursively from `HARN_SKILLS_DIR`.
50#[derive(Debug, Clone, PartialEq, Eq)]
51pub struct DiskSkill {
52    pub name: String,
53    pub frontmatter: DiskSkillFrontmatter,
54    pub body: String,
55    pub source: String,
56    pub path: PathBuf,
57}
58
59/// The active canonical corpus used by `harn skills list/get`.
60#[derive(Debug, Clone, PartialEq, Eq)]
61pub enum SkillCorpus {
62    Embedded(&'static [EmbeddedSkill]),
63    Disk(Vec<DiskSkill>),
64}
65
66impl SkillCorpus {
67    pub fn is_disk(&self) -> bool {
68        matches!(self, Self::Disk(_))
69    }
70
71    pub fn len(&self) -> usize {
72        match self {
73            Self::Embedded(skills) => skills.len(),
74            Self::Disk(skills) => skills.len(),
75        }
76    }
77
78    pub fn is_empty(&self) -> bool {
79        self.len() == 0
80    }
81}
82
83/// Error returned when disk skill discovery finds malformed files.
84#[derive(Debug)]
85pub enum SkillDiscoveryError {
86    Io {
87        path: PathBuf,
88        source: io::Error,
89    },
90    MissingFrontmatter {
91        path: PathBuf,
92    },
93    MissingField {
94        path: PathBuf,
95        field: &'static str,
96    },
97    DuplicateName {
98        name: String,
99        first: PathBuf,
100        second: PathBuf,
101    },
102}
103
104impl fmt::Display for SkillDiscoveryError {
105    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
106        match self {
107            Self::Io { path, source } => write!(f, "{}: {source}", path.display()),
108            Self::MissingFrontmatter { path } => {
109                write!(f, "{}: missing SKILL.md frontmatter", path.display())
110            }
111            Self::MissingField { path, field } => {
112                write!(f, "{}: missing `{field}` frontmatter field", path.display())
113            }
114            Self::DuplicateName {
115                name,
116                first,
117                second,
118            } => write!(
119                f,
120                "duplicate skill `{name}` in {} and {}",
121                first.display(),
122                second.display()
123            ),
124        }
125    }
126}
127
128impl std::error::Error for SkillDiscoveryError {}
129
130const SOURCES: &[&str] = &[
131    include_str!("corpus/harn-agent/SKILL.md"),
132    include_str!("corpus/harn-diagnostics/SKILL.md"),
133    include_str!("corpus/harn-language/SKILL.md"),
134    include_str!("corpus/harn-orchestration/SKILL.md"),
135    include_str!("corpus/harn-probe/SKILL.md"),
136    include_str!("corpus/harn-providers/SKILL.md"),
137    include_str!("corpus/harn-testing/SKILL.md"),
138    include_str!("corpus/harn-tracing/SKILL.md"),
139];
140
141static EMBEDDED_SKILLS: OnceLock<Box<[EmbeddedSkill]>> = OnceLock::new();
142
143/// Return every skill bundled into this build.
144pub fn list_embedded_skills() -> &'static [EmbeddedSkill] {
145    EMBEDDED_SKILLS
146        .get_or_init(|| SOURCES.iter().map(|source| parse_skill(source)).collect())
147        .as_ref()
148}
149
150/// Return one bundled skill by canonical skill name.
151pub fn get_embedded_skill(name: &str) -> Option<&'static EmbeddedSkill> {
152    list_embedded_skills()
153        .iter()
154        .find(|skill| skill.name == name)
155}
156
157/// Return the active canonical corpus. `HARN_SKILLS_DIR` wins only
158/// when it contains at least one recursively discovered `SKILL.md`;
159/// otherwise callers fall back to the embedded corpus.
160pub fn resolve_skill_corpus_from_env() -> Result<SkillCorpus, SkillDiscoveryError> {
161    let Ok(dir) = env::var(HARN_SKILLS_DIR_ENV) else {
162        return Ok(SkillCorpus::Embedded(list_embedded_skills()));
163    };
164    if dir.trim().is_empty() {
165        return Ok(SkillCorpus::Embedded(list_embedded_skills()));
166    }
167
168    let skills = list_disk_skills(dir)?;
169    if skills.is_empty() {
170        Ok(SkillCorpus::Embedded(list_embedded_skills()))
171    } else {
172        Ok(SkillCorpus::Disk(skills))
173    }
174}
175
176/// Recursively discover `SKILL.md` files under `root`.
177///
178/// A missing root is treated as an empty disk corpus so
179/// `HARN_SKILLS_DIR` can fall back cleanly to embedded skills.
180pub fn list_disk_skills(root: impl AsRef<Path>) -> Result<Vec<DiskSkill>, SkillDiscoveryError> {
181    let root = root.as_ref();
182    if !root.exists() {
183        return Ok(Vec::new());
184    }
185
186    let mut paths = Vec::new();
187    collect_skill_paths(root, &mut paths)?;
188    paths.sort();
189
190    let mut by_name: BTreeMap<String, DiskSkill> = BTreeMap::new();
191    for path in paths {
192        let skill = parse_disk_skill(&path)?;
193        if let Some(first) = by_name.get(&skill.name) {
194            return Err(SkillDiscoveryError::DuplicateName {
195                name: skill.name,
196                first: first.path.clone(),
197                second: path,
198            });
199        }
200        by_name.insert(skill.name.clone(), skill);
201    }
202
203    Ok(by_name.into_values().collect())
204}
205
206fn parse_skill(source: &'static str) -> EmbeddedSkill {
207    let (frontmatter, body) = split_frontmatter(source);
208    let frontmatter = parse_frontmatter(frontmatter);
209    EmbeddedSkill {
210        name: frontmatter.name,
211        frontmatter,
212        body,
213        source,
214    }
215}
216
217fn collect_skill_paths(dir: &Path, out: &mut Vec<PathBuf>) -> Result<(), SkillDiscoveryError> {
218    let entries = fs::read_dir(dir).map_err(|source| SkillDiscoveryError::Io {
219        path: dir.to_path_buf(),
220        source,
221    })?;
222    for entry in entries {
223        let entry = entry.map_err(|source| SkillDiscoveryError::Io {
224            path: dir.to_path_buf(),
225            source,
226        })?;
227        let path = entry.path();
228        let file_type = entry
229            .file_type()
230            .map_err(|source| SkillDiscoveryError::Io {
231                path: path.clone(),
232                source,
233            })?;
234        if file_type.is_dir() {
235            collect_skill_paths(&path, out)?;
236        } else if file_type.is_file() && entry.file_name() == "SKILL.md" {
237            out.push(path);
238        }
239    }
240    Ok(())
241}
242
243fn parse_disk_skill(path: &Path) -> Result<DiskSkill, SkillDiscoveryError> {
244    let source = fs::read_to_string(path).map_err(|source| SkillDiscoveryError::Io {
245        path: path.to_path_buf(),
246        source,
247    })?;
248    let (frontmatter, body) =
249        split_disk_frontmatter(&source).ok_or_else(|| SkillDiscoveryError::MissingFrontmatter {
250            path: path.to_path_buf(),
251        })?;
252    let frontmatter = parse_disk_frontmatter(path, frontmatter)?;
253    Ok(DiskSkill {
254        name: frontmatter.name.clone(),
255        frontmatter,
256        body: body.to_string(),
257        source,
258        path: path.to_path_buf(),
259    })
260}
261
262fn split_disk_frontmatter(source: &str) -> Option<(&str, &str)> {
263    split_frontmatter_parts(source)
264}
265
266fn parse_disk_frontmatter(
267    path: &Path,
268    frontmatter: &str,
269) -> Result<DiskSkillFrontmatter, SkillDiscoveryError> {
270    let mut name = None;
271    let mut short = None;
272    let mut description = None;
273    let mut when_to_use = None;
274
275    for line in frontmatter.lines() {
276        let Some((key, value)) = line.split_once(':') else {
277            continue;
278        };
279        let value = value.trim().to_string();
280        match key {
281            "name" => name = Some(value),
282            "short" => short = Some(value),
283            "description" => description = Some(value),
284            "when_to_use" => when_to_use = Some(value),
285            _ => {}
286        }
287    }
288
289    Ok(DiskSkillFrontmatter {
290        name: require_disk_field(path, name, "name")?,
291        short: short.unwrap_or_default(),
292        description: require_disk_field(path, description, "description")?,
293        when_to_use,
294    })
295}
296
297fn require_disk_field(
298    path: &Path,
299    value: Option<String>,
300    field: &'static str,
301) -> Result<String, SkillDiscoveryError> {
302    value.ok_or_else(|| SkillDiscoveryError::MissingField {
303        path: path.to_path_buf(),
304        field,
305    })
306}
307
308fn split_frontmatter(source: &'static str) -> (&'static str, &'static str) {
309    let Some((after_open, line_ending)) = split_opening_frontmatter(source) else {
310        panic!("embedded skill source is missing opening frontmatter delimiter");
311    };
312    let Some((frontmatter, body)) = split_closing_frontmatter(after_open, line_ending) else {
313        panic!("embedded skill source is missing closing frontmatter delimiter");
314    };
315    (frontmatter, body)
316}
317
318fn split_frontmatter_parts(source: &str) -> Option<(&str, &str)> {
319    let (after_open, line_ending) = split_opening_frontmatter(source)?;
320    split_closing_frontmatter(after_open, line_ending)
321}
322
323fn split_opening_frontmatter(source: &str) -> Option<(&str, &str)> {
324    if let Some(after_open) = source.strip_prefix("---\n") {
325        Some((after_open, "\n"))
326    } else if let Some(after_open) = source.strip_prefix("---\r\n") {
327        Some((after_open, "\r\n"))
328    } else {
329        None
330    }
331}
332
333fn split_closing_frontmatter<'a>(
334    after_open: &'a str,
335    line_ending: &str,
336) -> Option<(&'a str, &'a str)> {
337    let close = format!("{line_ending}---{line_ending}");
338    let close_offset = after_open.find(&close)?;
339    Some((
340        &after_open[..close_offset],
341        &after_open[close_offset + close.len()..],
342    ))
343}
344
345fn parse_frontmatter(frontmatter: &'static str) -> SkillFrontmatter {
346    let mut name = None;
347    let mut short = None;
348    let mut description = None;
349    let mut when_to_use = None;
350
351    for line in frontmatter.lines() {
352        let Some((key, value)) = line.split_once(':') else {
353            continue;
354        };
355        let value = value.trim();
356        match key {
357            "name" => name = Some(value),
358            "short" => short = Some(value),
359            "description" => description = Some(value),
360            "when_to_use" => when_to_use = Some(value),
361            _ => {}
362        }
363    }
364
365    SkillFrontmatter {
366        name: name.expect("embedded skill frontmatter is missing `name`"),
367        short: short.expect("embedded skill frontmatter is missing `short`"),
368        description: description.expect("embedded skill frontmatter is missing `description`"),
369        when_to_use,
370    }
371}
372
373#[cfg(test)]
374mod tests {
375    use super::*;
376    use std::collections::BTreeSet;
377    use tempfile::TempDir;
378
379    #[test]
380    fn lists_expected_initial_corpus() {
381        let skills = list_embedded_skills();
382        let names: Vec<&str> = skills.iter().map(|skill| skill.name).collect();
383        assert_eq!(
384            names,
385            [
386                "harn-agent",
387                "harn-diagnostics",
388                "harn-language",
389                "harn-orchestration",
390                "harn-probe",
391                "harn-providers",
392                "harn-testing",
393                "harn-tracing",
394            ]
395        );
396        assert_eq!(skills.len(), SOURCES.len());
397    }
398
399    #[test]
400    fn can_fetch_harn_language_skill() {
401        let skill = get_embedded_skill("harn-language").expect("harn-language skill is embedded");
402        assert_eq!(skill.frontmatter.name, "harn-language");
403        assert!(skill.body.contains("Harn language"));
404    }
405
406    #[test]
407    fn skills_have_unique_names_and_body_only_content() {
408        let mut names = BTreeSet::new();
409        for skill in list_embedded_skills() {
410            assert_eq!(skill.name, skill.frontmatter.name);
411            assert!(names.insert(skill.name), "duplicate skill {}", skill.name);
412            assert!(
413                !skill.body.trim().is_empty(),
414                "{} body is empty",
415                skill.name
416            );
417            assert!(
418                !skill.body.trim_start().starts_with("---"),
419                "{} body includes frontmatter",
420                skill.name
421            );
422        }
423    }
424
425    #[test]
426    fn skills_are_sorted_by_name() {
427        let names: Vec<&str> = list_embedded_skills()
428            .iter()
429            .map(|skill| skill.name)
430            .collect();
431        let mut sorted = names.clone();
432        sorted.sort_unstable();
433        assert_eq!(names, sorted);
434    }
435
436    #[test]
437    fn source_round_trips_to_frontmatter_and_body() {
438        for skill in list_embedded_skills() {
439            assert!(
440                split_frontmatter_parts(skill.source).is_some(),
441                "{} source missing opening fence",
442                skill.name
443            );
444            assert!(
445                skill.source.ends_with(skill.body),
446                "{} source must end with the body so dump output is byte-stable",
447                skill.name
448            );
449            assert!(
450                skill.source.contains(&format!("name: {}\n", skill.name)),
451                "{} source missing canonical name field",
452                skill.name
453            );
454        }
455    }
456
457    #[test]
458    fn frontmatter_split_accepts_crlf_sources() {
459        let source = "---\r\nname: crlf\r\n---\r\n# Body\r\n";
460        let (frontmatter, body) = split_frontmatter_parts(source).expect("CRLF frontmatter");
461        assert_eq!(frontmatter, "name: crlf");
462        assert_eq!(body, "# Body\r\n");
463    }
464
465    #[test]
466    fn frontmatter_split_rejects_missing_closing_fence() {
467        assert!(split_frontmatter_parts("---\nname: missing\n# Body\n").is_none());
468    }
469
470    #[test]
471    fn embedded_corpus_stays_within_binary_budget() {
472        let bytes: usize = SOURCES.iter().map(|source| source.len()).sum();
473        assert!(
474            bytes <= 200 * 1024,
475            "embedded corpus is {bytes} bytes, expected <= 200 KiB"
476        );
477    }
478
479    #[test]
480    fn skill_bodies_are_focused_and_not_placeholders() {
481        let expectations = [
482            ("harn-agent", ["agent_loop", "session id", "approval"]),
483            ("harn-diagnostics", ["diagnostic", "repair", "conformance"]),
484            ("harn-language", ["quickref", "type", "conformance"]),
485            ("harn-orchestration", ["agent_loop", "workflow", "host"]),
486            ("harn-probe", ["probe", "fact", "evidence"]),
487            ("harn-providers", ["llm_call", "provider", "schema"]),
488            (
489                "harn-testing",
490                ["conformance", "deterministic", "mock_time"],
491            ),
492            ("harn-tracing", ["replay", "receipts", "transcript"]),
493        ];
494
495        for (name, terms) in expectations {
496            let skill = get_embedded_skill(name).expect("expected embedded skill");
497            let body = skill.body.to_ascii_lowercase();
498            assert!(
499                !body.contains("embedded stub") && !body.contains("placeholder"),
500                "{name} should contain real guidance, not stub wording"
501            );
502            for term in terms {
503                assert!(
504                    body.contains(term),
505                    "{name} body should mention focused term `{term}`"
506                );
507            }
508        }
509    }
510
511    #[test]
512    fn skill_bodies_match_split_skill_contract() {
513        for skill in list_embedded_skills() {
514            let lines = skill.body.lines().count();
515            assert!(
516                lines >= 80,
517                "{} body is {lines} lines, expected at least 80",
518                skill.name
519            );
520            assert!(
521                lines <= 300,
522                "{} body is {lines} lines, expected at most 300",
523                skill.name
524            );
525        }
526    }
527
528    #[test]
529    fn skill_cross_links_resolve_to_embedded_skills() {
530        let names: BTreeSet<&str> = list_embedded_skills()
531            .iter()
532            .map(|skill| skill.name)
533            .collect();
534        for skill in list_embedded_skills() {
535            for reference in bracketed_skill_references(skill.body) {
536                assert!(
537                    names.contains(reference),
538                    "{} links to unknown embedded skill [[{}]]",
539                    skill.name,
540                    reference
541                );
542            }
543        }
544    }
545
546    #[test]
547    fn diagnostics_skill_mentions_all_code_categories() {
548        let skill = get_embedded_skill("harn-diagnostics").expect("diagnostics skill");
549        for category in [
550            "TYP", "PAR", "NAM", "CAP", "LLM", "ORC", "STD", "PRM", "MOD", "LNT", "FMT", "IMP",
551            "OWN", "RCV", "MAT",
552        ] {
553            assert!(
554                skill.body.contains(&format!("`{category}`")),
555                "harn-diagnostics should mention diagnostic category `{category}`"
556            );
557        }
558    }
559
560    #[test]
561    fn disk_discovery_finds_recursive_skill_files_sorted_by_name() {
562        let temp = TempDir::new().expect("temp dir");
563        write_skill(
564            &temp.path().join("zeta").join("SKILL.md"),
565            "zeta-skill",
566            "Zeta",
567        );
568        write_skill(
569            &temp.path().join("nested").join("alpha").join("SKILL.md"),
570            "alpha-skill",
571            "Alpha",
572        );
573
574        let skills = list_disk_skills(temp.path()).expect("discover disk skills");
575        let names: Vec<&str> = skills.iter().map(|skill| skill.name.as_str()).collect();
576        assert_eq!(names, ["alpha-skill", "zeta-skill"]);
577        assert_eq!(skills[0].frontmatter.description, "Alpha description");
578        assert!(skills[0].body.contains("Alpha body"));
579    }
580
581    #[test]
582    fn disk_discovery_treats_missing_root_as_empty() {
583        let temp = TempDir::new().expect("temp dir");
584        let skills = list_disk_skills(temp.path().join("missing")).expect("discover disk skills");
585        assert!(skills.is_empty());
586    }
587
588    #[test]
589    fn disk_discovery_rejects_duplicate_skill_names() {
590        let temp = TempDir::new().expect("temp dir");
591        write_skill(
592            &temp.path().join("one").join("SKILL.md"),
593            "same-skill",
594            "One",
595        );
596        write_skill(
597            &temp.path().join("two").join("SKILL.md"),
598            "same-skill",
599            "Two",
600        );
601
602        let error = list_disk_skills(temp.path()).expect_err("duplicate name should fail");
603        assert!(
604            error.to_string().contains("duplicate skill `same-skill`"),
605            "unexpected error: {error}"
606        );
607    }
608
609    fn write_skill(path: &Path, name: &str, label: &str) {
610        fs::create_dir_all(path.parent().expect("skill parent")).expect("create skill parent");
611        fs::write(
612            path,
613            format!(
614                "---\nname: {name}\nshort: {label} short\ndescription: {label} description\n---\n# {label}\n\n{label} body\n"
615            ),
616        )
617        .expect("write SKILL.md");
618    }
619
620    fn bracketed_skill_references(body: &str) -> Vec<&str> {
621        let mut references = Vec::new();
622        let mut rest = body;
623        while let Some(start) = rest.find("[[") {
624            rest = &rest[start + 2..];
625            let Some(end) = rest.find("]]") else {
626                break;
627            };
628            references.push(&rest[..end]);
629            rest = &rest[end + 2..];
630        }
631        references
632    }
633}