Skip to main content

harn_skills/
lib.rs

1//! Embedded Harn skill corpus.
2//!
3//! This crate exposes the bundled corpus as metadata plus `SKILL.md`
4//! bodies. CLI commands that enumerate, dump, or install these skills
5//! are layered above this foundation.
6
7use std::collections::BTreeMap;
8use std::env;
9use std::fmt;
10use std::fs;
11use std::io;
12use std::path::{Path, PathBuf};
13use std::sync::OnceLock;
14
15/// Environment override for canonical Harn skill discovery.
16pub const HARN_SKILLS_DIR_ENV: &str = "HARN_SKILLS_DIR";
17
18/// Frontmatter fields embedded with each bundled skill.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub struct SkillFrontmatter {
21    pub name: &'static str,
22    pub short: &'static str,
23    pub description: &'static str,
24    pub when_to_use: Option<&'static str>,
25}
26
27/// A single skill embedded into the Harn build.
28#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub struct EmbeddedSkill {
30    pub name: &'static str,
31    pub frontmatter: SkillFrontmatter,
32    pub body: &'static str,
33    /// The full original SKILL.md source — frontmatter delimiter,
34    /// frontmatter block, blank line, and body — exactly as embedded.
35    /// Use this when round-tripping a skill back to disk so the dumped
36    /// copy is byte-identical to the binary's canonical record.
37    pub source: &'static str,
38}
39
40/// Owned frontmatter fields loaded from a `SKILL.md` on disk.
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub struct DiskSkillFrontmatter {
43    pub name: String,
44    pub short: String,
45    pub description: String,
46    pub when_to_use: Option<String>,
47}
48
49/// A single skill discovered recursively from `HARN_SKILLS_DIR`.
50#[derive(Debug, Clone, PartialEq, Eq)]
51pub struct DiskSkill {
52    pub name: String,
53    pub frontmatter: DiskSkillFrontmatter,
54    pub body: String,
55    pub source: String,
56    pub path: PathBuf,
57}
58
59/// The active canonical corpus used by `harn skills list/get`.
60#[derive(Debug, Clone, PartialEq, Eq)]
61pub enum SkillCorpus {
62    Embedded(&'static [EmbeddedSkill]),
63    Disk(Vec<DiskSkill>),
64}
65
66impl SkillCorpus {
67    pub fn is_disk(&self) -> bool {
68        matches!(self, Self::Disk(_))
69    }
70
71    pub fn len(&self) -> usize {
72        match self {
73            Self::Embedded(skills) => skills.len(),
74            Self::Disk(skills) => skills.len(),
75        }
76    }
77
78    pub fn is_empty(&self) -> bool {
79        self.len() == 0
80    }
81}
82
83/// Error returned when disk skill discovery finds malformed files.
84#[derive(Debug)]
85pub enum SkillDiscoveryError {
86    Io {
87        path: PathBuf,
88        source: io::Error,
89    },
90    MissingFrontmatter {
91        path: PathBuf,
92    },
93    MissingField {
94        path: PathBuf,
95        field: &'static str,
96    },
97    DuplicateName {
98        name: String,
99        first: PathBuf,
100        second: PathBuf,
101    },
102}
103
104impl fmt::Display for SkillDiscoveryError {
105    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
106        match self {
107            Self::Io { path, source } => write!(f, "{}: {source}", path.display()),
108            Self::MissingFrontmatter { path } => {
109                write!(f, "{}: missing SKILL.md frontmatter", path.display())
110            }
111            Self::MissingField { path, field } => {
112                write!(f, "{}: missing `{field}` frontmatter field", path.display())
113            }
114            Self::DuplicateName {
115                name,
116                first,
117                second,
118            } => write!(
119                f,
120                "duplicate skill `{name}` in {} and {}",
121                first.display(),
122                second.display()
123            ),
124        }
125    }
126}
127
128impl std::error::Error for SkillDiscoveryError {}
129
130const SOURCES: &[&str] = &[
131    include_str!("corpus/harn-agent/SKILL.md"),
132    include_str!("corpus/harn-diagnostics/SKILL.md"),
133    include_str!("corpus/harn-language/SKILL.md"),
134    include_str!("corpus/harn-orchestration/SKILL.md"),
135    include_str!("corpus/harn-probe/SKILL.md"),
136    include_str!("corpus/harn-providers/SKILL.md"),
137    include_str!("corpus/harn-testing/SKILL.md"),
138    include_str!("corpus/harn-tracing/SKILL.md"),
139    include_str!("corpus/release-harn/SKILL.md"),
140];
141
142static EMBEDDED_SKILLS: OnceLock<Box<[EmbeddedSkill]>> = OnceLock::new();
143
144/// Return every skill bundled into this build.
145pub fn list_embedded_skills() -> &'static [EmbeddedSkill] {
146    EMBEDDED_SKILLS
147        .get_or_init(|| SOURCES.iter().map(|source| parse_skill(source)).collect())
148        .as_ref()
149}
150
151/// Return one bundled skill by canonical skill name.
152pub fn get_embedded_skill(name: &str) -> Option<&'static EmbeddedSkill> {
153    list_embedded_skills()
154        .iter()
155        .find(|skill| skill.name == name)
156}
157
158/// Return the active canonical corpus. `HARN_SKILLS_DIR` wins only
159/// when it contains at least one recursively discovered `SKILL.md`;
160/// otherwise callers fall back to the embedded corpus.
161pub fn resolve_skill_corpus_from_env() -> Result<SkillCorpus, SkillDiscoveryError> {
162    let Ok(dir) = env::var(HARN_SKILLS_DIR_ENV) else {
163        return Ok(SkillCorpus::Embedded(list_embedded_skills()));
164    };
165    if dir.trim().is_empty() {
166        return Ok(SkillCorpus::Embedded(list_embedded_skills()));
167    }
168
169    let skills = list_disk_skills(dir)?;
170    if skills.is_empty() {
171        Ok(SkillCorpus::Embedded(list_embedded_skills()))
172    } else {
173        Ok(SkillCorpus::Disk(skills))
174    }
175}
176
177/// Recursively discover `SKILL.md` files under `root`.
178///
179/// A missing root is treated as an empty disk corpus so
180/// `HARN_SKILLS_DIR` can fall back cleanly to embedded skills.
181pub fn list_disk_skills(root: impl AsRef<Path>) -> Result<Vec<DiskSkill>, SkillDiscoveryError> {
182    let root = root.as_ref();
183    if !root.exists() {
184        return Ok(Vec::new());
185    }
186
187    let mut paths = Vec::new();
188    collect_skill_paths(root, &mut paths)?;
189    paths.sort();
190
191    let mut by_name: BTreeMap<String, DiskSkill> = BTreeMap::new();
192    for path in paths {
193        let skill = parse_disk_skill(&path)?;
194        if let Some(first) = by_name.get(&skill.name) {
195            return Err(SkillDiscoveryError::DuplicateName {
196                name: skill.name,
197                first: first.path.clone(),
198                second: path,
199            });
200        }
201        by_name.insert(skill.name.clone(), skill);
202    }
203
204    Ok(by_name.into_values().collect())
205}
206
207fn parse_skill(source: &'static str) -> EmbeddedSkill {
208    let (frontmatter, body) = split_frontmatter(source);
209    let frontmatter = parse_frontmatter(frontmatter);
210    EmbeddedSkill {
211        name: frontmatter.name,
212        frontmatter,
213        body,
214        source,
215    }
216}
217
218fn collect_skill_paths(dir: &Path, out: &mut Vec<PathBuf>) -> Result<(), SkillDiscoveryError> {
219    let entries = fs::read_dir(dir).map_err(|source| SkillDiscoveryError::Io {
220        path: dir.to_path_buf(),
221        source,
222    })?;
223    for entry in entries {
224        let entry = entry.map_err(|source| SkillDiscoveryError::Io {
225            path: dir.to_path_buf(),
226            source,
227        })?;
228        let path = entry.path();
229        let file_type = entry
230            .file_type()
231            .map_err(|source| SkillDiscoveryError::Io {
232                path: path.clone(),
233                source,
234            })?;
235        if file_type.is_dir() {
236            collect_skill_paths(&path, out)?;
237        } else if file_type.is_file() && entry.file_name() == "SKILL.md" {
238            out.push(path);
239        }
240    }
241    Ok(())
242}
243
244fn parse_disk_skill(path: &Path) -> Result<DiskSkill, SkillDiscoveryError> {
245    let source = fs::read_to_string(path).map_err(|source| SkillDiscoveryError::Io {
246        path: path.to_path_buf(),
247        source,
248    })?;
249    let (frontmatter, body) =
250        split_disk_frontmatter(&source).ok_or_else(|| SkillDiscoveryError::MissingFrontmatter {
251            path: path.to_path_buf(),
252        })?;
253    let frontmatter = parse_disk_frontmatter(path, frontmatter)?;
254    Ok(DiskSkill {
255        name: frontmatter.name.clone(),
256        frontmatter,
257        body: body.to_string(),
258        source,
259        path: path.to_path_buf(),
260    })
261}
262
263fn split_disk_frontmatter(source: &str) -> Option<(&str, &str)> {
264    split_frontmatter_parts(source)
265}
266
267fn parse_disk_frontmatter(
268    path: &Path,
269    frontmatter: &str,
270) -> Result<DiskSkillFrontmatter, SkillDiscoveryError> {
271    let mut name = None;
272    let mut short = None;
273    let mut description = None;
274    let mut when_to_use = None;
275
276    for line in frontmatter.lines() {
277        let Some((key, value)) = line.split_once(':') else {
278            continue;
279        };
280        let value = value.trim().to_string();
281        match key {
282            "name" => name = Some(value),
283            "short" => short = Some(value),
284            "description" => description = Some(value),
285            "when_to_use" => when_to_use = Some(value),
286            _ => {}
287        }
288    }
289
290    Ok(DiskSkillFrontmatter {
291        name: require_disk_field(path, name, "name")?,
292        short: short.unwrap_or_default(),
293        description: require_disk_field(path, description, "description")?,
294        when_to_use,
295    })
296}
297
298fn require_disk_field(
299    path: &Path,
300    value: Option<String>,
301    field: &'static str,
302) -> Result<String, SkillDiscoveryError> {
303    value.ok_or_else(|| SkillDiscoveryError::MissingField {
304        path: path.to_path_buf(),
305        field,
306    })
307}
308
309fn split_frontmatter(source: &'static str) -> (&'static str, &'static str) {
310    let Some((after_open, line_ending)) = split_opening_frontmatter(source) else {
311        panic!("embedded skill source is missing opening frontmatter delimiter");
312    };
313    let Some((frontmatter, body)) = split_closing_frontmatter(after_open, line_ending) else {
314        panic!("embedded skill source is missing closing frontmatter delimiter");
315    };
316    (frontmatter, body)
317}
318
319fn split_frontmatter_parts(source: &str) -> Option<(&str, &str)> {
320    let (after_open, line_ending) = split_opening_frontmatter(source)?;
321    split_closing_frontmatter(after_open, line_ending)
322}
323
324fn split_opening_frontmatter(source: &str) -> Option<(&str, &str)> {
325    if let Some(after_open) = source.strip_prefix("---\n") {
326        Some((after_open, "\n"))
327    } else if let Some(after_open) = source.strip_prefix("---\r\n") {
328        Some((after_open, "\r\n"))
329    } else {
330        None
331    }
332}
333
334fn split_closing_frontmatter<'a>(
335    after_open: &'a str,
336    line_ending: &str,
337) -> Option<(&'a str, &'a str)> {
338    let close = format!("{line_ending}---{line_ending}");
339    let close_offset = after_open.find(&close)?;
340    Some((
341        &after_open[..close_offset],
342        &after_open[close_offset + close.len()..],
343    ))
344}
345
346fn parse_frontmatter(frontmatter: &'static str) -> SkillFrontmatter {
347    let mut name = None;
348    let mut short = None;
349    let mut description = None;
350    let mut when_to_use = None;
351
352    for line in frontmatter.lines() {
353        let Some((key, value)) = line.split_once(':') else {
354            continue;
355        };
356        let value = value.trim();
357        match key {
358            "name" => name = Some(value),
359            "short" => short = Some(value),
360            "description" => description = Some(value),
361            "when_to_use" => when_to_use = Some(value),
362            _ => {}
363        }
364    }
365
366    SkillFrontmatter {
367        name: name.expect("embedded skill frontmatter is missing `name`"),
368        short: short.expect("embedded skill frontmatter is missing `short`"),
369        description: description.expect("embedded skill frontmatter is missing `description`"),
370        when_to_use,
371    }
372}
373
374#[cfg(test)]
375mod tests {
376    use super::*;
377    use std::collections::BTreeSet;
378    use tempfile::TempDir;
379
380    #[test]
381    fn lists_expected_initial_corpus() {
382        let skills = list_embedded_skills();
383        let names: Vec<&str> = skills.iter().map(|skill| skill.name).collect();
384        assert_eq!(
385            names,
386            [
387                "harn-agent",
388                "harn-diagnostics",
389                "harn-language",
390                "harn-orchestration",
391                "harn-probe",
392                "harn-providers",
393                "harn-testing",
394                "harn-tracing",
395                "release-harn",
396            ]
397        );
398        assert_eq!(skills.len(), SOURCES.len());
399    }
400
401    #[test]
402    fn can_fetch_harn_language_skill() {
403        let skill = get_embedded_skill("harn-language").expect("harn-language skill is embedded");
404        assert_eq!(skill.frontmatter.name, "harn-language");
405        assert!(skill.body.contains("Harn language"));
406    }
407
408    #[test]
409    fn skills_have_unique_names_and_body_only_content() {
410        let mut names = BTreeSet::new();
411        for skill in list_embedded_skills() {
412            assert_eq!(skill.name, skill.frontmatter.name);
413            assert!(names.insert(skill.name), "duplicate skill {}", skill.name);
414            assert!(
415                !skill.body.trim().is_empty(),
416                "{} body is empty",
417                skill.name
418            );
419            assert!(
420                !skill.body.trim_start().starts_with("---"),
421                "{} body includes frontmatter",
422                skill.name
423            );
424        }
425    }
426
427    #[test]
428    fn skills_are_sorted_by_name() {
429        let names: Vec<&str> = list_embedded_skills()
430            .iter()
431            .map(|skill| skill.name)
432            .collect();
433        let mut sorted = names.clone();
434        sorted.sort_unstable();
435        assert_eq!(names, sorted);
436    }
437
438    #[test]
439    fn source_round_trips_to_frontmatter_and_body() {
440        for skill in list_embedded_skills() {
441            assert!(
442                split_frontmatter_parts(skill.source).is_some(),
443                "{} source missing opening fence",
444                skill.name
445            );
446            assert!(
447                skill.source.ends_with(skill.body),
448                "{} source must end with the body so dump output is byte-stable",
449                skill.name
450            );
451            assert!(
452                skill.source.contains(&format!("name: {}\n", skill.name)),
453                "{} source missing canonical name field",
454                skill.name
455            );
456        }
457    }
458
459    #[test]
460    fn frontmatter_split_accepts_crlf_sources() {
461        let source = "---\r\nname: crlf\r\n---\r\n# Body\r\n";
462        let (frontmatter, body) = split_frontmatter_parts(source).expect("CRLF frontmatter");
463        assert_eq!(frontmatter, "name: crlf");
464        assert_eq!(body, "# Body\r\n");
465    }
466
467    #[test]
468    fn frontmatter_split_rejects_missing_closing_fence() {
469        assert!(split_frontmatter_parts("---\nname: missing\n# Body\n").is_none());
470    }
471
472    #[test]
473    fn embedded_corpus_stays_within_binary_budget() {
474        let bytes: usize = SOURCES.iter().map(|source| source.len()).sum();
475        assert!(
476            bytes <= 200 * 1024,
477            "embedded corpus is {bytes} bytes, expected <= 200 KiB"
478        );
479    }
480
481    #[test]
482    fn skill_bodies_are_focused_and_not_placeholders() {
483        let expectations = [
484            ("harn-agent", ["agent_loop", "session id", "approval"]),
485            ("harn-diagnostics", ["diagnostic", "repair", "conformance"]),
486            ("harn-language", ["quickref", "type", "conformance"]),
487            ("harn-orchestration", ["agent_loop", "workflow", "host"]),
488            ("harn-probe", ["probe", "fact", "evidence"]),
489            ("harn-providers", ["llm_call", "provider", "schema"]),
490            (
491                "harn-testing",
492                ["conformance", "deterministic", "mock_time"],
493            ),
494            ("harn-tracing", ["replay", "receipts", "transcript"]),
495            ("release-harn", ["release_ship", "merge queue", "tag"]),
496        ];
497
498        for (name, terms) in expectations {
499            let skill = get_embedded_skill(name).expect("expected embedded skill");
500            let body = skill.body.to_ascii_lowercase();
501            assert!(
502                !body.contains("embedded stub") && !body.contains("placeholder"),
503                "{name} should contain real guidance, not stub wording"
504            );
505            for term in terms {
506                assert!(
507                    body.contains(term),
508                    "{name} body should mention focused term `{term}`"
509                );
510            }
511        }
512    }
513
514    #[test]
515    fn skill_bodies_match_split_skill_contract() {
516        for skill in list_embedded_skills() {
517            let lines = skill.body.lines().count();
518            assert!(
519                lines >= 80,
520                "{} body is {lines} lines, expected at least 80",
521                skill.name
522            );
523            assert!(
524                lines <= 300,
525                "{} body is {lines} lines, expected at most 300",
526                skill.name
527            );
528        }
529    }
530
531    #[test]
532    fn skill_cross_links_resolve_to_embedded_skills() {
533        let names: BTreeSet<&str> = list_embedded_skills()
534            .iter()
535            .map(|skill| skill.name)
536            .collect();
537        for skill in list_embedded_skills() {
538            for reference in bracketed_skill_references(skill.body) {
539                assert!(
540                    names.contains(reference),
541                    "{} links to unknown embedded skill [[{}]]",
542                    skill.name,
543                    reference
544                );
545            }
546        }
547    }
548
549    #[test]
550    fn diagnostics_skill_mentions_all_code_categories() {
551        let skill = get_embedded_skill("harn-diagnostics").expect("diagnostics skill");
552        for category in [
553            "TYP", "PAR", "NAM", "CAP", "LLM", "ORC", "STD", "PRM", "MOD", "LNT", "FMT", "IMP",
554            "OWN", "RCV", "MAT",
555        ] {
556            assert!(
557                skill.body.contains(&format!("`{category}`")),
558                "harn-diagnostics should mention diagnostic category `{category}`"
559            );
560        }
561    }
562
563    #[test]
564    fn disk_discovery_finds_recursive_skill_files_sorted_by_name() {
565        let temp = TempDir::new().expect("temp dir");
566        write_skill(
567            &temp.path().join("zeta").join("SKILL.md"),
568            "zeta-skill",
569            "Zeta",
570        );
571        write_skill(
572            &temp.path().join("nested").join("alpha").join("SKILL.md"),
573            "alpha-skill",
574            "Alpha",
575        );
576
577        let skills = list_disk_skills(temp.path()).expect("discover disk skills");
578        let names: Vec<&str> = skills.iter().map(|skill| skill.name.as_str()).collect();
579        assert_eq!(names, ["alpha-skill", "zeta-skill"]);
580        assert_eq!(skills[0].frontmatter.description, "Alpha description");
581        assert!(skills[0].body.contains("Alpha body"));
582    }
583
584    #[test]
585    fn disk_discovery_treats_missing_root_as_empty() {
586        let temp = TempDir::new().expect("temp dir");
587        let skills = list_disk_skills(temp.path().join("missing")).expect("discover disk skills");
588        assert!(skills.is_empty());
589    }
590
591    #[test]
592    fn disk_discovery_rejects_duplicate_skill_names() {
593        let temp = TempDir::new().expect("temp dir");
594        write_skill(
595            &temp.path().join("one").join("SKILL.md"),
596            "same-skill",
597            "One",
598        );
599        write_skill(
600            &temp.path().join("two").join("SKILL.md"),
601            "same-skill",
602            "Two",
603        );
604
605        let error = list_disk_skills(temp.path()).expect_err("duplicate name should fail");
606        assert!(
607            error.to_string().contains("duplicate skill `same-skill`"),
608            "unexpected error: {error}"
609        );
610    }
611
612    fn write_skill(path: &Path, name: &str, label: &str) {
613        fs::create_dir_all(path.parent().expect("skill parent")).expect("create skill parent");
614        fs::write(
615            path,
616            format!(
617                "---\nname: {name}\nshort: {label} short\ndescription: {label} description\n---\n# {label}\n\n{label} body\n"
618            ),
619        )
620        .expect("write SKILL.md");
621    }
622
623    fn bracketed_skill_references(body: &str) -> Vec<&str> {
624        let mut references = Vec::new();
625        let mut rest = body;
626        while let Some(start) = rest.find("[[") {
627            rest = &rest[start + 2..];
628            let Some(end) = rest.find("]]") else {
629                break;
630            };
631            references.push(&rest[..end]);
632            rest = &rest[end + 2..];
633        }
634        references
635    }
636}