Skip to main content

harn_skills/
lib.rs

1//! Embedded Harn skill corpus.
2//!
3//! This crate exposes the bundled corpus as metadata plus `SKILL.md`
4//! bodies. CLI commands that enumerate, dump, or install these skills
5//! are layered above this foundation.
6
7use std::collections::BTreeMap;
8use std::env;
9use std::fmt;
10use std::fs;
11use std::io;
12use std::path::{Path, PathBuf};
13use std::sync::OnceLock;
14
15/// Environment override for canonical Harn skill discovery.
16pub const HARN_SKILLS_DIR_ENV: &str = "HARN_SKILLS_DIR";
17
18/// Frontmatter fields embedded with each bundled skill.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub struct SkillFrontmatter {
21    pub name: &'static str,
22    pub short: &'static str,
23    pub description: &'static str,
24    pub when_to_use: Option<&'static str>,
25}
26
27/// A single skill embedded into the Harn build.
28#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub struct EmbeddedSkill {
30    pub name: &'static str,
31    pub frontmatter: SkillFrontmatter,
32    pub body: &'static str,
33    /// The full original SKILL.md source — frontmatter delimiter,
34    /// frontmatter block, blank line, and body — exactly as embedded.
35    /// Use this when round-tripping a skill back to disk so the dumped
36    /// copy is byte-identical to the binary's canonical record.
37    pub source: &'static str,
38}
39
40/// Owned frontmatter fields loaded from a `SKILL.md` on disk.
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub struct DiskSkillFrontmatter {
43    pub name: String,
44    pub short: String,
45    pub description: String,
46    pub when_to_use: Option<String>,
47}
48
49/// A single skill discovered recursively from `HARN_SKILLS_DIR`.
50#[derive(Debug, Clone, PartialEq, Eq)]
51pub struct DiskSkill {
52    pub name: String,
53    pub frontmatter: DiskSkillFrontmatter,
54    pub body: String,
55    pub source: String,
56    pub path: PathBuf,
57}
58
59/// The active canonical corpus used by `harn skills list/get`.
60#[derive(Debug, Clone, PartialEq, Eq)]
61pub enum SkillCorpus {
62    Embedded(&'static [EmbeddedSkill]),
63    Disk(Vec<DiskSkill>),
64}
65
66impl SkillCorpus {
67    pub fn is_disk(&self) -> bool {
68        matches!(self, Self::Disk(_))
69    }
70
71    pub fn len(&self) -> usize {
72        match self {
73            Self::Embedded(skills) => skills.len(),
74            Self::Disk(skills) => skills.len(),
75        }
76    }
77
78    pub fn is_empty(&self) -> bool {
79        self.len() == 0
80    }
81}
82
83/// Error returned when disk skill discovery finds malformed files.
84#[derive(Debug)]
85pub enum SkillDiscoveryError {
86    Io {
87        path: PathBuf,
88        source: io::Error,
89    },
90    MissingFrontmatter {
91        path: PathBuf,
92    },
93    MissingField {
94        path: PathBuf,
95        field: &'static str,
96    },
97    DuplicateName {
98        name: String,
99        first: PathBuf,
100        second: PathBuf,
101    },
102}
103
104impl fmt::Display for SkillDiscoveryError {
105    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
106        match self {
107            Self::Io { path, source } => write!(f, "{}: {source}", path.display()),
108            Self::MissingFrontmatter { path } => {
109                write!(f, "{}: missing SKILL.md frontmatter", path.display())
110            }
111            Self::MissingField { path, field } => {
112                write!(f, "{}: missing `{field}` frontmatter field", path.display())
113            }
114            Self::DuplicateName {
115                name,
116                first,
117                second,
118            } => write!(
119                f,
120                "duplicate skill `{name}` in {} and {}",
121                first.display(),
122                second.display()
123            ),
124        }
125    }
126}
127
128impl std::error::Error for SkillDiscoveryError {}
129
130const SOURCES: &[&str] = &[
131    include_str!("corpus/harn-agent/SKILL.md"),
132    include_str!("corpus/harn-diagnostics/SKILL.md"),
133    include_str!("corpus/harn-language/SKILL.md"),
134    include_str!("corpus/harn-orchestration/SKILL.md"),
135    include_str!("corpus/harn-providers/SKILL.md"),
136    include_str!("corpus/harn-testing/SKILL.md"),
137    include_str!("corpus/harn-tracing/SKILL.md"),
138];
139
140static EMBEDDED_SKILLS: OnceLock<Box<[EmbeddedSkill]>> = OnceLock::new();
141
142/// Return every skill bundled into this build.
143pub fn list_embedded_skills() -> &'static [EmbeddedSkill] {
144    EMBEDDED_SKILLS
145        .get_or_init(|| SOURCES.iter().map(|source| parse_skill(source)).collect())
146        .as_ref()
147}
148
149/// Return one bundled skill by canonical skill name.
150pub fn get_embedded_skill(name: &str) -> Option<&'static EmbeddedSkill> {
151    list_embedded_skills()
152        .iter()
153        .find(|skill| skill.name == name)
154}
155
156/// Return the active canonical corpus. `HARN_SKILLS_DIR` wins only
157/// when it contains at least one recursively discovered `SKILL.md`;
158/// otherwise callers fall back to the embedded corpus.
159pub fn resolve_skill_corpus_from_env() -> Result<SkillCorpus, SkillDiscoveryError> {
160    let Ok(dir) = env::var(HARN_SKILLS_DIR_ENV) else {
161        return Ok(SkillCorpus::Embedded(list_embedded_skills()));
162    };
163    if dir.trim().is_empty() {
164        return Ok(SkillCorpus::Embedded(list_embedded_skills()));
165    }
166
167    let skills = list_disk_skills(dir)?;
168    if skills.is_empty() {
169        Ok(SkillCorpus::Embedded(list_embedded_skills()))
170    } else {
171        Ok(SkillCorpus::Disk(skills))
172    }
173}
174
175/// Recursively discover `SKILL.md` files under `root`.
176///
177/// A missing root is treated as an empty disk corpus so
178/// `HARN_SKILLS_DIR` can fall back cleanly to embedded skills.
179pub fn list_disk_skills(root: impl AsRef<Path>) -> Result<Vec<DiskSkill>, SkillDiscoveryError> {
180    let root = root.as_ref();
181    if !root.exists() {
182        return Ok(Vec::new());
183    }
184
185    let mut paths = Vec::new();
186    collect_skill_paths(root, &mut paths)?;
187    paths.sort();
188
189    let mut by_name: BTreeMap<String, DiskSkill> = BTreeMap::new();
190    for path in paths {
191        let skill = parse_disk_skill(&path)?;
192        if let Some(first) = by_name.get(&skill.name) {
193            return Err(SkillDiscoveryError::DuplicateName {
194                name: skill.name,
195                first: first.path.clone(),
196                second: path,
197            });
198        }
199        by_name.insert(skill.name.clone(), skill);
200    }
201
202    Ok(by_name.into_values().collect())
203}
204
205fn parse_skill(source: &'static str) -> EmbeddedSkill {
206    let (frontmatter, body) = split_frontmatter(source);
207    let frontmatter = parse_frontmatter(frontmatter);
208    EmbeddedSkill {
209        name: frontmatter.name,
210        frontmatter,
211        body,
212        source,
213    }
214}
215
216fn collect_skill_paths(dir: &Path, out: &mut Vec<PathBuf>) -> Result<(), SkillDiscoveryError> {
217    let entries = fs::read_dir(dir).map_err(|source| SkillDiscoveryError::Io {
218        path: dir.to_path_buf(),
219        source,
220    })?;
221    for entry in entries {
222        let entry = entry.map_err(|source| SkillDiscoveryError::Io {
223            path: dir.to_path_buf(),
224            source,
225        })?;
226        let path = entry.path();
227        let file_type = entry
228            .file_type()
229            .map_err(|source| SkillDiscoveryError::Io {
230                path: path.clone(),
231                source,
232            })?;
233        if file_type.is_dir() {
234            collect_skill_paths(&path, out)?;
235        } else if file_type.is_file() && entry.file_name() == "SKILL.md" {
236            out.push(path);
237        }
238    }
239    Ok(())
240}
241
242fn parse_disk_skill(path: &Path) -> Result<DiskSkill, SkillDiscoveryError> {
243    let source = fs::read_to_string(path).map_err(|source| SkillDiscoveryError::Io {
244        path: path.to_path_buf(),
245        source,
246    })?;
247    let (frontmatter, body) =
248        split_disk_frontmatter(&source).ok_or_else(|| SkillDiscoveryError::MissingFrontmatter {
249            path: path.to_path_buf(),
250        })?;
251    let frontmatter = parse_disk_frontmatter(path, frontmatter)?;
252    Ok(DiskSkill {
253        name: frontmatter.name.clone(),
254        frontmatter,
255        body: body.to_string(),
256        source,
257        path: path.to_path_buf(),
258    })
259}
260
261fn split_disk_frontmatter(source: &str) -> Option<(&str, &str)> {
262    let after_open = source.strip_prefix("---\n")?;
263    let close_offset = after_open.find("\n---\n")?;
264    Some((
265        &after_open[..close_offset],
266        &after_open[close_offset + "\n---\n".len()..],
267    ))
268}
269
270fn parse_disk_frontmatter(
271    path: &Path,
272    frontmatter: &str,
273) -> Result<DiskSkillFrontmatter, SkillDiscoveryError> {
274    let mut name = None;
275    let mut short = None;
276    let mut description = None;
277    let mut when_to_use = None;
278
279    for line in frontmatter.lines() {
280        let Some((key, value)) = line.split_once(':') else {
281            continue;
282        };
283        let value = value.trim().to_string();
284        match key {
285            "name" => name = Some(value),
286            "short" => short = Some(value),
287            "description" => description = Some(value),
288            "when_to_use" => when_to_use = Some(value),
289            _ => {}
290        }
291    }
292
293    Ok(DiskSkillFrontmatter {
294        name: require_disk_field(path, name, "name")?,
295        short: short.unwrap_or_default(),
296        description: require_disk_field(path, description, "description")?,
297        when_to_use,
298    })
299}
300
301fn require_disk_field(
302    path: &Path,
303    value: Option<String>,
304    field: &'static str,
305) -> Result<String, SkillDiscoveryError> {
306    value.ok_or_else(|| SkillDiscoveryError::MissingField {
307        path: path.to_path_buf(),
308        field,
309    })
310}
311
312fn split_frontmatter(source: &'static str) -> (&'static str, &'static str) {
313    let Some(after_open) = source.strip_prefix("---\n") else {
314        panic!("embedded skill source is missing opening frontmatter delimiter");
315    };
316    let Some(close_offset) = after_open.find("\n---\n") else {
317        panic!("embedded skill source is missing closing frontmatter delimiter");
318    };
319    (
320        &after_open[..close_offset],
321        &after_open[close_offset + "\n---\n".len()..],
322    )
323}
324
325fn parse_frontmatter(frontmatter: &'static str) -> SkillFrontmatter {
326    let mut name = None;
327    let mut short = None;
328    let mut description = None;
329    let mut when_to_use = None;
330
331    for line in frontmatter.lines() {
332        let Some((key, value)) = line.split_once(':') else {
333            continue;
334        };
335        let value = value.trim();
336        match key {
337            "name" => name = Some(value),
338            "short" => short = Some(value),
339            "description" => description = Some(value),
340            "when_to_use" => when_to_use = Some(value),
341            _ => {}
342        }
343    }
344
345    SkillFrontmatter {
346        name: name.expect("embedded skill frontmatter is missing `name`"),
347        short: short.expect("embedded skill frontmatter is missing `short`"),
348        description: description.expect("embedded skill frontmatter is missing `description`"),
349        when_to_use,
350    }
351}
352
353#[cfg(test)]
354mod tests {
355    use super::*;
356    use std::collections::BTreeSet;
357    use tempfile::TempDir;
358
359    #[test]
360    fn lists_expected_initial_corpus() {
361        let skills = list_embedded_skills();
362        let names: Vec<&str> = skills.iter().map(|skill| skill.name).collect();
363        assert_eq!(
364            names,
365            [
366                "harn-agent",
367                "harn-diagnostics",
368                "harn-language",
369                "harn-orchestration",
370                "harn-providers",
371                "harn-testing",
372                "harn-tracing",
373            ]
374        );
375        assert_eq!(skills.len(), SOURCES.len());
376    }
377
378    #[test]
379    fn can_fetch_harn_language_skill() {
380        let skill = get_embedded_skill("harn-language").expect("harn-language skill is embedded");
381        assert_eq!(skill.frontmatter.name, "harn-language");
382        assert!(skill.body.contains("Harn language"));
383    }
384
385    #[test]
386    fn skills_have_unique_names_and_body_only_content() {
387        let mut names = BTreeSet::new();
388        for skill in list_embedded_skills() {
389            assert_eq!(skill.name, skill.frontmatter.name);
390            assert!(names.insert(skill.name), "duplicate skill {}", skill.name);
391            assert!(
392                !skill.body.trim().is_empty(),
393                "{} body is empty",
394                skill.name
395            );
396            assert!(
397                !skill.body.trim_start().starts_with("---"),
398                "{} body includes frontmatter",
399                skill.name
400            );
401        }
402    }
403
404    #[test]
405    fn skills_are_sorted_by_name() {
406        let names: Vec<&str> = list_embedded_skills()
407            .iter()
408            .map(|skill| skill.name)
409            .collect();
410        let mut sorted = names.clone();
411        sorted.sort_unstable();
412        assert_eq!(names, sorted);
413    }
414
415    #[test]
416    fn source_round_trips_to_frontmatter_and_body() {
417        for skill in list_embedded_skills() {
418            assert!(
419                skill.source.starts_with("---\n"),
420                "{} source missing opening fence",
421                skill.name
422            );
423            assert!(
424                skill.source.ends_with(skill.body),
425                "{} source must end with the body so dump output is byte-stable",
426                skill.name
427            );
428            assert!(
429                skill.source.contains(&format!("name: {}\n", skill.name)),
430                "{} source missing canonical name field",
431                skill.name
432            );
433        }
434    }
435
436    #[test]
437    fn embedded_corpus_stays_within_binary_budget() {
438        let bytes: usize = SOURCES.iter().map(|source| source.len()).sum();
439        assert!(
440            bytes <= 200 * 1024,
441            "embedded corpus is {bytes} bytes, expected <= 200 KiB"
442        );
443    }
444
445    #[test]
446    fn skill_bodies_are_focused_and_not_placeholders() {
447        let expectations = [
448            ("harn-agent", ["agent_loop", "session id", "approval"]),
449            ("harn-diagnostics", ["diagnostic", "repair", "conformance"]),
450            ("harn-language", ["quickref", "type", "conformance"]),
451            ("harn-orchestration", ["agent_loop", "workflow", "host"]),
452            ("harn-providers", ["llm_call", "provider", "schema"]),
453            (
454                "harn-testing",
455                ["conformance", "deterministic", "mock_time"],
456            ),
457            ("harn-tracing", ["replay", "receipts", "transcript"]),
458        ];
459
460        for (name, terms) in expectations {
461            let skill = get_embedded_skill(name).expect("expected embedded skill");
462            let body = skill.body.to_ascii_lowercase();
463            assert!(
464                !body.contains("embedded stub") && !body.contains("placeholder"),
465                "{name} should contain real guidance, not stub wording"
466            );
467            for term in terms {
468                assert!(
469                    body.contains(term),
470                    "{name} body should mention focused term `{term}`"
471                );
472            }
473        }
474    }
475
476    #[test]
477    fn skill_bodies_match_split_skill_contract() {
478        for skill in list_embedded_skills() {
479            let lines = skill.body.lines().count();
480            assert!(
481                lines >= 80,
482                "{} body is {lines} lines, expected at least 80",
483                skill.name
484            );
485            assert!(
486                lines <= 300,
487                "{} body is {lines} lines, expected at most 300",
488                skill.name
489            );
490        }
491    }
492
493    #[test]
494    fn skill_cross_links_resolve_to_embedded_skills() {
495        let names: BTreeSet<&str> = list_embedded_skills()
496            .iter()
497            .map(|skill| skill.name)
498            .collect();
499        for skill in list_embedded_skills() {
500            for reference in bracketed_skill_references(skill.body) {
501                assert!(
502                    names.contains(reference),
503                    "{} links to unknown embedded skill [[{}]]",
504                    skill.name,
505                    reference
506                );
507            }
508        }
509    }
510
511    #[test]
512    fn diagnostics_skill_mentions_all_code_categories() {
513        let skill = get_embedded_skill("harn-diagnostics").expect("diagnostics skill");
514        for category in [
515            "TYP", "PAR", "NAM", "CAP", "LLM", "ORC", "STD", "PRM", "MOD", "LNT", "FMT", "IMP",
516            "OWN", "RCV", "MAT",
517        ] {
518            assert!(
519                skill.body.contains(&format!("`{category}`")),
520                "harn-diagnostics should mention diagnostic category `{category}`"
521            );
522        }
523    }
524
525    #[test]
526    fn disk_discovery_finds_recursive_skill_files_sorted_by_name() {
527        let temp = TempDir::new().expect("temp dir");
528        write_skill(
529            &temp.path().join("zeta").join("SKILL.md"),
530            "zeta-skill",
531            "Zeta",
532        );
533        write_skill(
534            &temp.path().join("nested").join("alpha").join("SKILL.md"),
535            "alpha-skill",
536            "Alpha",
537        );
538
539        let skills = list_disk_skills(temp.path()).expect("discover disk skills");
540        let names: Vec<&str> = skills.iter().map(|skill| skill.name.as_str()).collect();
541        assert_eq!(names, ["alpha-skill", "zeta-skill"]);
542        assert_eq!(skills[0].frontmatter.description, "Alpha description");
543        assert!(skills[0].body.contains("Alpha body"));
544    }
545
546    #[test]
547    fn disk_discovery_treats_missing_root_as_empty() {
548        let temp = TempDir::new().expect("temp dir");
549        let skills = list_disk_skills(temp.path().join("missing")).expect("discover disk skills");
550        assert!(skills.is_empty());
551    }
552
553    #[test]
554    fn disk_discovery_rejects_duplicate_skill_names() {
555        let temp = TempDir::new().expect("temp dir");
556        write_skill(
557            &temp.path().join("one").join("SKILL.md"),
558            "same-skill",
559            "One",
560        );
561        write_skill(
562            &temp.path().join("two").join("SKILL.md"),
563            "same-skill",
564            "Two",
565        );
566
567        let error = list_disk_skills(temp.path()).expect_err("duplicate name should fail");
568        assert!(
569            error.to_string().contains("duplicate skill `same-skill`"),
570            "unexpected error: {error}"
571        );
572    }
573
574    fn write_skill(path: &Path, name: &str, label: &str) {
575        fs::create_dir_all(path.parent().expect("skill parent")).expect("create skill parent");
576        fs::write(
577            path,
578            format!(
579                "---\nname: {name}\nshort: {label} short\ndescription: {label} description\n---\n# {label}\n\n{label} body\n"
580            ),
581        )
582        .expect("write SKILL.md");
583    }
584
585    fn bracketed_skill_references(body: &str) -> Vec<&str> {
586        let mut references = Vec::new();
587        let mut rest = body;
588        while let Some(start) = rest.find("[[") {
589            rest = &rest[start + 2..];
590            let Some(end) = rest.find("]]") else {
591                break;
592            };
593            references.push(&rest[..end]);
594            rest = &rest[end + 2..];
595        }
596        references
597    }
598}