1use std::collections::BTreeMap;
8use std::env;
9use std::fmt;
10use std::fs;
11use std::io;
12use std::path::{Path, PathBuf};
13use std::sync::OnceLock;
14
15pub const HARN_SKILLS_DIR_ENV: &str = "HARN_SKILLS_DIR";
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub struct SkillFrontmatter {
21 pub name: &'static str,
22 pub short: &'static str,
23 pub description: &'static str,
24 pub when_to_use: Option<&'static str>,
25}
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub struct EmbeddedSkill {
30 pub name: &'static str,
31 pub frontmatter: SkillFrontmatter,
32 pub body: &'static str,
33 pub source: &'static str,
38}
39
40#[derive(Debug, Clone, PartialEq, Eq)]
42pub struct DiskSkillFrontmatter {
43 pub name: String,
44 pub short: String,
45 pub description: String,
46 pub when_to_use: Option<String>,
47}
48
49#[derive(Debug, Clone, PartialEq, Eq)]
51pub struct DiskSkill {
52 pub name: String,
53 pub frontmatter: DiskSkillFrontmatter,
54 pub body: String,
55 pub source: String,
56 pub path: PathBuf,
57}
58
59#[derive(Debug, Clone, PartialEq, Eq)]
61pub enum SkillCorpus {
62 Embedded(&'static [EmbeddedSkill]),
63 Disk(Vec<DiskSkill>),
64}
65
66impl SkillCorpus {
67 pub fn is_disk(&self) -> bool {
68 matches!(self, Self::Disk(_))
69 }
70
71 pub fn len(&self) -> usize {
72 match self {
73 Self::Embedded(skills) => skills.len(),
74 Self::Disk(skills) => skills.len(),
75 }
76 }
77
78 pub fn is_empty(&self) -> bool {
79 self.len() == 0
80 }
81}
82
83#[derive(Debug)]
85pub enum SkillDiscoveryError {
86 Io {
87 path: PathBuf,
88 source: io::Error,
89 },
90 MissingFrontmatter {
91 path: PathBuf,
92 },
93 MissingField {
94 path: PathBuf,
95 field: &'static str,
96 },
97 DuplicateName {
98 name: String,
99 first: PathBuf,
100 second: PathBuf,
101 },
102}
103
104impl fmt::Display for SkillDiscoveryError {
105 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
106 match self {
107 Self::Io { path, source } => write!(f, "{}: {source}", path.display()),
108 Self::MissingFrontmatter { path } => {
109 write!(f, "{}: missing SKILL.md frontmatter", path.display())
110 }
111 Self::MissingField { path, field } => {
112 write!(f, "{}: missing `{field}` frontmatter field", path.display())
113 }
114 Self::DuplicateName {
115 name,
116 first,
117 second,
118 } => write!(
119 f,
120 "duplicate skill `{name}` in {} and {}",
121 first.display(),
122 second.display()
123 ),
124 }
125 }
126}
127
128impl std::error::Error for SkillDiscoveryError {}
129
130const SOURCES: &[&str] = &[
131 include_str!("corpus/harn-agent/SKILL.md"),
132 include_str!("corpus/harn-diagnostics/SKILL.md"),
133 include_str!("corpus/harn-language/SKILL.md"),
134 include_str!("corpus/harn-orchestration/SKILL.md"),
135 include_str!("corpus/harn-providers/SKILL.md"),
136 include_str!("corpus/harn-testing/SKILL.md"),
137 include_str!("corpus/harn-tracing/SKILL.md"),
138];
139
140static EMBEDDED_SKILLS: OnceLock<Box<[EmbeddedSkill]>> = OnceLock::new();
141
142pub fn list_embedded_skills() -> &'static [EmbeddedSkill] {
144 EMBEDDED_SKILLS
145 .get_or_init(|| SOURCES.iter().map(|source| parse_skill(source)).collect())
146 .as_ref()
147}
148
149pub fn get_embedded_skill(name: &str) -> Option<&'static EmbeddedSkill> {
151 list_embedded_skills()
152 .iter()
153 .find(|skill| skill.name == name)
154}
155
156pub fn resolve_skill_corpus_from_env() -> Result<SkillCorpus, SkillDiscoveryError> {
160 let Ok(dir) = env::var(HARN_SKILLS_DIR_ENV) else {
161 return Ok(SkillCorpus::Embedded(list_embedded_skills()));
162 };
163 if dir.trim().is_empty() {
164 return Ok(SkillCorpus::Embedded(list_embedded_skills()));
165 }
166
167 let skills = list_disk_skills(dir)?;
168 if skills.is_empty() {
169 Ok(SkillCorpus::Embedded(list_embedded_skills()))
170 } else {
171 Ok(SkillCorpus::Disk(skills))
172 }
173}
174
175pub fn list_disk_skills(root: impl AsRef<Path>) -> Result<Vec<DiskSkill>, SkillDiscoveryError> {
180 let root = root.as_ref();
181 if !root.exists() {
182 return Ok(Vec::new());
183 }
184
185 let mut paths = Vec::new();
186 collect_skill_paths(root, &mut paths)?;
187 paths.sort();
188
189 let mut by_name: BTreeMap<String, DiskSkill> = BTreeMap::new();
190 for path in paths {
191 let skill = parse_disk_skill(&path)?;
192 if let Some(first) = by_name.get(&skill.name) {
193 return Err(SkillDiscoveryError::DuplicateName {
194 name: skill.name,
195 first: first.path.clone(),
196 second: path,
197 });
198 }
199 by_name.insert(skill.name.clone(), skill);
200 }
201
202 Ok(by_name.into_values().collect())
203}
204
205fn parse_skill(source: &'static str) -> EmbeddedSkill {
206 let (frontmatter, body) = split_frontmatter(source);
207 let frontmatter = parse_frontmatter(frontmatter);
208 EmbeddedSkill {
209 name: frontmatter.name,
210 frontmatter,
211 body,
212 source,
213 }
214}
215
216fn collect_skill_paths(dir: &Path, out: &mut Vec<PathBuf>) -> Result<(), SkillDiscoveryError> {
217 let entries = fs::read_dir(dir).map_err(|source| SkillDiscoveryError::Io {
218 path: dir.to_path_buf(),
219 source,
220 })?;
221 for entry in entries {
222 let entry = entry.map_err(|source| SkillDiscoveryError::Io {
223 path: dir.to_path_buf(),
224 source,
225 })?;
226 let path = entry.path();
227 let file_type = entry
228 .file_type()
229 .map_err(|source| SkillDiscoveryError::Io {
230 path: path.clone(),
231 source,
232 })?;
233 if file_type.is_dir() {
234 collect_skill_paths(&path, out)?;
235 } else if file_type.is_file() && entry.file_name() == "SKILL.md" {
236 out.push(path);
237 }
238 }
239 Ok(())
240}
241
242fn parse_disk_skill(path: &Path) -> Result<DiskSkill, SkillDiscoveryError> {
243 let source = fs::read_to_string(path).map_err(|source| SkillDiscoveryError::Io {
244 path: path.to_path_buf(),
245 source,
246 })?;
247 let (frontmatter, body) =
248 split_disk_frontmatter(&source).ok_or_else(|| SkillDiscoveryError::MissingFrontmatter {
249 path: path.to_path_buf(),
250 })?;
251 let frontmatter = parse_disk_frontmatter(path, frontmatter)?;
252 Ok(DiskSkill {
253 name: frontmatter.name.clone(),
254 frontmatter,
255 body: body.to_string(),
256 source,
257 path: path.to_path_buf(),
258 })
259}
260
261fn split_disk_frontmatter(source: &str) -> Option<(&str, &str)> {
262 let after_open = source.strip_prefix("---\n")?;
263 let close_offset = after_open.find("\n---\n")?;
264 Some((
265 &after_open[..close_offset],
266 &after_open[close_offset + "\n---\n".len()..],
267 ))
268}
269
270fn parse_disk_frontmatter(
271 path: &Path,
272 frontmatter: &str,
273) -> Result<DiskSkillFrontmatter, SkillDiscoveryError> {
274 let mut name = None;
275 let mut short = None;
276 let mut description = None;
277 let mut when_to_use = None;
278
279 for line in frontmatter.lines() {
280 let Some((key, value)) = line.split_once(':') else {
281 continue;
282 };
283 let value = value.trim().to_string();
284 match key {
285 "name" => name = Some(value),
286 "short" => short = Some(value),
287 "description" => description = Some(value),
288 "when_to_use" => when_to_use = Some(value),
289 _ => {}
290 }
291 }
292
293 Ok(DiskSkillFrontmatter {
294 name: require_disk_field(path, name, "name")?,
295 short: short.unwrap_or_default(),
296 description: require_disk_field(path, description, "description")?,
297 when_to_use,
298 })
299}
300
301fn require_disk_field(
302 path: &Path,
303 value: Option<String>,
304 field: &'static str,
305) -> Result<String, SkillDiscoveryError> {
306 value.ok_or_else(|| SkillDiscoveryError::MissingField {
307 path: path.to_path_buf(),
308 field,
309 })
310}
311
312fn split_frontmatter(source: &'static str) -> (&'static str, &'static str) {
313 let Some(after_open) = source.strip_prefix("---\n") else {
314 panic!("embedded skill source is missing opening frontmatter delimiter");
315 };
316 let Some(close_offset) = after_open.find("\n---\n") else {
317 panic!("embedded skill source is missing closing frontmatter delimiter");
318 };
319 (
320 &after_open[..close_offset],
321 &after_open[close_offset + "\n---\n".len()..],
322 )
323}
324
325fn parse_frontmatter(frontmatter: &'static str) -> SkillFrontmatter {
326 let mut name = None;
327 let mut short = None;
328 let mut description = None;
329 let mut when_to_use = None;
330
331 for line in frontmatter.lines() {
332 let Some((key, value)) = line.split_once(':') else {
333 continue;
334 };
335 let value = value.trim();
336 match key {
337 "name" => name = Some(value),
338 "short" => short = Some(value),
339 "description" => description = Some(value),
340 "when_to_use" => when_to_use = Some(value),
341 _ => {}
342 }
343 }
344
345 SkillFrontmatter {
346 name: name.expect("embedded skill frontmatter is missing `name`"),
347 short: short.expect("embedded skill frontmatter is missing `short`"),
348 description: description.expect("embedded skill frontmatter is missing `description`"),
349 when_to_use,
350 }
351}
352
353#[cfg(test)]
354mod tests {
355 use super::*;
356 use std::collections::BTreeSet;
357 use tempfile::TempDir;
358
359 #[test]
360 fn lists_expected_initial_corpus() {
361 let skills = list_embedded_skills();
362 let names: Vec<&str> = skills.iter().map(|skill| skill.name).collect();
363 assert_eq!(
364 names,
365 [
366 "harn-agent",
367 "harn-diagnostics",
368 "harn-language",
369 "harn-orchestration",
370 "harn-providers",
371 "harn-testing",
372 "harn-tracing",
373 ]
374 );
375 assert_eq!(skills.len(), SOURCES.len());
376 }
377
378 #[test]
379 fn can_fetch_harn_language_skill() {
380 let skill = get_embedded_skill("harn-language").expect("harn-language skill is embedded");
381 assert_eq!(skill.frontmatter.name, "harn-language");
382 assert!(skill.body.contains("Harn language"));
383 }
384
385 #[test]
386 fn skills_have_unique_names_and_body_only_content() {
387 let mut names = BTreeSet::new();
388 for skill in list_embedded_skills() {
389 assert_eq!(skill.name, skill.frontmatter.name);
390 assert!(names.insert(skill.name), "duplicate skill {}", skill.name);
391 assert!(
392 !skill.body.trim().is_empty(),
393 "{} body is empty",
394 skill.name
395 );
396 assert!(
397 !skill.body.trim_start().starts_with("---"),
398 "{} body includes frontmatter",
399 skill.name
400 );
401 }
402 }
403
404 #[test]
405 fn skills_are_sorted_by_name() {
406 let names: Vec<&str> = list_embedded_skills()
407 .iter()
408 .map(|skill| skill.name)
409 .collect();
410 let mut sorted = names.clone();
411 sorted.sort_unstable();
412 assert_eq!(names, sorted);
413 }
414
415 #[test]
416 fn source_round_trips_to_frontmatter_and_body() {
417 for skill in list_embedded_skills() {
418 assert!(
419 skill.source.starts_with("---\n"),
420 "{} source missing opening fence",
421 skill.name
422 );
423 assert!(
424 skill.source.ends_with(skill.body),
425 "{} source must end with the body so dump output is byte-stable",
426 skill.name
427 );
428 assert!(
429 skill.source.contains(&format!("name: {}\n", skill.name)),
430 "{} source missing canonical name field",
431 skill.name
432 );
433 }
434 }
435
436 #[test]
437 fn embedded_corpus_stays_within_binary_budget() {
438 let bytes: usize = SOURCES.iter().map(|source| source.len()).sum();
439 assert!(
440 bytes <= 200 * 1024,
441 "embedded corpus is {bytes} bytes, expected <= 200 KiB"
442 );
443 }
444
445 #[test]
446 fn skill_bodies_are_focused_and_not_placeholders() {
447 let expectations = [
448 ("harn-agent", ["agent_loop", "session id", "approval"]),
449 ("harn-diagnostics", ["diagnostic", "repair", "conformance"]),
450 ("harn-language", ["quickref", "type", "conformance"]),
451 ("harn-orchestration", ["agent_loop", "workflow", "host"]),
452 ("harn-providers", ["llm_call", "provider", "schema"]),
453 (
454 "harn-testing",
455 ["conformance", "deterministic", "mock_time"],
456 ),
457 ("harn-tracing", ["replay", "receipts", "transcript"]),
458 ];
459
460 for (name, terms) in expectations {
461 let skill = get_embedded_skill(name).expect("expected embedded skill");
462 let body = skill.body.to_ascii_lowercase();
463 assert!(
464 !body.contains("embedded stub") && !body.contains("placeholder"),
465 "{name} should contain real guidance, not stub wording"
466 );
467 for term in terms {
468 assert!(
469 body.contains(term),
470 "{name} body should mention focused term `{term}`"
471 );
472 }
473 }
474 }
475
476 #[test]
477 fn skill_bodies_match_split_skill_contract() {
478 for skill in list_embedded_skills() {
479 let lines = skill.body.lines().count();
480 assert!(
481 lines >= 80,
482 "{} body is {lines} lines, expected at least 80",
483 skill.name
484 );
485 assert!(
486 lines <= 300,
487 "{} body is {lines} lines, expected at most 300",
488 skill.name
489 );
490 }
491 }
492
493 #[test]
494 fn skill_cross_links_resolve_to_embedded_skills() {
495 let names: BTreeSet<&str> = list_embedded_skills()
496 .iter()
497 .map(|skill| skill.name)
498 .collect();
499 for skill in list_embedded_skills() {
500 for reference in bracketed_skill_references(skill.body) {
501 assert!(
502 names.contains(reference),
503 "{} links to unknown embedded skill [[{}]]",
504 skill.name,
505 reference
506 );
507 }
508 }
509 }
510
511 #[test]
512 fn diagnostics_skill_mentions_all_code_categories() {
513 let skill = get_embedded_skill("harn-diagnostics").expect("diagnostics skill");
514 for category in [
515 "TYP", "PAR", "NAM", "CAP", "LLM", "ORC", "STD", "PRM", "MOD", "LNT", "FMT", "IMP",
516 "OWN", "RCV", "MAT",
517 ] {
518 assert!(
519 skill.body.contains(&format!("`{category}`")),
520 "harn-diagnostics should mention diagnostic category `{category}`"
521 );
522 }
523 }
524
525 #[test]
526 fn disk_discovery_finds_recursive_skill_files_sorted_by_name() {
527 let temp = TempDir::new().expect("temp dir");
528 write_skill(
529 &temp.path().join("zeta").join("SKILL.md"),
530 "zeta-skill",
531 "Zeta",
532 );
533 write_skill(
534 &temp.path().join("nested").join("alpha").join("SKILL.md"),
535 "alpha-skill",
536 "Alpha",
537 );
538
539 let skills = list_disk_skills(temp.path()).expect("discover disk skills");
540 let names: Vec<&str> = skills.iter().map(|skill| skill.name.as_str()).collect();
541 assert_eq!(names, ["alpha-skill", "zeta-skill"]);
542 assert_eq!(skills[0].frontmatter.description, "Alpha description");
543 assert!(skills[0].body.contains("Alpha body"));
544 }
545
546 #[test]
547 fn disk_discovery_treats_missing_root_as_empty() {
548 let temp = TempDir::new().expect("temp dir");
549 let skills = list_disk_skills(temp.path().join("missing")).expect("discover disk skills");
550 assert!(skills.is_empty());
551 }
552
553 #[test]
554 fn disk_discovery_rejects_duplicate_skill_names() {
555 let temp = TempDir::new().expect("temp dir");
556 write_skill(
557 &temp.path().join("one").join("SKILL.md"),
558 "same-skill",
559 "One",
560 );
561 write_skill(
562 &temp.path().join("two").join("SKILL.md"),
563 "same-skill",
564 "Two",
565 );
566
567 let error = list_disk_skills(temp.path()).expect_err("duplicate name should fail");
568 assert!(
569 error.to_string().contains("duplicate skill `same-skill`"),
570 "unexpected error: {error}"
571 );
572 }
573
574 fn write_skill(path: &Path, name: &str, label: &str) {
575 fs::create_dir_all(path.parent().expect("skill parent")).expect("create skill parent");
576 fs::write(
577 path,
578 format!(
579 "---\nname: {name}\nshort: {label} short\ndescription: {label} description\n---\n# {label}\n\n{label} body\n"
580 ),
581 )
582 .expect("write SKILL.md");
583 }
584
585 fn bracketed_skill_references(body: &str) -> Vec<&str> {
586 let mut references = Vec::new();
587 let mut rest = body;
588 while let Some(start) = rest.find("[[") {
589 rest = &rest[start + 2..];
590 let Some(end) = rest.find("]]") else {
591 break;
592 };
593 references.push(&rest[..end]);
594 rest = &rest[end + 2..];
595 }
596 references
597 }
598}