1use std::collections::BTreeMap;
8use std::env;
9use std::fmt;
10use std::fs;
11use std::io;
12use std::path::{Path, PathBuf};
13use std::sync::OnceLock;
14
15pub const HARN_SKILLS_DIR_ENV: &str = "HARN_SKILLS_DIR";
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub struct SkillFrontmatter {
21 pub name: &'static str,
22 pub short: &'static str,
23 pub description: &'static str,
24 pub when_to_use: Option<&'static str>,
25}
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub struct EmbeddedSkill {
30 pub name: &'static str,
31 pub frontmatter: SkillFrontmatter,
32 pub body: &'static str,
33 pub source: &'static str,
38}
39
40#[derive(Debug, Clone, PartialEq, Eq)]
42pub struct DiskSkillFrontmatter {
43 pub name: String,
44 pub short: String,
45 pub description: String,
46 pub when_to_use: Option<String>,
47}
48
49#[derive(Debug, Clone, PartialEq, Eq)]
51pub struct DiskSkill {
52 pub name: String,
53 pub frontmatter: DiskSkillFrontmatter,
54 pub body: String,
55 pub source: String,
56 pub path: PathBuf,
57}
58
59#[derive(Debug, Clone, PartialEq, Eq)]
61pub enum SkillCorpus {
62 Embedded(&'static [EmbeddedSkill]),
63 Disk(Vec<DiskSkill>),
64}
65
66impl SkillCorpus {
67 pub fn is_disk(&self) -> bool {
68 matches!(self, Self::Disk(_))
69 }
70
71 pub fn len(&self) -> usize {
72 match self {
73 Self::Embedded(skills) => skills.len(),
74 Self::Disk(skills) => skills.len(),
75 }
76 }
77
78 pub fn is_empty(&self) -> bool {
79 self.len() == 0
80 }
81}
82
83#[derive(Debug)]
85pub enum SkillDiscoveryError {
86 Io {
87 path: PathBuf,
88 source: io::Error,
89 },
90 MissingFrontmatter {
91 path: PathBuf,
92 },
93 MissingField {
94 path: PathBuf,
95 field: &'static str,
96 },
97 DuplicateName {
98 name: String,
99 first: PathBuf,
100 second: PathBuf,
101 },
102}
103
104impl fmt::Display for SkillDiscoveryError {
105 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
106 match self {
107 Self::Io { path, source } => write!(f, "{}: {source}", path.display()),
108 Self::MissingFrontmatter { path } => {
109 write!(f, "{}: missing SKILL.md frontmatter", path.display())
110 }
111 Self::MissingField { path, field } => {
112 write!(f, "{}: missing `{field}` frontmatter field", path.display())
113 }
114 Self::DuplicateName {
115 name,
116 first,
117 second,
118 } => write!(
119 f,
120 "duplicate skill `{name}` in {} and {}",
121 first.display(),
122 second.display()
123 ),
124 }
125 }
126}
127
128impl std::error::Error for SkillDiscoveryError {}
129
130const SOURCES: &[&str] = &[
131 include_str!("corpus/harn-agent/SKILL.md"),
132 include_str!("corpus/harn-diagnostics/SKILL.md"),
133 include_str!("corpus/harn-language/SKILL.md"),
134 include_str!("corpus/harn-orchestration/SKILL.md"),
135 include_str!("corpus/harn-probe/SKILL.md"),
136 include_str!("corpus/harn-providers/SKILL.md"),
137 include_str!("corpus/harn-testing/SKILL.md"),
138 include_str!("corpus/harn-tracing/SKILL.md"),
139 include_str!("corpus/release-harn/SKILL.md"),
140];
141
142static EMBEDDED_SKILLS: OnceLock<Box<[EmbeddedSkill]>> = OnceLock::new();
143
144pub fn list_embedded_skills() -> &'static [EmbeddedSkill] {
146 EMBEDDED_SKILLS
147 .get_or_init(|| SOURCES.iter().map(|source| parse_skill(source)).collect())
148 .as_ref()
149}
150
151pub fn get_embedded_skill(name: &str) -> Option<&'static EmbeddedSkill> {
153 list_embedded_skills()
154 .iter()
155 .find(|skill| skill.name == name)
156}
157
158pub fn resolve_skill_corpus_from_env() -> Result<SkillCorpus, SkillDiscoveryError> {
162 let Ok(dir) = env::var(HARN_SKILLS_DIR_ENV) else {
163 return Ok(SkillCorpus::Embedded(list_embedded_skills()));
164 };
165 if dir.trim().is_empty() {
166 return Ok(SkillCorpus::Embedded(list_embedded_skills()));
167 }
168
169 let skills = list_disk_skills(dir)?;
170 if skills.is_empty() {
171 Ok(SkillCorpus::Embedded(list_embedded_skills()))
172 } else {
173 Ok(SkillCorpus::Disk(skills))
174 }
175}
176
177pub fn list_disk_skills(root: impl AsRef<Path>) -> Result<Vec<DiskSkill>, SkillDiscoveryError> {
182 let root = root.as_ref();
183 if !root.exists() {
184 return Ok(Vec::new());
185 }
186
187 let mut paths = Vec::new();
188 collect_skill_paths(root, &mut paths)?;
189 paths.sort();
190
191 let mut by_name: BTreeMap<String, DiskSkill> = BTreeMap::new();
192 for path in paths {
193 let skill = parse_disk_skill(&path)?;
194 if let Some(first) = by_name.get(&skill.name) {
195 return Err(SkillDiscoveryError::DuplicateName {
196 name: skill.name,
197 first: first.path.clone(),
198 second: path,
199 });
200 }
201 by_name.insert(skill.name.clone(), skill);
202 }
203
204 Ok(by_name.into_values().collect())
205}
206
207fn parse_skill(source: &'static str) -> EmbeddedSkill {
208 let (frontmatter, body) = split_frontmatter(source);
209 let frontmatter = parse_frontmatter(frontmatter);
210 EmbeddedSkill {
211 name: frontmatter.name,
212 frontmatter,
213 body,
214 source,
215 }
216}
217
218fn collect_skill_paths(dir: &Path, out: &mut Vec<PathBuf>) -> Result<(), SkillDiscoveryError> {
219 let entries = fs::read_dir(dir).map_err(|source| SkillDiscoveryError::Io {
220 path: dir.to_path_buf(),
221 source,
222 })?;
223 for entry in entries {
224 let entry = entry.map_err(|source| SkillDiscoveryError::Io {
225 path: dir.to_path_buf(),
226 source,
227 })?;
228 let path = entry.path();
229 let file_type = entry
230 .file_type()
231 .map_err(|source| SkillDiscoveryError::Io {
232 path: path.clone(),
233 source,
234 })?;
235 if file_type.is_dir() {
236 collect_skill_paths(&path, out)?;
237 } else if file_type.is_file() && entry.file_name() == "SKILL.md" {
238 out.push(path);
239 }
240 }
241 Ok(())
242}
243
244fn parse_disk_skill(path: &Path) -> Result<DiskSkill, SkillDiscoveryError> {
245 let source = fs::read_to_string(path).map_err(|source| SkillDiscoveryError::Io {
246 path: path.to_path_buf(),
247 source,
248 })?;
249 let (frontmatter, body) =
250 split_disk_frontmatter(&source).ok_or_else(|| SkillDiscoveryError::MissingFrontmatter {
251 path: path.to_path_buf(),
252 })?;
253 let frontmatter = parse_disk_frontmatter(path, frontmatter)?;
254 Ok(DiskSkill {
255 name: frontmatter.name.clone(),
256 frontmatter,
257 body: body.to_string(),
258 source,
259 path: path.to_path_buf(),
260 })
261}
262
263fn split_disk_frontmatter(source: &str) -> Option<(&str, &str)> {
264 split_frontmatter_parts(source)
265}
266
267fn parse_disk_frontmatter(
268 path: &Path,
269 frontmatter: &str,
270) -> Result<DiskSkillFrontmatter, SkillDiscoveryError> {
271 let mut name = None;
272 let mut short = None;
273 let mut description = None;
274 let mut when_to_use = None;
275
276 for line in frontmatter.lines() {
277 let Some((key, value)) = line.split_once(':') else {
278 continue;
279 };
280 let value = value.trim().to_string();
281 match key {
282 "name" => name = Some(value),
283 "short" => short = Some(value),
284 "description" => description = Some(value),
285 "when_to_use" => when_to_use = Some(value),
286 _ => {}
287 }
288 }
289
290 Ok(DiskSkillFrontmatter {
291 name: require_disk_field(path, name, "name")?,
292 short: short.unwrap_or_default(),
293 description: require_disk_field(path, description, "description")?,
294 when_to_use,
295 })
296}
297
298fn require_disk_field(
299 path: &Path,
300 value: Option<String>,
301 field: &'static str,
302) -> Result<String, SkillDiscoveryError> {
303 value.ok_or_else(|| SkillDiscoveryError::MissingField {
304 path: path.to_path_buf(),
305 field,
306 })
307}
308
309fn split_frontmatter(source: &'static str) -> (&'static str, &'static str) {
310 let Some((after_open, line_ending)) = split_opening_frontmatter(source) else {
311 panic!("embedded skill source is missing opening frontmatter delimiter");
312 };
313 let Some((frontmatter, body)) = split_closing_frontmatter(after_open, line_ending) else {
314 panic!("embedded skill source is missing closing frontmatter delimiter");
315 };
316 (frontmatter, body)
317}
318
319fn split_frontmatter_parts(source: &str) -> Option<(&str, &str)> {
320 let (after_open, line_ending) = split_opening_frontmatter(source)?;
321 split_closing_frontmatter(after_open, line_ending)
322}
323
324fn split_opening_frontmatter(source: &str) -> Option<(&str, &str)> {
325 if let Some(after_open) = source.strip_prefix("---\n") {
326 Some((after_open, "\n"))
327 } else if let Some(after_open) = source.strip_prefix("---\r\n") {
328 Some((after_open, "\r\n"))
329 } else {
330 None
331 }
332}
333
334fn split_closing_frontmatter<'a>(
335 after_open: &'a str,
336 line_ending: &str,
337) -> Option<(&'a str, &'a str)> {
338 let close = format!("{line_ending}---{line_ending}");
339 let close_offset = after_open.find(&close)?;
340 Some((
341 &after_open[..close_offset],
342 &after_open[close_offset + close.len()..],
343 ))
344}
345
346fn parse_frontmatter(frontmatter: &'static str) -> SkillFrontmatter {
347 let mut name = None;
348 let mut short = None;
349 let mut description = None;
350 let mut when_to_use = None;
351
352 for line in frontmatter.lines() {
353 let Some((key, value)) = line.split_once(':') else {
354 continue;
355 };
356 let value = value.trim();
357 match key {
358 "name" => name = Some(value),
359 "short" => short = Some(value),
360 "description" => description = Some(value),
361 "when_to_use" => when_to_use = Some(value),
362 _ => {}
363 }
364 }
365
366 SkillFrontmatter {
367 name: name.expect("embedded skill frontmatter is missing `name`"),
368 short: short.expect("embedded skill frontmatter is missing `short`"),
369 description: description.expect("embedded skill frontmatter is missing `description`"),
370 when_to_use,
371 }
372}
373
374#[cfg(test)]
375mod tests {
376 use super::*;
377 use std::collections::BTreeSet;
378 use tempfile::TempDir;
379
380 #[test]
381 fn lists_expected_initial_corpus() {
382 let skills = list_embedded_skills();
383 let names: Vec<&str> = skills.iter().map(|skill| skill.name).collect();
384 assert_eq!(
385 names,
386 [
387 "harn-agent",
388 "harn-diagnostics",
389 "harn-language",
390 "harn-orchestration",
391 "harn-probe",
392 "harn-providers",
393 "harn-testing",
394 "harn-tracing",
395 "release-harn",
396 ]
397 );
398 assert_eq!(skills.len(), SOURCES.len());
399 }
400
401 #[test]
402 fn can_fetch_harn_language_skill() {
403 let skill = get_embedded_skill("harn-language").expect("harn-language skill is embedded");
404 assert_eq!(skill.frontmatter.name, "harn-language");
405 assert!(skill.body.contains("Harn language"));
406 }
407
408 #[test]
409 fn skills_have_unique_names_and_body_only_content() {
410 let mut names = BTreeSet::new();
411 for skill in list_embedded_skills() {
412 assert_eq!(skill.name, skill.frontmatter.name);
413 assert!(names.insert(skill.name), "duplicate skill {}", skill.name);
414 assert!(
415 !skill.body.trim().is_empty(),
416 "{} body is empty",
417 skill.name
418 );
419 assert!(
420 !skill.body.trim_start().starts_with("---"),
421 "{} body includes frontmatter",
422 skill.name
423 );
424 }
425 }
426
427 #[test]
428 fn skills_are_sorted_by_name() {
429 let names: Vec<&str> = list_embedded_skills()
430 .iter()
431 .map(|skill| skill.name)
432 .collect();
433 let mut sorted = names.clone();
434 sorted.sort_unstable();
435 assert_eq!(names, sorted);
436 }
437
438 #[test]
439 fn source_round_trips_to_frontmatter_and_body() {
440 for skill in list_embedded_skills() {
441 assert!(
442 split_frontmatter_parts(skill.source).is_some(),
443 "{} source missing opening fence",
444 skill.name
445 );
446 assert!(
447 skill.source.ends_with(skill.body),
448 "{} source must end with the body so dump output is byte-stable",
449 skill.name
450 );
451 assert!(
452 skill.source.contains(&format!("name: {}\n", skill.name)),
453 "{} source missing canonical name field",
454 skill.name
455 );
456 }
457 }
458
459 #[test]
460 fn frontmatter_split_accepts_crlf_sources() {
461 let source = "---\r\nname: crlf\r\n---\r\n# Body\r\n";
462 let (frontmatter, body) = split_frontmatter_parts(source).expect("CRLF frontmatter");
463 assert_eq!(frontmatter, "name: crlf");
464 assert_eq!(body, "# Body\r\n");
465 }
466
467 #[test]
468 fn frontmatter_split_rejects_missing_closing_fence() {
469 assert!(split_frontmatter_parts("---\nname: missing\n# Body\n").is_none());
470 }
471
472 #[test]
473 fn embedded_corpus_stays_within_binary_budget() {
474 let bytes: usize = SOURCES.iter().map(|source| source.len()).sum();
475 assert!(
476 bytes <= 200 * 1024,
477 "embedded corpus is {bytes} bytes, expected <= 200 KiB"
478 );
479 }
480
481 #[test]
482 fn skill_bodies_are_focused_and_not_placeholders() {
483 let expectations = [
484 ("harn-agent", ["agent_loop", "session id", "approval"]),
485 ("harn-diagnostics", ["diagnostic", "repair", "conformance"]),
486 ("harn-language", ["quickref", "type", "conformance"]),
487 ("harn-orchestration", ["agent_loop", "workflow", "host"]),
488 ("harn-probe", ["probe", "fact", "evidence"]),
489 ("harn-providers", ["llm_call", "provider", "schema"]),
490 (
491 "harn-testing",
492 ["conformance", "deterministic", "mock_time"],
493 ),
494 ("harn-tracing", ["replay", "receipts", "transcript"]),
495 ("release-harn", ["release_ship", "merge queue", "tag"]),
496 ];
497
498 for (name, terms) in expectations {
499 let skill = get_embedded_skill(name).expect("expected embedded skill");
500 let body = skill.body.to_ascii_lowercase();
501 assert!(
502 !body.contains("embedded stub") && !body.contains("placeholder"),
503 "{name} should contain real guidance, not stub wording"
504 );
505 for term in terms {
506 assert!(
507 body.contains(term),
508 "{name} body should mention focused term `{term}`"
509 );
510 }
511 }
512 }
513
514 #[test]
515 fn skill_bodies_match_split_skill_contract() {
516 for skill in list_embedded_skills() {
517 let lines = skill.body.lines().count();
518 assert!(
519 lines >= 80,
520 "{} body is {lines} lines, expected at least 80",
521 skill.name
522 );
523 assert!(
524 lines <= 300,
525 "{} body is {lines} lines, expected at most 300",
526 skill.name
527 );
528 }
529 }
530
531 #[test]
532 fn skill_cross_links_resolve_to_embedded_skills() {
533 let names: BTreeSet<&str> = list_embedded_skills()
534 .iter()
535 .map(|skill| skill.name)
536 .collect();
537 for skill in list_embedded_skills() {
538 for reference in bracketed_skill_references(skill.body) {
539 assert!(
540 names.contains(reference),
541 "{} links to unknown embedded skill [[{}]]",
542 skill.name,
543 reference
544 );
545 }
546 }
547 }
548
549 #[test]
550 fn diagnostics_skill_mentions_all_code_categories() {
551 let skill = get_embedded_skill("harn-diagnostics").expect("diagnostics skill");
552 for category in [
553 "TYP", "PAR", "NAM", "CAP", "LLM", "ORC", "STD", "PRM", "MOD", "LNT", "FMT", "IMP",
554 "OWN", "RCV", "MAT",
555 ] {
556 assert!(
557 skill.body.contains(&format!("`{category}`")),
558 "harn-diagnostics should mention diagnostic category `{category}`"
559 );
560 }
561 }
562
563 #[test]
564 fn disk_discovery_finds_recursive_skill_files_sorted_by_name() {
565 let temp = TempDir::new().expect("temp dir");
566 write_skill(
567 &temp.path().join("zeta").join("SKILL.md"),
568 "zeta-skill",
569 "Zeta",
570 );
571 write_skill(
572 &temp.path().join("nested").join("alpha").join("SKILL.md"),
573 "alpha-skill",
574 "Alpha",
575 );
576
577 let skills = list_disk_skills(temp.path()).expect("discover disk skills");
578 let names: Vec<&str> = skills.iter().map(|skill| skill.name.as_str()).collect();
579 assert_eq!(names, ["alpha-skill", "zeta-skill"]);
580 assert_eq!(skills[0].frontmatter.description, "Alpha description");
581 assert!(skills[0].body.contains("Alpha body"));
582 }
583
584 #[test]
585 fn disk_discovery_treats_missing_root_as_empty() {
586 let temp = TempDir::new().expect("temp dir");
587 let skills = list_disk_skills(temp.path().join("missing")).expect("discover disk skills");
588 assert!(skills.is_empty());
589 }
590
591 #[test]
592 fn disk_discovery_rejects_duplicate_skill_names() {
593 let temp = TempDir::new().expect("temp dir");
594 write_skill(
595 &temp.path().join("one").join("SKILL.md"),
596 "same-skill",
597 "One",
598 );
599 write_skill(
600 &temp.path().join("two").join("SKILL.md"),
601 "same-skill",
602 "Two",
603 );
604
605 let error = list_disk_skills(temp.path()).expect_err("duplicate name should fail");
606 assert!(
607 error.to_string().contains("duplicate skill `same-skill`"),
608 "unexpected error: {error}"
609 );
610 }
611
612 fn write_skill(path: &Path, name: &str, label: &str) {
613 fs::create_dir_all(path.parent().expect("skill parent")).expect("create skill parent");
614 fs::write(
615 path,
616 format!(
617 "---\nname: {name}\nshort: {label} short\ndescription: {label} description\n---\n# {label}\n\n{label} body\n"
618 ),
619 )
620 .expect("write SKILL.md");
621 }
622
623 fn bracketed_skill_references(body: &str) -> Vec<&str> {
624 let mut references = Vec::new();
625 let mut rest = body;
626 while let Some(start) = rest.find("[[") {
627 rest = &rest[start + 2..];
628 let Some(end) = rest.find("]]") else {
629 break;
630 };
631 references.push(&rest[..end]);
632 rest = &rest[end + 2..];
633 }
634 references
635 }
636}