Skip to main content

mur_common/skill/
parser.rs

1//! Dual-format parser. Canonical YAML is the source of truth; markdown
2//! frontmatter is the human-authoring surface that round-trips via
3//! `canonical_from_markdown()` / `markdown_from_canonical()`.
4
5use super::manifest::SkillManifest;
6use std::fmt;
7
8#[derive(Debug)]
9pub enum ParseError {
10    Yaml(serde_yaml_ng::Error),
11    MissingFrontmatter,
12    MalformedFrontmatter(String),
13    LegacyMarkdown(String),
14}
15
16impl fmt::Display for ParseError {
17    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
18        match self {
19            ParseError::Yaml(e) => write!(f, "yaml parse: {e}"),
20            ParseError::MissingFrontmatter => write!(f, "missing `---` frontmatter delimiters"),
21            ParseError::MalformedFrontmatter(s) => write!(f, "malformed frontmatter: {s}"),
22            ParseError::LegacyMarkdown(s) => write!(f, "legacy markdown: {s}"),
23        }
24    }
25}
26
27impl std::error::Error for ParseError {}
28
29impl From<serde_yaml_ng::Error> for ParseError {
30    fn from(e: serde_yaml_ng::Error) -> Self {
31        ParseError::Yaml(e)
32    }
33}
34
35/// Parse canonical `skill.yaml`.
36pub fn parse_canonical(yaml: &str) -> Result<SkillManifest, ParseError> {
37    let m: SkillManifest = serde_yaml_ng::from_str(yaml)?;
38    Ok(m)
39}
40
41/// Serialise a `SkillManifest` to canonical YAML. Deterministic field order
42/// matches the struct definition.
43pub fn serialize_canonical(m: &SkillManifest) -> Result<String, ParseError> {
44    Ok(serde_yaml_ng::to_string(m)?)
45}
46
47/// Parse markdown-frontmatter skill source. Frontmatter (between two `---`
48/// fences) is YAML; the body becomes `content.abstract` plus — if it has a
49/// `## Steps` heading — a synthesised `content.procedure`, or otherwise a
50/// `content.context`. This is the human-authoring surface; canonical YAML
51/// remains source of truth on disk.
52pub fn parse_markdown(input: &str) -> Result<SkillManifest, ParseError> {
53    let (frontmatter, body) = split_frontmatter(input)?;
54    let mut value: serde_yaml_ng::Value = serde_yaml_ng::from_str(frontmatter)?;
55    inject_content_from_body(&mut value, body)?;
56    let m: SkillManifest = serde_yaml_ng::from_value(value)?;
57    Ok(m)
58}
59
60fn split_frontmatter(input: &str) -> Result<(&str, &str), ParseError> {
61    let trimmed = input.trim_start_matches('\u{feff}');
62    let trimmed = trimmed
63        .strip_prefix("---")
64        .ok_or(ParseError::MissingFrontmatter)?;
65    let trimmed = trimmed.strip_prefix('\n').unwrap_or(trimmed);
66    let end = trimmed
67        .find("\n---")
68        .ok_or_else(|| ParseError::MalformedFrontmatter("missing closing `---`".into()))?;
69    let frontmatter = &trimmed[..end];
70    let after = &trimmed[end + 4..];
71    let body = after.strip_prefix('\n').unwrap_or(after);
72    Ok((frontmatter, body))
73}
74
75fn inject_content_from_body(
76    value: &mut serde_yaml_ng::Value,
77    body: &str,
78) -> Result<(), ParseError> {
79    use serde_yaml_ng::Value;
80
81    if let Some(map) = value.as_mapping_mut() {
82        if map.contains_key(Value::String("content".into())) {
83            return Ok(()); // frontmatter already supplied content
84        }
85        let abstract_text = body
86            .lines()
87            .take(3)
88            .collect::<Vec<_>>()
89            .join("\n")
90            .trim()
91            .to_string();
92        let mut content = serde_yaml_ng::Mapping::new();
93        content.insert(
94            Value::String("abstract".into()),
95            Value::String(abstract_text),
96        );
97
98        if body.contains("## Steps") {
99            let proc = build_procedure_from_steps(body);
100            content.insert(Value::String("procedure".into()), proc);
101        } else {
102            content.insert(
103                Value::String("context".into()),
104                Value::String(body.trim().to_string()),
105            );
106        }
107        map.insert(Value::String("content".into()), Value::Mapping(content));
108    } else {
109        return Err(ParseError::MalformedFrontmatter(
110            "frontmatter is not a mapping".into(),
111        ));
112    }
113    Ok(())
114}
115
116fn build_procedure_from_steps(body: &str) -> serde_yaml_ng::Value {
117    use serde_yaml_ng::{Mapping, Value};
118    let mut steps = Vec::new();
119    let mut in_steps = false;
120    for line in body.lines() {
121        if line.trim_start().starts_with("## Steps") {
122            in_steps = true;
123            continue;
124        }
125        if in_steps && line.starts_with("## ") {
126            break;
127        }
128        if in_steps {
129            let trimmed = line.trim();
130            if let Some(rest) = trimmed.strip_prefix("- ").or_else(|| {
131                trimmed.find(". ").and_then(|i| {
132                    let (n, r) = trimmed.split_at(i);
133                    n.chars().all(|c| c.is_ascii_digit()).then(|| &r[2..])
134                })
135            }) {
136                let mut step = Mapping::new();
137                step.insert(
138                    Value::String("description".into()),
139                    Value::String(rest.to_string()),
140                );
141                steps.push(Value::Mapping(step));
142            }
143        }
144    }
145    let mut procedure = Mapping::new();
146    procedure.insert(Value::String("steps".into()), Value::Sequence(steps));
147    Value::Mapping(procedure)
148}
149
150/// Render a `SkillManifest` back to markdown frontmatter form. The body is
151/// derived from the populated content mode: `context` → context body,
152/// `procedure` → "## Steps" list, `command` → fenced block.
153pub fn serialize_markdown(m: &SkillManifest) -> Result<String, ParseError> {
154    let frontmatter = serialize_canonical_frontmatter(m)?;
155    let mut out = String::new();
156    out.push_str("---\n");
157    out.push_str(&frontmatter);
158    out.push_str("---\n\n");
159    out.push_str(&format!("# {}\n\n", m.name));
160    out.push_str(&m.content.r#abstract);
161    out.push('\n');
162    if let Some(ctx) = &m.content.context {
163        out.push('\n');
164        out.push_str(ctx);
165        out.push('\n');
166    } else if let Some(proc) = &m.content.procedure {
167        out.push_str("\n## Steps\n");
168        for (i, s) in proc.steps.iter().enumerate() {
169            out.push_str(&format!("{}. {}\n", i + 1, s.description));
170        }
171    } else if let Some(cmd) = &m.content.command {
172        out.push_str("\n## Command\n\n```\n");
173        out.push_str(cmd);
174        out.push_str("\n```\n");
175    }
176    Ok(out)
177}
178
179/// Frontmatter is the manifest serialised *without* the `content` field —
180/// the content moves into the markdown body.
181fn serialize_canonical_frontmatter(m: &SkillManifest) -> Result<String, ParseError> {
182    let mut value = serde_yaml_ng::to_value(m)?;
183    if let Some(map) = value.as_mapping_mut() {
184        map.remove(serde_yaml_ng::Value::String("content".into()));
185    }
186    Ok(serde_yaml_ng::to_string(&value)?)
187}
188
189/// Parse a legacy skill file — pre-M0 markdown with minimal frontmatter
190/// (just `name` + `description`). Fills in defaults so the file can be
191/// loaded by the new pipeline without rewriting it.
192pub fn parse_legacy_markdown(input: &str) -> Result<SkillManifest, ParseError> {
193    let (frontmatter, body) = split_frontmatter(input)?;
194    let mut value: serde_yaml_ng::Value = serde_yaml_ng::from_str(frontmatter)?;
195    let map = value
196        .as_mapping_mut()
197        .ok_or_else(|| ParseError::LegacyMarkdown("frontmatter is not a mapping".into()))?;
198    use serde_yaml_ng::Value;
199    let key = |k: &str| Value::String(k.into());
200    map.entry(key("version"))
201        .or_insert(Value::String("0.0.0".into()));
202    map.entry(key("publisher"))
203        .or_insert(Value::String("human:mur".into()));
204    map.entry(key("category"))
205        .or_insert(Value::String("context".into()));
206    inject_content_from_body(&mut value, body)?;
207    let m: SkillManifest = serde_yaml_ng::from_value(value)?;
208    Ok(m)
209}
210
211/// Convenience: parse canonical YAML, serialise back to markdown.
212/// Used by `ensure_mur_skill` so built-in yaml skills produce
213/// AI-tool-consumable markdown at `SKILL.md`.
214pub fn yaml_to_markdown(yaml: &str) -> Result<String, ParseError> {
215    let m = parse_canonical(yaml)?;
216    serialize_markdown(&m)
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222
223    const SAMPLE: &str = r#"
224name: demo-skill
225version: 0.1.0
226publisher: human:test
227description: Demo
228category: context
229content:
230  abstract: hello
231  context: |
232    body
233"#;
234
235    #[test]
236    fn parses_canonical_yaml() {
237        let m = parse_canonical(SAMPLE).unwrap();
238        assert_eq!(m.name, "demo-skill");
239        assert_eq!(m.content.context.as_deref(), Some("body\n"));
240    }
241
242    #[test]
243    fn serialize_then_reparse_is_identity() {
244        let m = parse_canonical(SAMPLE).unwrap();
245        let yaml = serialize_canonical(&m).unwrap();
246        let m2 = parse_canonical(&yaml).unwrap();
247        assert_eq!(m.name, m2.name);
248        assert_eq!(m.content.context, m2.content.context);
249    }
250
251    #[test]
252    fn rejects_non_yaml_input() {
253        let r = parse_canonical("this is not yaml ::: {{");
254        assert!(r.is_err());
255    }
256
257    #[test]
258    fn parses_markdown_frontmatter_to_context_mode() {
259        let md = r#"---
260name: simple-md
261version: 1.0.0
262publisher: human:test
263description: A markdown skill
264category: context
265---
266
267# simple-md
268
269Some context content here.
270"#;
271        let m = parse_markdown(md).unwrap();
272        assert_eq!(m.name, "simple-md");
273        assert!(m.content.context.is_some());
274        assert!(m.content.procedure.is_none());
275    }
276
277    #[test]
278    fn parses_markdown_with_steps_to_workflow_mode() {
279        let md = r#"---
280name: with-steps
281version: 1.0.0
282publisher: human:test
283description: A workflow
284category: workflow
285---
286
287# with-steps
288
289Does a thing.
290
291## Steps
2921. Navigate somewhere
2932. Click the button
294- Final extraction step
295"#;
296        let m = parse_markdown(md).unwrap();
297        let proc = m.content.procedure.expect("procedure populated");
298        assert_eq!(proc.steps.len(), 3);
299        assert_eq!(proc.steps[0].description, "Navigate somewhere");
300    }
301
302    #[test]
303    fn markdown_without_frontmatter_fails() {
304        let md = "# just a heading\n";
305        assert!(matches!(
306            parse_markdown(md),
307            Err(ParseError::MissingFrontmatter)
308        ));
309    }
310
311    #[test]
312    fn canonical_to_markdown_roundtrips_context() {
313        let m = parse_canonical(SAMPLE).unwrap();
314        let md = serialize_markdown(&m).unwrap();
315        let m2 = parse_markdown(&md).unwrap();
316        assert_eq!(m.name, m2.name);
317        assert_eq!(m.content.context.is_some(), m2.content.context.is_some());
318    }
319
320    #[test]
321    fn canonical_to_markdown_roundtrips_workflow() {
322        let yaml = r#"
323name: w
324version: 1.0.0
325publisher: human:test
326description: d
327category: workflow
328content:
329  abstract: a
330  procedure:
331    steps:
332      - description: First
333      - description: Second
334"#;
335        let m = parse_canonical(yaml).unwrap();
336        let md = serialize_markdown(&m).unwrap();
337        let m2 = parse_markdown(&md).unwrap();
338        let p2 = m2.content.procedure.unwrap();
339        assert_eq!(p2.steps.len(), 2);
340        assert_eq!(p2.steps[0].description, "First");
341    }
342
343    #[test]
344    fn legacy_minimal_frontmatter_loads() {
345        let md =
346            "---\nname: mur-context\ndescription: Background context\n---\n\n# MUR\n\nSome body.\n";
347        let m = parse_legacy_markdown(md).unwrap();
348        assert_eq!(m.name, "mur-context");
349        assert_eq!(m.publisher, "human:mur");
350        assert_eq!(m.version, "0.0.0");
351        assert!(m.content.context.is_some());
352    }
353
354    #[test]
355    fn yaml_to_markdown_yields_consumable_md() {
356        let md = yaml_to_markdown(SAMPLE).unwrap();
357        assert!(md.starts_with("---"), "should start with frontmatter fence");
358        assert!(md.contains("# demo-skill"), "should contain heading");
359        assert!(md.contains("hello"), "should contain abstract");
360        assert!(md.contains("body"), "should contain context body");
361    }
362}