Skip to main content

docgen_core/
frontmatter.rs

1use serde_yml::Value;
2
3/// Result of splitting frontmatter from a markdown document.
4#[derive(Debug, Clone, PartialEq)]
5pub struct Parsed {
6    pub frontmatter: Value,
7    pub body: String,
8}
9
10/// Split an optional leading `---`-delimited YAML frontmatter block from the body.
11/// On malformed YAML, frontmatter is `Value::Null` and the whole input is the body.
12/// Handles both LF and CRLF line endings, empty frontmatter blocks, and requires the
13/// closing fence to be a line containing exactly `---` (ignoring trailing whitespace).
14pub fn parse_frontmatter(raw: &str) -> Parsed {
15    let input = raw.strip_prefix('\u{feff}').unwrap_or(raw);
16
17    // Match an opening `---` fence followed by a line break (LF or CRLF).
18    let after_open = input
19        .strip_prefix("---\n")
20        .or_else(|| input.strip_prefix("---\r\n"));
21
22    if let Some(rest) = after_open {
23        // Walk line by line looking for a closing fence that is exactly `---`.
24        let mut offset = 0usize;
25        for line in rest.split_inclusive('\n') {
26            let trimmed = line.trim_end_matches('\n').trim_end_matches('\r');
27            if trimmed.trim_end() == "---" {
28                let yaml = &rest[..offset];
29                let after = &rest[offset + line.len()..];
30                let frontmatter = serde_yml::from_str(yaml).unwrap_or(Value::Null);
31                return Parsed {
32                    frontmatter,
33                    body: after.to_string(),
34                };
35            }
36            offset += line.len();
37        }
38        // Also handle a closing fence with no trailing newline (EOF).
39        let last = &rest[offset..];
40        if last.trim_end_matches('\r').trim_end() == "---" {
41            let yaml = &rest[..offset];
42            let frontmatter = serde_yml::from_str(yaml).unwrap_or(Value::Null);
43            return Parsed {
44                frontmatter,
45                body: String::new(),
46            };
47        }
48    }
49
50    Parsed {
51        frontmatter: Value::Null,
52        body: input.to_string(),
53    }
54}
55
56#[cfg(test)]
57mod tests {
58    use super::*;
59
60    #[test]
61    fn parses_yaml_frontmatter_and_body() {
62        let raw = "---\ntitle: Hello\n---\n# Body\n";
63        let parsed = parse_frontmatter(raw);
64        assert_eq!(parsed.frontmatter["title"].as_str(), Some("Hello"));
65        assert_eq!(parsed.body, "# Body\n");
66    }
67
68    #[test]
69    fn no_frontmatter_returns_null_and_full_body() {
70        let raw = "# Just body\n";
71        let parsed = parse_frontmatter(raw);
72        assert!(parsed.frontmatter.is_null());
73        assert_eq!(parsed.body, "# Just body\n");
74    }
75
76    #[test]
77    fn parses_crlf_frontmatter() {
78        let raw = "---\r\ntitle: X\r\n---\r\nbody\r\n";
79        let parsed = parse_frontmatter(raw);
80        assert_eq!(parsed.frontmatter["title"].as_str(), Some("X"));
81        assert_eq!(parsed.body, "body\r\n");
82    }
83
84    #[test]
85    fn parses_empty_frontmatter_block() {
86        let raw = "---\n---\nbody\n";
87        let parsed = parse_frontmatter(raw);
88        assert!(parsed.frontmatter.is_null());
89        assert_eq!(parsed.body, "body\n");
90    }
91
92    #[test]
93    fn longer_dash_run_is_not_a_closing_fence() {
94        // A `----` line is not a bare `---` fence; it must not be treated as the close,
95        // and no stray dash should leak into the body.
96        let raw = "---\ntitle: X\n----\nbody\n";
97        let parsed = parse_frontmatter(raw);
98        // No valid closing fence -> whole input is body, frontmatter null.
99        assert!(parsed.frontmatter.is_null());
100        assert_eq!(parsed.body, raw);
101    }
102
103    #[test]
104    fn malformed_yaml_falls_back_to_null_with_body() {
105        let raw = "---\n: not: valid: yaml\n---\nbody\n";
106        let parsed = parse_frontmatter(raw);
107        assert!(parsed.frontmatter.is_null());
108        assert_eq!(parsed.body, "body\n");
109    }
110
111    #[test]
112    fn unterminated_block_returns_full_input_as_body() {
113        let raw = "---\ntitle: X\n";
114        let parsed = parse_frontmatter(raw);
115        assert!(parsed.frontmatter.is_null());
116        assert_eq!(parsed.body, raw);
117    }
118
119    #[test]
120    fn strips_leading_bom() {
121        let raw = "\u{feff}---\ntitle: X\n---\nbody\n";
122        let parsed = parse_frontmatter(raw);
123        assert_eq!(parsed.frontmatter["title"].as_str(), Some("X"));
124        assert_eq!(parsed.body, "body\n");
125    }
126}