Skip to main content

llmwiki_tooling/
frontmatter.rs

1use crate::error::FrontmatterError;
2
3/// Parsed YAML frontmatter from a markdown file. Schema-free.
4#[derive(Debug, Clone)]
5pub struct Frontmatter {
6    /// Raw YAML text between the `---` delimiters (excluding delimiters).
7    pub raw_yaml: String,
8    /// Byte range of the entire frontmatter block including `---` delimiters.
9    pub byte_range: std::ops::Range<usize>,
10    /// Parsed YAML value for arbitrary field access.
11    data: serde_yml::Value,
12}
13
14impl Frontmatter {
15    /// Get a frontmatter field by name.
16    pub fn get(&self, field: &str) -> Option<&serde_yml::Value> {
17        self.data.get(field)
18    }
19
20    /// Check if a field exists in the frontmatter.
21    pub fn has_field(&self, field: &str) -> bool {
22        self.get(field).is_some()
23    }
24
25    /// Get a field as a string, if it is one.
26    pub fn get_str(&self, field: &str) -> Option<&str> {
27        self.get(field).and_then(|v| v.as_str())
28    }
29
30    /// Get a field as a string list (handles both YAML sequences and single strings).
31    pub fn get_str_list(&self, field: &str) -> Vec<&str> {
32        match self.get(field) {
33            Some(serde_yml::Value::Sequence(seq)) => {
34                seq.iter().filter_map(|v| v.as_str()).collect()
35            }
36            Some(serde_yml::Value::String(s)) => vec![s.as_str()],
37            _ => Vec::new(),
38        }
39    }
40
41    /// Get the full parsed data.
42    pub fn data(&self) -> &serde_yml::Value {
43        &self.data
44    }
45}
46
47/// Split frontmatter from markdown source. Returns `(yaml_str, yaml_byte_range)` if present.
48fn split_frontmatter(source: &str) -> Option<(&str, std::ops::Range<usize>)> {
49    let trimmed = source.strip_prefix("---")?;
50    if !trimmed.starts_with('\n') && !trimmed.starts_with("\r\n") {
51        return None;
52    }
53    let after_opener = source.len() - trimmed.len();
54    let closing = trimmed.find("\n---")?;
55    let yaml_start = after_opener;
56    let yaml_end = yaml_start + closing;
57    let block_end_offset = closing + "\n---".len();
58    let rest = &trimmed[block_end_offset..];
59    let block_end = yaml_start
60        + block_end_offset
61        + if rest.starts_with('\n') {
62            1
63        } else if rest.starts_with("\r\n") {
64            2
65        } else {
66            0
67        };
68    Some((&source[yaml_start..yaml_end], 0..block_end))
69}
70
71/// Parse frontmatter from a markdown source string.
72pub fn parse_frontmatter(source: &str) -> Result<Option<Frontmatter>, FrontmatterError> {
73    let Some((yaml_str, byte_range)) = split_frontmatter(source) else {
74        return Ok(None);
75    };
76    let data: serde_yml::Value =
77        serde_yml::from_str(yaml_str).map_err(|e| FrontmatterError::Yaml {
78            source: e,
79            context: yaml_str.chars().take(80).collect(),
80        })?;
81    Ok(Some(Frontmatter {
82        raw_yaml: yaml_str.to_owned(),
83        byte_range,
84        data,
85    }))
86}
87
88#[cfg(test)]
89mod tests {
90    use super::*;
91
92    #[test]
93    fn parses_standard_frontmatter() {
94        let source = "---\ntitle: Test Page\ntags: [a, b]\ndate: 2026-01-01\nsources: [raw/papers/test.md]\n---\n\n# Content";
95        let fm = parse_frontmatter(source).unwrap().unwrap();
96        assert_eq!(fm.get_str("title"), Some("Test Page"));
97        assert_eq!(fm.get_str_list("tags"), vec!["a", "b"]);
98        assert_eq!(fm.get_str("date"), Some("2026-01-01"));
99        assert_eq!(fm.get_str_list("sources"), vec!["raw/papers/test.md"]);
100        assert_eq!(fm.byte_range.start, 0);
101        assert!(source[fm.byte_range].ends_with('\n'));
102    }
103
104    #[test]
105    fn returns_none_without_frontmatter() {
106        let source = "# Just a heading\n\nSome content.";
107        assert!(parse_frontmatter(source).unwrap().is_none());
108    }
109
110    #[test]
111    fn handles_empty_optional_fields() {
112        let source = "---\ntitle: Minimal\n---\n\nContent";
113        let fm = parse_frontmatter(source).unwrap().unwrap();
114        assert_eq!(fm.get_str("title"), Some("Minimal"));
115        assert!(!fm.has_field("tags"));
116        assert!(!fm.has_field("date"));
117    }
118
119    #[test]
120    fn schema_free_arbitrary_fields() {
121        let source = "---\ncustom_field: hello\nnested:\n  key: value\n---\n\nContent";
122        let fm = parse_frontmatter(source).unwrap().unwrap();
123        assert_eq!(fm.get_str("custom_field"), Some("hello"));
124        assert!(fm.has_field("nested"));
125    }
126
127    #[test]
128    fn autolink_field_check() {
129        let source = "---\ntitle: Test\nautolink: false\n---\n\nContent";
130        let fm = parse_frontmatter(source).unwrap().unwrap();
131        assert_eq!(fm.get("autolink"), Some(&serde_yml::Value::Bool(false)));
132    }
133}