llmwiki_tooling/
frontmatter.rs1use crate::error::FrontmatterError;
2
3#[derive(Debug, Clone)]
5pub struct Frontmatter {
6 pub raw_yaml: String,
8 pub byte_range: std::ops::Range<usize>,
10 data: serde_yml::Value,
12}
13
14impl Frontmatter {
15 pub fn get(&self, field: &str) -> Option<&serde_yml::Value> {
17 self.data.get(field)
18 }
19
20 pub fn has_field(&self, field: &str) -> bool {
22 self.get(field).is_some()
23 }
24
25 pub fn get_str(&self, field: &str) -> Option<&str> {
27 self.get(field).and_then(|v| v.as_str())
28 }
29
30 pub fn get_str_list(&self, field: &str) -> Vec<&str> {
32 match self.get(field) {
33 Some(serde_yml::Value::Sequence(seq)) => {
34 seq.iter().filter_map(|v| v.as_str()).collect()
35 }
36 Some(serde_yml::Value::String(s)) => vec![s.as_str()],
37 _ => Vec::new(),
38 }
39 }
40
41 pub fn data(&self) -> &serde_yml::Value {
43 &self.data
44 }
45}
46
47fn split_frontmatter(source: &str) -> Option<(&str, std::ops::Range<usize>)> {
49 let trimmed = source.strip_prefix("---")?;
50 if !trimmed.starts_with('\n') && !trimmed.starts_with("\r\n") {
51 return None;
52 }
53 let after_opener = source.len() - trimmed.len();
54 let closing = trimmed.find("\n---")?;
55 let yaml_start = after_opener;
56 let yaml_end = yaml_start + closing;
57 let block_end_offset = closing + "\n---".len();
58 let rest = &trimmed[block_end_offset..];
59 let block_end = yaml_start
60 + block_end_offset
61 + if rest.starts_with('\n') {
62 1
63 } else if rest.starts_with("\r\n") {
64 2
65 } else {
66 0
67 };
68 Some((&source[yaml_start..yaml_end], 0..block_end))
69}
70
71pub fn parse_frontmatter(source: &str) -> Result<Option<Frontmatter>, FrontmatterError> {
73 let Some((yaml_str, byte_range)) = split_frontmatter(source) else {
74 return Ok(None);
75 };
76 let data: serde_yml::Value =
77 serde_yml::from_str(yaml_str).map_err(|e| FrontmatterError::Yaml {
78 source: e,
79 context: yaml_str.chars().take(80).collect(),
80 })?;
81 Ok(Some(Frontmatter {
82 raw_yaml: yaml_str.to_owned(),
83 byte_range,
84 data,
85 }))
86}
87
88#[cfg(test)]
89mod tests {
90 use super::*;
91
92 #[test]
93 fn parses_standard_frontmatter() {
94 let source = "---\ntitle: Test Page\ntags: [a, b]\ndate: 2026-01-01\nsources: [raw/papers/test.md]\n---\n\n# Content";
95 let fm = parse_frontmatter(source).unwrap().unwrap();
96 assert_eq!(fm.get_str("title"), Some("Test Page"));
97 assert_eq!(fm.get_str_list("tags"), vec!["a", "b"]);
98 assert_eq!(fm.get_str("date"), Some("2026-01-01"));
99 assert_eq!(fm.get_str_list("sources"), vec!["raw/papers/test.md"]);
100 assert_eq!(fm.byte_range.start, 0);
101 assert!(source[fm.byte_range].ends_with('\n'));
102 }
103
104 #[test]
105 fn returns_none_without_frontmatter() {
106 let source = "# Just a heading\n\nSome content.";
107 assert!(parse_frontmatter(source).unwrap().is_none());
108 }
109
110 #[test]
111 fn handles_empty_optional_fields() {
112 let source = "---\ntitle: Minimal\n---\n\nContent";
113 let fm = parse_frontmatter(source).unwrap().unwrap();
114 assert_eq!(fm.get_str("title"), Some("Minimal"));
115 assert!(!fm.has_field("tags"));
116 assert!(!fm.has_field("date"));
117 }
118
119 #[test]
120 fn schema_free_arbitrary_fields() {
121 let source = "---\ncustom_field: hello\nnested:\n key: value\n---\n\nContent";
122 let fm = parse_frontmatter(source).unwrap().unwrap();
123 assert_eq!(fm.get_str("custom_field"), Some("hello"));
124 assert!(fm.has_field("nested"));
125 }
126
127 #[test]
128 fn autolink_field_check() {
129 let source = "---\ntitle: Test\nautolink: false\n---\n\nContent";
130 let fm = parse_frontmatter(source).unwrap().unwrap();
131 assert_eq!(fm.get("autolink"), Some(&serde_yml::Value::Bool(false)));
132 }
133}