Skip to main content

mdvault_core/frontmatter/
parser.rs

1//! Frontmatter parsing from markdown documents.
2
3use super::types::{Frontmatter, ParsedDocument, TemplateFrontmatter};
4use thiserror::Error;
5
6/// Errors that can occur during frontmatter parsing.
7#[derive(Debug, Error)]
8pub enum FrontmatterParseError {
9    #[error("invalid YAML frontmatter: {0}")]
10    InvalidYaml(#[from] serde_yaml::Error),
11}
12
13/// Parse frontmatter from markdown content.
14///
15/// Frontmatter is delimited by `---` at the start of the document:
16/// ```markdown
17/// ---
18/// key: value
19/// ---
20/// # Document content
21/// ```
22pub fn parse(content: &str) -> Result<ParsedDocument, FrontmatterParseError> {
23    let trimmed = content.trim_start();
24
25    // Check if document starts with frontmatter delimiter
26    if !trimmed.starts_with("---") {
27        return Ok(ParsedDocument { frontmatter: None, body: content.to_string() });
28    }
29
30    // Find the closing ---
31    let after_first = &trimmed[3..];
32
33    // Skip the newline after opening ---
34    let after_newline = after_first
35        .strip_prefix('\n')
36        .or_else(|| after_first.strip_prefix("\r\n"))
37        .unwrap_or(after_first);
38
39    // Find closing delimiter
40    if let Some(end_pos) = find_closing_delimiter(after_newline) {
41        let yaml_content = &after_newline[..end_pos];
42
43        // Calculate body start (skip closing --- and following newline)
44        let after_closing = &after_newline[end_pos + 3..];
45        let body = after_closing
46            .strip_prefix('\n')
47            .or_else(|| after_closing.strip_prefix("\r\n"))
48            .unwrap_or(after_closing)
49            .to_string();
50
51        // Parse YAML
52        let frontmatter: Frontmatter = if yaml_content.trim().is_empty() {
53            Frontmatter::default()
54        } else {
55            serde_yaml::from_str(yaml_content.trim())?
56        };
57
58        Ok(ParsedDocument { frontmatter: Some(frontmatter), body })
59    } else {
60        // No closing ---, treat as no frontmatter
61        Ok(ParsedDocument { frontmatter: None, body: content.to_string() })
62    }
63}
64
65/// Find the position of closing `---` delimiter.
66fn find_closing_delimiter(content: &str) -> Option<usize> {
67    // Look for --- at the start of a line
68    for (i, line) in content.lines().enumerate() {
69        if line.trim() == "---" {
70            // Calculate byte position
71            let pos: usize = content
72                .lines()
73                .take(i)
74                .map(|l| l.len() + 1) // +1 for newline
75                .sum();
76            return Some(pos);
77        }
78    }
79    None
80}
81
82/// Parse template-specific frontmatter.
83///
84/// Returns the parsed template frontmatter (if present), raw frontmatter text, and the body content.
85///
86/// Unlike regular frontmatter parsing, this function is lenient about YAML parsing errors
87/// because template frontmatter may contain unrendered variables like `{{var}}` that are
88/// not valid YAML until after template rendering.
89///
90/// The raw frontmatter text is returned separately so it can be used for rendering
91/// after variable substitution.
92pub fn parse_template_frontmatter(
93    content: &str,
94) -> Result<(Option<TemplateFrontmatter>, Option<String>, String), FrontmatterParseError>
95{
96    let trimmed = content.trim_start();
97
98    // Check if document starts with frontmatter delimiter
99    if !trimmed.starts_with("---") {
100        return Ok((None, None, content.to_string()));
101    }
102
103    // Find the closing ---
104    let after_first = &trimmed[3..];
105
106    // Skip the newline after opening ---
107    let after_newline = after_first
108        .strip_prefix('\n')
109        .or_else(|| after_first.strip_prefix("\r\n"))
110        .unwrap_or(after_first);
111
112    // Find closing delimiter
113    if let Some(end_pos) = find_closing_delimiter(after_newline) {
114        let yaml_content = &after_newline[..end_pos];
115
116        // Calculate body start (skip closing --- and following newline)
117        let after_closing = &after_newline[end_pos + 3..];
118        let body = after_closing
119            .strip_prefix('\n')
120            .or_else(|| after_closing.strip_prefix("\r\n"))
121            .unwrap_or(after_closing)
122            .to_string();
123
124        // Store raw frontmatter text for rendering
125        let raw_fm = yaml_content.to_string();
126
127        // Try to parse template-specific fields (output, lua)
128        // Be lenient - if parsing fails due to template variables, that's OK
129        // We'll still have the raw text for rendering
130        let template_fm = if yaml_content.trim().is_empty() {
131            Some(TemplateFrontmatter::default())
132        } else {
133            // Try to parse, but ignore errors (template vars may make it invalid YAML)
134            match parse_lenient_template_frontmatter(yaml_content) {
135                Ok(fm) => Some(fm),
136                Err(_) => {
137                    // Parsing failed (likely due to template variables)
138                    // Create a minimal TemplateFrontmatter with just the raw content
139                    Some(TemplateFrontmatter::default())
140                }
141            }
142        };
143
144        Ok((template_fm, Some(raw_fm), body))
145    } else {
146        // No closing ---, treat as no frontmatter
147        Ok((None, None, content.to_string()))
148    }
149}
150
151/// Parse template frontmatter leniently, extracting only template-specific fields.
152///
153/// This attempts to extract `output:` and `lua:` fields via simple line parsing,
154/// which works even if other fields contain template variables that make the YAML invalid.
155///
156/// We intentionally don't try to parse other fields since they may contain template
157/// variables. The raw frontmatter text is stored separately for rendering.
158fn parse_lenient_template_frontmatter(
159    yaml_content: &str,
160) -> Result<TemplateFrontmatter, FrontmatterParseError> {
161    let mut output: Option<String> = None;
162    let mut lua: Option<String> = None;
163    let extra = std::collections::HashMap::new();
164
165    // Try simple line-by-line parsing for top-level string fields
166    for line in yaml_content.lines() {
167        let trimmed = line.trim();
168
169        // Extract output field
170        if let Some(rest) = trimmed.strip_prefix("output:") {
171            let value = rest.trim();
172            // Remove quotes if present
173            let value = value
174                .strip_prefix('"')
175                .and_then(|s| s.strip_suffix('"'))
176                .or_else(|| value.strip_prefix('\'').and_then(|s| s.strip_suffix('\'')))
177                .unwrap_or(value);
178            output = Some(value.to_string());
179        }
180        // Extract lua field
181        else if let Some(rest) = trimmed.strip_prefix("lua:") {
182            let value = rest.trim();
183            let value = value
184                .strip_prefix('"')
185                .and_then(|s| s.strip_suffix('"'))
186                .or_else(|| value.strip_prefix('\'').and_then(|s| s.strip_suffix('\'')))
187                .unwrap_or(value);
188            lua = Some(value.to_string());
189        }
190    }
191
192    // We intentionally don't parse other fields into `extra` because they may contain
193    // template variables. The raw frontmatter text will be used for rendering instead.
194    Ok(TemplateFrontmatter { lua, output, extra })
195}
196
197#[cfg(test)]
198mod tests {
199    use super::*;
200
201    #[test]
202    fn parse_no_frontmatter() {
203        let content = "# Hello\n\nSome content";
204        let result = parse(content).unwrap();
205        assert!(result.frontmatter.is_none());
206        assert_eq!(result.body, content);
207    }
208
209    #[test]
210    fn parse_simple_frontmatter() {
211        let content = "---\ntitle: Hello\n---\n# Content";
212        let result = parse(content).unwrap();
213        assert!(result.frontmatter.is_some());
214        let fm = result.frontmatter.unwrap();
215        assert_eq!(fm.fields.get("title").and_then(|v| v.as_str()), Some("Hello"));
216        assert_eq!(result.body, "# Content");
217    }
218
219    #[test]
220    fn parse_frontmatter_with_multiple_fields() {
221        let content =
222            "---\ntitle: Test\ndate: 2024-01-15\ntags:\n  - rust\n  - cli\n---\n\nBody";
223        let result = parse(content).unwrap();
224        assert!(result.frontmatter.is_some());
225        let fm = result.frontmatter.unwrap();
226        assert_eq!(fm.fields.get("title").and_then(|v| v.as_str()), Some("Test"));
227        assert!(fm.fields.contains_key("tags"));
228        assert_eq!(result.body, "\nBody");
229    }
230
231    #[test]
232    fn parse_empty_frontmatter() {
233        let content = "---\n---\n# Content";
234        let result = parse(content).unwrap();
235        assert!(result.frontmatter.is_some());
236        assert!(result.frontmatter.unwrap().fields.is_empty());
237        assert_eq!(result.body, "# Content");
238    }
239
240    #[test]
241    fn parse_template_frontmatter_with_output() {
242        let content = "---\noutput: daily/{{date}}.md\ntags: [daily]\n---\n# Daily";
243        let (fm, raw_fm, body) = parse_template_frontmatter(content).unwrap();
244        assert!(fm.is_some());
245        let fm = fm.unwrap();
246        assert_eq!(fm.output, Some("daily/{{date}}.md".to_string()));
247        // Note: extra fields are NOT parsed (they may contain template vars)
248        // The raw_frontmatter contains all fields for rendering
249        assert!(raw_fm.is_some());
250        let raw = raw_fm.unwrap();
251        assert!(raw.contains("tags: [daily]"), "raw frontmatter should contain tags");
252        assert_eq!(body, "# Daily");
253    }
254}