sara_core/parser/
frontmatter.rs

1//! YAML frontmatter extraction from Markdown files.
2
3use std::path::Path;
4
5use crate::error::ParseError;
6
7/// Represents extracted frontmatter content.
8#[derive(Debug, Clone)]
9pub struct ExtractedFrontmatter {
10    /// The raw YAML content between the `---` delimiters.
11    pub yaml: String,
12    /// Line number where the frontmatter ends (at the closing `---`).
13    pub end_line: usize,
14    /// The remaining Markdown content after the frontmatter.
15    pub body: String,
16}
17
18/// Extracts YAML frontmatter from Markdown content.
19///
20/// Frontmatter must be at the start of the file, enclosed by `---` delimiters.
21///
22/// # Example
23/// ```text
24/// ---
25/// id: "SOL-001"
26/// type: solution
27/// name: "My Solution"
28/// ---
29/// # Markdown content here
30/// ```
31pub fn extract_frontmatter(content: &str, file: &Path) -> Result<ExtractedFrontmatter, ParseError> {
32    let lines: Vec<&str> = content.lines().collect();
33
34    if lines.is_empty() {
35        return Err(ParseError::MissingFrontmatter {
36            file: file.to_path_buf(),
37        });
38    }
39
40    // Check for opening delimiter
41    if lines[0].trim() != "---" {
42        return Err(ParseError::MissingFrontmatter {
43            file: file.to_path_buf(),
44        });
45    }
46
47    // Find closing delimiter
48    let mut end_idx = None;
49    for (i, line) in lines.iter().enumerate().skip(1) {
50        if line.trim() == "---" {
51            end_idx = Some(i);
52            break;
53        }
54    }
55
56    let end_idx = end_idx.ok_or_else(|| ParseError::InvalidFrontmatter {
57        file: file.to_path_buf(),
58        reason: "Missing closing `---` delimiter".to_string(),
59    })?;
60
61    // Extract YAML content (lines between delimiters)
62    let yaml_lines: Vec<&str> = lines[1..end_idx].to_vec();
63    let yaml = yaml_lines.join("\n");
64
65    // Extract body (everything after closing delimiter)
66    let body_lines: Vec<&str> = if end_idx + 1 < lines.len() {
67        lines[end_idx + 1..].to_vec()
68    } else {
69        Vec::new()
70    };
71    let body = body_lines.join("\n");
72
73    Ok(ExtractedFrontmatter {
74        yaml,
75        end_line: end_idx + 1, // 1-indexed
76        body,
77    })
78}
79
80/// Checks if content has frontmatter (starts with `---`).
81pub fn has_frontmatter(content: &str) -> bool {
82    content.trim_start().starts_with("---")
83}
84
85/// Extracts just the body content after the frontmatter (FR-064).
86///
87/// Returns the body content without the frontmatter delimiters.
88/// If no frontmatter is present, returns the original content.
89pub fn extract_body(content: &str) -> String {
90    let lines: Vec<&str> = content.lines().collect();
91
92    if lines.is_empty() || lines[0].trim() != "---" {
93        // No frontmatter, return original content
94        return content.to_string();
95    }
96
97    // Find closing delimiter
98    for (i, line) in lines.iter().enumerate().skip(1) {
99        if line.trim() == "---" {
100            // Return everything after the closing delimiter
101            if i + 1 < lines.len() {
102                return lines[i + 1..].join("\n");
103            } else {
104                return String::new();
105            }
106        }
107    }
108
109    // No closing delimiter found, return original
110    content.to_string()
111}
112
113/// Updates the YAML frontmatter in content while preserving the body (FR-064).
114///
115/// The new_yaml should NOT include the `---` delimiters.
116/// Returns the updated content with new frontmatter and preserved body.
117pub fn update_frontmatter(content: &str, new_yaml: &str) -> String {
118    let body = extract_body(content);
119
120    // Ensure trailing newline in YAML
121    let yaml_trimmed = new_yaml.trim_end();
122
123    if body.is_empty() {
124        format!("---\n{}\n---\n", yaml_trimmed)
125    } else {
126        format!("---\n{}\n---\n{}", yaml_trimmed, body)
127    }
128}
129
130#[cfg(test)]
131mod tests {
132    use super::*;
133    use std::path::PathBuf;
134
135    #[test]
136    fn test_extract_frontmatter_valid() {
137        let content = r#"---
138id: "SOL-001"
139type: solution
140name: "Test"
141---
142# Body content"#;
143
144        let result = extract_frontmatter(content, &PathBuf::from("test.md")).unwrap();
145        assert!(result.yaml.contains("id: \"SOL-001\""));
146        assert!(result.yaml.contains("type: solution"));
147        assert_eq!(result.end_line, 5);
148        assert_eq!(result.body.trim(), "# Body content");
149    }
150
151    #[test]
152    fn test_extract_frontmatter_no_body() {
153        let content = r#"---
154id: "SOL-001"
155---"#;
156
157        let result = extract_frontmatter(content, &PathBuf::from("test.md")).unwrap();
158        assert!(result.yaml.contains("id: \"SOL-001\""));
159        assert!(result.body.is_empty());
160    }
161
162    #[test]
163    fn test_extract_frontmatter_missing() {
164        let content = "# Just markdown";
165        let result = extract_frontmatter(content, &PathBuf::from("test.md"));
166        assert!(result.is_err());
167    }
168
169    #[test]
170    fn test_extract_frontmatter_unclosed() {
171        let content = r#"---
172id: "SOL-001"
173# No closing delimiter"#;
174
175        let result = extract_frontmatter(content, &PathBuf::from("test.md"));
176        assert!(result.is_err());
177    }
178
179    #[test]
180    fn test_has_frontmatter() {
181        assert!(has_frontmatter("---\nid: test\n---"));
182        assert!(has_frontmatter("  ---\nid: test\n---"));
183        assert!(!has_frontmatter("# No frontmatter"));
184    }
185
186    #[test]
187    fn test_extract_frontmatter_empty() {
188        let content = "";
189        let result = extract_frontmatter(content, &PathBuf::from("test.md"));
190        assert!(result.is_err());
191    }
192
193    #[test]
194    fn test_extract_body_with_frontmatter() {
195        let content = r#"---
196id: "SOL-001"
197type: solution
198---
199# Body Content
200
201Some markdown here."#;
202
203        let body = extract_body(content);
204        assert_eq!(body, "# Body Content\n\nSome markdown here.");
205    }
206
207    #[test]
208    fn test_extract_body_no_frontmatter() {
209        let content = "# Just markdown\n\nNo frontmatter here.";
210        let body = extract_body(content);
211        assert_eq!(body, content);
212    }
213
214    #[test]
215    fn test_extract_body_empty_body() {
216        let content = "---\nid: test\n---";
217        let body = extract_body(content);
218        assert!(body.is_empty());
219    }
220
221    #[test]
222    fn test_update_frontmatter() {
223        let content = r#"---
224id: "SOL-001"
225type: solution
226name: "Old Name"
227---
228# Body Content
229
230Some markdown here."#;
231
232        let new_yaml = r#"id: "SOL-001"
233type: solution
234name: "New Name""#;
235
236        let updated = update_frontmatter(content, new_yaml);
237
238        assert!(updated.starts_with("---\n"));
239        assert!(updated.contains("name: \"New Name\""));
240        assert!(updated.contains("# Body Content"));
241        assert!(updated.contains("Some markdown here."));
242    }
243
244    #[test]
245    fn test_update_frontmatter_no_body() {
246        let content = "---\nid: test\n---";
247        let new_yaml = "id: test\nname: Updated";
248
249        let updated = update_frontmatter(content, new_yaml);
250
251        assert_eq!(updated, "---\nid: test\nname: Updated\n---\n");
252    }
253}