Skip to main content

vaultdb_core/
frontmatter.rs

1//! YAML frontmatter parsing. Internal — the public surface is
2//! [`crate::Vault::load_records`] / [`crate::Vault::find_by_name`].
3
4use std::collections::BTreeMap;
5use std::path::Path;
6
7use crate::error::{Result, VaultdbError};
8use crate::record::{Record, Value};
9
10/// Extract the raw frontmatter string from markdown content.
11///
12/// Returns `(frontmatter_text, body_start_byte_offset)` or `None` if
13/// the file has no valid frontmatter delimiters.
14pub fn extract_frontmatter(content: &str) -> Option<(&str, usize)> {
15    // Must start with "---" followed by a newline
16    let content = content.strip_prefix("\u{feff}").unwrap_or(content);
17
18    if !content.starts_with("---") {
19        return None;
20    }
21
22    let after_opening = &content[3..];
23    if !after_opening.starts_with('\n') && !after_opening.starts_with("\r\n") {
24        return None;
25    }
26
27    let search_start = if after_opening.starts_with("\r\n") {
28        5 // "---\r\n"
29    } else {
30        4 // "---\n"
31    };
32
33    // Check for empty frontmatter: closing --- immediately after opening
34    let rest = &content[search_start..];
35    if rest.starts_with("---\n") {
36        return Some(("", search_start + 4));
37    }
38    if rest.starts_with("---\r\n") {
39        return Some(("", search_start + 5));
40    }
41    if rest == "---" {
42        return Some(("", search_start + 3));
43    }
44
45    // Find closing "---" on its own line (preceded by a newline)
46    // Try all line-ending variants and pick the earliest match
47    let closing_patterns = ["\n---\n", "\n---\r\n"];
48    let mut best: Option<(usize, usize)> = None; // (newline_pos, after_delimiter)
49
50    for pattern in &closing_patterns {
51        if let Some(pos) = rest.find(pattern) {
52            let abs_pos = search_start + pos;
53            let delimiter_end = abs_pos + pattern.len();
54            match best {
55                None => best = Some((abs_pos, delimiter_end)),
56                Some((prev, _)) if abs_pos < prev => best = Some((abs_pos, delimiter_end)),
57                _ => {}
58            }
59        }
60    }
61
62    // Also check for closing --- at end of file (no trailing newline)
63    if let Some(pos) = rest.find("\n---") {
64        let abs_pos = search_start + pos;
65        // Make sure this is actually end-of-content or followed by only a newline
66        let after = abs_pos + 4; // past "\n---"
67        if after == content.len() {
68            match best {
69                None => best = Some((abs_pos, after)),
70                Some((prev, _)) if abs_pos < prev => best = Some((abs_pos, after)),
71                _ => {}
72            }
73        }
74    }
75
76    let (newline_pos, body_start) = best?;
77
78    // Include content up to (but not including) the \n before closing ---
79    let fm_text = &content[search_start..newline_pos];
80    Some((fm_text, body_start))
81}
82
83/// Parse a frontmatter YAML string into a field map.
84pub fn parse_frontmatter(yaml_text: &str) -> Result<BTreeMap<String, Value>> {
85    if yaml_text.trim().is_empty() {
86        return Ok(BTreeMap::new());
87    }
88
89    let value: serde_yaml::Value =
90        serde_yaml::from_str(yaml_text).map_err(|e| VaultdbError::InvalidFrontmatter {
91            file: "<unknown>".into(),
92            reason: e.to_string(),
93        })?;
94
95    match value {
96        serde_yaml::Value::Mapping(map) => {
97            let mut fields = BTreeMap::new();
98            for (k, v) in map {
99                if let serde_yaml::Value::String(key) = k {
100                    fields.insert(key, yaml_to_field_value(v));
101                }
102            }
103            Ok(fields)
104        }
105        serde_yaml::Value::Null => Ok(BTreeMap::new()),
106        _ => Err(VaultdbError::InvalidFrontmatter {
107            file: "<unknown>".into(),
108            reason: "frontmatter is not a YAML mapping".into(),
109        }),
110    }
111}
112
113/// Convert a serde_yaml::Value to our Value enum.
114fn yaml_to_field_value(value: serde_yaml::Value) -> Value {
115    match value {
116        serde_yaml::Value::Null => Value::Null,
117        serde_yaml::Value::Bool(b) => Value::Bool(b),
118        serde_yaml::Value::Number(n) => {
119            if let Some(i) = n.as_i64() {
120                Value::Integer(i)
121            } else if let Some(f) = n.as_f64() {
122                Value::Float(f)
123            } else {
124                Value::String(n.to_string())
125            }
126        }
127        serde_yaml::Value::String(s) => Value::String(s),
128        serde_yaml::Value::Sequence(seq) => {
129            Value::List(seq.into_iter().map(yaml_to_field_value).collect())
130        }
131        serde_yaml::Value::Mapping(map) => {
132            let mut fields = BTreeMap::new();
133            for (k, v) in map {
134                let key = match k {
135                    serde_yaml::Value::String(s) => s,
136                    other => other.as_str().unwrap_or("").to_string(),
137                };
138                fields.insert(key, yaml_to_field_value(v));
139            }
140            Value::Map(fields)
141        }
142        serde_yaml::Value::Tagged(tagged) => yaml_to_field_value(tagged.value),
143    }
144}
145
146/// Replace the placeholder "<unknown>" file in an `InvalidFrontmatter` error
147/// with an actual file path, preserving the parser's reason. Other variants
148/// pass through unchanged.
149fn attach_path(err: VaultdbError, path: &Path) -> VaultdbError {
150    match err {
151        VaultdbError::InvalidFrontmatter { reason, .. } => VaultdbError::InvalidFrontmatter {
152            file: path.display().to_string(),
153            reason,
154        },
155        other => other,
156    }
157}
158
159/// Load a Record from a file path (frontmatter only, no raw content).
160pub fn load_record(path: &Path) -> Result<Record> {
161    let content = std::fs::read_to_string(path)?;
162    let fields = match extract_frontmatter(&content) {
163        Some((fm_text, _)) => parse_frontmatter(fm_text).map_err(|e| attach_path(e, path))?,
164        None => {
165            return Err(VaultdbError::NoFrontmatter(path.display().to_string()));
166        }
167    };
168
169    Ok(Record {
170        path: path.to_path_buf(),
171        fields,
172        raw_content: None,
173    })
174}
175
176/// Load a Record with raw content preserved (for write operations).
177pub fn load_record_with_content(path: &Path) -> Result<Record> {
178    let content = std::fs::read_to_string(path)?;
179    let fields = match extract_frontmatter(&content) {
180        Some((fm_text, _)) => parse_frontmatter(fm_text).map_err(|e| attach_path(e, path))?,
181        None => {
182            return Err(VaultdbError::NoFrontmatter(path.display().to_string()));
183        }
184    };
185
186    Ok(Record {
187        path: path.to_path_buf(),
188        fields,
189        raw_content: Some(content),
190    })
191}
192
193#[cfg(test)]
194mod tests {
195    use super::*;
196
197    #[test]
198    fn extract_simple_frontmatter() {
199        let content = "---\ntitle: hello\n---\nBody text here.\n";
200        let (fm, body_start) = extract_frontmatter(content).unwrap();
201        assert_eq!(fm, "title: hello");
202        assert_eq!(&content[body_start..], "Body text here.\n");
203    }
204
205    #[test]
206    fn extract_no_frontmatter() {
207        let content = "# Just a heading\n\nSome body.\n";
208        assert!(extract_frontmatter(content).is_none());
209    }
210
211    #[test]
212    fn extract_empty_frontmatter() {
213        let content = "---\n---\nBody.\n";
214        let (fm, _) = extract_frontmatter(content).unwrap();
215        assert_eq!(fm, "");
216    }
217
218    #[test]
219    fn extract_task_file_no_frontmatter() {
220        let content = "## Today's Tasks\n- [ ] Study OS\n";
221        assert!(extract_frontmatter(content).is_none());
222    }
223
224    #[test]
225    fn parse_movie_frontmatter() {
226        let yaml = r#"aliases:
227tags:
228  - type/leaf
229  - topic/movies
230  - source/video
231  - genre/drama
232  - genre/war
233  - director/sam-mendes
234status: to-watch
235rating:
236director: Sam Mendes
237year: 2019
238related-to:
239"#;
240        let fields = parse_frontmatter(yaml).unwrap();
241
242        assert_eq!(
243            fields.get("status"),
244            Some(&Value::String("to-watch".into()))
245        );
246        assert_eq!(fields.get("rating"), Some(&Value::Null));
247        assert_eq!(
248            fields.get("director"),
249            Some(&Value::String("Sam Mendes".into()))
250        );
251        assert_eq!(fields.get("year"), Some(&Value::Integer(2019)));
252
253        // Tags should be a list
254        match fields.get("tags") {
255            Some(Value::List(tags)) => {
256                assert_eq!(tags.len(), 6);
257                assert_eq!(tags[0], Value::String("type/leaf".into()));
258                assert_eq!(tags[3], Value::String("genre/drama".into()));
259            }
260            other => panic!("expected List for tags, got {:?}", other),
261        }
262    }
263
264    #[test]
265    fn parse_chinese_vocab_frontmatter() {
266        let yaml = r#"aliases:
267- kuài
268tags:
269- type/concept
270- topic/chinese
271- source/self-study
272pinyin: kuài
273anlam: hızlı
274tür: sifat
275hsk: 1
276kaliplar:
277- kalip: 快乐
278  pinyin: kuàilè
279  anlam: mutlu, neşeli
280- kalip: 快要
281  pinyin: kuàiyào
282  anlam: yakında, az kaldı
283ornekler:
284- cumle: 他跑得很快。
285  pinyin: Tā pǎo de hěn kuài.
286  anlam: O çok hızlı koşuyor.
287related-to:
288"#;
289        let fields = parse_frontmatter(yaml).unwrap();
290
291        assert_eq!(fields.get("pinyin"), Some(&Value::String("kuài".into())));
292        assert_eq!(fields.get("anlam"), Some(&Value::String("hızlı".into())));
293        assert_eq!(fields.get("hsk"), Some(&Value::Integer(1)));
294
295        // kaliplar should be a list of maps
296        match fields.get("kaliplar") {
297            Some(Value::List(items)) => {
298                assert_eq!(items.len(), 2);
299                match &items[0] {
300                    Value::Map(m) => {
301                        assert_eq!(m.get("kalip"), Some(&Value::String("快乐".into())));
302                        assert_eq!(m.get("pinyin"), Some(&Value::String("kuàilè".into())));
303                    }
304                    other => panic!("expected Map in kaliplar, got {:?}", other),
305                }
306            }
307            other => panic!("expected List for kaliplar, got {:?}", other),
308        }
309    }
310
311    #[test]
312    fn parse_wiki_links_in_frontmatter() {
313        let yaml = r#"aliases:
314tags:
315  - type/leaf
316related-to:
317  - "[[2FA Setup - Yubi]]"
318  - "[[Watchlist]]"
319"#;
320        let fields = parse_frontmatter(yaml).unwrap();
321
322        match fields.get("related-to") {
323            Some(Value::List(items)) => {
324                assert_eq!(items.len(), 2);
325                assert_eq!(items[0], Value::String("[[2FA Setup - Yubi]]".into()));
326            }
327            other => panic!("expected List for related-to, got {:?}", other),
328        }
329    }
330
331    #[test]
332    fn parse_null_aliases_and_related_to() {
333        let yaml = "aliases:\ntags:\n  - type/concept\nrelated-to:\n";
334        let fields = parse_frontmatter(yaml).unwrap();
335        assert_eq!(fields.get("aliases"), Some(&Value::Null));
336        assert_eq!(fields.get("related-to"), Some(&Value::Null));
337    }
338
339    #[test]
340    fn parse_empty_frontmatter_string() {
341        let fields = parse_frontmatter("").unwrap();
342        assert!(fields.is_empty());
343    }
344
345    #[test]
346    fn parse_only_whitespace_frontmatter() {
347        let fields = parse_frontmatter("   \n  \n").unwrap();
348        assert!(fields.is_empty());
349    }
350
351    #[test]
352    fn invalid_frontmatter_preserves_yaml_parser_reason_and_path() {
353        use std::path::PathBuf;
354        use tempfile::TempDir;
355
356        // Write a file with broken YAML and load it via load_record. The
357        // returned error must (a) have the actual file path, and (b)
358        // surface the YAML parser's reason — not the placeholder
359        // "failed to parse YAML" string we used to throw away.
360        let dir = TempDir::new().unwrap();
361        let path: PathBuf = dir.path().join("bad.md");
362        std::fs::write(&path, "---\n: : : not valid yaml here\n---\nbody\n").unwrap();
363
364        match load_record(&path) {
365            Err(VaultdbError::InvalidFrontmatter { file, reason }) => {
366                assert!(file.contains("bad.md"), "expected file path, got {}", file);
367                // The reason must NOT be the old placeholder string.
368                assert_ne!(reason, "failed to parse YAML");
369                // It should mention something serde_yaml-shaped.
370                assert!(
371                    !reason.is_empty(),
372                    "expected non-empty parser reason, got empty"
373                );
374            }
375            other => panic!("expected InvalidFrontmatter, got {:?}", other),
376        }
377    }
378
379    #[test]
380    fn roundtrip_full_file_extraction() {
381        let content = "---\naliases:\ntags:\n- type/concept\n- topic/chinese\npinyin: kuài\n---\n\n# 快 (kuài)\n\nBody text.\n";
382        let (fm, body_start) = extract_frontmatter(content).unwrap();
383        let fields = parse_frontmatter(fm).unwrap();
384
385        assert_eq!(fields.get("pinyin"), Some(&Value::String("kuài".into())));
386        assert!(content[body_start..].contains("Body text."));
387    }
388}