vaultdb-core 1.6.1

Library engine for vaultdb — markdown-as-database for Obsidian-style vaults
Documentation
//! YAML frontmatter parsing. Internal — the public surface is
//! [`crate::Vault::load_records`] / [`crate::Vault::find_by_name`].

use std::collections::BTreeMap;
use std::path::Path;

use crate::error::{Result, VaultdbError};
use crate::record::{Record, Value};

/// Extract the raw frontmatter string from markdown content.
///
/// Returns `(frontmatter_text, body_start_byte_offset)` or `None` if
/// the file has no valid frontmatter delimiters.
pub fn extract_frontmatter(content: &str) -> Option<(&str, usize)> {
    // Must start with "---" followed by a newline
    let content = content.strip_prefix("\u{feff}").unwrap_or(content);

    if !content.starts_with("---") {
        return None;
    }

    let after_opening = &content[3..];
    if !after_opening.starts_with('\n') && !after_opening.starts_with("\r\n") {
        return None;
    }

    let search_start = if after_opening.starts_with("\r\n") {
        5 // "---\r\n"
    } else {
        4 // "---\n"
    };

    // Check for empty frontmatter: closing --- immediately after opening
    let rest = &content[search_start..];
    if rest.starts_with("---\n") {
        return Some(("", search_start + 4));
    }
    if rest.starts_with("---\r\n") {
        return Some(("", search_start + 5));
    }
    if rest == "---" {
        return Some(("", search_start + 3));
    }

    // Find closing "---" on its own line (preceded by a newline)
    // Try all line-ending variants and pick the earliest match
    let closing_patterns = ["\n---\n", "\n---\r\n"];
    let mut best: Option<(usize, usize)> = None; // (newline_pos, after_delimiter)

    for pattern in &closing_patterns {
        if let Some(pos) = rest.find(pattern) {
            let abs_pos = search_start + pos;
            let delimiter_end = abs_pos + pattern.len();
            match best {
                None => best = Some((abs_pos, delimiter_end)),
                Some((prev, _)) if abs_pos < prev => best = Some((abs_pos, delimiter_end)),
                _ => {}
            }
        }
    }

    // Also check for closing --- at end of file (no trailing newline)
    if let Some(pos) = rest.find("\n---") {
        let abs_pos = search_start + pos;
        // Make sure this is actually end-of-content or followed by only a newline
        let after = abs_pos + 4; // past "\n---"
        if after == content.len() {
            match best {
                None => best = Some((abs_pos, after)),
                Some((prev, _)) if abs_pos < prev => best = Some((abs_pos, after)),
                _ => {}
            }
        }
    }

    let (newline_pos, body_start) = best?;

    // Include content up to (but not including) the \n before closing ---
    let fm_text = &content[search_start..newline_pos];
    Some((fm_text, body_start))
}

/// Parse a frontmatter YAML string into a field map.
pub fn parse_frontmatter(yaml_text: &str) -> Result<BTreeMap<String, Value>> {
    if yaml_text.trim().is_empty() {
        return Ok(BTreeMap::new());
    }

    let value: serde_yaml::Value =
        serde_yaml::from_str(yaml_text).map_err(|e| VaultdbError::InvalidFrontmatter {
            file: "<unknown>".into(),
            reason: e.to_string(),
        })?;

    match value {
        serde_yaml::Value::Mapping(map) => {
            let mut fields = BTreeMap::new();
            for (k, v) in map {
                if let serde_yaml::Value::String(key) = k {
                    fields.insert(key, yaml_to_field_value(v));
                }
            }
            Ok(fields)
        }
        serde_yaml::Value::Null => Ok(BTreeMap::new()),
        _ => Err(VaultdbError::InvalidFrontmatter {
            file: "<unknown>".into(),
            reason: "frontmatter is not a YAML mapping".into(),
        }),
    }
}

/// Convert a serde_yaml::Value to our Value enum.
fn yaml_to_field_value(value: serde_yaml::Value) -> Value {
    match value {
        serde_yaml::Value::Null => Value::Null,
        serde_yaml::Value::Bool(b) => Value::Bool(b),
        serde_yaml::Value::Number(n) => {
            if let Some(i) = n.as_i64() {
                Value::Integer(i)
            } else if let Some(f) = n.as_f64() {
                Value::Float(f)
            } else {
                Value::String(n.to_string())
            }
        }
        serde_yaml::Value::String(s) => Value::String(s),
        serde_yaml::Value::Sequence(seq) => {
            Value::List(seq.into_iter().map(yaml_to_field_value).collect())
        }
        serde_yaml::Value::Mapping(map) => {
            let mut fields = BTreeMap::new();
            for (k, v) in map {
                let key = match k {
                    serde_yaml::Value::String(s) => s,
                    other => other.as_str().unwrap_or("").to_string(),
                };
                fields.insert(key, yaml_to_field_value(v));
            }
            Value::Map(fields)
        }
        serde_yaml::Value::Tagged(tagged) => yaml_to_field_value(tagged.value),
    }
}

/// Replace the placeholder "<unknown>" file in an `InvalidFrontmatter` error
/// with an actual file path, preserving the parser's reason. Other variants
/// pass through unchanged.
fn attach_path(err: VaultdbError, path: &Path) -> VaultdbError {
    match err {
        VaultdbError::InvalidFrontmatter { reason, .. } => VaultdbError::InvalidFrontmatter {
            file: path.display().to_string(),
            reason,
        },
        other => other,
    }
}

/// Load a Record from a file path (frontmatter only, no raw content).
pub fn load_record(path: &Path) -> Result<Record> {
    let content = std::fs::read_to_string(path)?;
    let fields = match extract_frontmatter(&content) {
        Some((fm_text, _)) => parse_frontmatter(fm_text).map_err(|e| attach_path(e, path))?,
        None => {
            return Err(VaultdbError::NoFrontmatter(path.display().to_string()));
        }
    };

    Ok(Record {
        path: path.to_path_buf(),
        fields,
        raw_content: None,
    })
}

/// Load a Record with raw content preserved (for write operations).
pub fn load_record_with_content(path: &Path) -> Result<Record> {
    let content = std::fs::read_to_string(path)?;
    let fields = match extract_frontmatter(&content) {
        Some((fm_text, _)) => parse_frontmatter(fm_text).map_err(|e| attach_path(e, path))?,
        None => {
            return Err(VaultdbError::NoFrontmatter(path.display().to_string()));
        }
    };

    Ok(Record {
        path: path.to_path_buf(),
        fields,
        raw_content: Some(content),
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn extract_simple_frontmatter() {
        let content = "---\ntitle: hello\n---\nBody text here.\n";
        let (fm, body_start) = extract_frontmatter(content).unwrap();
        assert_eq!(fm, "title: hello");
        assert_eq!(&content[body_start..], "Body text here.\n");
    }

    #[test]
    fn extract_no_frontmatter() {
        let content = "# Just a heading\n\nSome body.\n";
        assert!(extract_frontmatter(content).is_none());
    }

    #[test]
    fn extract_empty_frontmatter() {
        let content = "---\n---\nBody.\n";
        let (fm, _) = extract_frontmatter(content).unwrap();
        assert_eq!(fm, "");
    }

    #[test]
    fn extract_task_file_no_frontmatter() {
        let content = "## Today's Tasks\n- [ ] Study OS\n";
        assert!(extract_frontmatter(content).is_none());
    }

    #[test]
    fn parse_movie_frontmatter() {
        let yaml = r#"aliases:
tags:
  - type/leaf
  - topic/movies
  - source/video
  - genre/drama
  - genre/war
  - director/sam-mendes
status: to-watch
rating:
director: Sam Mendes
year: 2019
related-to:
"#;
        let fields = parse_frontmatter(yaml).unwrap();

        assert_eq!(
            fields.get("status"),
            Some(&Value::String("to-watch".into()))
        );
        assert_eq!(fields.get("rating"), Some(&Value::Null));
        assert_eq!(
            fields.get("director"),
            Some(&Value::String("Sam Mendes".into()))
        );
        assert_eq!(fields.get("year"), Some(&Value::Integer(2019)));

        // Tags should be a list
        match fields.get("tags") {
            Some(Value::List(tags)) => {
                assert_eq!(tags.len(), 6);
                assert_eq!(tags[0], Value::String("type/leaf".into()));
                assert_eq!(tags[3], Value::String("genre/drama".into()));
            }
            other => panic!("expected List for tags, got {:?}", other),
        }
    }

    #[test]
    fn parse_chinese_vocab_frontmatter() {
        let yaml = r#"aliases:
- kuài
tags:
- type/concept
- topic/chinese
- source/self-study
pinyin: kuài
anlam: hızlı
tür: sifat
hsk: 1
kaliplar:
- kalip: 快乐
  pinyin: kuàilè
  anlam: mutlu, neşeli
- kalip: 快要
  pinyin: kuàiyào
  anlam: yakında, az kaldı
ornekler:
- cumle: 他跑得很快。
  pinyin: Tā pǎo de hěn kuài.
  anlam: O çok hızlı koşuyor.
related-to:
"#;
        let fields = parse_frontmatter(yaml).unwrap();

        assert_eq!(fields.get("pinyin"), Some(&Value::String("kuài".into())));
        assert_eq!(fields.get("anlam"), Some(&Value::String("hızlı".into())));
        assert_eq!(fields.get("hsk"), Some(&Value::Integer(1)));

        // kaliplar should be a list of maps
        match fields.get("kaliplar") {
            Some(Value::List(items)) => {
                assert_eq!(items.len(), 2);
                match &items[0] {
                    Value::Map(m) => {
                        assert_eq!(m.get("kalip"), Some(&Value::String("快乐".into())));
                        assert_eq!(m.get("pinyin"), Some(&Value::String("kuàilè".into())));
                    }
                    other => panic!("expected Map in kaliplar, got {:?}", other),
                }
            }
            other => panic!("expected List for kaliplar, got {:?}", other),
        }
    }

    #[test]
    fn parse_wiki_links_in_frontmatter() {
        let yaml = r#"aliases:
tags:
  - type/leaf
related-to:
  - "[[2FA Setup - Yubi]]"
  - "[[Watchlist]]"
"#;
        let fields = parse_frontmatter(yaml).unwrap();

        match fields.get("related-to") {
            Some(Value::List(items)) => {
                assert_eq!(items.len(), 2);
                assert_eq!(items[0], Value::String("[[2FA Setup - Yubi]]".into()));
            }
            other => panic!("expected List for related-to, got {:?}", other),
        }
    }

    #[test]
    fn parse_null_aliases_and_related_to() {
        let yaml = "aliases:\ntags:\n  - type/concept\nrelated-to:\n";
        let fields = parse_frontmatter(yaml).unwrap();
        assert_eq!(fields.get("aliases"), Some(&Value::Null));
        assert_eq!(fields.get("related-to"), Some(&Value::Null));
    }

    #[test]
    fn parse_empty_frontmatter_string() {
        let fields = parse_frontmatter("").unwrap();
        assert!(fields.is_empty());
    }

    #[test]
    fn parse_only_whitespace_frontmatter() {
        let fields = parse_frontmatter("   \n  \n").unwrap();
        assert!(fields.is_empty());
    }

    #[test]
    fn invalid_frontmatter_preserves_yaml_parser_reason_and_path() {
        use std::path::PathBuf;
        use tempfile::TempDir;

        // Write a file with broken YAML and load it via load_record. The
        // returned error must (a) have the actual file path, and (b)
        // surface the YAML parser's reason — not the placeholder
        // "failed to parse YAML" string we used to throw away.
        let dir = TempDir::new().unwrap();
        let path: PathBuf = dir.path().join("bad.md");
        std::fs::write(&path, "---\n: : : not valid yaml here\n---\nbody\n").unwrap();

        match load_record(&path) {
            Err(VaultdbError::InvalidFrontmatter { file, reason }) => {
                assert!(file.contains("bad.md"), "expected file path, got {}", file);
                // The reason must NOT be the old placeholder string.
                assert_ne!(reason, "failed to parse YAML");
                // It should mention something serde_yaml-shaped.
                assert!(
                    !reason.is_empty(),
                    "expected non-empty parser reason, got empty"
                );
            }
            other => panic!("expected InvalidFrontmatter, got {:?}", other),
        }
    }

    #[test]
    fn roundtrip_full_file_extraction() {
        let content = "---\naliases:\ntags:\n- type/concept\n- topic/chinese\npinyin: kuài\n---\n\n# 快 (kuài)\n\nBody text.\n";
        let (fm, body_start) = extract_frontmatter(content).unwrap();
        let fields = parse_frontmatter(fm).unwrap();

        assert_eq!(fields.get("pinyin"), Some(&Value::String("kuài".into())));
        assert!(content[body_start..].contains("Body text."));
    }
}