oxi-cli 0.6.19

Terminal-based AI coding assistant — multi-provider, streaming-first, extensible
//! YAML frontmatter parsing
//!
//! Parses YAML frontmatter from markdown and text files.
//! Frontmatter is enclosed between --- markers at the start of the file.

use std::collections::HashMap;

/// Parsed frontmatter result
#[derive(Debug, Clone)]
pub struct Frontmatter {
    /// Parsed fields as JSON values
    pub fields: HashMap<String, serde_json::Value>,
    /// Content after frontmatter (without frontmatter markers)
    pub body: String,
}

/// Parse frontmatter from content
///
/// If content starts with "---\n", parses the YAML block and returns
/// the parsed fields along with the remaining content.
///
/// Returns None if no frontmatter found.
pub fn parse_frontmatter(content: &str) -> Option<(HashMap<String, serde_json::Value>, &str)> {
    let trimmed = content.trim_start();

    if !trimmed.starts_with("---") {
        return None;
    }

    // Find the closing ---
    let rest = &trimmed[3..];
    if !rest.starts_with('\n') && !rest.starts_with('\r') {
        return None;
    }

    let rest = rest.trim_start_matches(|c| c == '\n' || c == '\r');

    // Find the closing ---
    let end_marker = rest.find("---")?;
    let end_of_marker = end_marker + 3;

    let frontmatter_text = &rest[..end_marker];
    let body_start = end_of_marker;

    // Skip any blank lines after the closing ---
    let body = rest[body_start..].trim_start_matches(|c| c == '\n' || c == '\r');

    // Parse YAML
    let fields = parse_yaml_frontmatter(frontmatter_text)?;

    Some((fields, body))
}

/// Strip frontmatter from content
///
/// If content has frontmatter, returns the body content only.
/// Otherwise, returns the original content unchanged.
pub fn strip_frontmatter(content: &str) -> &str {
    match parse_frontmatter(content) {
        Some((_, body)) => body,
        None => content,
    }
}

/// Parse YAML frontmatter text into a HashMap
fn parse_yaml_frontmatter(text: &str) -> Option<HashMap<String, serde_json::Value>> {
    let mut fields = HashMap::new();

    for line in text.lines() {
        let line = line.trim();

        // Skip empty lines and comments
        if line.is_empty() || line.starts_with('#') {
            continue;
        }

        // Parse key: value pairs
        if let Some(colon_pos) = line.find(':') {
            let key = line[..colon_pos].trim();
            let value = line[colon_pos + 1..].trim();

            if key.is_empty() {
                continue;
            }

            // Parse the value
            let json_value = parse_yaml_value(value);
            fields.insert(key.to_string(), json_value);
        }
    }

    Some(fields)
}

/// Parse a YAML value into a serde_json::Value
fn parse_yaml_value(value: &str) -> serde_json::Value {
    let value = value.trim();

    // Empty value
    if value.is_empty() {
        return serde_json::Value::Null;
    }

    // Quoted string (single or double quotes)
    if (value.starts_with('"') && value.ends_with('"'))
        || (value.starts_with('\'') && value.ends_with('\''))
    {
        let s = &value[1..value.len() - 1];
        return serde_json::Value::String(s.to_string());
    }

    // Unquoted string (may contain spaces)
    // Check for special characters that indicate non-string types
    let _words: Vec<&str> = value.split_whitespace().collect();

    // Boolean values
    match value.to_lowercase().as_str() {
        "true" | "yes" | "on" => return serde_json::Value::Bool(true),
        "false" | "no" | "off" | "null" => return serde_json::Value::Bool(false),
        _ => {}
    }

    // Number (integer or float)
    if let Ok(n) = value.parse::<i64>() {
        return serde_json::Value::Number(n.into());
    }
    if let Ok(n) = value.parse::<f64>() {
        if let Some(num) = serde_json::Number::from_f64(n) {
            return serde_json::Value::Number(num);
        }
    }

    // Array/list [item1, item2, ...]
    if value.starts_with('[') && value.ends_with(']') {
        if let Ok(arr) = serde_json::from_str::<Vec<serde_json::Value>>(value) {
            return serde_json::Value::Array(arr);
        }
    }

    // Object/map {key: value, ...}
    if value.starts_with('{') && value.ends_with('}') {
        if let Ok(obj) = serde_json::from_str(value) {
            return serde_json::Value::Object(obj);
        }
    }

    // Plain string
    serde_json::Value::String(value.to_string())
}

/// Extract a frontmatter field
pub fn get_field<'a>(
    fields: &'a HashMap<String, serde_json::Value>,
    key: &str,
) -> Option<&'a serde_json::Value> {
    fields.get(key)
}

/// Check if content has frontmatter
pub fn has_frontmatter(content: &str) -> bool {
    let trimmed = content.trim_start();
    trimmed.starts_with("---")
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_parse_frontmatter_basic() {
        let content = r#"---
name: Test Skill
description: A test skill
---
# Content"#;

        let (fields, body) = parse_frontmatter(content).unwrap();
        assert_eq!(fields.get("name").unwrap().as_str().unwrap(), "Test Skill");
        assert_eq!(
            fields.get("description").unwrap().as_str().unwrap(),
            "A test skill"
        );
        assert!(body.starts_with("# Content"));
    }

    #[test]
    fn test_parse_frontmatter_with_blank_lines() {
        let content = r#"---

name: Test

---

Content here"#;

        let (fields, body) = parse_frontmatter(content).unwrap();
        assert_eq!(fields.get("name").unwrap().as_str().unwrap(), "Test");
        assert!(body.starts_with("Content"));
    }

    #[test]
    fn test_parse_frontmatter_no_frontmatter() {
        let content = "# Just content\nNo frontmatter here";
        assert!(parse_frontmatter(content).is_none());
    }

    #[test]
    fn test_parse_frontmatter_empty_value() {
        let content = r#"---
empty_field:
---
Content"#;

        let (fields, _) = parse_frontmatter(content).unwrap();
        assert_eq!(fields.get("empty_field").unwrap(), &serde_json::Value::Null);
    }

    #[test]
    fn test_parse_frontmatter_boolean() {
        let content = r#"---
enabled: true
disabled: false
---
Content"#;

        let (fields, _) = parse_frontmatter(content).unwrap();
        assert_eq!(
            fields.get("enabled").unwrap(),
            &serde_json::Value::Bool(true)
        );
        assert_eq!(
            fields.get("disabled").unwrap(),
            &serde_json::Value::Bool(false)
        );
    }

    #[test]
    fn test_parse_frontmatter_integer() {
        let content = r#"---
count: 42
---
Content"#;

        let (fields, _) = parse_frontmatter(content).unwrap();
        assert_eq!(fields.get("count").unwrap().as_i64().unwrap(), 42);
    }

    #[test]
    fn test_parse_frontmatter_float() {
        let content = r#"---
ratio: 3.14
---
Content"#;

        let (fields, _) = parse_frontmatter(content).unwrap();
        assert!((fields.get("ratio").unwrap().as_f64().unwrap() - 3.14).abs() < 0.001);
    }

    #[test]
    fn test_parse_frontmatter_quoted_string() {
        let content = r#"---
title: "Hello World"
---
Content"#;

        let (fields, _) = parse_frontmatter(content).unwrap();
        assert_eq!(
            fields.get("title").unwrap().as_str().unwrap(),
            "Hello World"
        );
    }

    #[test]
    fn test_parse_frontmatter_single_quoted() {
        let content = r#"---
title: 'Single Quote'
---
Content"#;

        let (fields, _) = parse_frontmatter(content).unwrap();
        assert_eq!(
            fields.get("title").unwrap().as_str().unwrap(),
            "Single Quote"
        );
    }

    #[test]
    fn test_parse_frontmatter_list() {
        let content = r#"---
tags: ["one", "two", "three"]
---
Content"#;

        let (fields, _) = parse_frontmatter(content).unwrap();
        let arr = fields.get("tags").unwrap().as_array().unwrap();
        assert_eq!(arr.len(), 3);
    }

    #[test]
    fn test_parse_frontmatter_inline_list() {
        let content = r#"---
tags: ["one", "two", "three"]
---
Content"#;

        let (fields, _) = parse_frontmatter(content).unwrap();
        let arr = fields.get("tags").unwrap().as_array().unwrap();
        assert_eq!(arr[0].as_str().unwrap(), "one");
    }

    #[test]
    fn test_parse_frontmatter_comments() {
        let content = r#"---
# This is a comment
name: Test
---
Content"#;

        let (fields, _) = parse_frontmatter(content).unwrap();
        assert_eq!(fields.get("name").unwrap().as_str().unwrap(), "Test");
        // Should not have a comment field
        assert!(!fields.contains_key("# This is a comment"));
    }

    #[test]
    fn test_strip_frontmatter() {
        let content = r#"---
name: Test
---
# Content"#;

        let body = strip_frontmatter(content);
        assert!(!body.contains("name:"));
        assert!(body.starts_with("# Content"));
    }

    #[test]
    fn test_strip_frontmatter_no_frontmatter() {
        let content = "# Just content";
        assert_eq!(strip_frontmatter(content), content);
    }

    #[test]
    fn test_has_frontmatter() {
        assert!(has_frontmatter("---\n---\ncontent"));
        assert!(has_frontmatter("---\nname: test\n---\ncontent"));
        assert!(!has_frontmatter("# Just content"));
        assert!(!has_frontmatter("No frontmatter"));
    }

    #[test]
    fn test_get_field() {
        let content = r#"---
name: Test
---
Content"#;

        let (fields, _) = parse_frontmatter(content).unwrap();
        assert_eq!(
            get_field(&fields, "name").unwrap().as_str().unwrap(),
            "Test"
        );
        assert!(get_field(&fields, "nonexistent").is_none());
    }

    #[test]
    fn test_parse_frontmatter_complex() {
        let content = r#"---
name: My Skill
description: A complex skill with various field types
version: "1.0"
enabled: true
tags: ["rust", "coding"]
---
# Skill Content"#;

        let (fields, body) = parse_frontmatter(content).unwrap();

        assert_eq!(fields.get("name").unwrap().as_str().unwrap(), "My Skill");
        assert_eq!(fields.get("version").unwrap().as_str().unwrap(), "1.0");
        assert_eq!(fields.get("enabled").unwrap().as_bool().unwrap(), true);

        // Check body starts with content
        assert!(body.contains("# Skill Content"));
    }
}