rulemorph 0.3.4

YAML-based declarative data transformation engine for CSV/JSON to JSON
Documentation
use serde_json::{Map, Number as JsonNumber, Value as JsonValue};
use serde_yaml::Value as YamlValue;

use crate::error::{TransformError, TransformErrorKind};
use crate::model::MarkdownFrontmatter;
use crate::normalization::yaml::aliases::enforce_yaml_alias_limit;
use crate::serde_guard::parse_yaml_value_strict_with_limits;

use super::super::NormalizationOptions;

pub(super) struct SplitMarkdown<'a> {
    pub(super) frontmatter: Map<String, JsonValue>,
    pub(super) body: &'a str,
}

pub(super) fn split_frontmatter<'a>(
    mode: MarkdownFrontmatter,
    input: &'a str,
    options: &NormalizationOptions,
) -> Result<SplitMarkdown<'a>, TransformError> {
    match mode {
        MarkdownFrontmatter::None => Ok(SplitMarkdown {
            frontmatter: Map::new(),
            body: input,
        }),
        MarkdownFrontmatter::Yaml => {
            split_delimited_frontmatter(input, "---", parse_yaml_frontmatter, options)
        }
        MarkdownFrontmatter::Toml => {
            split_delimited_frontmatter(input, "+++", parse_toml_frontmatter, options)
        }
        MarkdownFrontmatter::Auto => {
            if let Some(split) =
                split_auto_delimited_frontmatter(input, "---", parse_yaml_frontmatter, options)?
            {
                Ok(split)
            } else if let Some(split) =
                split_auto_delimited_frontmatter(input, "+++", parse_toml_frontmatter, options)?
            {
                Ok(split)
            } else {
                Ok(SplitMarkdown {
                    frontmatter: Map::new(),
                    body: input,
                })
            }
        }
    }
}

fn split_delimited_frontmatter<'a>(
    input: &'a str,
    delimiter: &str,
    parser: fn(&str, &NormalizationOptions) -> Result<Map<String, JsonValue>, TransformError>,
    options: &NormalizationOptions,
) -> Result<SplitMarkdown<'a>, TransformError> {
    let Some(rest) = strip_opening_delimiter(input, delimiter) else {
        return Ok(SplitMarkdown {
            frontmatter: Map::new(),
            body: input,
        });
    };
    let Some((frontmatter_end, body_start)) = find_closing_delimiter(rest, delimiter) else {
        return Err(TransformError::new(
            TransformErrorKind::InvalidInput,
            "markdown frontmatter closing delimiter is missing",
        ));
    };
    let frontmatter = parser(&rest[..frontmatter_end], options)?;
    let body = &rest[body_start..];
    Ok(SplitMarkdown { frontmatter, body })
}

fn split_auto_delimited_frontmatter<'a>(
    input: &'a str,
    delimiter: &str,
    parser: fn(&str, &NormalizationOptions) -> Result<Map<String, JsonValue>, TransformError>,
    options: &NormalizationOptions,
) -> Result<Option<SplitMarkdown<'a>>, TransformError> {
    let Some(rest) = strip_opening_delimiter(input, delimiter) else {
        return Ok(None);
    };
    let Some((frontmatter_end, body_start)) = find_closing_delimiter(rest, delimiter) else {
        return Ok(None);
    };
    let frontmatter = parser(&rest[..frontmatter_end], options)?;
    let body = &rest[body_start..];
    Ok(Some(SplitMarkdown { frontmatter, body }))
}

fn strip_opening_delimiter<'a>(input: &'a str, delimiter: &str) -> Option<&'a str> {
    input
        .strip_prefix(delimiter)
        .and_then(strip_required_line_ending)
}

fn strip_required_line_ending(input: &str) -> Option<&str> {
    input
        .strip_prefix("\r\n")
        .or_else(|| input.strip_prefix('\n'))
}

fn find_closing_delimiter(input: &str, delimiter: &str) -> Option<(usize, usize)> {
    let mut line_start = 0usize;
    loop {
        let tail = &input[line_start..];
        let Some(newline_offset) = tail.find('\n') else {
            return delimiter_line_matches(tail, delimiter).then_some((line_start, input.len()));
        };
        let line_end = line_start + newline_offset;
        if delimiter_line_matches(&input[line_start..line_end], delimiter) {
            return Some((line_start, line_end + 1));
        }
        line_start = line_end + 1;
    }
}

fn delimiter_line_matches(line: &str, delimiter: &str) -> bool {
    line.strip_suffix('\r').unwrap_or(line) == delimiter
}

fn parse_yaml_frontmatter(
    input: &str,
    options: &NormalizationOptions,
) -> Result<Map<String, JsonValue>, TransformError> {
    enforce_yaml_alias_limit(input, options)?;
    let value = parse_yaml_value_strict_with_limits(
        input,
        options.max_depth,
        options.max_yaml_expanded_nodes,
        options.max_array_len,
        options.max_text_bytes,
    )
    .map_err(|err| {
        TransformError::new(
            TransformErrorKind::InvalidInput,
            format!("failed to parse YAML frontmatter: {}", err),
        )
    })?;
    let mut node_count = 0usize;
    let value = yaml_frontmatter_to_json(&value, options, 0, &mut node_count)?;
    object_frontmatter(value)
}

fn parse_toml_frontmatter(
    input: &str,
    options: &NormalizationOptions,
) -> Result<Map<String, JsonValue>, TransformError> {
    let value = super::super::toml::parse_toml_json_with_limits(input, options).map_err(|err| {
        TransformError::new(
            TransformErrorKind::InvalidInput,
            format!("failed to parse TOML frontmatter: {}", err),
        )
    })?;
    object_frontmatter(value)
}

fn object_frontmatter(value: JsonValue) -> Result<Map<String, JsonValue>, TransformError> {
    match value {
        JsonValue::Object(map) => Ok(map),
        _ => Err(TransformError::new(
            TransformErrorKind::InvalidInput,
            "markdown frontmatter must be an object",
        )),
    }
}

fn yaml_frontmatter_to_json(
    value: &YamlValue,
    options: &NormalizationOptions,
    depth: usize,
    node_count: &mut usize,
) -> Result<JsonValue, TransformError> {
    if depth > options.max_depth {
        return Err(TransformError::new(
            TransformErrorKind::InvalidInput,
            "input exceeds max_depth",
        ));
    }
    *node_count = node_count.saturating_add(1);
    if *node_count > options.max_yaml_expanded_nodes {
        return Err(TransformError::new(
            TransformErrorKind::InvalidInput,
            "input exceeds max_yaml_expanded_nodes",
        ));
    }

    match value {
        YamlValue::Null => Ok(JsonValue::Null),
        YamlValue::Bool(value) => Ok(JsonValue::Bool(*value)),
        YamlValue::Number(value) => yaml_frontmatter_number_to_json(value),
        YamlValue::String(value) => Ok(JsonValue::String(value.clone())),
        YamlValue::Sequence(items) => {
            if items.len() > options.max_array_len {
                return Err(TransformError::new(
                    TransformErrorKind::InvalidInput,
                    "input exceeds max_array_len",
                ));
            }
            let mut out = Vec::with_capacity(items.len());
            for item in items {
                out.push(yaml_frontmatter_to_json(
                    item,
                    options,
                    depth + 1,
                    node_count,
                )?);
            }
            Ok(JsonValue::Array(out))
        }
        YamlValue::Mapping(map) => {
            let mut out = Map::new();
            for (key, value) in map {
                let key = match key {
                    YamlValue::String(key) => key.clone(),
                    _ => {
                        return Err(TransformError::new(
                            TransformErrorKind::InvalidInput,
                            "YAML frontmatter mapping keys must be strings",
                        ));
                    }
                };
                out.insert(
                    key,
                    yaml_frontmatter_to_json(value, options, depth + 1, node_count)?,
                );
            }
            Ok(JsonValue::Object(out))
        }
        YamlValue::Tagged(_) => Err(TransformError::new(
            TransformErrorKind::InvalidInput,
            "YAML frontmatter custom tags are not supported",
        )),
    }
}

fn yaml_frontmatter_number_to_json(
    value: &serde_yaml::Number,
) -> Result<JsonValue, TransformError> {
    if let Some(value) = value.as_i64() {
        return Ok(JsonValue::Number(value.into()));
    }
    if let Some(value) = value.as_u64() {
        return Ok(JsonValue::Number(value.into()));
    }
    if let Some(value) = value.as_f64()
        && let Some(value) = JsonNumber::from_f64(value)
    {
        return Ok(JsonValue::Number(value));
    }
    Err(TransformError::new(
        TransformErrorKind::InvalidInput,
        "YAML frontmatter number is not JSON-compatible",
    ))
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn toml_frontmatter_split_applies_text_limits() {
        let options = NormalizationOptions {
            max_text_bytes: 4,
            ..NormalizationOptions::default()
        };

        let err = match split_frontmatter(
            MarkdownFrontmatter::Toml,
            "+++\nowner = \"docs-team\"\n+++\n# Guide",
            &options,
        ) {
            Ok(_) => panic!("toml frontmatter should enforce text limits during conversion"),
            Err(err) => err,
        };

        assert_eq!(err.kind, TransformErrorKind::InvalidInput);
        assert!(err.message.contains("max_text_bytes"));
    }
}