rulemorph 0.3.1

YAML-based declarative data transformation engine for CSV/JSON to JSON
Documentation
use crate::error::{TransformError, TransformErrorKind};

use super::NormalizationOptions;

pub(super) fn enforce_yaml_alias_limit(
    input: &str,
    options: &NormalizationOptions,
) -> Result<(), TransformError> {
    let aliases = count_yaml_alias_tokens(input);
    if aliases > options.max_yaml_aliases {
        return Err(TransformError::new(
            TransformErrorKind::InvalidInput,
            "input exceeds max_yaml_aliases",
        ));
    }
    Ok(())
}

fn count_yaml_alias_tokens(input: &str) -> usize {
    let mut count = 0usize;
    let mut block_scalar_indent: Option<usize> = None;

    for line in input.lines() {
        let indent = line.chars().take_while(|value| *value == ' ').count();
        if let Some(block_indent) = block_scalar_indent {
            if line.trim().is_empty() || indent > block_indent {
                continue;
            }
            block_scalar_indent = None;
        }

        if starts_block_scalar(line) {
            block_scalar_indent = Some(indent);
        }
        count = count.saturating_add(count_yaml_alias_tokens_in_line(line));
    }

    count
}

fn starts_block_scalar(line: &str) -> bool {
    let mut in_single = false;
    let mut in_double = false;
    let mut chars = line.chars().peekable();
    while let Some(ch) = chars.next() {
        match ch {
            '\'' if !in_double => in_single = !in_single,
            '"' if !in_single => {
                in_double = !in_double;
                while in_double {
                    match chars.next() {
                        Some('\\') => {
                            chars.next();
                        }
                        Some('"') => in_double = false,
                        Some(_) => {}
                        None => break,
                    }
                }
            }
            '#' if !in_single && !in_double => break,
            '|' | '>' if !in_single && !in_double => {
                let tail = chars.collect::<String>();
                let tail = tail.trim();
                return tail.is_empty()
                    || tail
                        .chars()
                        .all(|value| matches!(value, '+' | '-' | '0'..='9'));
            }
            _ => {}
        }
    }
    false
}

fn count_yaml_alias_tokens_in_line(line: &str) -> usize {
    let mut count = 0usize;
    let mut in_single = false;
    let mut in_double = false;
    let bytes = line.as_bytes();
    let mut index = 0usize;

    while index < bytes.len() {
        let byte = bytes[index];
        match byte {
            b'\'' if !in_double => {
                in_single = !in_single;
                index += 1;
            }
            b'"' if !in_single => {
                in_double = !in_double;
                index += 1;
            }
            b'\\' if in_double => {
                index = (index + 2).min(bytes.len());
            }
            b'#' if !in_single && !in_double => break,
            b'*' if !in_single && !in_double => {
                if is_alias_token_boundary(bytes, index) {
                    count = count.saturating_add(1);
                }
                index += 1;
            }
            _ => index += 1,
        }
    }

    count
}

fn is_alias_token_boundary(bytes: &[u8], index: usize) -> bool {
    let previous = index
        .checked_sub(1)
        .and_then(|previous| bytes.get(previous))
        .copied();
    let next = bytes.get(index + 1).copied();
    previous.is_none_or(|value| {
        value.is_ascii_whitespace() || matches!(value, b'[' | b'{' | b',' | b':' | b'-')
    }) && next.is_some_and(is_yaml_anchor_char)
}

fn is_yaml_anchor_char(value: u8) -> bool {
    value.is_ascii_alphanumeric() || matches!(value, b'_' | b'-')
}