rulemorph 0.3.1

YAML-based declarative data transformation engine for CSV/JSON to JSON
Documentation
use std::collections::HashSet;

use crate::error::ErrorCode;
use crate::model::ExprRef;
use crate::path::{PathToken, parse_path};

use super::ValidationCtx;
use super::scope::LocalScope;

pub(super) fn validate_source(
    source: &str,
    base_path: &str,
    produced_targets: &HashSet<Vec<PathToken>>,
    ctx: &mut ValidationCtx<'_>,
) {
    let full_path = format!("{}.source", base_path);
    let (namespace, path) = match parse_source(source) {
        Some(parsed) => parsed,
        None => {
            ctx.push(
                ErrorCode::InvalidRefNamespace,
                "ref namespace must be input|context|out",
                full_path,
            );
            return;
        }
    };

    let tokens = match parse_path(path) {
        Ok(tokens) => tokens,
        Err(_) => {
            ctx.push(ErrorCode::InvalidPath, "path is invalid", full_path);
            return;
        }
    };

    if namespace == Namespace::Out
        && !ctx.allow_any_out_ref
        && !out_ref_resolves(&tokens, produced_targets)
    {
        ctx.push(
            ErrorCode::ForwardOutReference,
            "out reference must point to previous mappings",
            full_path,
        );
    }
}

pub(super) fn validate_ref(
    expr_ref: &ExprRef,
    base_path: &str,
    produced_targets: &HashSet<Vec<PathToken>>,
    ctx: &mut ValidationCtx<'_>,
    scope: LocalScope,
) {
    let (namespace, path) = match parse_ref(&expr_ref.ref_path) {
        Some(parsed) => parsed,
        None => {
            ctx.push(
                ErrorCode::InvalidRefNamespace,
                "ref namespace must be input|context|out|item|acc",
                base_path,
            );
            return;
        }
    };

    match namespace {
        Namespace::Item => {
            if !scope.allows_item() {
                ctx.push(
                    ErrorCode::InvalidRefNamespace,
                    "item refs are only allowed inside array ops",
                    base_path,
                );
                return;
            }
        }
        Namespace::Acc => {
            if !scope.allows_acc() {
                ctx.push(
                    ErrorCode::InvalidRefNamespace,
                    "acc refs are only allowed inside reduce/fold ops",
                    base_path,
                );
                return;
            }
        }
        _ => {}
    }

    let tokens = match parse_path(path) {
        Ok(tokens) => tokens,
        Err(_) => {
            ctx.push(ErrorCode::InvalidPath, "path is invalid", base_path);
            return;
        }
    };

    match namespace {
        Namespace::Out => {
            if !ctx.allow_any_out_ref && !out_ref_resolves(&tokens, produced_targets) {
                ctx.push(
                    ErrorCode::ForwardOutReference,
                    "out reference must point to previous mappings",
                    base_path,
                );
            }
        }
        Namespace::Item => {
            let ok = matches!(tokens.first(), Some(PathToken::Key(key)) if key == "value" || key == "index");
            if !ok {
                ctx.push(
                    ErrorCode::InvalidPath,
                    "item ref must start with value or index",
                    base_path,
                );
            }
        }
        Namespace::Acc => {
            let ok = matches!(tokens.first(), Some(PathToken::Key(key)) if key == "value");
            if !ok {
                ctx.push(
                    ErrorCode::InvalidPath,
                    "acc ref must start with value",
                    base_path,
                );
            }
        }
        _ => {}
    }
}

fn out_ref_resolves(tokens: &[PathToken], produced_targets: &HashSet<Vec<PathToken>>) -> bool {
    let key_tokens: Vec<PathToken> = tokens
        .iter()
        .filter_map(|token| match token {
            PathToken::Key(key) => Some(PathToken::Key(key.clone())),
            PathToken::Index(_) => None,
        })
        .collect();
    if key_tokens.is_empty() {
        return false;
    }

    for end in (1..=key_tokens.len()).rev() {
        if produced_targets.contains(&key_tokens[..end].to_vec()) {
            return true;
        }
    }
    false
}

fn parse_ref(value: &str) -> Option<(Namespace, &str)> {
    let mut parts = value.splitn(2, '.');
    let namespace = parts.next()?;
    let path = parts.next()?;
    if path.is_empty() {
        return None;
    }

    let namespace = match namespace {
        "input" => Namespace::Input,
        "context" => Namespace::Context,
        "out" => Namespace::Out,
        "item" => Namespace::Item,
        "acc" => Namespace::Acc,
        _ => return None,
    };

    Some((namespace, path))
}

fn parse_source(value: &str) -> Option<(Namespace, &str)> {
    if let Some((prefix, path)) = value.split_once('.') {
        if path.is_empty() {
            return None;
        }
        let namespace = match prefix {
            "input" => Namespace::Input,
            "context" => Namespace::Context,
            "out" => Namespace::Out,
            _ => return None,
        };
        Some((namespace, path))
    } else {
        if value.is_empty() {
            return None;
        }
        Some((Namespace::Input, value))
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Namespace {
    Input,
    Context,
    Out,
    Item,
    Acc,
}