mdqy 0.1.3

jq for markdown: query and transform Markdown with a hybrid selector and jq DSL
Documentation
//! Write path. Attribute-level mutation.
//!
//! Shapes:
//! ```text
//!   <SELECTOR> | .<attr> |= <f>    // or <SELECTOR>.<attr> |= <f>
//!   del(<SELECTOR>.<attr>)
//!   <mut1> | <mut2> | <mut3>
//! ```
//!
//! `emit::md` byte-copies clean subtrees and regenerates dirty ones.
//! Each mutation only rewrites its own span.

use std::collections::HashSet;
use std::sync::Arc;

use crate::ast::{attr, Node};
use crate::error::RunError;
use crate::eval::{self, Env};
use crate::events::build_tree_from_source;
use crate::expr::{AssignOp, Expr};
use crate::value::Value;

/// Parse, mutate, serialise. Top-level entry for `--output md`
/// and `-U`.
pub fn transform_bytes(expr: &Expr, source: &[u8]) -> Result<Vec<u8>, RunError> {
    let source_str =
        std::str::from_utf8(source).map_err(|e| RunError::Io(format!("source not utf-8: {e}")))?;
    let root = build_tree_from_source(source_str);
    let mutated = apply_expr(expr, Arc::new(root))?;
    let mut out = Vec::with_capacity(source.len());
    crate::emit::md::serialize(&mut out, source, &mutated)?;
    Ok(out)
}

fn apply_expr(expr: &Expr, root: Arc<Node>) -> Result<Arc<Node>, RunError> {
    match expr {
        Expr::Identity => Ok(root),
        Expr::Pipe(a, b) | Expr::Comma(a, b) => {
            let r1 = apply_expr(a, root)?;
            apply_expr(b, r1)
        }
        Expr::Assign(lhs, AssignOp::Update, rhs) => apply_attr_op(lhs, root, Op::Update(rhs)),
        Expr::Assign(_, AssignOp::Set, _) => Err(RunError::NotImplemented {
            feature: "`=` assignment (use `|=`)",
        }),
        Expr::Call { name, args } if name.as_ref() == "del" && args.len() == 1 => {
            apply_attr_op(&args[0], root, Op::Delete)
        }
        Expr::Call { name, args } if name.as_ref() == "walk" && args.len() == 1 => {
            walk_tree(&args[0], root)
        }
        _ => Ok(root),
    }
}

/// Post-order walk. Children recurse first, then `f` runs at each
/// node. `f` can use mutation operators (`|=`, `del`) alongside
/// control flow; see [`apply_walk_f`] for the accepted shapes.
fn walk_tree(f: &Expr, node: Arc<Node>) -> Result<Arc<Node>, RunError> {
    let mut new_children = Vec::with_capacity(node.children.len());
    let mut descendant_mutated = false;
    for child in &node.children {
        if let Value::Node(arc) = child {
            let updated = walk_tree(f, arc.clone())?;
            descendant_mutated |= !Arc::ptr_eq(&updated, arc);
            new_children.push(Value::Node(updated));
        } else {
            new_children.push(child.clone());
        }
    }

    // Reuse the original arc when nothing under it changed; otherwise
    // the `current_arc` clone defeats the serializer's clean-subtree
    // fast path and `walk(.)` would regenerate every node.
    let target = if descendant_mutated {
        let mut current = (*node).clone();
        current.children = new_children;
        Arc::new(current)
    } else {
        node
    };

    let updated = apply_walk_f(f, target.clone())?;
    if !Arc::ptr_eq(&updated, &target) {
        let mut n = (*updated).clone();
        n.dirty = true;
        return Ok(Arc::new(n));
    }
    if descendant_mutated {
        let mut n = (*target).clone();
        n.dirty = true;
        return Ok(Arc::new(n));
    }
    Ok(target)
}

/// Mini-interpreter for `walk(f)`'s body. `crate::eval` rejects
/// mutation operators outright, so walk needs its own evaluator for
/// them. Read-only forms delegate back to the normal evaluator and
/// expect a Node-valued result.
fn apply_walk_f(f: &Expr, node: Arc<Node>) -> Result<Arc<Node>, RunError> {
    match f {
        Expr::Identity => Ok(node),
        Expr::Pipe(a, b) => {
            let mid = apply_walk_f(a, node)?;
            apply_walk_f(b, mid)
        }
        Expr::If {
            branches,
            else_branch,
        } => {
            for (cond, then_branch) in branches {
                match eval::eval(cond, Value::Node(node.clone()), &Env::default()).next() {
                    Some(Ok(v)) if v.truthy() => return apply_walk_f(then_branch, node),
                    Some(Err(e)) => return Err(e),
                    _ => {}
                }
            }
            match else_branch.as_deref() {
                Some(e) => apply_walk_f(e, node),
                None => Ok(node),
            }
        }
        Expr::Assign(lhs, AssignOp::Update, rhs) => walk_self_attr(node, lhs, Op::Update(rhs)),
        Expr::Call { name, args } if name.as_ref() == "del" && args.len() == 1 => {
            walk_self_attr(node, &args[0], Op::Delete)
        }
        Expr::Call { name, args } if name.as_ref() == "walk" && args.len() == 1 => {
            walk_tree(&args[0], node)
        }
        _ => {
            // Read-only path: eval against the node and demand a
            // Node in return. `.` and chained field access land here.
            match eval::eval(f, Value::Node(node.clone()), &Env::default()).next() {
                Some(Ok(Value::Node(n))) => Ok(n),
                Some(Ok(Value::Null)) => Ok(node),
                Some(Ok(other)) => Err(RunError::Type {
                    expected: "node".into(),
                    got: other.type_name().into(),
                }),
                Some(Err(e)) => Err(e),
                None => Ok(node),
            }
        }
    }
}

enum Op<'a> {
    Update(&'a Expr),
    Delete,
}

fn walk_self_attr(node: Arc<Node>, lhs: &Expr, op: Op<'_>) -> Result<Arc<Node>, RunError> {
    let (_, attr_name) = split_attr_lhs(lhs)?;
    let targets: HashSet<usize> = [Arc::as_ptr(&node) as usize].into_iter().collect();
    walk_and_update(node, &targets, &attr_name, &op)
}

fn apply_attr_op(path: &Expr, root: Arc<Node>, op: Op<'_>) -> Result<Arc<Node>, RunError> {
    let (selector, attr_name) = split_attr_lhs(path)?;
    let targets = collect_target_ptrs(&selector, &root)?;
    if targets.is_empty() {
        return Ok(root);
    }
    walk_and_update(root, &targets, &attr_name, &op)
}

/// Split a mutation target into `(selector, attribute)`.
///
/// The parser produces `<SELECTOR>.<attr>` as
/// `Pipe(SELECTOR, Field(attr))`, so we only handle that shape plus
/// the bare-field case. Anything else is rejected.
fn split_attr_lhs(expr: &Expr) -> Result<(Expr, String), RunError> {
    match expr {
        Expr::Field(name) => Ok((Expr::Identity, name.to_string())),
        Expr::Pipe(sel, tail) => match tail.as_ref() {
            Expr::Field(name) => Ok((sel.as_ref().clone(), name.to_string())),
            _ => Err(RunError::NotImplemented {
                feature: "mutation target must end in `.<attr>`",
            }),
        },
        _ => Err(RunError::NotImplemented {
            feature: "unsupported mutation target shape",
        }),
    }
}

/// Evaluate `selector` against the root; collect the `Arc::as_ptr`
/// of every Node it yields. Non-Node outputs are a type error.
fn collect_target_ptrs(selector: &Expr, root: &Arc<Node>) -> Result<HashSet<usize>, RunError> {
    let env = Env::default();
    let stream = eval::eval(selector, Value::Node(root.clone()), &env);
    let mut ptrs = HashSet::new();
    for r in stream {
        match r? {
            Value::Node(n) => {
                ptrs.insert(Arc::as_ptr(&n) as usize);
            }
            _ => {
                return Err(RunError::NotImplemented {
                    feature: "mutation target must resolve to Node values",
                });
            }
        }
    }
    Ok(ptrs)
}

/// Clone the subtree rooted at `node`, applying `op` at every node
/// whose pointer is in `targets`. Ancestors stay clean (not marked
/// dirty). The serializer walks the tree picking the minimal dirty
/// subtrees to regenerate, so leaving ancestors clean is what keeps
/// unrelated output bytes untouched.
fn walk_and_update(
    node: Arc<Node>,
    targets: &HashSet<usize>,
    attr_name: &str,
    op: &Op<'_>,
) -> Result<Arc<Node>, RunError> {
    let is_target = targets.contains(&(Arc::as_ptr(&node) as usize));

    let mut new_children = Vec::with_capacity(node.children.len());
    let mut descendant_mutated = false;
    for child in &node.children {
        if let Value::Node(arc) = child {
            let updated = walk_and_update(arc.clone(), targets, attr_name, op)?;
            descendant_mutated |= !Arc::ptr_eq(&updated, arc);
            new_children.push(Value::Node(updated));
        } else {
            new_children.push(child.clone());
        }
    }

    if !is_target && !descendant_mutated {
        return Ok(node);
    }

    let mut new_node = (*node).clone();
    new_node.children = new_children;

    if is_target {
        let Some(key) = attr::by_name(attr_name) else {
            return Err(RunError::Other(format!(
                "unknown attribute `{attr_name}`; only canonical attrs are supported"
            )));
        };
        match op {
            Op::Delete => {
                new_node.attrs.remove(key);
            }
            Op::Update(rhs) => {
                let current = new_node.attrs.get(key).cloned().unwrap_or(Value::Null);
                let replacement = eval::eval(rhs, current, &Env::default())
                    .next()
                    .transpose()?
                    .unwrap_or(Value::Null);
                let expected = attr::expected_type(key);
                let got = replacement.type_name();
                let compatible = expected == "any" || got == "null" || got == expected;
                if !compatible {
                    return Err(RunError::Type {
                        expected: format!("{expected} for `.{attr_name}`"),
                        got: got.into(),
                    });
                }
                new_node.attrs.insert(key, replacement);
            }
        }
        new_node.dirty = true;
    }
    Ok(Arc::new(new_node))
}