celq 0.5.0

A CEL command-line query tool for JSON data
Documentation
use quick_xml::Reader;
use quick_xml::XmlVersion;
use quick_xml::encoding::EncodingError;
use quick_xml::escape::{EscapeError, resolve_predefined_entity};
use quick_xml::events::attributes::AttrError;
use quick_xml::events::{BytesRef, BytesStart, BytesText, Event};
use serde_json::{Map, Value};
use std::error::Error;
use std::fmt;
use std::str::Utf8Error;

#[derive(Debug)]
pub struct XmlParseError(String);

impl fmt::Display for XmlParseError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.write_str(&self.0)
    }
}

impl Error for XmlParseError {}

impl From<quick_xml::Error> for XmlParseError {
    fn from(err: quick_xml::Error) -> Self {
        Self(err.to_string())
    }
}

impl From<AttrError> for XmlParseError {
    fn from(err: AttrError) -> Self {
        Self(err.to_string())
    }
}

impl From<EncodingError> for XmlParseError {
    fn from(err: EncodingError) -> Self {
        Self(err.to_string())
    }
}

impl From<EscapeError> for XmlParseError {
    fn from(err: EscapeError) -> Self {
        Self(err.to_string())
    }
}

impl From<Utf8Error> for XmlParseError {
    fn from(err: Utf8Error) -> Self {
        Self(err.to_string())
    }
}

#[derive(Debug)]
struct Node {
    name: String,
    value: Map<String, Value>,
    text: String,
}

impl Node {
    fn new(name: String, value: Map<String, Value>) -> Self {
        Self {
            name,
            value,
            text: String::new(),
        }
    }
}

pub fn parse_xml(xml: &str) -> Result<Value, XmlParseError> {
    let mut reader = Reader::from_str(xml);
    let mut stack = Vec::new();
    let mut output = Value::Null;
    let mut root_seen = false;

    loop {
        match reader.read_event() {
            Ok(Event::Start(event)) => {
                if root_seen && stack.is_empty() {
                    return Err(XmlParseError("multiple root elements".to_string()));
                }
                stack.push(node_from_start(&reader, &event)?);
            }
            Ok(Event::Empty(event)) => {
                if root_seen && stack.is_empty() {
                    return Err(XmlParseError("multiple root elements".to_string()));
                }
                let node = node_from_start(&reader, &event)?;
                if let Some(root) = close_node(node, &mut stack)? {
                    root_seen = true;
                    output = root;
                }
            }
            Ok(Event::Text(event)) => append_text(&mut stack, event)?,
            Ok(Event::GeneralRef(event)) => append_ref(&mut stack, event)?,
            Ok(Event::CData(event)) => {
                if let Some(node) = stack.last_mut() {
                    node.text.push_str(&event.decode()?);
                } else {
                    return Err(XmlParseError("CDATA outside root element".to_string()));
                }
            }
            Ok(Event::End(event)) => {
                let node = stack.pop().ok_or_else(|| {
                    XmlParseError("unexpected closing tag without an open element".to_string())
                })?;
                let close_name = std::str::from_utf8(event.name().as_ref())?.to_string();
                if node.name != close_name {
                    return Err(XmlParseError(format!(
                        "mismatched closing tag: expected {}, got {}",
                        node.name, close_name
                    )));
                }
                if let Some(root) = close_node(node, &mut stack)? {
                    root_seen = true;
                    output = root;
                }
            }
            Ok(Event::Eof) => break,
            Ok(_) => {}
            Err(err) => {
                return Err(XmlParseError(format!(
                    "error at position {}: {}",
                    reader.buffer_position(),
                    err
                )));
            }
        }
    }

    if !stack.is_empty() {
        return Err(XmlParseError("unexpected end of input".to_string()));
    }

    Ok(output)
}

fn node_from_start(reader: &Reader<&[u8]>, event: &BytesStart<'_>) -> Result<Node, XmlParseError> {
    let name = std::str::from_utf8(event.name().as_ref())?.to_string();
    let mut value = Map::new();
    let mut attrs = Map::new();

    for attr in event.attributes() {
        let attr = attr?;
        let key = std::str::from_utf8(attr.key.as_ref())?.to_string();
        let value = attr.decoded_and_normalized_value(XmlVersion::Implicit1_0, reader.decoder())?;
        attrs.insert(key, Value::String(value.into_owned()));
    }

    if !attrs.is_empty() {
        value.insert("$".to_string(), Value::Object(attrs));
    }

    Ok(Node::new(name, value))
}

fn append_text(stack: &mut [Node], event: BytesText<'_>) -> Result<(), XmlParseError> {
    if let Some(node) = stack.last_mut() {
        node.text.push_str(&event.decode()?);
    } else if !event.decode()?.chars().all(char::is_whitespace) {
        return Err(XmlParseError("text outside root element".to_string()));
    }
    Ok(())
}

fn append_ref(stack: &mut [Node], event: BytesRef<'_>) -> Result<(), XmlParseError> {
    if let Some(node) = stack.last_mut() {
        if let Some(ch) = event.resolve_char_ref()? {
            node.text.push(ch);
        } else {
            let entity = event.decode()?;
            let resolved = resolve_predefined_entity(&entity).ok_or_else(|| {
                XmlParseError(format!("unknown XML entity reference: &{entity};"))
            })?;
            node.text.push_str(resolved);
        }
    } else {
        return Err(XmlParseError(
            "entity reference outside root element".to_string(),
        ));
    }
    Ok(())
}

fn close_node(mut node: Node, stack: &mut [Node]) -> Result<Option<Value>, XmlParseError> {
    let text = if node.text.chars().all(char::is_whitespace) {
        String::new()
    } else {
        node.text
    };

    let value = if node.value.is_empty() {
        Value::String(text)
    } else {
        if !text.is_empty() {
            node.value.insert("_".to_string(), Value::String(text));
        }
        Value::Object(node.value)
    };

    if let Some(parent) = stack.last_mut() {
        assign_or_push(&mut parent.value, node.name, value);
        Ok(None)
    } else {
        let mut root = Map::new();
        root.insert(node.name, value);
        Ok(Some(Value::Object(root)))
    }
}

fn assign_or_push(object: &mut Map<String, Value>, key: String, value: Value) {
    match object.get_mut(&key) {
        None => {
            object.insert(key, value);
        }
        Some(existing @ Value::String(_))
        | Some(existing @ Value::Object(_))
        | Some(existing @ Value::Bool(_))
        | Some(existing @ Value::Number(_))
        | Some(existing @ Value::Null) => {
            let previous = std::mem::replace(existing, Value::Null);
            *existing = Value::Array(vec![previous, value]);
        }
        Some(Value::Array(values)) => values.push(value),
    }
}

#[cfg(test)]
#[path = "xml2json_test.rs"]
mod tests;