use crate::compiler::prelude::*;
use regex::{Regex, RegexBuilder};
use roxmltree::NodeType;
pub use roxmltree::{Document, Node};
use rust_decimal::prelude::Zero;
use std::sync::LazyLock;
use std::{
borrow::Cow,
collections::{BTreeMap, btree_map::Entry},
};
pub static XML_RE: LazyLock<Regex> = LazyLock::new(|| {
RegexBuilder::new(r">\s+?<")
.multi_line(true)
.build()
.expect("trim regex failed")
});
pub static DEFAULT_TRIM: LazyLock<Value> = LazyLock::new(|| Value::Boolean(true));
pub static DEFAULT_INCLUDE_ATTR: LazyLock<Value> = LazyLock::new(|| Value::Boolean(true));
pub static DEFAULT_ATTR_PREFIX: LazyLock<Value> = LazyLock::new(|| Value::Bytes(Bytes::from("@")));
pub static DEFAULT_TEXT_KEY: LazyLock<Value> = LazyLock::new(|| Value::Bytes(Bytes::from("text")));
pub static DEFAULT_ALWAYS_USE_TEXT_KEY: LazyLock<Value> = LazyLock::new(|| Value::Boolean(false));
pub static DEFAULT_PARSE_BOOL: LazyLock<Value> = LazyLock::new(|| Value::Boolean(true));
pub static DEFAULT_PARSE_NULL: LazyLock<Value> = LazyLock::new(|| Value::Boolean(true));
pub static DEFAULT_PARSE_NUMBER: LazyLock<Value> = LazyLock::new(|| Value::Boolean(true));
#[derive(Debug, Clone)]
pub struct ParseXmlConfig<'a> {
pub include_attr: bool,
pub attr_prefix: Cow<'a, str>,
pub text_key: Cow<'a, str>,
pub always_use_text_key: bool,
pub parse_bool: bool,
pub parse_null: bool,
pub parse_number: bool,
}
#[derive(Debug, Default)]
pub struct ParseOptions {
pub trim: Option<Value>,
pub include_attr: Option<Value>,
pub attr_prefix: Option<Value>,
pub text_key: Option<Value>,
pub always_use_text_key: Option<Value>,
pub parse_bool: Option<Value>,
pub parse_null: Option<Value>,
pub parse_number: Option<Value>,
}
pub fn parse_xml(value: Value, options: ParseOptions) -> Resolved {
let string = value.try_bytes_utf8_lossy()?;
let trim = options
.trim
.unwrap_or_else(|| DEFAULT_TRIM.clone())
.try_boolean()?;
let include_attr = options
.include_attr
.unwrap_or_else(|| DEFAULT_INCLUDE_ATTR.clone())
.try_boolean()?;
let attr_prefix = Cow::from(
options
.attr_prefix
.unwrap_or_else(|| DEFAULT_ATTR_PREFIX.clone())
.try_bytes_utf8_lossy()?
.into_owned(),
);
let text_key = Cow::from(
options
.text_key
.unwrap_or_else(|| DEFAULT_TEXT_KEY.clone())
.try_bytes_utf8_lossy()?
.into_owned(),
);
let always_use_text_key = options
.always_use_text_key
.unwrap_or_else(|| DEFAULT_ALWAYS_USE_TEXT_KEY.clone())
.try_boolean()?;
let parse_bool = options
.parse_bool
.unwrap_or_else(|| DEFAULT_PARSE_BOOL.clone())
.try_boolean()?;
let parse_null = options
.parse_null
.unwrap_or_else(|| DEFAULT_PARSE_NULL.clone())
.try_boolean()?;
let parse_number = options
.parse_number
.unwrap_or_else(|| DEFAULT_PARSE_NUMBER.clone())
.try_boolean()?;
let config = ParseXmlConfig {
include_attr,
attr_prefix,
text_key,
always_use_text_key,
parse_bool,
parse_null,
parse_number,
};
let parse = if trim { trim_xml(&string) } else { string };
let doc = Document::parse(&parse).map_err(|e| format!("unable to parse xml: {e}"))?;
let value = process_node(doc.root(), &config);
Ok(value)
}
pub fn process_node(node: Node, config: &ParseXmlConfig) -> Value {
let recurse = |node: Node| -> ObjectMap {
let mut map = BTreeMap::new();
if config.include_attr {
for attr in node.attributes() {
map.insert(
format!("{}{}", config.attr_prefix, attr.name()).into(),
attr.value().into(),
);
}
}
for n in node.children().filter(|n| n.is_element() || n.is_text()) {
let name = match n.node_type() {
NodeType::Element => n.tag_name().name().to_string().into(),
NodeType::Text => config.text_key.to_string().into(),
_ => unreachable!("shouldn't be other XML nodes"),
};
let value = process_node(n, config);
match map.entry(name) {
Entry::Occupied(mut entry) => {
let v = entry.get_mut();
match v {
Value::Array(v) => v.push(value),
v => {
let prev = std::mem::replace(v, Value::Array(Vec::with_capacity(2)));
if let Value::Array(v) = v {
v.extend_from_slice(&[prev, value]);
}
}
};
}
Entry::Vacant(entry) => {
entry.insert(value);
}
}
}
map
};
match node.node_type() {
NodeType::Root => Value::Object(recurse(node)),
NodeType::Element => {
match (
config.always_use_text_key,
node.attributes().len().is_zero(),
) {
(_, false) if config.include_attr => Value::Object(recurse(node)),
(true, true) => Value::Object(recurse(node)),
_ => match node.children().count() {
1 => {
let node = node.children().next().expect("expected 1 XML node");
if node.is_element() {
let mut map = BTreeMap::new();
map.insert(
node.tag_name().name().to_string().into(),
process_node(node, config),
);
Value::Object(map)
} else {
process_node(node, config)
}
}
_ => Value::Object(recurse(node)),
},
}
}
NodeType::Text => process_text(node.text().expect("expected XML text node"), config),
_ => unreachable!("shouldn't be other XML nodes"),
}
}
fn process_text<'a>(text: &'a str, config: &ParseXmlConfig<'a>) -> Value {
match text {
"" | "null" if config.parse_null => Value::Null,
"true" if config.parse_bool => true.into(),
"false" if config.parse_bool => false.into(),
_ if !config.parse_number => text.into(),
_ => {
if let Ok(v) = text.parse::<i64>() {
return v.into();
}
if let Ok(v) = text.parse::<f64>() {
return Value::from_f64_or_zero(v);
}
text.into()
}
}
}
#[inline]
fn trim_xml(xml: &str) -> Cow<'_, str> {
XML_RE.replace_all(xml, "><")
}