#![allow(clippy::needless_return)]
use sxd_document::dom::{Element, ChildOfElement, Attribute};
pub fn mml_to_string(e: Element) -> String {
return format_element(e, 0);
}
pub fn format_element(e: Element, indent: usize) -> String {
let namespace = "";
let mut answer = format!("{:in$}<{ns}{name}{attrs}>", " ", in=2*indent, ns=namespace, name=e.name().local_part(), attrs=format_attrs(&e.attributes()));
let children = e.children();
let has_element = children.iter().find(|&&c| matches!(c, ChildOfElement::Element(_x)));
if has_element.is_none() {
let content = children.iter()
.map(|c| if let ChildOfElement::Text(t) = c {t.text()} else {""})
.collect::<Vec<&str>>()
.join("");
return format!("{}{}</{}{}>\n", answer, &handle_special_chars(&content), namespace, e.name().local_part());
} else {
answer += "\n"; for c in e.children() {
if let ChildOfElement::Element(e) = c {
answer += &format_element(e, indent+1);
}
}
}
return answer + &format!("{:in$}</{ns}{name}>\n", " ", in=2*indent, ns=namespace, name=e.name().local_part());
}
pub fn format_attrs(attrs: &[Attribute]) -> String {
let mut result = String::new();
for attr in attrs {
result += format!(" {}='{}'", attr.name().local_part(), &handle_special_chars(attr.value())).as_str();
}
result
}
fn handle_special_chars(text: &str) -> String {
let mut s = String::with_capacity(text.len());
for ch in text.chars() {
match ch {
'"' => s.push_str("""),
'&' => s.push_str("&"),
'\'' => s.push_str("'"),
'<' => s.push_str("<"),
'>' => s.push_str(">"),
'\u{2061}' => s.push_str("⁡"),
'\u{2062}' => s.push_str("⁢"),
'\u{2063}' => s.push_str("⁣"),
'\u{2064}' => s.push_str("⁤"),
_ => s.push(ch),
}
}
s
}
pub fn yaml_to_string(yaml: &Yaml, indent: usize) -> String {
let mut result = String::new();
{
let mut emitter = YamlEmitter::new(&mut result);
emitter.compact(true);
emitter.emit_node(yaml).unwrap(); }
if indent == 0 {
return result;
}
let indent_str = format!("{:in$}", " ", in=2*indent);
result = result.replace('\n',&("\n".to_string() + &indent_str)); return indent_str + result.trim_end(); }
fn is_scalar(v: &Yaml) -> bool {
return !matches!(v, Yaml::Hash(_) | Yaml::Array(_));
}
fn is_complex(v: &Yaml) -> bool {
return match v {
Yaml::Hash(h) => {
return match h.len() {
0 => false,
1 => {
let (key,val) = h.iter().next().unwrap();
return !(is_scalar(key) && is_scalar(val))
},
_ => true,
}
},
Yaml::Array(v) => {
return match v.len() {
0 => false,
1 => {
let hash = v[0].as_hash();
if let Some(hash) = hash {
return match hash.len() {
0 => false,
1 => {
let (key, val) = hash.iter().next().unwrap();
return !(is_scalar(key) && is_scalar(val));
},
_ => true,
}
} else {
return !is_scalar(&v[0]);
}
},
_ => true,
}
},
_ => false,
}
}
use std::error::Error;
use std::fmt::{self, Display};
use yaml_rust::{Yaml, yaml::Hash};
#[derive(Copy, Clone, Debug)]
#[allow(dead_code)] enum EmitError {
FmtError(fmt::Error),
BadHashmapKey,
}
impl Error for EmitError {
fn cause(&self) -> Option<&dyn Error> {
None
}
}
impl Display for EmitError {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
match *self {
EmitError::FmtError(ref err) => Display::fmt(err, formatter),
EmitError::BadHashmapKey => formatter.write_str("bad hashmap key"),
}
}
}
impl From<fmt::Error> for EmitError {
fn from(f: fmt::Error) -> Self {
EmitError::FmtError(f)
}
}
struct YamlEmitter<'a> {
writer: &'a mut dyn fmt::Write,
best_indent: usize,
compact: bool,
level: isize,
}
type EmitResult = Result<(), EmitError>;
fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> {
wr.write_str("\"")?;
let mut start = 0;
for (i, byte) in v.bytes().enumerate() {
let escaped = match byte {
b'"' => "\\\"",
b'\\' => "\\\\",
b'\x00' => "\\u0000",
b'\x01' => "\\u0001",
b'\x02' => "\\u0002",
b'\x03' => "\\u0003",
b'\x04' => "\\u0004",
b'\x05' => "\\u0005",
b'\x06' => "\\u0006",
b'\x07' => "\\u0007",
b'\x08' => "\\b",
b'\t' => "\\t",
b'\n' => "\\n",
b'\x0b' => "\\u000b",
b'\x0c' => "\\f",
b'\r' => "\\r",
b'\x0e' => "\\u000e",
b'\x0f' => "\\u000f",
b'\x10' => "\\u0010",
b'\x11' => "\\u0011",
b'\x12' => "\\u0012",
b'\x13' => "\\u0013",
b'\x14' => "\\u0014",
b'\x15' => "\\u0015",
b'\x16' => "\\u0016",
b'\x17' => "\\u0017",
b'\x18' => "\\u0018",
b'\x19' => "\\u0019",
b'\x1a' => "\\u001a",
b'\x1b' => "\\u001b",
b'\x1c' => "\\u001c",
b'\x1d' => "\\u001d",
b'\x1e' => "\\u001e",
b'\x1f' => "\\u001f",
b'\x7f' => "\\u007f",
_ => continue,
};
if start < i {
wr.write_str(&v[start..i])?;
}
wr.write_str(escaped)?;
start = i + 1;
}
if start != v.len() {
wr.write_str(&v[start..])?;
}
wr.write_str("\"")?;
Ok(())
}
impl<'a> YamlEmitter<'a> {
pub fn new(writer: &'a mut dyn fmt::Write) -> YamlEmitter<'a> {
YamlEmitter {
writer,
best_indent: 2,
compact: true,
level: -1,
}
}
pub fn compact(&mut self, compact: bool) {
self.compact = compact;
}
#[allow(dead_code)] pub fn is_compact(&self) -> bool {
self.compact
}
fn write_indent(&mut self) -> EmitResult {
if self.level <= 0 {
return Ok(());
}
for _ in 0..self.level {
for _ in 0..self.best_indent {
write!(self.writer, " ")?;
}
}
Ok(())
}
fn emit_node(&mut self, node: &Yaml) -> EmitResult {
match *node {
Yaml::Array(ref v) => self.emit_array(v),
Yaml::Hash(ref h) => self.emit_hash(h),
Yaml::String(ref v) => {
if need_quotes(v) {
escape_str(self.writer, v)?;
} else {
write!(self.writer, "{v}")?;
}
Ok(())
}
Yaml::Boolean(v) => {
if v {
self.writer.write_str("true")?;
} else {
self.writer.write_str("false")?;
}
Ok(())
}
Yaml::Integer(v) => {
write!(self.writer, "{v}")?;
Ok(())
}
Yaml::Real(ref v) => {
write!(self.writer, "{v}")?;
Ok(())
}
Yaml::Null | Yaml::BadValue => {
write!(self.writer, "~")?;
Ok(())
}
_ => Ok(()),
}
}
fn emit_array(&mut self, v: &[Yaml]) -> EmitResult {
if v.is_empty() {
write!(self.writer, "[]")?;
} else if v.len() == 1 && !is_complex(&v[0]) {
write!(self.writer, "[")?;
self.emit_val(true, &v[0])?;
write!(self.writer, "]")?;
} else {
self.level += 1;
for (cnt, x) in v.iter().enumerate() {
if cnt > 0 {
writeln!(self.writer)?;
self.write_indent()?;
}
write!(self.writer, "- ")?;
self.emit_val(true, x)?;
}
self.level -= 1;
}
return Ok(());
}
fn emit_hash(&mut self, h: &Hash) -> EmitResult {
if h.is_empty() {
self.writer.write_str("{}")?;
} else {
self.level += 1;
for (cnt, (k, v)) in h.iter().enumerate() {
if cnt > 0 {
writeln!(self.writer)?;
self.write_indent()?;
}
if !is_scalar(k) {
write!(self.writer, "? ")?;
self.emit_val(true, k)?;
writeln!(self.writer)?;
self.write_indent()?;
write!(self.writer, ": ")?;
self.emit_val(true, v)?;
} else {
self.emit_node(k)?;
write!(self.writer, ": ")?;
let complex_value = is_complex(v);
if !complex_value && v.as_hash().is_some() {
write!(self.writer, "{{")?;
}
self.emit_val(!complex_value, v)?;
if !complex_value && v.as_hash().is_some() {
write!(self.writer, "}}")?;
}
}
}
self.level -= 1;
}
Ok(())
}
fn emit_val(&mut self, inline: bool, val: &Yaml) -> EmitResult {
match *val {
Yaml::Array(ref v) => {
if !((inline && self.compact) || v.is_empty()) {
writeln!(self.writer)?;
self.level += 1;
self.write_indent()?;
self.level -= 1;
}
self.emit_array(v)
}
Yaml::Hash(ref h) => {
if !((inline && self.compact) || h.is_empty()) {
writeln!(self.writer)?;
self.level += 1;
self.write_indent()?;
self.level -= 1;
}
self.emit_hash(h)
}
_ => {
self.emit_node(val)
}
}
}
}
fn need_quotes(string: &str) -> bool {
fn need_quotes_spaces(string: &str) -> bool {
string.starts_with(' ') || string.ends_with(' ')
}
string.is_empty()
|| need_quotes_spaces(string)
|| string.starts_with(['&', '*', '?', '|', '-', '<', '>', '=', '!', '%', '@'])
|| string.contains(|character: char| matches!(character,
':'
| '{'
| '}'
| '['
| ']'
| ','
| '#'
| '`'
| '\"'
| '\''
| '\\'
| '\0'..='\x06'
| '\t'
| '\n'
| '\r'
| '\x0e'..='\x1a'
| '\x1c'..='\x1f') )
|| [
"yes", "Yes", "YES", "no", "No", "NO", "True", "TRUE", "true", "False", "FALSE",
"false", "on", "On", "ON", "off", "Off", "OFF",
"null", "Null", "NULL", "~",
]
.contains(&string)
|| string.starts_with('.')
|| string.starts_with("0x")
|| string.parse::<i64>().is_ok()
|| string.parse::<f64>().is_ok()
}
#[cfg(test)]
mod tests {
use super::*;
use sxd_document::dom::{ChildOfElement, ChildOfRoot};
use sxd_document::parser;
fn first_element(package: &sxd_document::Package) -> Element<'_> {
let doc = package.as_document();
for child in doc.root().children() {
if let ChildOfRoot::Element(e) = child {
return e;
}
}
panic!("No root element found");
}
#[test]
fn handle_special_chars_escapes() {
let input = "& < > \" ' \u{2061} \u{2062} \u{2063} \u{2064} x";
let expected = "& < > " ' ⁡ ⁢ ⁣ ⁤ x";
assert_eq!(handle_special_chars(input), expected);
}
#[test]
fn format_element_leaf_text() {
let package = parser::parse("<math><mi>&</mi></math>").unwrap();
let math = first_element(&package);
let mi = math
.children()
.iter()
.find_map(|c| match c {
ChildOfElement::Element(e) => Some(*e),
_ => None,
})
.unwrap();
assert_eq!(format_element(mi, 0), " <mi>&</mi>\n");
}
#[test]
fn format_element_nested() {
let package = parser::parse("<math><mi>x</mi><mo>+</mo></math>").unwrap();
let math = first_element(&package);
let rendered = format_element(math, 0);
assert!(rendered.starts_with(" <math>\n"));
assert!(rendered.contains("\n <mi>x</mi>\n"));
assert!(rendered.contains("\n <mo>+</mo>\n"));
assert!(rendered.ends_with("</math>\n"));
}
#[test]
fn format_attrs_escapes() {
let package = parser::parse("<math a=\"&\" b=\"<\"></math>").unwrap();
let math = first_element(&package);
let rendered = format_attrs(&math.attributes());
assert!(rendered.contains(" a='&'"));
assert!(rendered.contains(" b='<'"));
}
#[test]
fn format_element_non_bmp_character_literal() {
let package = parser::parse("<math><mi>𝞪</mi></math>").unwrap();
let math = first_element(&package);
let mi = math
.children()
.iter()
.find_map(|c| match c {
ChildOfElement::Element(e) => Some(*e),
_ => None,
})
.unwrap();
let rendered = format_element(mi, 0);
assert!(rendered.contains("𝞪"));
}
#[test]
fn format_element_non_bmp_character_numeric() {
let package = parser::parse("<math><mi>𝞪</mi></math>").unwrap();
let math = first_element(&package);
let mi = math
.children()
.iter()
.find_map(|c| match c {
ChildOfElement::Element(e) => Some(*e),
_ => None,
})
.unwrap();
let rendered = format_element(mi, 0);
assert!(rendered.contains("𝞪"));
}
#[test]
fn xpath_non_bmp_literal() {
use sxd_xpath::{Factory, Value};
let package = parser::parse("<math><mi>𝞪</mi></math>").unwrap();
let xpath = Factory::new().build("string(/math/mi)").unwrap().unwrap();
let context = sxd_xpath::Context::new();
let value = xpath.evaluate(&context, first_element(&package)).unwrap();
match value {
Value::String(s) => assert_eq!(s, "𝞪"),
_ => panic!("Expected string value from xpath"),
}
}
#[test]
fn xpath_non_bmp_numeric() {
use sxd_xpath::{Factory, Value};
let package = parser::parse("<math><mi>𝞪</mi></math>").unwrap();
let xpath = Factory::new().build("string(/math/mi)").unwrap().unwrap();
let context = sxd_xpath::Context::new();
let value = xpath.evaluate(&context, first_element(&package)).unwrap();
match value {
Value::String(s) => assert_eq!(s, "𝞪"),
_ => panic!("Expected string value from xpath"),
}
}
#[test]
fn xpath_non_bmp_namespace_literal() {
use sxd_xpath::{Factory, Value};
let xml = "<math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>𝞪</mi></math>";
let package = parser::parse(xml).unwrap();
let xpath = Factory::new()
.build("string(/m:math/m:mi)")
.unwrap()
.unwrap();
let mut context = sxd_xpath::Context::new();
context.set_namespace("m", "http://www.w3.org/1998/Math/MathML");
let value = xpath.evaluate(&context, first_element(&package)).unwrap();
match value {
Value::String(s) => assert_eq!(s, "𝞪"),
_ => panic!("Expected string value from xpath"),
}
}
#[test]
fn xpath_non_bmp_namespace_numeric() {
use sxd_xpath::{Factory, Value};
let xml = "<math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>𝞪</mi></math>";
let package = parser::parse(xml).unwrap();
let xpath = Factory::new()
.build("string(/m:math/m:mi)")
.unwrap()
.unwrap();
let mut context = sxd_xpath::Context::new();
context.set_namespace("m", "http://www.w3.org/1998/Math/MathML");
let value = xpath.evaluate(&context, first_element(&package)).unwrap();
match value {
Value::String(s) => assert_eq!(s, "𝞪"),
_ => panic!("Expected string value from xpath"),
}
}
#[test]
fn xpath_non_bmp_text_nodeset() {
use sxd_xpath::{Factory, Value};
let xml = "<math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>𝞪</mi></math>";
let package = parser::parse(xml).unwrap();
let xpath = Factory::new().build("/m:math/m:mi/text()").unwrap().unwrap();
let mut context = sxd_xpath::Context::new();
context.set_namespace("m", "http://www.w3.org/1998/Math/MathML");
let value = xpath.evaluate(&context, first_element(&package)).unwrap();
match value {
Value::Nodeset(nodes) => {
let ordered = nodes.document_order();
let node = ordered.first().expect("Expected one text node");
let text = node.text().expect("Expected text node");
assert_eq!(text.text(), "𝞪");
assert_eq!(ordered.len(), 1);
}
_ => panic!("Expected nodeset value from xpath"),
}
}
}