1#![allow(clippy::needless_return)]
3
4use sxd_document::dom::{Element, ChildOfElement, Attribute};
5
6pub fn mml_to_string(e: Element) -> String {
18 return format_element(e, 0);
19}
20
21pub fn format_element(e: Element, indent: usize) -> String {
24 let namespace = "";
30 let mut answer = format!("{:in$}<{ns}{name}{attrs}>", " ", in=2*indent, ns=namespace, name=e.name().local_part(), attrs=format_attrs(&e.attributes()));
31 let children = e.children();
32 let has_element = children.iter().find(|&&c| matches!(c, ChildOfElement::Element(_x)));
33 if has_element.is_none() {
34 let content = children.iter()
36 .map(|c| if let ChildOfElement::Text(t) = c {t.text()} else {""})
37 .collect::<Vec<&str>>()
38 .join("");
39 return format!("{}{}</{}{}>\n", answer, &handle_special_chars(&content), namespace, e.name().local_part());
40 } else {
46 answer += "\n"; for c in e.children() {
49 if let ChildOfElement::Element(e) = c {
50 answer += &format_element(e, indent+1);
51 }
52 }
53 }
54 return answer + &format!("{:in$}</{ns}{name}>\n", " ", in=2*indent, ns=namespace, name=e.name().local_part());
55
56 }
58
59pub fn format_attrs(attrs: &[Attribute]) -> String {
61 let mut result = String::new();
62 for attr in attrs {
63 result += format!(" {}='{}'", attr.name().local_part(), &handle_special_chars(attr.value())).as_str();
64 }
65 result
66}
67
68fn handle_special_chars(text: &str) -> String {
69 let mut s = String::with_capacity(text.len());
71 for ch in text.chars() {
72 match ch {
73 '"' => s.push_str("""),
74 '&' => s.push_str("&"),
75 '\'' => s.push_str("'"),
76 '<' => s.push_str("<"),
77 '>' => s.push_str(">"),
78 '\u{2061}' => s.push_str("⁡"),
79 '\u{2062}' => s.push_str("⁢"),
80 '\u{2063}' => s.push_str("⁣"),
81 '\u{2064}' => s.push_str("⁤"),
82 _ => s.push(ch),
83 }
84 }
85 s
86}
87
88
89pub fn yaml_to_string(yaml: &Yaml, indent: usize) -> String {
108 let mut result = String::new();
109 {
110 let mut emitter = YamlEmitter::new(&mut result);
111 emitter.compact(true);
112 emitter.emit_node(yaml).unwrap(); }
114 if indent == 0 {
115 return result;
116 }
117 let indent_str = format!("{:in$}", " ", in=2*indent);
118 result = result.replace('\n',&("\n".to_string() + &indent_str)); return indent_str + result.trim_end(); }
121
122fn is_scalar(v: &Yaml) -> bool {
128 return !matches!(v, Yaml::Hash(_) | Yaml::Array(_));
129}
130
131fn is_complex(v: &Yaml) -> bool {
132 return match v {
133 Yaml::Hash(h) => {
134 return match h.len() {
135 0 => false,
136 1 => {
137 let (key,val) = h.iter().next().unwrap();
138 return !(is_scalar(key) && is_scalar(val))
139 },
140 _ => true,
141 }
142 },
143 Yaml::Array(v) => {
144 return match v.len() {
145 0 => false,
146 1 => {
147 let hash = v[0].as_hash();
148 if let Some(hash) = hash {
149 return match hash.len() {
150 0 => false,
151 1 => {
152 let (key, val) = hash.iter().next().unwrap();
153 return !(is_scalar(key) && is_scalar(val));
154 },
155 _ => true,
156 }
157 } else {
158 return !is_scalar(&v[0]);
159 }
160 },
161 _ => true,
162 }
163 },
164 _ => false,
165 }
166}
167
168use std::error::Error;
169use std::fmt::{self, Display};
170use yaml_rust::{Yaml, yaml::Hash};
171
172#[derive(Copy, Clone, Debug)]
175#[allow(dead_code)] enum EmitError {
177 FmtError(fmt::Error),
178 BadHashmapKey,
179}
180
181impl Error for EmitError {
182 fn cause(&self) -> Option<&dyn Error> {
183 None
184 }
185}
186
187impl Display for EmitError {
188 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
189 match *self {
190 EmitError::FmtError(ref err) => Display::fmt(err, formatter),
191 EmitError::BadHashmapKey => formatter.write_str("bad hashmap key"),
192 }
193 }
194}
195
196impl From<fmt::Error> for EmitError {
197 fn from(f: fmt::Error) -> Self {
198 EmitError::FmtError(f)
199 }
200}
201
202struct YamlEmitter<'a> {
203 writer: &'a mut dyn fmt::Write,
204 best_indent: usize,
205 compact: bool,
206
207 level: isize,
208}
209
210type EmitResult = Result<(), EmitError>;
211
212fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> {
214 wr.write_str("\"")?;
215
216 let mut start = 0;
217
218 for (i, byte) in v.bytes().enumerate() {
219 let escaped = match byte {
220 b'"' => "\\\"",
221 b'\\' => "\\\\",
222 b'\x00' => "\\u0000",
223 b'\x01' => "\\u0001",
224 b'\x02' => "\\u0002",
225 b'\x03' => "\\u0003",
226 b'\x04' => "\\u0004",
227 b'\x05' => "\\u0005",
228 b'\x06' => "\\u0006",
229 b'\x07' => "\\u0007",
230 b'\x08' => "\\b",
231 b'\t' => "\\t",
232 b'\n' => "\\n",
233 b'\x0b' => "\\u000b",
234 b'\x0c' => "\\f",
235 b'\r' => "\\r",
236 b'\x0e' => "\\u000e",
237 b'\x0f' => "\\u000f",
238 b'\x10' => "\\u0010",
239 b'\x11' => "\\u0011",
240 b'\x12' => "\\u0012",
241 b'\x13' => "\\u0013",
242 b'\x14' => "\\u0014",
243 b'\x15' => "\\u0015",
244 b'\x16' => "\\u0016",
245 b'\x17' => "\\u0017",
246 b'\x18' => "\\u0018",
247 b'\x19' => "\\u0019",
248 b'\x1a' => "\\u001a",
249 b'\x1b' => "\\u001b",
250 b'\x1c' => "\\u001c",
251 b'\x1d' => "\\u001d",
252 b'\x1e' => "\\u001e",
253 b'\x1f' => "\\u001f",
254 b'\x7f' => "\\u007f",
255 _ => continue,
256 };
257
258 if start < i {
259 wr.write_str(&v[start..i])?;
260 }
261
262 wr.write_str(escaped)?;
263
264 start = i + 1;
265 }
266
267 if start != v.len() {
268 wr.write_str(&v[start..])?;
269 }
270
271 wr.write_str("\"")?;
272 Ok(())
273}
274
275impl<'a> YamlEmitter<'a> {
276 pub fn new(writer: &'a mut dyn fmt::Write) -> YamlEmitter<'a> {
277 YamlEmitter {
278 writer,
279 best_indent: 2,
280 compact: true,
281 level: -1,
282 }
283 }
284
285 pub fn compact(&mut self, compact: bool) {
294 self.compact = compact;
295 }
296
297 #[allow(dead_code)] pub fn is_compact(&self) -> bool {
300 self.compact
301 }
302
303 fn write_indent(&mut self) -> EmitResult {
311 if self.level <= 0 {
312 return Ok(());
313 }
314 for _ in 0..self.level {
315 for _ in 0..self.best_indent {
316 write!(self.writer, " ")?;
317 }
318 }
319 Ok(())
320 }
321
322 fn emit_node(&mut self, node: &Yaml) -> EmitResult {
323 match *node {
324 Yaml::Array(ref v) => self.emit_array(v),
325 Yaml::Hash(ref h) => self.emit_hash(h),
326 Yaml::String(ref v) => {
327 if need_quotes(v) {
328 escape_str(self.writer, v)?;
329 } else {
330 write!(self.writer, "{v}")?;
331 }
332 Ok(())
333 }
334 Yaml::Boolean(v) => {
335 if v {
336 self.writer.write_str("true")?;
337 } else {
338 self.writer.write_str("false")?;
339 }
340 Ok(())
341 }
342 Yaml::Integer(v) => {
343 write!(self.writer, "{v}")?;
344 Ok(())
345 }
346 Yaml::Real(ref v) => {
347 write!(self.writer, "{v}")?;
348 Ok(())
349 }
350 Yaml::Null | Yaml::BadValue => {
351 write!(self.writer, "~")?;
352 Ok(())
353 }
354 _ => Ok(()),
356 }
357 }
358
359 fn emit_array(&mut self, v: &[Yaml]) -> EmitResult {
360 if v.is_empty() {
361 write!(self.writer, "[]")?;
362 } else if v.len() == 1 && !is_complex(&v[0]) {
363 write!(self.writer, "[")?;
365 self.emit_val(true, &v[0])?;
366 write!(self.writer, "]")?;
367 } else {
368 self.level += 1;
369
370 for (cnt, x) in v.iter().enumerate() {
371 if cnt > 0 {
372 writeln!(self.writer)?;
373 self.write_indent()?;
374 }
375 write!(self.writer, "- ")?;
376 self.emit_val(true, x)?;
377 }
378 self.level -= 1;
379 }
380 return Ok(());
381 }
382
383 fn emit_hash(&mut self, h: &Hash) -> EmitResult {
384 if h.is_empty() {
385 self.writer.write_str("{}")?;
386 } else {
387 self.level += 1;
389 for (cnt, (k, v)) in h.iter().enumerate() {
390 if cnt > 0 {
396 writeln!(self.writer)?;
397 self.write_indent()?;
398 }
399 if !is_scalar(k) {
400 write!(self.writer, "? ")?;
401 self.emit_val(true, k)?;
402 writeln!(self.writer)?;
403 self.write_indent()?;
404 write!(self.writer, ": ")?;
405 self.emit_val(true, v)?;
406 } else {
407 self.emit_node(k)?;
408 write!(self.writer, ": ")?;
409
410 let complex_value = is_complex(v);
412 if !complex_value && v.as_hash().is_some() {
413 write!(self.writer, "{{")?;
414 }
415 self.emit_val(!complex_value, v)?;
417 if !complex_value && v.as_hash().is_some() {
418 write!(self.writer, "}}")?;
419 }
420 }
421 }
422 self.level -= 1;
423 }
424 Ok(())
425 }
426
427 fn emit_val(&mut self, inline: bool, val: &Yaml) -> EmitResult {
433 match *val {
434 Yaml::Array(ref v) => {
435 if !((inline && self.compact) || v.is_empty()) {
436 writeln!(self.writer)?;
437 self.level += 1;
438 self.write_indent()?;
439 self.level -= 1;
440 }
441 self.emit_array(v)
442 }
443 Yaml::Hash(ref h) => {
444 if !((inline && self.compact) || h.is_empty()) {
445 writeln!(self.writer)?;
446 self.level += 1;
447 self.write_indent()?;
448 self.level -= 1;
449 }
450 self.emit_hash(h)
451 }
452 _ => {
453 self.emit_node(val)
455 }
456 }
457 }
458}
459
460fn need_quotes(string: &str) -> bool {
475 fn need_quotes_spaces(string: &str) -> bool {
476 string.starts_with(' ') || string.ends_with(' ')
477 }
478
479 string.is_empty()
480 || need_quotes_spaces(string)
481 || string.starts_with(['&', '*', '?', '|', '-', '<', '>', '=', '!', '%', '@'])
482 || string.contains(|character: char| matches!(character,
483 ':'
484 | '{'
485 | '}'
486 | '['
487 | ']'
488 | ','
489 | '#'
490 | '`'
491 | '\"'
492 | '\''
493 | '\\'
494 | '\0'..='\x06'
495 | '\t'
496 | '\n'
497 | '\r'
498 | '\x0e'..='\x1a'
499 | '\x1c'..='\x1f') )
500 || [
501 "yes", "Yes", "YES", "no", "No", "NO", "True", "TRUE", "true", "False", "FALSE",
506 "false", "on", "On", "ON", "off", "Off", "OFF",
507 "null", "Null", "NULL", "~",
509 ]
510 .contains(&string)
511 || string.starts_with('.')
512 || string.starts_with("0x")
513 || string.parse::<i64>().is_ok()
514 || string.parse::<f64>().is_ok()
515}
516
517#[cfg(test)]
518mod tests {
519 use super::*;
520 use sxd_document::dom::{ChildOfElement, ChildOfRoot};
521 use sxd_document::parser;
522
523 fn first_element(package: &sxd_document::Package) -> Element<'_> {
525 let doc = package.as_document();
526 for child in doc.root().children() {
527 if let ChildOfRoot::Element(e) = child {
528 return e;
529 }
530 }
531 panic!("No root element found");
532 }
533
534 #[test]
535 fn handle_special_chars_escapes() {
538 let input = "& < > \" ' \u{2061} \u{2062} \u{2063} \u{2064} x";
539 let expected = "& < > " ' ⁡ ⁢ ⁣ ⁤ x";
540 assert_eq!(handle_special_chars(input), expected);
541 }
542
543 #[test]
544 fn format_element_leaf_text() {
546 let package = parser::parse("<math><mi>&</mi></math>").unwrap();
547 let math = first_element(&package);
548 let mi = math
549 .children()
550 .iter()
551 .find_map(|c| match c {
552 ChildOfElement::Element(e) => Some(*e),
553 _ => None,
554 })
555 .unwrap();
556 assert_eq!(format_element(mi, 0), " <mi>&</mi>\n");
557 }
558
559 #[test]
560 fn format_element_nested() {
562 let package = parser::parse("<math><mi>x</mi><mo>+</mo></math>").unwrap();
563 let math = first_element(&package);
564 let rendered = format_element(math, 0);
565 assert!(rendered.starts_with(" <math>\n"));
566 assert!(rendered.contains("\n <mi>x</mi>\n"));
567 assert!(rendered.contains("\n <mo>+</mo>\n"));
568 assert!(rendered.ends_with("</math>\n"));
569 }
570
571 #[test]
572 fn format_attrs_escapes() {
574 let package = parser::parse("<math a=\"&\" b=\"<\"></math>").unwrap();
575 let math = first_element(&package);
576 let rendered = format_attrs(&math.attributes());
577 assert!(rendered.contains(" a='&'"));
578 assert!(rendered.contains(" b='<'"));
579 }
580
581 #[test]
582 fn format_element_non_bmp_character_literal() {
584 let package = parser::parse("<math><mi>𝞪</mi></math>").unwrap();
585 let math = first_element(&package);
586 let mi = math
587 .children()
588 .iter()
589 .find_map(|c| match c {
590 ChildOfElement::Element(e) => Some(*e),
591 _ => None,
592 })
593 .unwrap();
594 let rendered = format_element(mi, 0);
595 assert!(rendered.contains("𝞪"));
596 }
597
598 #[test]
599 fn format_element_non_bmp_character_numeric() {
601 let package = parser::parse("<math><mi>𝞪</mi></math>").unwrap();
602 let math = first_element(&package);
603 let mi = math
604 .children()
605 .iter()
606 .find_map(|c| match c {
607 ChildOfElement::Element(e) => Some(*e),
608 _ => None,
609 })
610 .unwrap();
611 let rendered = format_element(mi, 0);
612 assert!(rendered.contains("𝞪"));
613 }
614
615 #[test]
616 fn xpath_non_bmp_literal() {
618 use sxd_xpath::{Factory, Value};
619
620 let package = parser::parse("<math><mi>𝞪</mi></math>").unwrap();
621 let xpath = Factory::new().build("string(/math/mi)").unwrap().unwrap();
622 let context = sxd_xpath::Context::new();
623
624 let value = xpath.evaluate(&context, first_element(&package)).unwrap();
625 match value {
626 Value::String(s) => assert_eq!(s, "𝞪"),
627 _ => panic!("Expected string value from xpath"),
628 }
629 }
630
631 #[test]
632 fn xpath_non_bmp_numeric() {
634 use sxd_xpath::{Factory, Value};
635
636 let package = parser::parse("<math><mi>𝞪</mi></math>").unwrap();
637 let xpath = Factory::new().build("string(/math/mi)").unwrap().unwrap();
638 let context = sxd_xpath::Context::new();
639
640 let value = xpath.evaluate(&context, first_element(&package)).unwrap();
641 match value {
642 Value::String(s) => assert_eq!(s, "𝞪"),
643 _ => panic!("Expected string value from xpath"),
644 }
645 }
646
647 #[test]
648 fn xpath_non_bmp_namespace_literal() {
650 use sxd_xpath::{Factory, Value};
651
652 let xml = "<math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>𝞪</mi></math>";
653 let package = parser::parse(xml).unwrap();
654 let xpath = Factory::new()
655 .build("string(/m:math/m:mi)")
656 .unwrap()
657 .unwrap();
658 let mut context = sxd_xpath::Context::new();
659 context.set_namespace("m", "http://www.w3.org/1998/Math/MathML");
660
661 let value = xpath.evaluate(&context, first_element(&package)).unwrap();
662 match value {
663 Value::String(s) => assert_eq!(s, "𝞪"),
664 _ => panic!("Expected string value from xpath"),
665 }
666 }
667
668 #[test]
669 fn xpath_non_bmp_namespace_numeric() {
671 use sxd_xpath::{Factory, Value};
672
673 let xml = "<math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>𝞪</mi></math>";
674 let package = parser::parse(xml).unwrap();
675 let xpath = Factory::new()
676 .build("string(/m:math/m:mi)")
677 .unwrap()
678 .unwrap();
679 let mut context = sxd_xpath::Context::new();
680 context.set_namespace("m", "http://www.w3.org/1998/Math/MathML");
681
682 let value = xpath.evaluate(&context, first_element(&package)).unwrap();
683 match value {
684 Value::String(s) => assert_eq!(s, "𝞪"),
685 _ => panic!("Expected string value from xpath"),
686 }
687 }
688
689 #[test]
690 fn xpath_non_bmp_text_nodeset() {
692 use sxd_xpath::{Factory, Value};
693
694 let xml = "<math xmlns=\"http://www.w3.org/1998/Math/MathML\"><mi>𝞪</mi></math>";
695 let package = parser::parse(xml).unwrap();
696 let xpath = Factory::new().build("/m:math/m:mi/text()").unwrap().unwrap();
697 let mut context = sxd_xpath::Context::new();
698 context.set_namespace("m", "http://www.w3.org/1998/Math/MathML");
699
700 let value = xpath.evaluate(&context, first_element(&package)).unwrap();
701 match value {
702 Value::Nodeset(nodes) => {
703 let ordered = nodes.document_order();
704 let node = ordered.first().expect("Expected one text node");
705 let text = node.text().expect("Expected text node");
706 assert_eq!(text.text(), "𝞪");
707 assert_eq!(ordered.len(), 1);
708 }
709 _ => panic!("Expected nodeset value from xpath"),
710 }
711 }
712}