libmathcat/pretty_print.rs
1//! Useful functions for debugging and error messages.
2#![allow(clippy::needless_return)]
3
4use sxd_document::dom::*;
5
6// #[allow(dead_code)]
7// pub fn pp_doc(doc: &Document) {
8// for root_child in doc.root().children() {
9// if let ChildOfRoot::Element(e) = root_child {
10// format_element(&e, 0);
11// break;
12// }
13// };
14// }
15
16/// Pretty-print the MathML represented by `element`.
17pub fn mml_to_string(e: Element) -> String {
18 return format_element(e, 0);
19}
20
21/// Pretty-print the MathML represented by `element`.
22/// * `indent` -- the amount of indentation to start with
23pub fn format_element(e: Element, indent: usize) -> String {
24 // let namespace = match e.name().namespace_uri() {
25 // None => "".to_string(),
26 // Some(prefix) => prefix.to_string() + ":",
27 // };
28 // let namespace = namespace.as_str();
29 let namespace = "";
30 let mut answer = format!("{:in$}<{ns}{name}{attrs}>", " ", in=2*indent, ns=namespace, name=e.name().local_part(), attrs=format_attrs(&e.attributes()));
31 let children = e.children();
32 let has_element = children.iter().find(|&&c| matches!(c, ChildOfElement::Element(_x)));
33 if has_element.is_none() {
34 // print text content
35 let content = children.iter()
36 .map(|c| if let ChildOfElement::Text(t) = c {t.text()} else {""})
37 .collect::<Vec<&str>>()
38 .join("");
39 return format!("{}{}</{}{}>\n", answer, &handle_special_chars(&content), namespace, e.name().local_part());
40 // for child in children {
41 // if let ChildOfElement::Text(t) = child {
42 // return format!("{}{}</{}{}>\n", answer, &make_invisible_chars_visible(t.text()), namespace, e.name().local_part());
43 // }
44 // };
45 } else {
46 answer += "\n"; // tag with children should start on new line
47 // recurse on each Element child
48 for c in e.children() {
49 if let ChildOfElement::Element(e) = c {
50 answer += &format_element(e, indent+1);
51 }
52 }
53 }
54 return answer + &format!("{:in$}</{ns}{name}>\n", " ", in=2*indent, ns=namespace, name=e.name().local_part());
55
56 // Use the &#x....; representation for invisible chars when printing
57}
58
59/// Format a vector of attributes as a string with a leading space
60pub fn format_attrs(attrs: &[Attribute]) -> String {
61 let mut result = String::new();
62 for attr in attrs {
63 result += format!(" {}='{}'", attr.name().local_part(), &handle_special_chars(attr.value())).as_str();
64 }
65 result
66}
67
68fn handle_special_chars(text: &str) -> String {
69 return text.chars().map(|ch|
70 match ch {
71 '"' => """.to_string(),
72 '&' => "&".to_string(),
73 '\'' => "'".to_string(),
74 '<' => "<".to_string(),
75 '>' => ">".to_string(),
76 '\u{2061}' => "⁡".to_string(),
77 '\u{2062}' => "⁢".to_string(),
78 '\u{2063}' => "⁣".to_string(),
79 '\u{2064}' => "⁤".to_string(),
80 _ => ch.to_string(),
81 }
82 ).collect::<Vec<String>>().join("");
83}
84
85
86// /// Pretty print an xpath value.
87// /// If the value is a `NodeSet`, the MathML for the node/element is returned.
88// pub fn pp_xpath_value(value: Value) {
89// use sxd_xpath::Value;
90// use sxd_xpath::nodeset::Node;
91// debug!("XPath value:");
92// if let Value::Nodeset(nodeset) = &value {
93// for node in nodeset.document_order() {
94// match node {
95// Node::Element(el) => {debug!("{}", crate::pretty_print::format_element(&el, 1))},
96// Node::Text(t) => {debug!("found Text value: {}", t.text())},
97// _ => {debug!("found unexpected node type")}
98// }
99// }
100// }
101// }
102
103/// Convert YAML to a string using with `indent` amount of space.
104pub fn yaml_to_string(yaml: &Yaml, indent: usize) -> String {
105 let mut result = String::new();
106 {
107 let mut emitter = YamlEmitter::new(&mut result);
108 emitter.compact(true);
109 emitter.emit_node(yaml).unwrap(); // dump the YAML object to a String
110 }
111 if indent == 0 {
112 return result;
113 }
114 let indent_str = format!("{:in$}", " ", in=2*indent);
115 result = result.replace('\n',&("\n".to_string() + &indent_str)); // add indentation to all but first line
116 return indent_str + result.trim_end(); // add indent to first line and remove an extra indent at end
117}
118
119/* --------------------- Tweaked pretty printer for YAML (from YAML code) --------------------- */
120
121// Changed: new function to determine if more compact notation can be used (when child is a one entry simple array/hash). Writes
122// -foo [bar: bletch]
123// -foo {bar: bletch}
124fn is_scalar(v: &Yaml) -> bool {
125 return !matches!(v, Yaml::Hash(_) | Yaml::Array(_));
126}
127
128fn is_complex(v: &Yaml) -> bool {
129 return match v {
130 Yaml::Hash(h) => {
131 return match h.len() {
132 0 => false,
133 1 => {
134 let (key,val) = h.iter().next().unwrap();
135 return !(is_scalar(key) && is_scalar(val))
136 },
137 _ => true,
138 }
139 },
140 Yaml::Array(v) => {
141 return match v.len() {
142 0 => false,
143 1 => {
144 let hash = v[0].as_hash();
145 if let Some(hash) = hash {
146 return match hash.len() {
147 0 => false,
148 1 => {
149 let (key, val) = hash.iter().next().unwrap();
150 return !(is_scalar(key) && is_scalar(val));
151 },
152 _ => true,
153 }
154 } else {
155 return !is_scalar(&v[0]);
156 }
157 },
158 _ => true,
159 }
160 },
161 _ => false,
162 }
163}
164
165use std::error::Error;
166use std::fmt::{self, Display};
167extern crate yaml_rust;
168use yaml_rust::{Yaml, yaml::Hash};
169
170//use crate::yaml::{Hash, Yaml};
171
172#[derive(Copy, Clone, Debug)]
173#[allow(dead_code)] // from original YAML code (isn't used here)
174enum EmitError {
175 FmtError(fmt::Error),
176 BadHashmapKey,
177}
178
179impl Error for EmitError {
180 fn cause(&self) -> Option<&dyn Error> {
181 None
182 }
183}
184
185impl Display for EmitError {
186 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
187 match *self {
188 EmitError::FmtError(ref err) => Display::fmt(err, formatter),
189 EmitError::BadHashmapKey => formatter.write_str("bad hashmap key"),
190 }
191 }
192}
193
194impl From<fmt::Error> for EmitError {
195 fn from(f: fmt::Error) -> Self {
196 EmitError::FmtError(f)
197 }
198}
199
200struct YamlEmitter<'a> {
201 writer: &'a mut dyn fmt::Write,
202 best_indent: usize,
203 compact: bool,
204
205 level: isize,
206}
207
208type EmitResult = Result<(), EmitError>;
209
210// from serialize::json
211fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> {
212 wr.write_str("\"")?;
213
214 let mut start = 0;
215
216 for (i, byte) in v.bytes().enumerate() {
217 let escaped = match byte {
218 b'"' => "\\\"",
219 b'\\' => "\\\\",
220 b'\x00' => "\\u0000",
221 b'\x01' => "\\u0001",
222 b'\x02' => "\\u0002",
223 b'\x03' => "\\u0003",
224 b'\x04' => "\\u0004",
225 b'\x05' => "\\u0005",
226 b'\x06' => "\\u0006",
227 b'\x07' => "\\u0007",
228 b'\x08' => "\\b",
229 b'\t' => "\\t",
230 b'\n' => "\\n",
231 b'\x0b' => "\\u000b",
232 b'\x0c' => "\\f",
233 b'\r' => "\\r",
234 b'\x0e' => "\\u000e",
235 b'\x0f' => "\\u000f",
236 b'\x10' => "\\u0010",
237 b'\x11' => "\\u0011",
238 b'\x12' => "\\u0012",
239 b'\x13' => "\\u0013",
240 b'\x14' => "\\u0014",
241 b'\x15' => "\\u0015",
242 b'\x16' => "\\u0016",
243 b'\x17' => "\\u0017",
244 b'\x18' => "\\u0018",
245 b'\x19' => "\\u0019",
246 b'\x1a' => "\\u001a",
247 b'\x1b' => "\\u001b",
248 b'\x1c' => "\\u001c",
249 b'\x1d' => "\\u001d",
250 b'\x1e' => "\\u001e",
251 b'\x1f' => "\\u001f",
252 b'\x7f' => "\\u007f",
253 _ => continue,
254 };
255
256 if start < i {
257 wr.write_str(&v[start..i])?;
258 }
259
260 wr.write_str(escaped)?;
261
262 start = i + 1;
263 }
264
265 if start != v.len() {
266 wr.write_str(&v[start..])?;
267 }
268
269 wr.write_str("\"")?;
270 Ok(())
271}
272
273impl<'a> YamlEmitter<'a> {
274 pub fn new(writer: &'a mut dyn fmt::Write) -> YamlEmitter<'a> {
275 YamlEmitter {
276 writer,
277 best_indent: 2,
278 compact: true,
279 level: -1,
280 }
281 }
282
283 /// Set 'compact inline notation' on or off, as described for block
284 /// [sequences](http://www.yaml.org/spec/1.2/spec.html#id2797382)
285 /// and
286 /// [mappings](http://www.yaml.org/spec/1.2/spec.html#id2798057).
287 ///
288 /// In this form, blocks cannot have any properties (such as anchors
289 /// or tags), which should be OK, because this emitter doesn't
290 /// (currently) emit those anyways.
291 pub fn compact(&mut self, compact: bool) {
292 self.compact = compact;
293 }
294
295 /// Determine if this emitter is using 'compact inline notation'.
296 #[allow(dead_code)] // not all fields are used in this program
297 pub fn is_compact(&self) -> bool {
298 self.compact
299 }
300
301 // fn dump(&mut self, doc: &Yaml) -> EmitResult {
302 // // write DocumentStart
303 // writeln!(self.writer, "---")?;
304 // self.level = -1;
305 // self.emit_node(doc)
306 // }
307
308 fn write_indent(&mut self) -> EmitResult {
309 if self.level <= 0 {
310 return Ok(());
311 }
312 for _ in 0..self.level {
313 for _ in 0..self.best_indent {
314 write!(self.writer, " ")?;
315 }
316 }
317 Ok(())
318 }
319
320 fn emit_node(&mut self, node: &Yaml) -> EmitResult {
321 match *node {
322 Yaml::Array(ref v) => self.emit_array(v),
323 Yaml::Hash(ref h) => self.emit_hash(h),
324 Yaml::String(ref v) => {
325 if need_quotes(v) {
326 escape_str(self.writer, v)?;
327 } else {
328 write!(self.writer, "{v}")?;
329 }
330 Ok(())
331 }
332 Yaml::Boolean(v) => {
333 if v {
334 self.writer.write_str("true")?;
335 } else {
336 self.writer.write_str("false")?;
337 }
338 Ok(())
339 }
340 Yaml::Integer(v) => {
341 write!(self.writer, "{v}")?;
342 Ok(())
343 }
344 Yaml::Real(ref v) => {
345 write!(self.writer, "{v}")?;
346 Ok(())
347 }
348 Yaml::Null | Yaml::BadValue => {
349 write!(self.writer, "~")?;
350 Ok(())
351 }
352 // XXX(chenyh) Alias
353 _ => Ok(()),
354 }
355 }
356
357 fn emit_array(&mut self, v: &[Yaml]) -> EmitResult {
358 if v.is_empty() {
359 write!(self.writer, "[]")?;
360 } else if v.len() == 1 && !is_complex(&v[0]) {
361 // changed -- for arrays that have only one simple element, make them more compact by using [...] notation
362 write!(self.writer, "[")?;
363 self.emit_val(true, &v[0])?;
364 write!(self.writer, "]")?;
365 } else {
366 self.level += 1;
367
368 for (cnt, x) in v.iter().enumerate() {
369 if cnt > 0 {
370 writeln!(self.writer)?;
371 self.write_indent()?;
372 }
373 write!(self.writer, "- ")?;
374 self.emit_val(true, x)?;
375 }
376 self.level -= 1;
377 }
378 return Ok(());
379 }
380
381 fn emit_hash(&mut self, h: &Hash) -> EmitResult {
382 if h.is_empty() {
383 self.writer.write_str("{}")?;
384 } else {
385 // changed -- for hashmaps that have only one simple element, make them more compact by using {...}} notation
386 self.level += 1;
387 for (cnt, (k, v)) in h.iter().enumerate() {
388 // changed: use new function is_scalar()
389 // let complex_key = match *k {
390 // Yaml::Hash(_) | Yaml::Array(_) => true,
391 // _ => false,
392 // };
393 if cnt > 0 {
394 writeln!(self.writer)?;
395 self.write_indent()?;
396 }
397 if !is_scalar(k) {
398 write!(self.writer, "? ")?;
399 self.emit_val(true, k)?;
400 writeln!(self.writer)?;
401 self.write_indent()?;
402 write!(self.writer, ": ")?;
403 self.emit_val(true, v)?;
404 } else {
405 self.emit_node(k)?;
406 write!(self.writer, ": ")?;
407
408 // changed to use braces in some cases
409 let complex_value = is_complex(v);
410 if !complex_value && v.as_hash().is_some() {
411 write!(self.writer, "{{")?;
412 }
413 // changed to use complex_value from 'false'
414 self.emit_val(!complex_value, v)?;
415 if !complex_value && v.as_hash().is_some() {
416 write!(self.writer, "}}")?;
417 }
418 }
419 }
420 self.level -= 1;
421 }
422 Ok(())
423 }
424
425 /// Emit a yaml as a hash or array value: i.e., which should appear
426 /// following a ":" or "-", either after a space, or on a new line.
427 /// If `inline` is true, then the preceding characters are distinct
428 /// and short enough to respect the compact flag.
429 // changed: use to always emit ' ' for inline -- that is now handled elsewhere
430 fn emit_val(&mut self, inline: bool, val: &Yaml) -> EmitResult {
431 match *val {
432 Yaml::Array(ref v) => {
433 if !((inline && self.compact) || v.is_empty()) {
434 writeln!(self.writer)?;
435 self.level += 1;
436 self.write_indent()?;
437 self.level -= 1;
438 }
439 self.emit_array(v)
440 }
441 Yaml::Hash(ref h) => {
442 if !((inline && self.compact) || h.is_empty()) {
443 writeln!(self.writer)?;
444 self.level += 1;
445 self.write_indent()?;
446 self.level -= 1;
447 }
448 self.emit_hash(h)
449 }
450 _ => {
451 // write!(self.writer, " ")?;
452 self.emit_node(val)
453 }
454 }
455 }
456}
457
458/// Check if the string requires quoting.
459/// Strings starting with any of the following characters must be quoted.
460/// :, &, *, ?, |, -, <, >, =, !, %, @
461/// Strings containing any of the following characters must be quoted.
462/// {, }, [, ], ,, #, `
463///
464/// If the string contains any of the following control characters, it must be escaped with double quotes:
465/// \0, \x01, \x02, \x03, \x04, \x05, \x06, \a, \b, \t, \n, \v, \f, \r, \x0e, \x0f, \x10, \x11, \x12, \x13, \x14, \x15, \x16, \x17, \x18, \x19, \x1a, \e, \x1c, \x1d, \x1e, \x1f, \N, \_, \L, \P
466///
467/// Finally, there are other cases when the strings must be quoted, no matter if you're using single or double quotes:
468/// * When the string is true or false (otherwise, it would be treated as a boolean value);
469/// * When the string is null or ~ (otherwise, it would be considered as a null value);
470/// * When the string looks like a number, such as integers (e.g. 2, 14, etc.), floats (e.g. 2.6, 14.9) and exponential numbers (e.g. 12e7, etc.) (otherwise, it would be treated as a numeric value);
471/// * When the string looks like a date (e.g. 2014-12-31) (otherwise it would be automatically converted into a Unix timestamp).
472fn need_quotes(string: &str) -> bool {
473 fn need_quotes_spaces(string: &str) -> bool {
474 string.starts_with(' ') || string.ends_with(' ')
475 }
476
477 string.is_empty()
478 || need_quotes_spaces(string)
479 || string.starts_with(['&', '*', '?', '|', '-', '<', '>', '=', '!', '%', '@'])
480 || string.contains(|character: char| matches!(character,
481 ':'
482 | '{'
483 | '}'
484 | '['
485 | ']'
486 | ','
487 | '#'
488 | '`'
489 | '\"'
490 | '\''
491 | '\\'
492 | '\0'..='\x06'
493 | '\t'
494 | '\n'
495 | '\r'
496 | '\x0e'..='\x1a'
497 | '\x1c'..='\x1f') )
498 || [
499 // http://yaml.org/type/bool.html
500 // Note: 'y', 'Y', 'n', 'N', is not quoted deliberately, as in libyaml. PyYAML also parse
501 // them as string, not booleans, although it is violating the YAML 1.1 specification.
502 // See https://github.com/dtolnay/serde-yaml/pull/83#discussion_r152628088.
503 "yes", "Yes", "YES", "no", "No", "NO", "True", "TRUE", "true", "False", "FALSE",
504 "false", "on", "On", "ON", "off", "Off", "OFF",
505 // http://yaml.org/type/null.html
506 "null", "Null", "NULL", "~",
507 ]
508 .contains(&string)
509 || string.starts_with('.')
510 || string.starts_with("0x")
511 || string.parse::<i64>().is_ok()
512 || string.parse::<f64>().is_ok()
513}