libmathcat/pretty_print.rs
1//! Useful functions for debugging and error messages.
2#![allow(clippy::needless_return)]
3
4use sxd_document::dom::*;
5
6// #[allow(dead_code)]
7// pub fn pp_doc(doc: &Document) {
8// for root_child in doc.root().children() {
9// if let ChildOfRoot::Element(e) = root_child {
10// format_element(&e, 0);
11// break;
12// }
13// };
14// }
15
16/// Pretty-print the MathML represented by `element`.
17pub fn mml_to_string(e: Element) -> String {
18 return format_element(e, 0);
19}
20
21/// Pretty-print the MathML represented by `element`.
22/// * `indent` -- the amount of indentation to start with
23pub fn format_element(e: Element, indent: usize) -> String {
24 // let namespace = match e.name().namespace_uri() {
25 // None => "".to_string(),
26 // Some(prefix) => prefix.to_string() + ":",
27 // };
28 // let namespace = namespace.as_str();
29 let namespace = "";
30 let mut answer = format!("{:in$}<{ns}{name}{attrs}>", " ", in=2*indent, ns=namespace, name=e.name().local_part(), attrs=format_attrs(&e.attributes()));
31 let children = e.children();
32 let has_element = children.iter().find(|&&c| matches!(c, ChildOfElement::Element(_x)));
33 if has_element.is_none() {
34 // print text content
35 let content = children.iter()
36 .map(|c| if let ChildOfElement::Text(t) = c {t.text()} else {""})
37 .collect::<Vec<&str>>()
38 .join("");
39 return format!("{}{}</{}{}>\n", answer, &handle_special_chars(&content), namespace, e.name().local_part());
40 // for child in children {
41 // if let ChildOfElement::Text(t) = child {
42 // return format!("{}{}</{}{}>\n", answer, &make_invisible_chars_visible(t.text()), namespace, e.name().local_part());
43 // }
44 // };
45 } else {
46 answer += "\n"; // tag with children should start on new line
47 // recurse on each Element child
48 for c in e.children() {
49 if let ChildOfElement::Element(e) = c {
50 answer += &format_element(e, indent+1);
51 }
52 }
53 }
54 return answer + &format!("{:in$}</{ns}{name}>\n", " ", in=2*indent, ns=namespace, name=e.name().local_part());
55
56 // Use the &#x....; representation for invisible chars when printing
57}
58
59/// Format a vector of attributes as a string with a leading space
60pub fn format_attrs(attrs: &[Attribute]) -> String {
61 let mut result = String::new();
62 for attr in attrs {
63 result += format!(" {}='{}'", attr.name().local_part(), &handle_special_chars(attr.value())).as_str();
64 }
65 result
66}
67
68fn handle_special_chars(text: &str) -> String {
69 // Pre-allocate a buffer. We guess the size is roughly the same as input, maybe slightly larger.
70 let mut s = String::with_capacity(text.len());
71 for ch in text.chars() {
72 match ch {
73 '"' => s.push_str("""),
74 '&' => s.push_str("&"),
75 '\'' => s.push_str("'"),
76 '<' => s.push_str("<"),
77 '>' => s.push_str(">"),
78 '\u{2061}' => s.push_str("⁡"),
79 '\u{2062}' => s.push_str("⁢"),
80 '\u{2063}' => s.push_str("⁣"),
81 '\u{2064}' => s.push_str("⁤"),
82 _ => s.push(ch),
83 }
84 }
85 s
86}
87
88
89// /// Pretty print an xpath value.
90// /// If the value is a `NodeSet`, the MathML for the node/element is returned.
91// pub fn pp_xpath_value(value: Value) {
92// use sxd_xpath::Value;
93// use sxd_xpath::nodeset::Node;
94// debug!("XPath value:");
95// if let Value::Nodeset(nodeset) = &value {
96// for node in nodeset.document_order() {
97// match node {
98// Node::Element(el) => {debug!("{}", crate::pretty_print::format_element(&el, 1))},
99// Node::Text(t) => {debug!("found Text value: {}", t.text())},
100// _ => {debug!("found unexpected node type")}
101// }
102// }
103// }
104// }
105
106/// Convert YAML to a string using with `indent` amount of space.
107pub fn yaml_to_string(yaml: &Yaml, indent: usize) -> String {
108 let mut result = String::new();
109 {
110 let mut emitter = YamlEmitter::new(&mut result);
111 emitter.compact(true);
112 emitter.emit_node(yaml).unwrap(); // dump the YAML object to a String
113 }
114 if indent == 0 {
115 return result;
116 }
117 let indent_str = format!("{:in$}", " ", in=2*indent);
118 result = result.replace('\n',&("\n".to_string() + &indent_str)); // add indentation to all but first line
119 return indent_str + result.trim_end(); // add indent to first line and remove an extra indent at end
120}
121
122/* --------------------- Tweaked pretty printer for YAML (from YAML code) --------------------- */
123
124// Changed: new function to determine if more compact notation can be used (when child is a one entry simple array/hash). Writes
125// -foo [bar: bletch]
126// -foo {bar: bletch}
127fn is_scalar(v: &Yaml) -> bool {
128 return !matches!(v, Yaml::Hash(_) | Yaml::Array(_));
129}
130
131fn is_complex(v: &Yaml) -> bool {
132 return match v {
133 Yaml::Hash(h) => {
134 return match h.len() {
135 0 => false,
136 1 => {
137 let (key,val) = h.iter().next().unwrap();
138 return !(is_scalar(key) && is_scalar(val))
139 },
140 _ => true,
141 }
142 },
143 Yaml::Array(v) => {
144 return match v.len() {
145 0 => false,
146 1 => {
147 let hash = v[0].as_hash();
148 if let Some(hash) = hash {
149 return match hash.len() {
150 0 => false,
151 1 => {
152 let (key, val) = hash.iter().next().unwrap();
153 return !(is_scalar(key) && is_scalar(val));
154 },
155 _ => true,
156 }
157 } else {
158 return !is_scalar(&v[0]);
159 }
160 },
161 _ => true,
162 }
163 },
164 _ => false,
165 }
166}
167
168use std::error::Error;
169use std::fmt::{self, Display};
170use yaml_rust::{Yaml, yaml::Hash};
171
172//use crate::yaml::{Hash, Yaml};
173
174#[derive(Copy, Clone, Debug)]
175#[allow(dead_code)] // from original YAML code (isn't used here)
176enum EmitError {
177 FmtError(fmt::Error),
178 BadHashmapKey,
179}
180
181impl Error for EmitError {
182 fn cause(&self) -> Option<&dyn Error> {
183 None
184 }
185}
186
187impl Display for EmitError {
188 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
189 match *self {
190 EmitError::FmtError(ref err) => Display::fmt(err, formatter),
191 EmitError::BadHashmapKey => formatter.write_str("bad hashmap key"),
192 }
193 }
194}
195
196impl From<fmt::Error> for EmitError {
197 fn from(f: fmt::Error) -> Self {
198 EmitError::FmtError(f)
199 }
200}
201
202struct YamlEmitter<'a> {
203 writer: &'a mut dyn fmt::Write,
204 best_indent: usize,
205 compact: bool,
206
207 level: isize,
208}
209
210type EmitResult = Result<(), EmitError>;
211
212// from serialize::json
213fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> {
214 wr.write_str("\"")?;
215
216 let mut start = 0;
217
218 for (i, byte) in v.bytes().enumerate() {
219 let escaped = match byte {
220 b'"' => "\\\"",
221 b'\\' => "\\\\",
222 b'\x00' => "\\u0000",
223 b'\x01' => "\\u0001",
224 b'\x02' => "\\u0002",
225 b'\x03' => "\\u0003",
226 b'\x04' => "\\u0004",
227 b'\x05' => "\\u0005",
228 b'\x06' => "\\u0006",
229 b'\x07' => "\\u0007",
230 b'\x08' => "\\b",
231 b'\t' => "\\t",
232 b'\n' => "\\n",
233 b'\x0b' => "\\u000b",
234 b'\x0c' => "\\f",
235 b'\r' => "\\r",
236 b'\x0e' => "\\u000e",
237 b'\x0f' => "\\u000f",
238 b'\x10' => "\\u0010",
239 b'\x11' => "\\u0011",
240 b'\x12' => "\\u0012",
241 b'\x13' => "\\u0013",
242 b'\x14' => "\\u0014",
243 b'\x15' => "\\u0015",
244 b'\x16' => "\\u0016",
245 b'\x17' => "\\u0017",
246 b'\x18' => "\\u0018",
247 b'\x19' => "\\u0019",
248 b'\x1a' => "\\u001a",
249 b'\x1b' => "\\u001b",
250 b'\x1c' => "\\u001c",
251 b'\x1d' => "\\u001d",
252 b'\x1e' => "\\u001e",
253 b'\x1f' => "\\u001f",
254 b'\x7f' => "\\u007f",
255 _ => continue,
256 };
257
258 if start < i {
259 wr.write_str(&v[start..i])?;
260 }
261
262 wr.write_str(escaped)?;
263
264 start = i + 1;
265 }
266
267 if start != v.len() {
268 wr.write_str(&v[start..])?;
269 }
270
271 wr.write_str("\"")?;
272 Ok(())
273}
274
275impl<'a> YamlEmitter<'a> {
276 pub fn new(writer: &'a mut dyn fmt::Write) -> YamlEmitter<'a> {
277 YamlEmitter {
278 writer,
279 best_indent: 2,
280 compact: true,
281 level: -1,
282 }
283 }
284
285 /// Set 'compact inline notation' on or off, as described for block
286 /// [sequences](http://www.yaml.org/spec/1.2/spec.html#id2797382)
287 /// and
288 /// [mappings](http://www.yaml.org/spec/1.2/spec.html#id2798057).
289 ///
290 /// In this form, blocks cannot have any properties (such as anchors
291 /// or tags), which should be OK, because this emitter doesn't
292 /// (currently) emit those anyways.
293 pub fn compact(&mut self, compact: bool) {
294 self.compact = compact;
295 }
296
297 /// Determine if this emitter is using 'compact inline notation'.
298 #[allow(dead_code)] // not all fields are used in this program
299 pub fn is_compact(&self) -> bool {
300 self.compact
301 }
302
303 // fn dump(&mut self, doc: &Yaml) -> EmitResult {
304 // // write DocumentStart
305 // writeln!(self.writer, "---")?;
306 // self.level = -1;
307 // self.emit_node(doc)
308 // }
309
310 fn write_indent(&mut self) -> EmitResult {
311 if self.level <= 0 {
312 return Ok(());
313 }
314 for _ in 0..self.level {
315 for _ in 0..self.best_indent {
316 write!(self.writer, " ")?;
317 }
318 }
319 Ok(())
320 }
321
322 fn emit_node(&mut self, node: &Yaml) -> EmitResult {
323 match *node {
324 Yaml::Array(ref v) => self.emit_array(v),
325 Yaml::Hash(ref h) => self.emit_hash(h),
326 Yaml::String(ref v) => {
327 if need_quotes(v) {
328 escape_str(self.writer, v)?;
329 } else {
330 write!(self.writer, "{v}")?;
331 }
332 Ok(())
333 }
334 Yaml::Boolean(v) => {
335 if v {
336 self.writer.write_str("true")?;
337 } else {
338 self.writer.write_str("false")?;
339 }
340 Ok(())
341 }
342 Yaml::Integer(v) => {
343 write!(self.writer, "{v}")?;
344 Ok(())
345 }
346 Yaml::Real(ref v) => {
347 write!(self.writer, "{v}")?;
348 Ok(())
349 }
350 Yaml::Null | Yaml::BadValue => {
351 write!(self.writer, "~")?;
352 Ok(())
353 }
354 // XXX(chenyh) Alias
355 _ => Ok(()),
356 }
357 }
358
359 fn emit_array(&mut self, v: &[Yaml]) -> EmitResult {
360 if v.is_empty() {
361 write!(self.writer, "[]")?;
362 } else if v.len() == 1 && !is_complex(&v[0]) {
363 // changed -- for arrays that have only one simple element, make them more compact by using [...] notation
364 write!(self.writer, "[")?;
365 self.emit_val(true, &v[0])?;
366 write!(self.writer, "]")?;
367 } else {
368 self.level += 1;
369
370 for (cnt, x) in v.iter().enumerate() {
371 if cnt > 0 {
372 writeln!(self.writer)?;
373 self.write_indent()?;
374 }
375 write!(self.writer, "- ")?;
376 self.emit_val(true, x)?;
377 }
378 self.level -= 1;
379 }
380 return Ok(());
381 }
382
383 fn emit_hash(&mut self, h: &Hash) -> EmitResult {
384 if h.is_empty() {
385 self.writer.write_str("{}")?;
386 } else {
387 // changed -- for hashmaps that have only one simple element, make them more compact by using {...}} notation
388 self.level += 1;
389 for (cnt, (k, v)) in h.iter().enumerate() {
390 // changed: use new function is_scalar()
391 // let complex_key = match *k {
392 // Yaml::Hash(_) | Yaml::Array(_) => true,
393 // _ => false,
394 // };
395 if cnt > 0 {
396 writeln!(self.writer)?;
397 self.write_indent()?;
398 }
399 if !is_scalar(k) {
400 write!(self.writer, "? ")?;
401 self.emit_val(true, k)?;
402 writeln!(self.writer)?;
403 self.write_indent()?;
404 write!(self.writer, ": ")?;
405 self.emit_val(true, v)?;
406 } else {
407 self.emit_node(k)?;
408 write!(self.writer, ": ")?;
409
410 // changed to use braces in some cases
411 let complex_value = is_complex(v);
412 if !complex_value && v.as_hash().is_some() {
413 write!(self.writer, "{{")?;
414 }
415 // changed to use complex_value from 'false'
416 self.emit_val(!complex_value, v)?;
417 if !complex_value && v.as_hash().is_some() {
418 write!(self.writer, "}}")?;
419 }
420 }
421 }
422 self.level -= 1;
423 }
424 Ok(())
425 }
426
427 /// Emit a yaml as a hash or array value: i.e., which should appear
428 /// following a ":" or "-", either after a space, or on a new line.
429 /// If `inline` is true, then the preceding characters are distinct
430 /// and short enough to respect the compact flag.
431 // changed: use to always emit ' ' for inline -- that is now handled elsewhere
432 fn emit_val(&mut self, inline: bool, val: &Yaml) -> EmitResult {
433 match *val {
434 Yaml::Array(ref v) => {
435 if !((inline && self.compact) || v.is_empty()) {
436 writeln!(self.writer)?;
437 self.level += 1;
438 self.write_indent()?;
439 self.level -= 1;
440 }
441 self.emit_array(v)
442 }
443 Yaml::Hash(ref h) => {
444 if !((inline && self.compact) || h.is_empty()) {
445 writeln!(self.writer)?;
446 self.level += 1;
447 self.write_indent()?;
448 self.level -= 1;
449 }
450 self.emit_hash(h)
451 }
452 _ => {
453 // write!(self.writer, " ")?;
454 self.emit_node(val)
455 }
456 }
457 }
458}
459
460/// Check if the string requires quoting.
461/// Strings starting with any of the following characters must be quoted.
462/// :, &, *, ?, |, -, <, >, =, !, %, @
463/// Strings containing any of the following characters must be quoted.
464/// {, }, [, ], ,, #, `
465///
466/// If the string contains any of the following control characters, it must be escaped with double quotes:
467/// \0, \x01, \x02, \x03, \x04, \x05, \x06, \a, \b, \t, \n, \v, \f, \r, \x0e, \x0f, \x10, \x11, \x12, \x13, \x14, \x15, \x16, \x17, \x18, \x19, \x1a, \e, \x1c, \x1d, \x1e, \x1f, \N, \_, \L, \P
468///
469/// Finally, there are other cases when the strings must be quoted, no matter if you're using single or double quotes:
470/// * When the string is true or false (otherwise, it would be treated as a boolean value);
471/// * When the string is null or ~ (otherwise, it would be considered as a null value);
472/// * When the string looks like a number, such as integers (e.g. 2, 14, etc.), floats (e.g. 2.6, 14.9) and exponential numbers (e.g. 12e7, etc.) (otherwise, it would be treated as a numeric value);
473/// * When the string looks like a date (e.g. 2014-12-31) (otherwise it would be automatically converted into a Unix timestamp).
474fn need_quotes(string: &str) -> bool {
475 fn need_quotes_spaces(string: &str) -> bool {
476 string.starts_with(' ') || string.ends_with(' ')
477 }
478
479 string.is_empty()
480 || need_quotes_spaces(string)
481 || string.starts_with(['&', '*', '?', '|', '-', '<', '>', '=', '!', '%', '@'])
482 || string.contains(|character: char| matches!(character,
483 ':'
484 | '{'
485 | '}'
486 | '['
487 | ']'
488 | ','
489 | '#'
490 | '`'
491 | '\"'
492 | '\''
493 | '\\'
494 | '\0'..='\x06'
495 | '\t'
496 | '\n'
497 | '\r'
498 | '\x0e'..='\x1a'
499 | '\x1c'..='\x1f') )
500 || [
501 // http://yaml.org/type/bool.html
502 // Note: 'y', 'Y', 'n', 'N', is not quoted deliberately, as in libyaml. PyYAML also parse
503 // them as string, not booleans, although it is violating the YAML 1.1 specification.
504 // See https://github.com/dtolnay/serde-yaml/pull/83#discussion_r152628088.
505 "yes", "Yes", "YES", "no", "No", "NO", "True", "TRUE", "true", "False", "FALSE",
506 "false", "on", "On", "ON", "off", "Off", "OFF",
507 // http://yaml.org/type/null.html
508 "null", "Null", "NULL", "~",
509 ]
510 .contains(&string)
511 || string.starts_with('.')
512 || string.starts_with("0x")
513 || string.parse::<i64>().is_ok()
514 || string.parse::<f64>().is_ok()
515}