libmathcat/
pretty_print.rs

1//! Useful functions for debugging and error messages.
2#![allow(clippy::needless_return)]
3
4use sxd_document::dom::*;
5
6// #[allow(dead_code)]
7// pub fn pp_doc(doc: &Document) {
8//     for root_child in doc.root().children() {
9//         if let ChildOfRoot::Element(e) = root_child {
10//             format_element(&e, 0);
11//             break;
12//         }
13//     };
14// }
15
16/// Pretty-print the MathML represented by `element`.
17pub fn mml_to_string(e: Element) -> String {
18    return format_element(e, 0);
19}
20
21/// Pretty-print the MathML represented by `element`.
22/// * `indent` -- the amount of indentation to start with
23pub fn format_element(e: Element, indent: usize) -> String {
24    // let namespace = match e.name().namespace_uri() {
25    //     None => "".to_string(),
26    //     Some(prefix) => prefix.to_string() + ":",
27    // };
28    // let namespace = namespace.as_str();
29    let namespace = "";
30    let mut answer = format!("{:in$}<{ns}{name}{attrs}>", " ", in=2*indent, ns=namespace, name=e.name().local_part(), attrs=format_attrs(&e.attributes()));
31    let children = e.children();
32    let has_element = children.iter().find(|&&c| matches!(c, ChildOfElement::Element(_x)));
33    if has_element.is_none() {
34        // print text content
35        let content = children.iter()
36                .map(|c| if let ChildOfElement::Text(t) = c {t.text()} else {""})
37                .collect::<Vec<&str>>()
38                .join("");
39        return format!("{}{}</{}{}>\n", answer, &handle_special_chars(&content), namespace, e.name().local_part());
40        // for child in children {
41        //     if let ChildOfElement::Text(t) = child {
42        //         return format!("{}{}</{}{}>\n", answer, &make_invisible_chars_visible(t.text()), namespace, e.name().local_part());
43        //     }
44        // };
45    } else {
46       answer += "\n";        // tag with children should start on new line
47        // recurse on each Element child
48        for c in e.children() {
49            if let ChildOfElement::Element(e) = c {
50                answer += &format_element(e, indent+1);
51            }
52        }
53    }
54    return answer + &format!("{:in$}</{ns}{name}>\n", " ", in=2*indent, ns=namespace, name=e.name().local_part());
55
56    // Use the &#x....; representation for invisible chars when printing
57}
58
59/// Format a vector of attributes as a string with a leading space
60pub fn format_attrs(attrs: &[Attribute]) -> String {
61    let mut result = String::new();
62    for attr in attrs {
63        result += format!(" {}='{}'", attr.name().local_part(), &handle_special_chars(attr.value())).as_str();
64    }
65    result
66}
67
68fn handle_special_chars(text: &str) -> String {
69    return text.chars().map(|ch|
70        match ch {
71            '"' => "&quot;".to_string(),
72            '&' => "&amp;".to_string(),
73            '\'' => "&apos;".to_string(),
74            '<' => "&lt;".to_string(),
75            '>' => "&gt;".to_string(),
76            '\u{2061}' => "&#x2061;".to_string(),
77            '\u{2062}' => "&#x2062;".to_string(),
78            '\u{2063}' => "&#x2063;".to_string(),
79            '\u{2064}' => "&#x2064;".to_string(),
80            _ => ch.to_string(),
81        }
82    ).collect::<Vec<String>>().join("");
83}
84
85
86// /// Pretty print an xpath value.
87// /// If the value is a `NodeSet`, the MathML for the node/element is returned.
88// pub fn pp_xpath_value(value: Value) {
89//     use sxd_xpath::Value;
90//     use sxd_xpath::nodeset::Node;
91//     debug!("XPath value:");
92//     if let Value::Nodeset(nodeset) = &value {
93//         for node in nodeset.document_order() {
94//             match node {
95//                 Node::Element(el) => {debug!("{}", crate::pretty_print::format_element(&el, 1))},
96//                 Node::Text(t) =>  {debug!("found Text value: {}", t.text())},
97//                 _ => {debug!("found unexpected node type")}
98//             }
99//         }
100//     }
101// }
102
103/// Convert YAML to a string using with `indent` amount of space.
104pub fn yaml_to_string(yaml: &Yaml, indent: usize) -> String {
105    let mut result = String::new();
106    {
107        let mut emitter = YamlEmitter::new(&mut result);
108        emitter.compact(true);
109        emitter.emit_node(yaml).unwrap(); // dump the YAML object to a String
110    }
111    if indent == 0 {
112        return result;
113    }
114    let indent_str = format!("{:in$}", " ", in=2*indent);
115    result = result.replace('\n',&("\n".to_string() + &indent_str)); // add indentation to all but first line
116    return indent_str + result.trim_end();  // add indent to first line and remove an extra indent at end
117}
118
119/* --------------------- Tweaked pretty printer for YAML (from YAML code) --------------------- */
120
121// Changed: new function to determine if more compact notation can be used (when child is a one entry simple array/hash). Writes
122// -foo [bar: bletch]
123// -foo {bar: bletch}
124fn is_scalar(v: &Yaml) -> bool {
125    return !matches!(v, Yaml::Hash(_) | Yaml::Array(_));
126}
127
128fn is_complex(v: &Yaml) -> bool {
129    return match v {
130        Yaml::Hash(h) => {
131            return match h.len() {
132                0 => false,
133                1 => {
134                    let (key,val) = h.iter().next().unwrap();
135                    return !(is_scalar(key) && is_scalar(val))
136                },
137                _ => true,
138            }
139        },
140        Yaml::Array(v) => {
141            return match v.len() {
142                0 => false,
143                1 => {
144                    let hash = v[0].as_hash();
145                    if let Some(hash) = hash {
146                        return match hash.len() {
147                            0 => false,
148                            1 => {
149                                let (key, val) = hash.iter().next().unwrap();
150                                return !(is_scalar(key) && is_scalar(val));
151                            },
152                            _ => true,
153                        }
154                    } else {
155                        return !is_scalar(&v[0]);
156                    }    
157                },
158                _ => true,
159            }
160        },
161        _ => false,
162    }
163}
164
165use std::error::Error;
166use std::fmt::{self, Display};
167extern crate yaml_rust;
168use yaml_rust::{Yaml, yaml::Hash};
169
170//use crate::yaml::{Hash, Yaml};
171
172#[derive(Copy, Clone, Debug)]
173#[allow(dead_code)] // from original YAML code (isn't used here)
174enum EmitError {
175    FmtError(fmt::Error),
176    BadHashmapKey,
177}
178
179impl Error for EmitError {
180    fn cause(&self) -> Option<&dyn Error> {
181        None
182    }
183}
184
185impl Display for EmitError {
186    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
187        match *self {
188            EmitError::FmtError(ref err) => Display::fmt(err, formatter),
189            EmitError::BadHashmapKey => formatter.write_str("bad hashmap key"),
190        }
191    }
192}
193
194impl From<fmt::Error> for EmitError {
195    fn from(f: fmt::Error) -> Self {
196        EmitError::FmtError(f)
197    }
198}
199
200struct YamlEmitter<'a> {
201    writer: &'a mut dyn fmt::Write,
202    best_indent: usize,
203    compact: bool,
204
205    level: isize,
206}
207
208type EmitResult = Result<(), EmitError>;
209
210// from serialize::json
211fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> {
212    wr.write_str("\"")?;
213
214    let mut start = 0;
215
216    for (i, byte) in v.bytes().enumerate() {
217        let escaped = match byte {
218            b'"' => "\\\"",
219            b'\\' => "\\\\",
220            b'\x00' => "\\u0000",
221            b'\x01' => "\\u0001",
222            b'\x02' => "\\u0002",
223            b'\x03' => "\\u0003",
224            b'\x04' => "\\u0004",
225            b'\x05' => "\\u0005",
226            b'\x06' => "\\u0006",
227            b'\x07' => "\\u0007",
228            b'\x08' => "\\b",
229            b'\t' => "\\t",
230            b'\n' => "\\n",
231            b'\x0b' => "\\u000b",
232            b'\x0c' => "\\f",
233            b'\r' => "\\r",
234            b'\x0e' => "\\u000e",
235            b'\x0f' => "\\u000f",
236            b'\x10' => "\\u0010",
237            b'\x11' => "\\u0011",
238            b'\x12' => "\\u0012",
239            b'\x13' => "\\u0013",
240            b'\x14' => "\\u0014",
241            b'\x15' => "\\u0015",
242            b'\x16' => "\\u0016",
243            b'\x17' => "\\u0017",
244            b'\x18' => "\\u0018",
245            b'\x19' => "\\u0019",
246            b'\x1a' => "\\u001a",
247            b'\x1b' => "\\u001b",
248            b'\x1c' => "\\u001c",
249            b'\x1d' => "\\u001d",
250            b'\x1e' => "\\u001e",
251            b'\x1f' => "\\u001f",
252            b'\x7f' => "\\u007f",
253            _ => continue,
254        };
255
256        if start < i {
257            wr.write_str(&v[start..i])?;
258        }
259
260        wr.write_str(escaped)?;
261
262        start = i + 1;
263    }
264
265    if start != v.len() {
266        wr.write_str(&v[start..])?;
267    }
268
269    wr.write_str("\"")?;
270    Ok(())
271}
272
273impl<'a> YamlEmitter<'a> {
274    pub fn new(writer: &'a mut dyn fmt::Write) -> YamlEmitter<'a> {
275        YamlEmitter {
276            writer,
277            best_indent: 2,
278            compact: true,
279            level: -1,
280        }
281    }
282
283    /// Set 'compact inline notation' on or off, as described for block
284    /// [sequences](http://www.yaml.org/spec/1.2/spec.html#id2797382)
285    /// and
286    /// [mappings](http://www.yaml.org/spec/1.2/spec.html#id2798057).
287    ///
288    /// In this form, blocks cannot have any properties (such as anchors
289    /// or tags), which should be OK, because this emitter doesn't
290    /// (currently) emit those anyways.
291    pub fn compact(&mut self, compact: bool) {
292        self.compact = compact;
293    }
294
295    /// Determine if this emitter is using 'compact inline notation'.
296    #[allow(dead_code)]   // not all fields are used in this program
297    pub fn is_compact(&self) -> bool {
298        self.compact
299    }
300
301    // fn dump(&mut self, doc: &Yaml) -> EmitResult {
302    //     // write DocumentStart
303    //     writeln!(self.writer, "---")?;
304    //     self.level = -1;
305    //     self.emit_node(doc)
306    // }
307
308    fn write_indent(&mut self) -> EmitResult {
309        if self.level <= 0 {
310            return Ok(());
311        }
312        for _ in 0..self.level {
313            for _ in 0..self.best_indent {
314                write!(self.writer, " ")?;
315            }
316        }
317        Ok(())
318    }
319
320    fn emit_node(&mut self, node: &Yaml) -> EmitResult {
321        match *node {
322            Yaml::Array(ref v) => self.emit_array(v),
323            Yaml::Hash(ref h) => self.emit_hash(h),
324            Yaml::String(ref v) => {
325                if need_quotes(v) {
326                    escape_str(self.writer, v)?;
327                } else {
328                    write!(self.writer, "{v}")?;
329                }
330                Ok(())
331            }
332            Yaml::Boolean(v) => {
333                if v {
334                    self.writer.write_str("true")?;
335                } else {
336                    self.writer.write_str("false")?;
337                }
338                Ok(())
339            }
340            Yaml::Integer(v) => {
341                write!(self.writer, "{v}")?;
342                Ok(())
343            }
344            Yaml::Real(ref v) => {
345                write!(self.writer, "{v}")?;
346                Ok(())
347            }
348            Yaml::Null | Yaml::BadValue => {
349                write!(self.writer, "~")?;
350                Ok(())
351            }
352            // XXX(chenyh) Alias
353            _ => Ok(()),
354        }
355    }
356
357    fn emit_array(&mut self, v: &[Yaml]) -> EmitResult {
358        if v.is_empty() {
359            write!(self.writer, "[]")?;
360        } else if v.len() == 1 && !is_complex(&v[0]) {
361            // changed -- for arrays that have only one simple element, make them more compact by using [...] notation
362            write!(self.writer, "[")?;
363            self.emit_val(true, &v[0])?;
364            write!(self.writer, "]")?;
365        } else {
366            self.level += 1;
367            
368            for (cnt, x) in v.iter().enumerate() {
369                if cnt > 0 {
370                    writeln!(self.writer)?;
371                    self.write_indent()?;
372                }
373                write!(self.writer, "- ")?;
374                self.emit_val(true, x)?;
375            }
376            self.level -= 1;
377        }
378        return Ok(());
379    }
380
381    fn emit_hash(&mut self, h: &Hash) -> EmitResult {
382        if h.is_empty() {
383            self.writer.write_str("{}")?;
384        } else {
385          // changed -- for hashmaps that have only one simple element, make them more compact by using {...}} notation
386            self.level += 1;
387            for (cnt, (k, v)) in h.iter().enumerate() {
388                // changed: use new function is_scalar()
389                // let complex_key = match *k {
390                //     Yaml::Hash(_) | Yaml::Array(_) => true,
391                //     _ => false,
392                // };
393                if cnt > 0 {
394                    writeln!(self.writer)?;
395                    self.write_indent()?;
396                }
397                if !is_scalar(k) {
398                    write!(self.writer, "? ")?;
399                    self.emit_val(true, k)?;
400                    writeln!(self.writer)?;
401                    self.write_indent()?;
402                    write!(self.writer, ": ")?;
403                    self.emit_val(true, v)?;
404                } else {
405                    self.emit_node(k)?;
406                    write!(self.writer, ": ")?;
407
408                    // changed to use braces in some cases
409                    let complex_value = is_complex(v);
410                    if !complex_value && v.as_hash().is_some() {
411                        write!(self.writer, "{{")?;
412                    }
413                    // changed to use complex_value from 'false'
414                    self.emit_val(!complex_value, v)?;
415                    if !complex_value && v.as_hash().is_some() {
416                        write!(self.writer, "}}")?;
417                    }
418                }
419            }
420            self.level -= 1;
421        }   
422        Ok(())
423    }
424
425    /// Emit a yaml as a hash or array value: i.e., which should appear
426    /// following a ":" or "-", either after a space, or on a new line.
427    /// If `inline` is true, then the preceding characters are distinct
428    /// and short enough to respect the compact flag.
429    // changed: use to always emit ' ' for inline -- that is now handled elsewhere
430    fn emit_val(&mut self, inline: bool, val: &Yaml) -> EmitResult {
431        match *val {
432            Yaml::Array(ref v) => {
433                if !((inline && self.compact) || v.is_empty()) {
434                    writeln!(self.writer)?;
435                    self.level += 1;
436                    self.write_indent()?;
437                    self.level -= 1;
438                }
439                self.emit_array(v)
440            }
441            Yaml::Hash(ref h) => {
442                if !((inline && self.compact) || h.is_empty()) {
443                    writeln!(self.writer)?;
444                    self.level += 1;
445                    self.write_indent()?;
446                    self.level -= 1;
447                }
448                self.emit_hash(h)
449            }
450            _ => {
451           //     write!(self.writer, " ")?;
452                self.emit_node(val)
453            }
454        }
455    }
456}
457
458/// Check if the string requires quoting.
459/// Strings starting with any of the following characters must be quoted.
460/// :, &, *, ?, |, -, <, >, =, !, %, @
461/// Strings containing any of the following characters must be quoted.
462/// {, }, [, ], ,, #, `
463///
464/// If the string contains any of the following control characters, it must be escaped with double quotes:
465/// \0, \x01, \x02, \x03, \x04, \x05, \x06, \a, \b, \t, \n, \v, \f, \r, \x0e, \x0f, \x10, \x11, \x12, \x13, \x14, \x15, \x16, \x17, \x18, \x19, \x1a, \e, \x1c, \x1d, \x1e, \x1f, \N, \_, \L, \P
466///
467/// Finally, there are other cases when the strings must be quoted, no matter if you're using single or double quotes:
468/// * When the string is true or false (otherwise, it would be treated as a boolean value);
469/// * When the string is null or ~ (otherwise, it would be considered as a null value);
470/// * When the string looks like a number, such as integers (e.g. 2, 14, etc.), floats (e.g. 2.6, 14.9) and exponential numbers (e.g. 12e7, etc.) (otherwise, it would be treated as a numeric value);
471/// * When the string looks like a date (e.g. 2014-12-31) (otherwise it would be automatically converted into a Unix timestamp).
472fn need_quotes(string: &str) -> bool {
473    fn need_quotes_spaces(string: &str) -> bool {
474        string.starts_with(' ') || string.ends_with(' ')
475    }
476
477    string.is_empty()
478        || need_quotes_spaces(string)
479        || string.starts_with(['&', '*', '?', '|', '-', '<', '>', '=', '!', '%', '@'])
480        || string.contains(|character: char| matches!(character,
481            ':'
482            | '{'
483            | '}'
484            | '['
485            | ']'
486            | ','
487            | '#'
488            | '`'
489            | '\"'
490            | '\''
491            | '\\'
492            | '\0'..='\x06'
493            | '\t'
494            | '\n'
495            | '\r'
496            | '\x0e'..='\x1a'
497            | '\x1c'..='\x1f') )
498        || [
499            // http://yaml.org/type/bool.html
500            // Note: 'y', 'Y', 'n', 'N', is not quoted deliberately, as in libyaml. PyYAML also parse
501            // them as string, not booleans, although it is violating the YAML 1.1 specification.
502            // See https://github.com/dtolnay/serde-yaml/pull/83#discussion_r152628088.
503            "yes", "Yes", "YES", "no", "No", "NO", "True", "TRUE", "true", "False", "FALSE",
504            "false", "on", "On", "ON", "off", "Off", "OFF",
505            // http://yaml.org/type/null.html
506            "null", "Null", "NULL", "~",
507        ]
508        .contains(&string)
509        || string.starts_with('.')
510        || string.starts_with("0x")
511        || string.parse::<i64>().is_ok()
512        || string.parse::<f64>().is_ok()
513}