libmathcat/
pretty_print.rs

1//! Useful functions for debugging and error messages.
2#![allow(clippy::needless_return)]
3
4use sxd_document::dom::*;
5
6// #[allow(dead_code)]
7// pub fn pp_doc(doc: &Document) {
8//     for root_child in doc.root().children() {
9//         if let ChildOfRoot::Element(e) = root_child {
10//             format_element(&e, 0);
11//             break;
12//         }
13//     };
14// }
15
16/// Pretty-print the MathML represented by `element`.
17pub fn mml_to_string(e: Element) -> String {
18    return format_element(e, 0);
19}
20
21/// Pretty-print the MathML represented by `element`.
22/// * `indent` -- the amount of indentation to start with
23pub fn format_element(e: Element, indent: usize) -> String {
24    // let namespace = match e.name().namespace_uri() {
25    //     None => "".to_string(),
26    //     Some(prefix) => prefix.to_string() + ":",
27    // };
28    // let namespace = namespace.as_str();
29    let namespace = "";
30    let mut answer = format!("{:in$}<{ns}{name}{attrs}>", " ", in=2*indent, ns=namespace, name=e.name().local_part(), attrs=format_attrs(&e.attributes()));
31    let children = e.children();
32    let has_element = children.iter().find(|&&c| matches!(c, ChildOfElement::Element(_x)));
33    if has_element.is_none() {
34        // print text content
35        let content = children.iter()
36                .map(|c| if let ChildOfElement::Text(t) = c {t.text()} else {""})
37                .collect::<Vec<&str>>()
38                .join("");
39        return format!("{}{}</{}{}>\n", answer, &handle_special_chars(&content), namespace, e.name().local_part());
40        // for child in children {
41        //     if let ChildOfElement::Text(t) = child {
42        //         return format!("{}{}</{}{}>\n", answer, &make_invisible_chars_visible(t.text()), namespace, e.name().local_part());
43        //     }
44        // };
45    } else {
46       answer += "\n";        // tag with children should start on new line
47        // recurse on each Element child
48        for c in e.children() {
49            if let ChildOfElement::Element(e) = c {
50                answer += &format_element(e, indent+1);
51            }
52        }
53    }
54    return answer + &format!("{:in$}</{ns}{name}>\n", " ", in=2*indent, ns=namespace, name=e.name().local_part());
55
56    // Use the &#x....; representation for invisible chars when printing
57}
58
59/// Format a vector of attributes as a string with a leading space
60pub fn format_attrs(attrs: &[Attribute]) -> String {
61    let mut result = String::new();
62    for attr in attrs {
63        result += format!(" {}='{}'", attr.name().local_part(), &handle_special_chars(attr.value())).as_str();
64    }
65    result
66}
67
68fn handle_special_chars(text: &str) -> String {
69    // Pre-allocate a buffer. We guess the size is roughly the same as input, maybe slightly larger.
70    let mut s = String::with_capacity(text.len());
71    for ch in text.chars() {
72        match ch {
73            '"' => s.push_str("&quot;"),
74            '&' => s.push_str("&amp;"),
75            '\'' => s.push_str("&apos;"),
76            '<' => s.push_str("&lt;"),
77            '>' => s.push_str("&gt;"),
78            '\u{2061}' => s.push_str("&#x2061;"),
79            '\u{2062}' => s.push_str("&#x2062;"),
80            '\u{2063}' => s.push_str("&#x2063;"),
81            '\u{2064}' => s.push_str("&#x2064;"),
82            _ => s.push(ch),
83        }
84    }
85    s
86}
87
88
89// /// Pretty print an xpath value.
90// /// If the value is a `NodeSet`, the MathML for the node/element is returned.
91// pub fn pp_xpath_value(value: Value) {
92//     use sxd_xpath::Value;
93//     use sxd_xpath::nodeset::Node;
94//     debug!("XPath value:");
95//     if let Value::Nodeset(nodeset) = &value {
96//         for node in nodeset.document_order() {
97//             match node {
98//                 Node::Element(el) => {debug!("{}", crate::pretty_print::format_element(&el, 1))},
99//                 Node::Text(t) =>  {debug!("found Text value: {}", t.text())},
100//                 _ => {debug!("found unexpected node type")}
101//             }
102//         }
103//     }
104// }
105
106/// Convert YAML to a string using with `indent` amount of space.
107pub fn yaml_to_string(yaml: &Yaml, indent: usize) -> String {
108    let mut result = String::new();
109    {
110        let mut emitter = YamlEmitter::new(&mut result);
111        emitter.compact(true);
112        emitter.emit_node(yaml).unwrap(); // dump the YAML object to a String
113    }
114    if indent == 0 {
115        return result;
116    }
117    let indent_str = format!("{:in$}", " ", in=2*indent);
118    result = result.replace('\n',&("\n".to_string() + &indent_str)); // add indentation to all but first line
119    return indent_str + result.trim_end();  // add indent to first line and remove an extra indent at end
120}
121
122/* --------------------- Tweaked pretty printer for YAML (from YAML code) --------------------- */
123
124// Changed: new function to determine if more compact notation can be used (when child is a one entry simple array/hash). Writes
125// -foo [bar: bletch]
126// -foo {bar: bletch}
127fn is_scalar(v: &Yaml) -> bool {
128    return !matches!(v, Yaml::Hash(_) | Yaml::Array(_));
129}
130
131fn is_complex(v: &Yaml) -> bool {
132    return match v {
133        Yaml::Hash(h) => {
134            return match h.len() {
135                0 => false,
136                1 => {
137                    let (key,val) = h.iter().next().unwrap();
138                    return !(is_scalar(key) && is_scalar(val))
139                },
140                _ => true,
141            }
142        },
143        Yaml::Array(v) => {
144            return match v.len() {
145                0 => false,
146                1 => {
147                    let hash = v[0].as_hash();
148                    if let Some(hash) = hash {
149                        return match hash.len() {
150                            0 => false,
151                            1 => {
152                                let (key, val) = hash.iter().next().unwrap();
153                                return !(is_scalar(key) && is_scalar(val));
154                            },
155                            _ => true,
156                        }
157                    } else {
158                        return !is_scalar(&v[0]);
159                    }    
160                },
161                _ => true,
162            }
163        },
164        _ => false,
165    }
166}
167
168use std::error::Error;
169use std::fmt::{self, Display};
170use yaml_rust::{Yaml, yaml::Hash};
171
172//use crate::yaml::{Hash, Yaml};
173
174#[derive(Copy, Clone, Debug)]
175#[allow(dead_code)] // from original YAML code (isn't used here)
176enum EmitError {
177    FmtError(fmt::Error),
178    BadHashmapKey,
179}
180
181impl Error for EmitError {
182    fn cause(&self) -> Option<&dyn Error> {
183        None
184    }
185}
186
187impl Display for EmitError {
188    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
189        match *self {
190            EmitError::FmtError(ref err) => Display::fmt(err, formatter),
191            EmitError::BadHashmapKey => formatter.write_str("bad hashmap key"),
192        }
193    }
194}
195
196impl From<fmt::Error> for EmitError {
197    fn from(f: fmt::Error) -> Self {
198        EmitError::FmtError(f)
199    }
200}
201
202struct YamlEmitter<'a> {
203    writer: &'a mut dyn fmt::Write,
204    best_indent: usize,
205    compact: bool,
206
207    level: isize,
208}
209
210type EmitResult = Result<(), EmitError>;
211
212// from serialize::json
213fn escape_str(wr: &mut dyn fmt::Write, v: &str) -> Result<(), fmt::Error> {
214    wr.write_str("\"")?;
215
216    let mut start = 0;
217
218    for (i, byte) in v.bytes().enumerate() {
219        let escaped = match byte {
220            b'"' => "\\\"",
221            b'\\' => "\\\\",
222            b'\x00' => "\\u0000",
223            b'\x01' => "\\u0001",
224            b'\x02' => "\\u0002",
225            b'\x03' => "\\u0003",
226            b'\x04' => "\\u0004",
227            b'\x05' => "\\u0005",
228            b'\x06' => "\\u0006",
229            b'\x07' => "\\u0007",
230            b'\x08' => "\\b",
231            b'\t' => "\\t",
232            b'\n' => "\\n",
233            b'\x0b' => "\\u000b",
234            b'\x0c' => "\\f",
235            b'\r' => "\\r",
236            b'\x0e' => "\\u000e",
237            b'\x0f' => "\\u000f",
238            b'\x10' => "\\u0010",
239            b'\x11' => "\\u0011",
240            b'\x12' => "\\u0012",
241            b'\x13' => "\\u0013",
242            b'\x14' => "\\u0014",
243            b'\x15' => "\\u0015",
244            b'\x16' => "\\u0016",
245            b'\x17' => "\\u0017",
246            b'\x18' => "\\u0018",
247            b'\x19' => "\\u0019",
248            b'\x1a' => "\\u001a",
249            b'\x1b' => "\\u001b",
250            b'\x1c' => "\\u001c",
251            b'\x1d' => "\\u001d",
252            b'\x1e' => "\\u001e",
253            b'\x1f' => "\\u001f",
254            b'\x7f' => "\\u007f",
255            _ => continue,
256        };
257
258        if start < i {
259            wr.write_str(&v[start..i])?;
260        }
261
262        wr.write_str(escaped)?;
263
264        start = i + 1;
265    }
266
267    if start != v.len() {
268        wr.write_str(&v[start..])?;
269    }
270
271    wr.write_str("\"")?;
272    Ok(())
273}
274
275impl<'a> YamlEmitter<'a> {
276    pub fn new(writer: &'a mut dyn fmt::Write) -> YamlEmitter<'a> {
277        YamlEmitter {
278            writer,
279            best_indent: 2,
280            compact: true,
281            level: -1,
282        }
283    }
284
285    /// Set 'compact inline notation' on or off, as described for block
286    /// [sequences](http://www.yaml.org/spec/1.2/spec.html#id2797382)
287    /// and
288    /// [mappings](http://www.yaml.org/spec/1.2/spec.html#id2798057).
289    ///
290    /// In this form, blocks cannot have any properties (such as anchors
291    /// or tags), which should be OK, because this emitter doesn't
292    /// (currently) emit those anyways.
293    pub fn compact(&mut self, compact: bool) {
294        self.compact = compact;
295    }
296
297    /// Determine if this emitter is using 'compact inline notation'.
298    #[allow(dead_code)]   // not all fields are used in this program
299    pub fn is_compact(&self) -> bool {
300        self.compact
301    }
302
303    // fn dump(&mut self, doc: &Yaml) -> EmitResult {
304    //     // write DocumentStart
305    //     writeln!(self.writer, "---")?;
306    //     self.level = -1;
307    //     self.emit_node(doc)
308    // }
309
310    fn write_indent(&mut self) -> EmitResult {
311        if self.level <= 0 {
312            return Ok(());
313        }
314        for _ in 0..self.level {
315            for _ in 0..self.best_indent {
316                write!(self.writer, " ")?;
317            }
318        }
319        Ok(())
320    }
321
322    fn emit_node(&mut self, node: &Yaml) -> EmitResult {
323        match *node {
324            Yaml::Array(ref v) => self.emit_array(v),
325            Yaml::Hash(ref h) => self.emit_hash(h),
326            Yaml::String(ref v) => {
327                if need_quotes(v) {
328                    escape_str(self.writer, v)?;
329                } else {
330                    write!(self.writer, "{v}")?;
331                }
332                Ok(())
333            }
334            Yaml::Boolean(v) => {
335                if v {
336                    self.writer.write_str("true")?;
337                } else {
338                    self.writer.write_str("false")?;
339                }
340                Ok(())
341            }
342            Yaml::Integer(v) => {
343                write!(self.writer, "{v}")?;
344                Ok(())
345            }
346            Yaml::Real(ref v) => {
347                write!(self.writer, "{v}")?;
348                Ok(())
349            }
350            Yaml::Null | Yaml::BadValue => {
351                write!(self.writer, "~")?;
352                Ok(())
353            }
354            // XXX(chenyh) Alias
355            _ => Ok(()),
356        }
357    }
358
359    fn emit_array(&mut self, v: &[Yaml]) -> EmitResult {
360        if v.is_empty() {
361            write!(self.writer, "[]")?;
362        } else if v.len() == 1 && !is_complex(&v[0]) {
363            // changed -- for arrays that have only one simple element, make them more compact by using [...] notation
364            write!(self.writer, "[")?;
365            self.emit_val(true, &v[0])?;
366            write!(self.writer, "]")?;
367        } else {
368            self.level += 1;
369            
370            for (cnt, x) in v.iter().enumerate() {
371                if cnt > 0 {
372                    writeln!(self.writer)?;
373                    self.write_indent()?;
374                }
375                write!(self.writer, "- ")?;
376                self.emit_val(true, x)?;
377            }
378            self.level -= 1;
379        }
380        return Ok(());
381    }
382
383    fn emit_hash(&mut self, h: &Hash) -> EmitResult {
384        if h.is_empty() {
385            self.writer.write_str("{}")?;
386        } else {
387          // changed -- for hashmaps that have only one simple element, make them more compact by using {...}} notation
388            self.level += 1;
389            for (cnt, (k, v)) in h.iter().enumerate() {
390                // changed: use new function is_scalar()
391                // let complex_key = match *k {
392                //     Yaml::Hash(_) | Yaml::Array(_) => true,
393                //     _ => false,
394                // };
395                if cnt > 0 {
396                    writeln!(self.writer)?;
397                    self.write_indent()?;
398                }
399                if !is_scalar(k) {
400                    write!(self.writer, "? ")?;
401                    self.emit_val(true, k)?;
402                    writeln!(self.writer)?;
403                    self.write_indent()?;
404                    write!(self.writer, ": ")?;
405                    self.emit_val(true, v)?;
406                } else {
407                    self.emit_node(k)?;
408                    write!(self.writer, ": ")?;
409
410                    // changed to use braces in some cases
411                    let complex_value = is_complex(v);
412                    if !complex_value && v.as_hash().is_some() {
413                        write!(self.writer, "{{")?;
414                    }
415                    // changed to use complex_value from 'false'
416                    self.emit_val(!complex_value, v)?;
417                    if !complex_value && v.as_hash().is_some() {
418                        write!(self.writer, "}}")?;
419                    }
420                }
421            }
422            self.level -= 1;
423        }   
424        Ok(())
425    }
426
427    /// Emit a yaml as a hash or array value: i.e., which should appear
428    /// following a ":" or "-", either after a space, or on a new line.
429    /// If `inline` is true, then the preceding characters are distinct
430    /// and short enough to respect the compact flag.
431    // changed: use to always emit ' ' for inline -- that is now handled elsewhere
432    fn emit_val(&mut self, inline: bool, val: &Yaml) -> EmitResult {
433        match *val {
434            Yaml::Array(ref v) => {
435                if !((inline && self.compact) || v.is_empty()) {
436                    writeln!(self.writer)?;
437                    self.level += 1;
438                    self.write_indent()?;
439                    self.level -= 1;
440                }
441                self.emit_array(v)
442            }
443            Yaml::Hash(ref h) => {
444                if !((inline && self.compact) || h.is_empty()) {
445                    writeln!(self.writer)?;
446                    self.level += 1;
447                    self.write_indent()?;
448                    self.level -= 1;
449                }
450                self.emit_hash(h)
451            }
452            _ => {
453           //     write!(self.writer, " ")?;
454                self.emit_node(val)
455            }
456        }
457    }
458}
459
460/// Check if the string requires quoting.
461/// Strings starting with any of the following characters must be quoted.
462/// :, &, *, ?, |, -, <, >, =, !, %, @
463/// Strings containing any of the following characters must be quoted.
464/// {, }, [, ], ,, #, `
465///
466/// If the string contains any of the following control characters, it must be escaped with double quotes:
467/// \0, \x01, \x02, \x03, \x04, \x05, \x06, \a, \b, \t, \n, \v, \f, \r, \x0e, \x0f, \x10, \x11, \x12, \x13, \x14, \x15, \x16, \x17, \x18, \x19, \x1a, \e, \x1c, \x1d, \x1e, \x1f, \N, \_, \L, \P
468///
469/// Finally, there are other cases when the strings must be quoted, no matter if you're using single or double quotes:
470/// * When the string is true or false (otherwise, it would be treated as a boolean value);
471/// * When the string is null or ~ (otherwise, it would be considered as a null value);
472/// * When the string looks like a number, such as integers (e.g. 2, 14, etc.), floats (e.g. 2.6, 14.9) and exponential numbers (e.g. 12e7, etc.) (otherwise, it would be treated as a numeric value);
473/// * When the string looks like a date (e.g. 2014-12-31) (otherwise it would be automatically converted into a Unix timestamp).
474fn need_quotes(string: &str) -> bool {
475    fn need_quotes_spaces(string: &str) -> bool {
476        string.starts_with(' ') || string.ends_with(' ')
477    }
478
479    string.is_empty()
480        || need_quotes_spaces(string)
481        || string.starts_with(['&', '*', '?', '|', '-', '<', '>', '=', '!', '%', '@'])
482        || string.contains(|character: char| matches!(character,
483            ':'
484            | '{'
485            | '}'
486            | '['
487            | ']'
488            | ','
489            | '#'
490            | '`'
491            | '\"'
492            | '\''
493            | '\\'
494            | '\0'..='\x06'
495            | '\t'
496            | '\n'
497            | '\r'
498            | '\x0e'..='\x1a'
499            | '\x1c'..='\x1f') )
500        || [
501            // http://yaml.org/type/bool.html
502            // Note: 'y', 'Y', 'n', 'N', is not quoted deliberately, as in libyaml. PyYAML also parse
503            // them as string, not booleans, although it is violating the YAML 1.1 specification.
504            // See https://github.com/dtolnay/serde-yaml/pull/83#discussion_r152628088.
505            "yes", "Yes", "YES", "no", "No", "NO", "True", "TRUE", "true", "False", "FALSE",
506            "false", "on", "On", "ON", "off", "Off", "OFF",
507            // http://yaml.org/type/null.html
508            "null", "Null", "NULL", "~",
509        ]
510        .contains(&string)
511        || string.starts_with('.')
512        || string.starts_with("0x")
513        || string.parse::<i64>().is_ok()
514        || string.parse::<f64>().is_ok()
515}