mathml/
lib.rs

1pub use numbers::NumType;
2pub use regexes::sanitize_xml;
3use roxmltree;
4use roxmltree::Node;
5use roxmltree::NodeType;
6use serde_derive::{Deserialize, Serialize};
7use serde_plain;
8use std::collections::HashMap;
9mod numbers;
10mod regexes;
11
12#[derive(Deserialize, Debug, Serialize, Eq, PartialEq)]
13#[allow(non_camel_case_types)]
14pub enum BuiltinOp {
15    factorial,
16    minus,
17    abs,
18    conjugate,
19    arg,
20    real,
21    imaginary,
22    floor,
23    ceiling,
24    not,
25    inverse,
26    ident,
27    domain,
28    codomain,
29    image,
30    sin,
31    cos,
32    tan,
33    sec,
34    csc,
35    cot,
36    sinh,
37    cosh,
38    tanh,
39    sech,
40    csch,
41    coth,
42    arcsin,
43    arccos,
44    arctan,
45    arccosh,
46    arccot,
47    arccoth,
48    arccsc,
49    arccsch,
50    arcsec,
51    arcsech,
52    arcsinh,
53    arctanh,
54    exp,
55    ln,
56    log,
57    determinant,
58    transpose,
59    divergence,
60    grad,
61    curl,
62    laplacian,
63    card,
64    quotient,
65    divide,
66    power,
67    rem,
68    implies,
69    equivalent,
70    approx,
71    setdiff,
72    vectorproduct,
73    scalarproduct,
74    outerproduct,
75    plus,
76    times,
77    max,
78    min,
79    gcd,
80    lcm,
81    mean,
82    sdev,
83    variance,
84    median,
85    mode,
86    and,
87    or,
88    xor,
89    selector,
90    union,
91    intersect,
92    cartesianproduct,
93    compose,
94    r#fn,
95    int,
96    sum,
97    product,
98    diff,
99    partialdiff,
100    forall,
101    exists,
102    eq,
103    neq,
104    gt,
105    lt,
106    geq,
107    leq,
108    root,
109}
110
111#[derive(Debug, Serialize, Eq, PartialEq)]
112pub enum MathNode {
113    Apply(Vec<MathNode>),
114    Op(BuiltinOp),
115    Text(String),
116    Root(Vec<MathNode>),
117    Ci(Vec<MathNode>),
118    Csymbol {
119        definition_url: String,
120        encoding: Option<String>,
121        children: Vec<MathNode>,
122    },
123    Cn {
124        num_type: numbers::NumType,
125        base: u32,
126        definition_url: Option<String>,
127        encoding: Option<String>,
128        attributes: Option<HashMap<String, String>>,
129    },
130    Comment(String),
131    PI(String, Option<String>),
132}
133
134fn has_text(math_node: &MathNode) -> bool {
135    match math_node {
136        MathNode::Text(e) if e.is_empty() => false,
137        _ => true,
138    }
139}
140fn map_children(node: Node) -> Vec<MathNode> {
141    node.children().map(parse_node).filter(has_text).collect()
142}
143fn parse_element_type(node: Node) -> MathNode {
144    let tag_name = node.tag_name().name();
145    // Is this a defined op?
146    let maybe_op: Result<BuiltinOp, serde_plain::Error> = serde_plain::from_str(tag_name);
147    if let Ok(op) = maybe_op {
148        return MathNode::Op(op);
149    }
150    match tag_name {
151        "apply" => MathNode::Apply(map_children(node)),
152        "ci" => MathNode::Ci(map_children(node)),
153        "cn" => numbers::node_to_cn(node),
154        "csymbol" => MathNode::Csymbol {
155            definition_url: node.attribute("definitionUrl").unwrap().to_owned(),
156            encoding: node.attribute("encoding").map(|e| e.to_owned()),
157            children: map_children(node),
158        },
159        _ => {
160            dbg!(node);
161            panic!()
162        }
163    }
164}
165/// Parse a single xml node into a MathML node
166
167pub fn parse_node(node: Node) -> MathNode {
168    match node.node_type() {
169        NodeType::Text => MathNode::Text(node.text().unwrap().trim().to_owned()),
170        NodeType::Element if node.tag_name().name() == "math" => MathNode::Root(map_children(node)),
171        NodeType::Root => parse_node(node.first_child().unwrap()),
172        NodeType::Element => parse_element_type(node),
173        NodeType::PI => MathNode::PI(
174            node.pi().unwrap().target.to_owned(),
175            node.pi().unwrap().value.map(|m| m.to_owned()),
176        ),
177        NodeType::Comment => MathNode::Comment(node.text().unwrap().to_owned()),
178    }
179}
180/// Parse a string into a MathML node
181pub fn parse_document(text: &str) -> Result<MathNode, roxmltree::Error> {
182    let sanitized = regexes::sanitize_xml(text);
183    let xml = roxmltree::Document::parse(&sanitized)?;
184
185    let parsed: MathNode = parse_node(xml.root());
186    Ok(parsed)
187}
188
189#[cfg(test)]
190mod test {
191    use super::MathNode::*;
192    use super::*;
193    use crate::numbers::NumType;
194
195    #[test]
196    fn test_simple_parsing() {
197        let test = r#"<math xmlns="http://www.w3.org/1998/Math/MathML">
198                            <apply>
199                          <plus/>
200                      <ci> x </ci>
201                      <ci> y </ci>
202                    </apply></math>"#;
203        let res = parse_document(test).unwrap();
204        let exp = Root(vec![Apply(vec![
205            Op(BuiltinOp::plus),
206            Ci(vec![Text("x".to_owned())]),
207            Ci(vec![Text("y".to_owned())]),
208        ])]);
209        assert_eq!(res, exp);
210    }
211    #[test]
212    fn test_recursion() {
213        let test = r#"<apply>
214                      <plus/>
215                      <apply>
216                        <times/>
217                        <ci> a </ci>
218                        <ci> x </ci>
219                      </apply>
220                      <ci> b </ci>
221                    </apply>"#;
222        let res = parse_document(test).unwrap();
223        let expect = Apply(vec![
224            Op(BuiltinOp::plus),
225            Apply(vec![
226                Op(BuiltinOp::times),
227                Ci(vec![Text("a".to_owned())]),
228                Ci(vec![Text("x".to_owned())]),
229            ]),
230            Ci(vec![Text("b".to_owned())]),
231        ]);
232        assert_eq!(res, expect)
233    }
234    #[test]
235    fn test_numbers() {
236        let test = r#"
237        <math xmlns="http://www.w3.org/1998/Math/MathML">
238        <cn type="real"> 12345.7 </cn>
239                    <cn type="integer"> 12345 </cn>
240                    <cn type="integer" base="16"> AB3 </cn>
241                    <cn type="rational"> 12342 <sep/> 2342342 </cn>
242                    <cn type="complex-cartesian"> 12.3 <sep/> 5 </cn>
243                    <cn type="complex-polar"> 2 <sep/> 3.1415 </cn>
244                    <cn type="constant">  &tau; </cn>
245                    </math>
246                    "#;
247        parse_document(test).unwrap();
248    }
249    #[test]
250    fn test_tau() {
251        let test = r#"<cn type="constant">  &tau; </cn>"#;
252        let parsed = parse_document(test).unwrap();
253        assert_eq!(
254            parsed,
255            Cn {
256                num_type: NumType::Constant("$FIXED_tau".to_string()),
257                base: 10,
258                definition_url: None,
259                encoding: None,
260                attributes: None
261            }
262        )
263    }
264    #[test]
265    fn test_sbml_attrs() {
266        use BuiltinOp::*;
267        use NumType::*;
268        let test = r#"                <math xmlns="http://www.w3.org/1998/Math/MathML" 
269                    xmlns:sbml="http://www.sbml.org/sbml/level3/version2/core">
270                    <apply>
271                        <and/>
272                        <apply>
273                            <lt/>
274                            <cn sbml:units="mole"> 1 </cn>
275                            <ci> S1 </ci>
276                        </apply>
277                        <apply>
278                            <lt/>
279                            <ci> S1 </ci>
280                            <cn sbml:units="mole"> 100 </cn>
281                        </apply>
282                    </apply>
283                </math>"#;
284        let xml = roxmltree::Document::parse(&test).unwrap();
285        let parsed: MathNode = parse_node(xml.root());
286        let units: HashMap<String, String> = vec![(
287            "http://www.sbml.org/sbml/level3/version2/core:units".to_owned(),
288            "mole".to_owned(),
289        )]
290        .into_iter()
291        .collect();
292        let expected = Root(vec![Apply(vec![
293            Op(and),
294            Apply(vec![
295                Op(lt),
296                Cn {
297                    num_type: Real(1.0),
298                    base: 10,
299                    definition_url: None,
300                    encoding: None,
301                    attributes: Some(units.clone()),
302                },
303                Ci(vec![Text("S1".to_owned())]),
304            ]),
305            Apply(vec![
306                Op(lt),
307                Ci(vec![Text("S1".to_owned())]),
308                Cn {
309                    num_type: Real(100.0),
310                    base: 10,
311                    definition_url: None,
312                    encoding: None,
313                    attributes: Some(units),
314                },
315            ]),
316        ])]);
317        assert_eq!(expected, parsed);
318    }
319}