1pub use numbers::NumType;
2pub use regexes::sanitize_xml;
3use roxmltree;
4use roxmltree::Node;
5use roxmltree::NodeType;
6use serde_derive::{Deserialize, Serialize};
7use serde_plain;
8use std::collections::HashMap;
9mod numbers;
10mod regexes;
11
12#[derive(Deserialize, Debug, Serialize, Eq, PartialEq)]
13#[allow(non_camel_case_types)]
14pub enum BuiltinOp {
15 factorial,
16 minus,
17 abs,
18 conjugate,
19 arg,
20 real,
21 imaginary,
22 floor,
23 ceiling,
24 not,
25 inverse,
26 ident,
27 domain,
28 codomain,
29 image,
30 sin,
31 cos,
32 tan,
33 sec,
34 csc,
35 cot,
36 sinh,
37 cosh,
38 tanh,
39 sech,
40 csch,
41 coth,
42 arcsin,
43 arccos,
44 arctan,
45 arccosh,
46 arccot,
47 arccoth,
48 arccsc,
49 arccsch,
50 arcsec,
51 arcsech,
52 arcsinh,
53 arctanh,
54 exp,
55 ln,
56 log,
57 determinant,
58 transpose,
59 divergence,
60 grad,
61 curl,
62 laplacian,
63 card,
64 quotient,
65 divide,
66 power,
67 rem,
68 implies,
69 equivalent,
70 approx,
71 setdiff,
72 vectorproduct,
73 scalarproduct,
74 outerproduct,
75 plus,
76 times,
77 max,
78 min,
79 gcd,
80 lcm,
81 mean,
82 sdev,
83 variance,
84 median,
85 mode,
86 and,
87 or,
88 xor,
89 selector,
90 union,
91 intersect,
92 cartesianproduct,
93 compose,
94 r#fn,
95 int,
96 sum,
97 product,
98 diff,
99 partialdiff,
100 forall,
101 exists,
102 eq,
103 neq,
104 gt,
105 lt,
106 geq,
107 leq,
108 root,
109}
110
111#[derive(Debug, Serialize, Eq, PartialEq)]
112pub enum MathNode {
113 Apply(Vec<MathNode>),
114 Op(BuiltinOp),
115 Text(String),
116 Root(Vec<MathNode>),
117 Ci(Vec<MathNode>),
118 Csymbol {
119 definition_url: String,
120 encoding: Option<String>,
121 children: Vec<MathNode>,
122 },
123 Cn {
124 num_type: numbers::NumType,
125 base: u32,
126 definition_url: Option<String>,
127 encoding: Option<String>,
128 attributes: Option<HashMap<String, String>>,
129 },
130 Comment(String),
131 PI(String, Option<String>),
132}
133
134fn has_text(math_node: &MathNode) -> bool {
135 match math_node {
136 MathNode::Text(e) if e.is_empty() => false,
137 _ => true,
138 }
139}
140fn map_children(node: Node) -> Vec<MathNode> {
141 node.children().map(parse_node).filter(has_text).collect()
142}
143fn parse_element_type(node: Node) -> MathNode {
144 let tag_name = node.tag_name().name();
145 let maybe_op: Result<BuiltinOp, serde_plain::Error> = serde_plain::from_str(tag_name);
147 if let Ok(op) = maybe_op {
148 return MathNode::Op(op);
149 }
150 match tag_name {
151 "apply" => MathNode::Apply(map_children(node)),
152 "ci" => MathNode::Ci(map_children(node)),
153 "cn" => numbers::node_to_cn(node),
154 "csymbol" => MathNode::Csymbol {
155 definition_url: node.attribute("definitionUrl").unwrap().to_owned(),
156 encoding: node.attribute("encoding").map(|e| e.to_owned()),
157 children: map_children(node),
158 },
159 _ => {
160 dbg!(node);
161 panic!()
162 }
163 }
164}
165pub fn parse_node(node: Node) -> MathNode {
168 match node.node_type() {
169 NodeType::Text => MathNode::Text(node.text().unwrap().trim().to_owned()),
170 NodeType::Element if node.tag_name().name() == "math" => MathNode::Root(map_children(node)),
171 NodeType::Root => parse_node(node.first_child().unwrap()),
172 NodeType::Element => parse_element_type(node),
173 NodeType::PI => MathNode::PI(
174 node.pi().unwrap().target.to_owned(),
175 node.pi().unwrap().value.map(|m| m.to_owned()),
176 ),
177 NodeType::Comment => MathNode::Comment(node.text().unwrap().to_owned()),
178 }
179}
180pub fn parse_document(text: &str) -> Result<MathNode, roxmltree::Error> {
182 let sanitized = regexes::sanitize_xml(text);
183 let xml = roxmltree::Document::parse(&sanitized)?;
184
185 let parsed: MathNode = parse_node(xml.root());
186 Ok(parsed)
187}
188
189#[cfg(test)]
190mod test {
191 use super::MathNode::*;
192 use super::*;
193 use crate::numbers::NumType;
194
195 #[test]
196 fn test_simple_parsing() {
197 let test = r#"<math xmlns="http://www.w3.org/1998/Math/MathML">
198 <apply>
199 <plus/>
200 <ci> x </ci>
201 <ci> y </ci>
202 </apply></math>"#;
203 let res = parse_document(test).unwrap();
204 let exp = Root(vec![Apply(vec![
205 Op(BuiltinOp::plus),
206 Ci(vec![Text("x".to_owned())]),
207 Ci(vec![Text("y".to_owned())]),
208 ])]);
209 assert_eq!(res, exp);
210 }
211 #[test]
212 fn test_recursion() {
213 let test = r#"<apply>
214 <plus/>
215 <apply>
216 <times/>
217 <ci> a </ci>
218 <ci> x </ci>
219 </apply>
220 <ci> b </ci>
221 </apply>"#;
222 let res = parse_document(test).unwrap();
223 let expect = Apply(vec![
224 Op(BuiltinOp::plus),
225 Apply(vec![
226 Op(BuiltinOp::times),
227 Ci(vec![Text("a".to_owned())]),
228 Ci(vec![Text("x".to_owned())]),
229 ]),
230 Ci(vec![Text("b".to_owned())]),
231 ]);
232 assert_eq!(res, expect)
233 }
234 #[test]
235 fn test_numbers() {
236 let test = r#"
237 <math xmlns="http://www.w3.org/1998/Math/MathML">
238 <cn type="real"> 12345.7 </cn>
239 <cn type="integer"> 12345 </cn>
240 <cn type="integer" base="16"> AB3 </cn>
241 <cn type="rational"> 12342 <sep/> 2342342 </cn>
242 <cn type="complex-cartesian"> 12.3 <sep/> 5 </cn>
243 <cn type="complex-polar"> 2 <sep/> 3.1415 </cn>
244 <cn type="constant"> τ </cn>
245 </math>
246 "#;
247 parse_document(test).unwrap();
248 }
249 #[test]
250 fn test_tau() {
251 let test = r#"<cn type="constant"> τ </cn>"#;
252 let parsed = parse_document(test).unwrap();
253 assert_eq!(
254 parsed,
255 Cn {
256 num_type: NumType::Constant("$FIXED_tau".to_string()),
257 base: 10,
258 definition_url: None,
259 encoding: None,
260 attributes: None
261 }
262 )
263 }
264 #[test]
265 fn test_sbml_attrs() {
266 use BuiltinOp::*;
267 use NumType::*;
268 let test = r#" <math xmlns="http://www.w3.org/1998/Math/MathML"
269 xmlns:sbml="http://www.sbml.org/sbml/level3/version2/core">
270 <apply>
271 <and/>
272 <apply>
273 <lt/>
274 <cn sbml:units="mole"> 1 </cn>
275 <ci> S1 </ci>
276 </apply>
277 <apply>
278 <lt/>
279 <ci> S1 </ci>
280 <cn sbml:units="mole"> 100 </cn>
281 </apply>
282 </apply>
283 </math>"#;
284 let xml = roxmltree::Document::parse(&test).unwrap();
285 let parsed: MathNode = parse_node(xml.root());
286 let units: HashMap<String, String> = vec![(
287 "http://www.sbml.org/sbml/level3/version2/core:units".to_owned(),
288 "mole".to_owned(),
289 )]
290 .into_iter()
291 .collect();
292 let expected = Root(vec![Apply(vec![
293 Op(and),
294 Apply(vec![
295 Op(lt),
296 Cn {
297 num_type: Real(1.0),
298 base: 10,
299 definition_url: None,
300 encoding: None,
301 attributes: Some(units.clone()),
302 },
303 Ci(vec![Text("S1".to_owned())]),
304 ]),
305 Apply(vec![
306 Op(lt),
307 Ci(vec![Text("S1".to_owned())]),
308 Cn {
309 num_type: Real(100.0),
310 base: 10,
311 definition_url: None,
312 encoding: None,
313 attributes: Some(units),
314 },
315 ]),
316 ])]);
317 assert_eq!(expected, parsed);
318 }
319}