#![allow(clippy::needless_return)]
use crate::errors::*;
use sxd_document::dom::*;
use sxd_document::QName;
use phf::{phf_map, phf_set};
use crate::xpath_functions::{IsBracketed, is_leaf};
use std::{ptr::eq as ptr_eq};
use crate::pretty_print::*;
use regex::Regex;
use std::fmt;
use crate::chemistry::*;
const DECIMAL_SEPARATOR: &str = ".";
pub const CHANGED_ATTR: &str = "data-changed";
pub const ADDED_ATTR_VALUE: &str = "added";
const MFENCED_ATTR_VALUE: &str = "from_mfenced";
pub const CHEMICAL_BOND: &str ="data-chemical-bond";
const MHCHEM_MMULTISCRIPTS_HACK: &str = "MHCHEM_SCRIPT_HACK";
static OPERATORS: phf::Map<&str, OperatorInfo> = include!("operator-info.in");
static AMBIGUOUS_OPERATORS: phf::Set<&str> = phf_set! {
"|", "∥", "\u{2016}"
};
lazy_static!{
static ref LEFT_FENCEPOST: OperatorInfo = OperatorInfo{ op_type: OperatorTypes::LEFT_FENCE, priority: 0, next: &None };
static ref INVISIBLE_FUNCTION_APPLICATION: &'static OperatorInfo = OPERATORS.get("\u{2061}").unwrap();
static ref IMPLIED_TIMES: &'static OperatorInfo = OPERATORS.get("\u{2062}").unwrap();
static ref IMPLIED_INVISIBLE_COMMA: &'static OperatorInfo = OPERATORS.get("\u{2063}").unwrap();
static ref IMPLIED_INVISIBLE_PLUS: &'static OperatorInfo = OPERATORS.get("\u{2064}").unwrap();
static ref PLUS: &'static OperatorInfo = OPERATORS.get("+").unwrap();
static ref MINUS: &'static OperatorInfo = OPERATORS.get("-").unwrap();
static ref PREFIX_MINUS: &'static OperatorInfo = MINUS.next.as_ref().unwrap();
static ref TIMES_SIGN: &'static OperatorInfo = OPERATORS.get("×").unwrap();
static ref IMPLIED_TIMES_HIGH_PRIORITY: OperatorInfo = OperatorInfo{
op_type: OperatorTypes::INFIX, priority: 851, next: &None
};
static ref IMPLIED_SEPARATOR_HIGH_PRIORITY: OperatorInfo = OperatorInfo{
op_type: OperatorTypes::INFIX, priority: 901, next: &None
};
static ref IMPLIED_CHEMICAL_BOND: OperatorInfo = OperatorInfo{
op_type: OperatorTypes::INFIX, priority: 905, next: &None
};
static ref IMPLIED_PLUS_SLASH_HIGH_PRIORITY: OperatorInfo = OperatorInfo{ op_type: OperatorTypes::INFIX, priority: 881, next: &None
};
static ref DEFAULT_OPERATOR_INFO_PREFIX: &'static OperatorInfo = &OperatorInfo{
op_type: OperatorTypes::PREFIX, priority: 260, next: &None
};
static ref DEFAULT_OPERATOR_INFO_INFIX: &'static OperatorInfo = &OperatorInfo{
op_type: OperatorTypes::INFIX, priority: 260, next:& None
};
static ref DEFAULT_OPERATOR_INFO_POSTFIX: &'static OperatorInfo = &OperatorInfo{
op_type: OperatorTypes::POSTFIX, priority: 260, next: &None
};
static ref ILLEGAL_OPERATOR_INFO: &'static OperatorInfo = &OperatorInfo{
op_type: OperatorTypes::INFIX, priority: 999, next: &None
};
static ref EQUAL_PRIORITY: usize = OPERATORS.get("=").unwrap().priority;
static ref IS_WHITESPACE: Regex = Regex::new(r"^\s+$").unwrap(); }
bitflags! {
struct OperatorTypes: u32 {
const NONE = 0x0;
const PREFIX = 0x1;
const INFIX = 0x2;
const POSTFIX = 0x4;
const FENCE = 0x8;
const LEFT_FENCE= 0x9;
const RIGHT_FENCE=0xc;
const UNSPECIFIED=0xf; }
}
#[derive(Clone, Debug)]
struct OperatorInfo {
op_type: OperatorTypes, priority: usize, next: &'static Option<OperatorInfo>, }
#[derive(Clone, Debug)]
struct OperatorPair<'op> {
ch: &'op str,
op: &'static OperatorInfo
}
impl<'op> OperatorPair<'op> {
fn new() -> OperatorPair<'op> {
return OperatorPair{
ch: "illegal", op: &ILLEGAL_OPERATOR_INFO, };
}
}
#[derive(Debug)]
struct OperatorVersions {
prefix: Option<&'static OperatorInfo>,
infix: Option<&'static OperatorInfo>,
postfix: Option<&'static OperatorInfo>,
}
impl OperatorVersions {
fn new(op: &'static OperatorInfo) -> OperatorVersions {
let mut op = op;
let mut prefix = None;
let mut infix = None;
let mut postfix = None;
loop {
if op.is_prefix() {
prefix = Some( op );
} else if op.is_infix() {
infix = Some( op )
} else if op.is_postfix() {
postfix = Some( op );
} else {
panic!("OperatorVersions::new: operator is not prefix, infix, or postfix")
}
match &op.next {
None => break,
Some(alt_op) => op = alt_op,
}
}
return OperatorVersions{prefix, infix, postfix};
}
}
impl OperatorInfo {
fn is_prefix(&self) -> bool {
return (self.op_type.bits & OperatorTypes::PREFIX.bits) != 0;
}
fn is_infix(&self) -> bool {
return (self.op_type.bits & OperatorTypes::INFIX.bits) != 0;
}
fn is_postfix(&self) -> bool {
return (self.op_type.bits & OperatorTypes::POSTFIX.bits) != 0;
}
fn is_left_fence(&self) -> bool {
return self.op_type.bits & OperatorTypes::LEFT_FENCE.bits == OperatorTypes::LEFT_FENCE.bits;
}
fn is_right_fence(&self) -> bool {
return self.op_type.bits & OperatorTypes::RIGHT_FENCE.bits ==OperatorTypes::RIGHT_FENCE.bits;
}
fn is_fence(&self) -> bool {
return (self.op_type.bits & (OperatorTypes::LEFT_FENCE.bits | OperatorTypes::RIGHT_FENCE.bits)) != 0;
}
fn is_operator_type(&self, op_type: OperatorTypes) -> bool {
return self.op_type.bits & op_type.bits != 0;
}
fn is_plus_or_minus(&self) -> bool {
return ptr_eq(self, *PLUS) || ptr_eq(self, *MINUS);
}
fn is_times(&self) -> bool {
return ptr_eq(self, *IMPLIED_TIMES) || ptr_eq(self, *TIMES_SIGN);
}
fn is_nary(&self, previous_op: &OperatorInfo) -> bool {
return ptr_eq(previous_op,self) ||
(previous_op.is_plus_or_minus() && self.is_plus_or_minus()) ||
(previous_op.is_times() && self.is_times());
}
}
struct StackInfo<'a, 'op>{
mrow: Element<'a>, op_pair: OperatorPair<'op>, is_operand: bool, }
impl<'a, 'op> fmt::Display for StackInfo<'a, 'op> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "StackInfo(op={}/{}, is_operand={}, mrow({}",
show_invisible_op_char(self.op_pair.ch), self.op_pair.op.priority, self.is_operand,
if self.mrow.children().is_empty() {")"} else {""})?;
for child in self.mrow.children() {
let child = as_element(child);
write!(f, "{}{}", name(&child), if child.following_siblings().is_empty() {")"} else {","})?;
}
return Ok( () );
}
}
impl<'a, 'op:'a> StackInfo<'a, 'op> {
fn new(doc: Document<'a>) -> StackInfo<'a, 'op> {
let mrow = create_mathml_element(&doc, "mrow") ;
mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
return StackInfo{
mrow,
op_pair: OperatorPair{ ch: "\u{E000}", op: &LEFT_FENCEPOST },
is_operand: false,
}
}
fn with_op<'d>(doc: &'d Document<'a>, node: Element<'a>, op_pair: OperatorPair<'op>) -> StackInfo<'a, 'op> {
let mrow = create_mathml_element(doc, "mrow");
mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
mrow.append_child(node);
return StackInfo {
mrow,
op_pair,
is_operand: false,
}
}
fn priority(&self) -> usize {
return self.op_pair.op.priority;
}
fn last_child_in_mrow(&self) -> Option<Element<'a>> {
let children = self.mrow.children();
if children.is_empty() {
return None
} else {
return Some( as_element(children[children.len() - 1]) );
}
}
fn add_child_to_mrow(&mut self, child: Element<'a>, child_op: OperatorPair<'op>) {
self.mrow.append_child(child);
if ptr_eq(child_op.op, *ILLEGAL_OPERATOR_INFO) {
assert!(!self.is_operand); self.is_operand = true;
} else {
self.op_pair = child_op;
self.is_operand = false;
}
}
fn remove_last_operand_from_mrow(&mut self) -> Element<'a> {
let children = self.mrow.children();
assert!( !children.is_empty() );
assert!( self.is_operand || children.len()==1 ); self.is_operand = false;
let last_operand = as_element(children[children.len()-1]);
last_operand.remove_from_parent();
return last_operand;
}
}
pub fn create_mathml_element<'a>(doc: &Document<'a>, name: &str) -> Element<'a> {
return doc.create_element(sxd_document::QName::with_namespace_uri(
Some("http://www.w3.org/1998/Math/MathML"),
name));
}
pub fn is_fence(mo: Element) -> bool {
return CanonicalizeContext::new()
.find_operator(mo, None, None, None).is_fence();
}
pub fn is_relational_op(mo: Element) -> bool {
return CanonicalizeContext::new()
.find_operator(mo, None, None, None).priority == *EQUAL_PRIORITY;
}
pub fn set_mathml_name(element: Element, new_name: &str) {
element.set_name(QName::with_namespace_uri(Some("http://www.w3.org/1998/Math/MathML"), new_name));
}
pub fn replace_children<'a>(mathml: Element<'a>, replacements: Vec<Element<'a>>) -> Element<'a> {
if replacements.len() == 1 {
add_attrs(mathml, replacements[0].attributes());
return mathml;
}
let parent = mathml.parent().unwrap().element().unwrap();
if ELEMENTS_WITH_FIXED_NUMBER_OF_CHILDREN.contains(name(&parent)) {
add_attrs(mathml, replacements[0].attributes());
let mrow = create_mathml_element(&mathml.document(), "mrow");
mrow.append_children(replacements);
return mathml;
} else {
let mut new_children = mathml.preceding_siblings();
let i_first_new_child = new_children.len();
let mut replacements = replacements.iter().map(|&el| ChildOfElement::Element(el)).collect::<Vec<ChildOfElement>>();
new_children.append(&mut replacements);
new_children.append(&mut mathml.following_siblings());
let parent = mathml.parent().unwrap().element().unwrap();
parent.replace_children(new_children);
return as_element(parent.children()[i_first_new_child]);
}
}
pub fn get_presentation_element(element: Element) -> (usize, Element) {
assert_eq!(name(&element), "semantics");
let children = element.children();
if let Some( (i, child) ) = children.iter().enumerate().find(|(_, &child)|
if let Some(encoding) = as_element(child).attribute_value("encoding") {
encoding == "MathML-Presentation"
} else {
false
})
{
let presentation_annotation = as_element(*child);
assert_eq!(presentation_annotation.children().len(), 1);
return (i, as_element(presentation_annotation.children()[0]));
} else {
return (0, as_element(children[0]));
}
}
pub fn canonicalize(mathml: Element) -> Result<Element> {
let context = CanonicalizeContext::new();
return context.canonicalize(mathml);
}
struct CanonicalizeContext {
}
#[derive(PartialEq)]
#[allow(non_camel_case_types)]
enum DigitBlockType {
None,
DecimalBlock_3,
DecimalBlock_4,
DecimalBlock_5,
BinaryBlock_4,
}
#[derive(Debug, PartialEq)]
enum FunctionNameCertainty {
True,
Maybe,
False
}
static ELEMENTS_WITH_ONE_CHILD: phf::Set<&str> = phf_set! {
"math", "msqrt", "merror", "mpadded", "mphantom", "menclose", "mtd", "mscarry"
};
static ELEMENTS_WITH_FIXED_NUMBER_OF_CHILDREN: phf::Set<&str> = phf_set! {
"mfrac", "mroot", "msub", "msup", "msubsup","munder", "mover", "munderover", "mmultiscripts", "mlongdiv"
};
static EMPTY_ELEMENTS: phf::Set<&str> = phf_set! {
"mspace", "none", "mprescripts", "mglyph", "malignmark", "maligngroup", "msline",
};
lazy_static! {
static ref IS_PRIME: Regex = Regex::new(r"['′″‴⁗]").unwrap();
static ref UPPER_ROMAN_NUMERAL: Regex = Regex::new(r"^\s*^M{0,3}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\s*$").unwrap();
static ref LOWER_ROMAN_NUMERAL: Regex = Regex::new(r"^\s*^m{0,3}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})\s*$").unwrap();
}
impl CanonicalizeContext {
fn new() -> CanonicalizeContext {
return CanonicalizeContext{}
}
fn canonicalize<'a>(&self, mut mathml: Element<'a>) -> Result<Element<'a>> {
if name(&mathml) != "math" {
let math_element = create_mathml_element(&mathml.document(), "math");
math_element.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
math_element.append_child(mathml);
let root = math_element.document().root();
root.clear_children();
root.append_child(math_element);
mathml = root.children()[0].element().unwrap();
}
CanonicalizeContext::assure_mathml(mathml)?;
let mathml = self.clean_mathml(mathml).unwrap(); self.assure_nary_tag_has_mrow(mathml);
let mut converted_mathml = self.canonicalize_mrows(mathml)
.chain_err(|| format!("while processing\n{}", mml_to_string(&mathml)))?;
if !crate::chemistry::scan_and_mark_chemistry(converted_mathml) {
debug!("Not chemistry -- retry:\n{}", mml_to_string(&converted_mathml));
self.assure_nary_tag_has_mrow(converted_mathml);
converted_mathml = self.canonicalize_mrows(mathml)
.chain_err(|| format!("while processing\n{}", mml_to_string(&mathml)))?;
}
debug!("\nMathML after canonicalize:\n{}", mml_to_string(&converted_mathml));
return Ok(converted_mathml);
}
fn assure_nary_tag_has_mrow(&self, mathml: Element) {
let children = mathml.children();
if children.len() > 1 && ELEMENTS_WITH_ONE_CHILD.contains(name(&mathml)) {
let mrow = create_mathml_element(&mathml.document(), "mrow");
mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
mrow.append_children(children);
mathml.replace_children(vec![ChildOfElement::Element(mrow)]);
}
}
fn assure_mathml(mathml: Element) -> Result<()> {
static ALL_MATHML_ELEMENTS: phf::Set<&str> = phf_set!{
"mi", "mo", "mn", "mtext", "ms", "mspace", "mglyph",
"mfrac", "mroot", "msub", "msup", "msubsup","munder", "mover", "munderover", "mmultiscripts",
"mstack", "mlongdiv", "msgroup", "msrow", "mscarries", "mscarry", "msline",
"none", "mprescripts", "malignmark", "maligngroup",
"math", "msqrt", "merror", "mpadded", "mphantom", "menclose", "mtd", "mstyle",
"mrow", "mfenced", "mtable", "mtr", "mlabeledtr",
};
let n_children = mathml.children().len();
let element_name = name(&mathml);
if is_leaf(mathml) {
if EMPTY_ELEMENTS.contains(element_name) {
if n_children != 0 {
bail!("{} should only have one child:\n{}", element_name, mml_to_string(&mathml));
}
} else if (n_children == 1 && mathml.children()[0].text().is_some()) || n_children == 0 { return Ok( () );
} else {
bail!("Not a valid MathML leaf element:\n{}", mml_to_string(&mathml));
};
}
if ELEMENTS_WITH_FIXED_NUMBER_OF_CHILDREN.contains(element_name) {
match element_name {
"munderover" | "msubsup" => if n_children != 3 {
bail!("{} should have 3 children:\n{}", element_name, mml_to_string(&mathml));
},
"mmultiscripts" => {
let has_prescripts = mathml.children().iter()
.any(|&child| name(&as_element(child)) == "mprescripts");
if has_prescripts ^ (n_children % 2 == 0) {
bail!("{} has the wrong number of children:\n{}", element_name, mml_to_string(&mathml));
}
},
"mlongdiv" => if n_children < 3 {
bail!("{} should have at least 3 children:\n{}", element_name, mml_to_string(&mathml));
},
_ => if n_children != 2 {
bail!("{} should have 2 children:\n{}", element_name, mml_to_string(&mathml));
},
}
}
let children = mathml.children();
if element_name == "semantics" {
if children.is_empty() {
return Ok( () );
} else {
return CanonicalizeContext::assure_mathml(get_presentation_element(mathml).1);
}
}
if !ALL_MATHML_ELEMENTS.contains(element_name) {
bail!("'{}' is not a valid MathML element", element_name);
}
for child in children {
CanonicalizeContext::assure_mathml( as_element(child) )?;
}
return Ok( () );
}
fn clean_mathml<'a>(&self, mathml: Element<'a>) -> Option<Element<'a>> {
lazy_static! {
static ref IS_PRIME: Regex = Regex::new(r"['′″‴⁗]").unwrap();
}
static CURRENCY_SYMBOLS: phf::Set<&str> = phf_set! {
"$", "¢", "€", "£", "₡", "₤", "₨", "₩", "₪", "₱", "₹", "₺", "₿" };
let element_name = name(&mathml);
let parent_requires_child =
if element_name == "math" {
false
} else if let Some(parent) = mathml.parent() { let parent = parent.element().unwrap();
let parent_name = name(&parent).to_string();
ELEMENTS_WITH_FIXED_NUMBER_OF_CHILDREN.contains(parent_name.as_str())
} else { false
};
if is_leaf(mathml) && !EMPTY_ELEMENTS.contains(element_name) && as_text(mathml).is_empty() {
if !parent_requires_child {
return None;
}
make_empty_element(mathml);
};
if mathml.children().is_empty() && !EMPTY_ELEMENTS.contains(element_name) {
if element_name == "mrow" {
if parent_requires_child {
return Some( make_empty_element(mathml) );
} else {
return None;
}
} else {
let mtext = create_empty_element(&mathml.document());
mathml.append_child(mtext);
}
};
match element_name {
"mn" => {
return Some(mathml);
},
"ms" | "mglyph" => {
return Some(mathml);
},
"mi" => {
let text = as_text(mathml);
if let Some(dash) = canonicalize_dash(text) { mathml.set_text(dash);
return Some(mathml);
} else if OPERATORS.get(text).is_some() {
set_mathml_name(mathml, "mo");
return Some(mathml);
} else if let Some(result) = merge_arc_trig(mathml) {
return Some(result);
} else if IS_PRIME.is_match(text) {
let new_text = merge_prime_text(text);
mathml.set_text(&new_text);
return Some(mathml);
} else if let Some(result) = split_points(mathml) {
return Some(result);
} else {
return Some(mathml);
};
},
"mtext" => {
if let Some(result) = merge_arc_trig(mathml) {
return Some(result);
};
if let Some(result) = split_points(mathml) {
return Some(result);
}
let text = as_text(mathml);
let mathml = mathml;
if IS_WHITESPACE.is_match(text) {
make_empty_element(mathml);
} else if let Some(dash) = canonicalize_dash(text) {
mathml.set_text(dash);
} else if OPERATORS.get(text).is_some() {
set_mathml_name(mathml, "mo");
return Some(mathml);
}
return if parent_requires_child || !text.is_empty() {Some(mathml)} else {None};
},
"mo" => {
let text = as_text(mathml);
if !text.is_empty() && IS_WHITESPACE.is_match(text) {
set_mathml_name(mathml, "mtext");
}
if let Some(result) = merge_arc_trig(mathml) {
return Some(result);
};
return crate::definitions::DEFINITIONS.with(|definitions| {
if text == "…" ||
definitions.borrow().get_hashset("FunctionNames").unwrap().contains(text) ||
definitions.borrow().get_hashset("GeometryShapes").unwrap().contains(text) {
set_mathml_name(mathml, "mi");
return Some(mathml);
}
if IS_PRIME.is_match(text) {
let new_text = merge_prime_text(text);
mathml.set_text(&new_text);
return Some(mathml);
}
if CURRENCY_SYMBOLS.contains(text) {
set_mathml_name(mathml, "mi");
return Some(mathml);
}
return Some(mathml);
});
},
"mfenced" => {return self.clean_mathml( convert_mfenced_to_mrow(mathml) )},
"mstyle" | "mpadded" => {
let children = mathml.children();
if children.is_empty() {
if parent_requires_child {
return Some( make_empty_element(mathml));
} else {
return None;
}
} else if children.len() == 1 {
if let Some(new_mathml) = self.clean_mathml( as_element(children[0]) ) {
mathml.replace_children(new_mathml.children());
set_mathml_name(mathml, name(&new_mathml));
add_attrs(mathml, new_mathml.attributes());
return Some(mathml);
} else if parent_requires_child {
return Some( make_empty_element(mathml));
} else {
return None;
}
} else {
set_mathml_name(mathml, "mrow");
mathml.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
return self.clean_mathml(mathml); }
},
"mphantom" | "malignmark" | "maligngroup"=> {
if parent_requires_child {
return Some( make_empty_element(mathml));
} else {
return None;
}
},
"mspace" => {
let width = mathml.attribute_value("width").unwrap_or("0");
if is_width_ignorable(width) { return None;
}
return Some( make_empty_element(mathml));
},
"semantics" => {
let mut children = mathml.children();
let (i, presentation) = get_presentation_element(mathml);
let new_presentation = if let Some(presentation) = self.clean_mathml(presentation) {
presentation
} else {
create_empty_element(&mathml.document())
};
if i==0 {
children[0] = ChildOfElement::Element(new_presentation);
} else {
children.remove(i);
children.insert(0, ChildOfElement::Element(presentation));
}
mathml.replace_children(children);
return Some(mathml);
},
_ => {
let children = mathml.children();
if element_name == "mrow" {
if children.is_empty() {
return if parent_requires_child {Some(mathml)} else {None};
} else if children.len() == 1 {
let is_from_mhchem = is_from_mhchem_hack(mathml);
if let Some(new_mathml) = self.clean_mathml(as_element(children[0])) {
mathml.replace_children(new_mathml.children());
set_mathml_name(mathml, name(&new_mathml));
add_attrs(mathml, new_mathml.attributes());
return Some(mathml);
} else if parent_requires_child {
let empty = make_empty_element(mathml);
if is_from_mhchem {
empty.set_attribute_value(MHCHEM_MMULTISCRIPTS_HACK, "true");
}
return Some(empty);
} else {
return None;
}
}
}
let mathml = if element_name == "mrow" || ELEMENTS_WITH_ONE_CHILD.contains(element_name) {
let merged = merge_dots(mathml); let merged = merge_primes(merged);
handle_pseudo_scripts(merged)
} else {
mathml
};
let mut children = mathml.children();
let mut i = 0;
while i < children.len() {
if let Some(child) = children[i].element() {
match self.clean_mathml(child) {
None => {
mathml.remove_child(child);
},
Some(new_child) => {
let new_child_name = name(&new_child);
children = mathml.children(); children[i] = ChildOfElement::Element(new_child);
mathml.replace_children(children);
if new_child_name == "mi" || new_child_name == "mtext" {
clean_chemistry_leaf(as_element(mathml.children()[i]));
}
i += 1;
}
}
children = mathml.children(); } else {
i += 1;
}
}
if element_name == "mrow" && children.len() == 1 {
let child = as_element(children[0]);
mathml.replace_children(child.children());
set_mathml_name(mathml, name(&child));
add_attrs(mathml, child.attributes());
return Some(mathml); }
if element_name == "mrow" || ELEMENTS_WITH_ONE_CHILD.contains(element_name) {
merge_number_blocks(mathml, &mut children);
merge_whitespace(&mut children);
handle_convert_to_mmultiscripts(&mut children);
} else if element_name == "msub" || element_name == "msup" ||
element_name == "msubsup" || element_name == "mmultiscripts"{
if element_name != "mmultiscripts" {
let mut is_empty_script = is_empty_element(as_element(children[0])) &&
is_empty_element(as_element(children[1]));
if element_name == "msubsup" {
is_empty_script = is_empty_element(as_element(children[2]));
}
if is_empty_script {
if parent_requires_child {
return Some( as_element(children[0]) ); } else {
return None;
}
}
}
let mathml = if element_name == "mmultiscripts" {clean_mmultiscripts(mathml).unwrap()} else {mathml};
if !is_chemistry_off() {
let likely_chemistry = likely_adorned_chem_formula(mathml);
if likely_chemistry >= 0 {
mathml.set_attribute_value(MAYBE_CHEMISTRY, likely_chemistry.to_string().as_str());
}
}
if element_name == "msubsup" {
return Some( clean_msubsup(mathml) );
} else {
return Some(mathml);
}
}
mathml.replace_children(children);
if element_name == "mrow" || ELEMENTS_WITH_ONE_CHILD.contains(element_name) {
clean_chemistry_mrow(mathml);
}
self.assure_nary_tag_has_mrow(mathml);
return Some(mathml);
}
}
fn canonicalize_dash(text: &str) -> Option<&str> {
if text == "--" {
return Some("—"); } else if text == "---" || text == "----" { return Some("―"); } else {
return None;
}
}
fn is_from_mhchem_hack(mrow: Element) -> bool {
assert_eq!(name(&mrow), "mrow");
assert_eq!(mrow.children().len(), 1);
let parent = mrow.parent().unwrap().element().unwrap();
let parent_name = name(&parent);
if !(parent_name == "msub" || parent_name == "msup") {
return false;
}
let mrow = as_element(mrow.children()[0]);
if !(name(&mrow) == "mrow" && mrow.children().len() == 1) {
return false;
}
let child = as_element(mrow.children()[0]);
if !(name(&child) == "mpadded" && child.attribute("width").is_some()) {
return false;
}
if child.attribute_value("width").unwrap() != "0" {
return false;
}
let child = as_element(child.children()[0]);
if !(name(&child) == "mphantom" && child.children().len() == 1) {
return false;
}
let child = as_element(child.children()[0]);
return name(&child) == "mi" && as_text(child) == "A";
}
fn is_width_ignorable(width: &str) -> bool {
if width == "0" || width.starts_with('-') { return true;
}
if let Some(i) = width.find(|ch: char| ch.is_ascii_alphabetic()) {
let (amount, unit) = width.split_at(i);
match unit {
"em" | "rem" => return amount.parse::<f64>().unwrap_or(100.) < 0.25,
"ex" => return amount.parse::<f64>().unwrap_or(100.) < 0.5,
"px" => return amount.parse::<f64>().unwrap_or(100.) < 6.1, _ => return false,
}
}
return false;
}
fn make_empty_element(mathml: Element) -> Element {
set_mathml_name(mathml, "mtext");
mathml.clear_children();
mathml.set_text("\u{A0}");
mathml.set_attribute_value("data-changed", "empty_content");
return mathml;
}
fn create_empty_element<'a>(doc: &Document<'a>) -> Element<'a> {
let mtext = create_mathml_element(doc, "mtext");
mtext.set_text("\u{A0}");
mtext.set_attribute_value("data-added", "missing-content");
return mtext;
}
fn is_empty_element(el: Element) -> bool {
return (is_leaf(el) && as_text(el).trim().is_empty()) ||
(name(&el) == "mrow" && el.children().is_empty());
}
fn clean_chemistry_leaf(mathml: Element) -> Element {
if !(is_chemistry_off() || mathml.attribute(MAYBE_CHEMISTRY).is_some()) {
assert!(name(&mathml)=="mi" || name(&mathml)=="mtext");
let text = as_text(mathml);
if text.len() > 2 && is_roman_number_match(text) {
return mathml;
}
if let Some(elements) = convert_leaves_to_chem_elements(mathml) {
return replace_children(mathml, elements);
} else {
let likely_chemistry = likely_chem_element(mathml);
if likely_chemistry >= 0 {
mathml.set_attribute_value(MAYBE_CHEMISTRY, likely_chemistry.to_string().as_str());
}
};
}
return mathml;
}
fn clean_mmultiscripts(mathml: Element) -> Option<Element> {
let mut mathml = mathml;
let children = mathml.children();
let n = children.len();
let i_mprescripts =
if let Some((i,_)) = children.iter().enumerate()
.find(|(_,&el)| name(&as_element(el)) == "mprescripts") { i } else { n };
let has_misplaced_mprescripts = i_mprescripts & 1 == 0; let mut has_proper_number_of_children = if i_mprescripts == n { n & 1 == 0} else { n & 1 != 0 }; if has_misplaced_mprescripts || !has_proper_number_of_children || has_none_none_script_pair(&children) {
let mut new_children = Vec::with_capacity(n+2); new_children.push(children[0]);
let mut i = 1;
while i < n {
let child = as_element(children[i]);
let child_name = name(&child);
if child_name == "mprescripts" {
if has_misplaced_mprescripts {
let mtext = create_empty_element(&mathml.document());
new_children.push(ChildOfElement::Element(mtext));
has_proper_number_of_children = !has_proper_number_of_children;
}
new_children.push(children[i]);
i += 1;
} else if i+1 < n && child_name == "none" && name(&as_element(children[i+1])) == "none" {
i += 2; } else {
new_children.push(children[i]);
new_children.push(children[i+1]);
i += 2;
}
}
if new_children.len() == 1 {
mathml = as_element(new_children[0]);
} else {
mathml.replace_children(new_children);
}
}
return Some(mathml);
fn has_none_none_script_pair(children: &[ChildOfElement]) -> bool {
let mut i = 1;
let n = children.len();
while i < n {
let child = as_element(children[i]);
let child_name = name(&child);
if child_name == "mprescripts" {
i += 1;
} else if i+1 < n && child_name == "none" && name(&as_element(children[i+1])) == "none" {
return true; } else {
i += 2;
}
}
return false;
}
}
fn clean_msubsup(mathml: Element) -> Element {
let children = mathml.children();
let subscript = as_element(children[1]);
let has_subscript = !(name(&subscript) == "mtext" && as_text(subscript).trim().is_empty());
let superscript = as_element(children[2]);
let has_superscript = !(name(&superscript) == "mtext" && as_text(superscript).trim().is_empty());
if has_subscript && has_superscript {
return mathml;
} else if has_subscript {
set_mathml_name(mathml, "msub");
let children = vec!(children[0], children[1]);
mathml.replace_children(children);
return mathml;
} else if has_superscript {
set_mathml_name(mathml, "msup");
let children = vec!(children[0], children[2]);
mathml.replace_children(children);
return mathml;
} else {
return as_element(children[0]); }
}
fn merge_arc_trig(leaf: Element) -> Option<Element> {
assert!(is_leaf(leaf));
let leaf_text = as_text(leaf);
if !(leaf_text == "arc" || leaf_text == "arc " || leaf_text == "arc " ) {
return None;
}
let following_siblings = leaf.following_siblings();
if following_siblings.is_empty() {
return None;
}
let following_sibling = as_element(following_siblings[0]);
let following_sibling_name = name(&following_sibling);
if !(following_sibling_name == "mi" || following_sibling_name == "mo" || following_sibling_name == "mtext") {
return None;
}
return crate::definitions::DEFINITIONS.with(|definitions| {
let following_text = as_text(following_sibling);
if definitions.borrow().get_hashset("TrigFunctionNames").unwrap().contains(following_text) {
let new_text = "arc".to_string() + following_text;
set_mathml_name(leaf, "mi");
leaf.set_text(&new_text);
following_sibling.remove_from_parent();
return Some(leaf);
}
return None;
})
}
fn convert_mfenced_to_mrow(mfenced: Element) -> Element {
let open = mfenced.attribute_value("open").unwrap_or("(");
let close = mfenced.attribute_value("close").unwrap_or(")");
let mut separators= mfenced.attribute_value("separators").unwrap_or(",").chars();
set_mathml_name(mfenced, "mrow");
mfenced.remove_attribute("open");
mfenced.remove_attribute("close");
mfenced.remove_attribute("separators");
let children = mfenced.children();
let mut new_children = Vec::with_capacity(2*children.len() + 1);
if !open.is_empty() {
new_children.push(ChildOfElement::Element( create_mo(mfenced.document(), open, MFENCED_ATTR_VALUE)) );
}
if !children.is_empty() {
new_children.push(children[0]);
for child in &children[1..] {
let sep = separators.next().unwrap_or(',').to_string();
new_children.push( ChildOfElement::Element( create_mo(mfenced.document(), &sep, MFENCED_ATTR_VALUE)) );
new_children.push(*child);
}
}
if !close.is_empty() {
new_children.push(ChildOfElement::Element( create_mo(mfenced.document(), close, MFENCED_ATTR_VALUE)) );
}
mfenced.replace_children(new_children);
return mfenced;
}
fn is_roman_number_match(text: &str) -> bool {
return UPPER_ROMAN_NUMERAL.is_match(text) || LOWER_ROMAN_NUMERAL.is_match(text);
}
fn is_digit_block(mathml: Element) -> DigitBlockType {
lazy_static! {
static ref IS_DIGIT_BLOCK_3: Regex = Regex::new(r"^\d\d\d$").unwrap();
static ref IS_DIGIT_BLOCK_4: Regex = Regex::new(r"^\d\d\d\d$").unwrap();
static ref IS_DIGIT_BLOCK_5: Regex = Regex::new(r"^\d\d\d\d\d$").unwrap();
static ref IS_BINARY_DIGIT_BLOCK: Regex = Regex::new(r"^[01]{4}$").unwrap();
}
if name(&mathml) == "mn" {
let text = as_text(mathml);
match text.len() {
3 => if IS_DIGIT_BLOCK_3.is_match(text) {
return DigitBlockType::DecimalBlock_3;
},
4 => if IS_DIGIT_BLOCK_4.is_match(text) {
return DigitBlockType::DecimalBlock_4;
} else if IS_BINARY_DIGIT_BLOCK.is_match(text) {
return DigitBlockType::BinaryBlock_4;
},
5 => if IS_DIGIT_BLOCK_5.is_match(text) {
return DigitBlockType::DecimalBlock_5;
},
_ => return DigitBlockType::None,
}
}
return DigitBlockType::None;
}
fn merge_whitespace(children: &mut Vec<ChildOfElement>) {
let mut i = 0;
while i < children.len() {
let child = as_element(children[i]);
if name(&child) == "mtext" && as_text(child) == "\u{A0}" {
if i < children.len()-1 {
let next_child = as_element(children[i+1]);
if name(&next_child) == "mtext"{
if as_text(next_child) != "\u{A0}" {
let new_text = "\u{A0}".to_string() + as_text(next_child);
next_child.set_text(&new_text);
}
children.remove(i);
continue; }
}
if i > 0 {
let prev_child = as_element(children[i-1]);
if name(&prev_child) == "mi" || name(&prev_child) == "mn" || name(&prev_child) == "mtext" {
let new_text = as_text(prev_child).to_string() + "\u{A0}";
prev_child.set_text(&new_text);
children.remove(i);
continue; }
}
if i < children.len()-1 { let next_child = as_element(children[i+1]);
if name(&next_child) == "mi" || name(&next_child) == "mn" {
let new_text = "\u{A0}".to_string() + as_text(next_child);
next_child.set_text(&new_text);
children.remove(i);
i += 1; continue;
}
}
}
i += 1;
}
}
fn merge_number_blocks(parent_mrow: Element, children: &mut Vec<ChildOfElement>) {
lazy_static!{
static ref SEPARATORS: Regex = Regex::new(r"[],. \u{00A0}]").unwrap();
}
let mut i = 0;
while i < children.len() {
let child = as_element(children[i]);
let mut is_comma = false;
let mut is_decimal_pt = false;
let mut has_decimal_pt = false;
if name(&child) == "mn" {
if SEPARATORS.is_match(as_text(child)) {
i += 1;
continue;
}
let mut start = i;
let mut looking_for_separator = true;
if i > 0 && name(&as_element(children[i-1])) == "mo" {
let leaf_text = as_text(as_element(children[i-1]));
is_comma = leaf_text == ",";
is_decimal_pt = leaf_text == ".";
has_decimal_pt = is_decimal_pt;
if is_decimal_pt {
start = i - 1;
}
}
let mut end = children.len();
for (j, sibling) in children[i+1..].iter().enumerate() {
let sibling = as_element(*sibling);
let sibling_name = name(&sibling);
if sibling_name != "mn" {
if sibling_name=="mo" || sibling_name=="mtext" {
let leaf_text = as_text(sibling);
if !(leaf_text=="." || leaf_text=="," || leaf_text.trim().is_empty()) ||
(leaf_text=="." && has_decimal_pt) {
end = start + j+1;
break;
} else if looking_for_separator {
is_comma = leaf_text == ",";
is_decimal_pt = leaf_text == ".";
} else {
is_comma = false;
is_decimal_pt = false;
}
} else {
end = start + j+1;
break;
}
}
if !(looking_for_separator &&
(sibling_name == "mtext" || is_comma || is_decimal_pt)) &&
( looking_for_separator ||
!(is_decimal_pt || is_digit_block(sibling) != DigitBlockType::None)) {
end = start + if is_decimal_pt {j+2} else {j+1};
break;
}
looking_for_separator = !looking_for_separator;
}
if is_likely_a_number(parent_mrow, children, start, end) {
merge_block(children, start, end);
} else {
i = end-1; }
}
i += 1;
}
}
fn split_points(leaf: Element) -> Option<Element> {
lazy_static!{
static ref IS_UPPERCASE: Regex = Regex::new(r"^[A-Z]+$").unwrap();
}
if !IS_UPPERCASE.is_match(as_text(leaf)) {
return None;
}
let parent = leaf.parent().unwrap().element().unwrap();
if name(&parent) == "mover" {
let over = as_element(parent.children()[1]);
if is_leaf(over) {
let mut over_chars = as_text(over).chars();
let first_char = over_chars.next();
if first_char.is_some() && over_chars.next().is_none() && !first_char.unwrap().is_alphanumeric(){
return Some( split_element(leaf) );
}
}
}
let preceding_siblings = leaf.preceding_siblings();
if !preceding_siblings.is_empty() {
let preceding_sibling = as_element(preceding_siblings[preceding_siblings.len()-1]);
let preceding_sibling_name = name(&preceding_sibling);
if preceding_sibling_name == "mi" || preceding_sibling_name == "mo" || preceding_sibling_name == "mtext" {
let preceding_text = as_text(preceding_sibling);
return crate::definitions::DEFINITIONS.with(|definitions| {
let defs = definitions.borrow();
let prefix_ops = defs.get_hashset("GeometryPrefixOperators").unwrap();
let shapes = defs.get_hashset("GeometryShapes").unwrap();
if prefix_ops.contains(preceding_text) || shapes.contains(preceding_text) {
return Some( split_element(leaf) ); } else {
return None;
}
})
}
}
return None;
fn split_element(leaf: Element) -> Element {
let mut children = Vec::with_capacity(leaf.children().len());
for ch in as_text(leaf).chars() {
let new_leaf = create_mathml_element(&leaf.document(), "mi");
new_leaf.set_text(&ch.to_string());
children.push(new_leaf);
}
set_mathml_name(leaf, "mrow");
leaf.replace_children(children);
return leaf;
}
}
fn is_likely_a_number(mrow: Element, children: &[ChildOfElement], mut start: usize, mut end: usize) -> bool {
if count_decimal_pts(children, start, end) > 1 {
return false;
}
while end >= start+3 {
let child = as_element(children[end-1]); if !is_leaf(child) || !as_text(child).trim().is_empty() {
break;
}
end -= 1;
}
let decimal_at_start = count_decimal_pts(children, start, start+1) == 1;
let decimal_at_end = !(decimal_at_start || count_decimal_pts(children, end-1, end) == 0);
if end - start < 3 {
return decimal_at_start || decimal_at_end;
}
if decimal_at_start {
start += 1;
} else if decimal_at_end {
end -= 1;
}
if name(&as_element(children[end-1])) != "mn" {
return false; }
if name(&as_element(children[start+1])) == "mtext" ||
IS_WHITESPACE.is_match(as_text(as_element(children[start+1]))) {
let mut digit_block = DigitBlockType::None; for &child in children {
let child = as_element(child);
if name(&child) == "mn" {
if digit_block == DigitBlockType::None {
digit_block = is_digit_block(child);
} else if is_digit_block(child) != digit_block {
return false; }
}
}
return true; }
if start > 1 && name(&as_element(children[0])) == "mn" {
let potential_comma = as_element(children[1]);
if name(&potential_comma) == "mo" && as_text(potential_comma) == "," {
return false;
}
}
let first_child;
let last_child;
if start == 0 && end == children.len() {
let preceding_children = mrow.preceding_siblings();
let following_children = mrow.following_siblings();
if preceding_children.is_empty() || following_children.is_empty() {
return true; }
first_child = preceding_children[preceding_children.len()-1];
last_child = following_children[0];
} else if start > 0 && end < children.len() {
first_child = children[start-1];
last_child = children[end];
} else {
return true; }
let first_child = as_element(first_child);
let last_child = as_element(last_child);
return !(name(&first_child) == "mo" && is_fence(first_child) &&
name(&last_child) == "mo" && is_fence(last_child) );
}
fn count_decimal_pts(children: &[ChildOfElement], start: usize, end: usize) -> usize {
let mut n_decimal_pt = 0;
for &child_as_element in children.iter().take(end).skip(start) {
let child = as_element(child_as_element);
if as_text(child).contains('.') {
n_decimal_pt += 1;
}
}
return n_decimal_pt;
}
fn merge_block(children: &mut Vec<ChildOfElement>, start: usize, end: usize) {
let mut mn_text = String::with_capacity(4*(end-start)-1); for &child_as_element in children.iter().take(end).skip(start) {
let child = as_element(child_as_element);
mn_text.push_str(as_text(child));
}
let child = as_element(children[start]);
set_mathml_name(child, "mn");
child.set_text(&mn_text);
children.drain(start+1..end);
}
fn merge_dots(mrow: Element) -> Element {
let children = mrow.children();
let mut i = 0;
let mut n_dots = 0; while i < children.len() {
let child = as_element(children[i]);
if name(&child) == "mo" {
let text = as_text(child);
if text == "." {
n_dots += 1;
if n_dots == 3 {
let first_child = as_element(children[i-2]);
first_child.set_text("…");
as_element(children[i-1]).remove_from_parent();
child.remove_from_parent();
n_dots = 0;
}
} else {
n_dots = 0;
}
} else {
n_dots = 0;
}
i += 1;
}
return mrow;
}
fn merge_primes(mrow: Element) -> Element {
let mut children = mrow.children();
let mut i = 0;
let mut n_primes = 0; while i < children.len() {
let child = as_element(children[i]);
if name(&child) == "mo" {
let text = as_text(child);
if IS_PRIME.is_match(text) {
n_primes += 1;
} else if n_primes > 0 {
merge_prime_elements(&mut children, i - n_primes, i);
n_primes = 0;
}
} else if n_primes > 0 {
merge_prime_elements(&mut children, i - n_primes, i);
n_primes = 0;
}
i += 1;
}
if n_primes > 0 {
merge_prime_elements(&mut children, i - n_primes, i);
}
return mrow;
}
fn merge_prime_elements(children: &mut [ChildOfElement], start: usize, end: usize) {
let first_child = as_element(children[start]);
let mut new_text = String::with_capacity(end+3-start); new_text.push_str(as_text(first_child));
for &child_as_element in children.iter().take(end).skip(start+1) {
let child = as_element(child_as_element);
let text = as_text(child); new_text.push_str(text);
child.remove_from_parent();
}
first_child.set_text(&merge_prime_text(&new_text));
}
fn merge_prime_text(text: &str) -> String {
let mut n_primes = 0;
for ch in text.chars() {
match ch {
'\'' | '′' => n_primes += 1,
'″' => n_primes += 2,
'‴' => n_primes += 3,
'⁗' => n_primes += 4,
_ => {
eprint!("merge_prime_text: unexpected char '{}' found", ch);
return text.to_string();
}
}
}
let mut result = String::with_capacity(n_primes); for _ in 0..n_primes/4 {
result.push('⁗');
}
match n_primes % 4 {
1 => result.push('′'),
2 => result.push('″'),
3 => result.push('‴'),
_ => () }
return result;
}
fn handle_pseudo_scripts(mrow: Element) -> Element {
static PSEUDO_SCRIPTS: phf::Set<&str> = phf_set! {
"\"", "'", "*", "`", "ª", "°", "²", "³", "´", "¹", "º",
"‘", "’", "“", "”", "„", "‟",
"′", "″", "‴", "‵", "‶", "‷", "⁗",
};
let mut children = mrow.children();
let mut i = 1;
let mut found = false;
while i < children.len() {
let child = as_element(children[i]);
if name(&child) == "mo" && PSEUDO_SCRIPTS.contains(as_text(child)) {
let msup = create_mathml_element(&child.document(), "msup");
msup.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
msup.append_child(children[i-1]);
msup.append_child(child);
children[i-1] = ChildOfElement::Element(msup);
children.remove(i);
found = true;
} else {
i += 1;
}
}
if found {
mrow.replace_children(children)
}
return mrow;
}
fn handle_convert_to_mmultiscripts(children: &mut Vec<ChildOfElement>) {
let mut i = 0;
while i < children.len() {
let child = as_element(children[i]);
let child_name = name(&child);
if (child_name == "msub" || child_name == "msup" || child_name == "msubsup") && is_empty_element(as_element(child.children()[0])) {
i = convert_to_mmultiscripts(children, i);
} else {
i += 1;
}
}
}
fn convert_to_mmultiscripts(mrow_children: &mut Vec<ChildOfElement>, i: usize) -> usize {
let parent = as_element(mrow_children[i]).parent().unwrap().element().unwrap();
debug!("convert_to_mmultiscripts (i={}) -- PARENT:\n{}", i, mml_to_string(&parent));
let i_base = choose_base_of_mmultiscripts(mrow_children, i);
let mut base = as_element(mrow_children[i_base]);
debug!("convert_to_mmultiscripts -- base\n{}", mml_to_string(&base));
let base_name = name(&base);
let mut prescripts = vec![];
let mut postscripts = vec![];
let mut i_postscript = i_base + 1;
if (base_name == "msub" || base_name == "msup" || base_name == "msubsup") &&
!is_empty_element(as_element(base.children()[0])) {
let mut base_children = base.children();
let script_base = as_element(base.children()[0]);
base_children[0] = ChildOfElement::Element(create_empty_element(&base.document()));
base.replace_children(base_children);
add_to_scripts(base, &mut postscripts);
base = script_base;
}
if i_base > i {
let mut i_prescript = i;
while i_prescript < i_base {
let script = as_element(mrow_children[i_prescript]);
if !add_to_scripts(script, &mut prescripts) {
break;
}
i_prescript += 1;
}
}
while i_postscript < mrow_children.len() {
let script = as_element(mrow_children[i_postscript]);
if name(&script) == "msub" && i_postscript+1 < mrow_children.len() {
let superscript = as_element(mrow_children[i_postscript+1]);
if name(&superscript) == "msup" && is_empty_element(as_element(superscript.children()[0])) {
set_mathml_name(script, "msubsup");
script.append_child(superscript.children()[1]);
i_postscript += 1;
}
}
debug!("adding script\n{}", mml_to_string(&script));
if !add_to_scripts(script, &mut postscripts) {
break;
}
i_postscript += 1;
}
let i_returned = if i_base < i {i_base} else {i};
let script = create_mathml_element(&base.document(), "mmultiscripts");
let mut num_children = 1 + postscripts.len();
if !prescripts.is_empty() {
num_children += 1 + prescripts.len();
}
let mut new_children = Vec::with_capacity(num_children);
new_children.push(ChildOfElement::Element(base));
new_children.append(&mut postscripts);
if !prescripts.is_empty() {
new_children.push( ChildOfElement::Element( create_mathml_element(&script.document(), "mprescripts") ) );
new_children.append(&mut prescripts);
}
script.replace_children(new_children);
mrow_children[i_returned] = ChildOfElement::Element(script);
mrow_children.drain(i_returned+1..i_postscript);
let likely_chemistry = likely_adorned_chem_formula(script);
if likely_chemistry >= 0 {
script.set_attribute_value(MAYBE_CHEMISTRY, likely_chemistry.to_string().as_str());
}
return i_returned;
}
fn add_to_scripts<'a>(el: Element<'a>, scripts: &mut Vec<ChildOfElement<'a>>) -> bool {
let script_name = name(&el);
if !(script_name == "msub" || script_name == "msup" || script_name == "msubsup") ||
!is_empty_element(as_element(el.children()[0])) {
return false;
}
if script_name == "msub" {
add_pair(scripts, Some(el.children()[1]), None);
} else if script_name == "msup" {
add_pair(scripts, None, Some(el.children()[1]));
} else { add_pair(scripts, Some(el.children()[1]), Some(el.children()[2]));
};
return true;
}
fn add_pair<'v, 'a:'v>(script_vec: &'v mut Vec<ChildOfElement<'a>>, subscript: Option<ChildOfElement<'a>>, superscript: Option<ChildOfElement<'a>>) {
let child_of_element = if let Some(subscript) = subscript {subscript} else {superscript.unwrap()};
let doc = as_element(child_of_element).document();
let subscript = if let Some(subscript)= subscript {
if is_empty_element(as_element(subscript)) {
ChildOfElement::Element(create_mathml_element(&doc, "none"))
} else {
subscript
}
} else {
ChildOfElement::Element(create_mathml_element(&doc, "none"))
};
let superscript = if let Some(superscript) = superscript {
if is_empty_element(as_element(superscript)) {
ChildOfElement::Element(create_mathml_element(&doc, "none"))
} else {
superscript
}
} else {
ChildOfElement::Element(create_mathml_element(&doc, "none"))
};
script_vec.push(subscript);
script_vec.push(superscript);
}
fn choose_base_of_mmultiscripts(mrow_children: &mut Vec<ChildOfElement>, i: usize) -> usize {
let script_element_base = as_element(as_element(mrow_children[i]).children()[0]);
let from_mchem = script_element_base.attribute(MHCHEM_MMULTISCRIPTS_HACK).is_some();
if mrow_children.len() > i+1 && !(from_mchem && i > 0) && is_child_simple_base(mrow_children[i+1]) {
return i+1;
}
if i > 0 {
if let Some(i_start) = is_grouped_base(&mrow_children[..i]) {
assert!(i_start < i-1); let new_mrow = create_mathml_element(&as_element(mrow_children[0]).document(), "mrow");
new_mrow.set_attribute_value(CHANGED_ATTR, ADDED_ATTR_VALUE);
for &child in &mrow_children[i_start..i] {
new_mrow.append_child(child);
}
mrow_children.drain(i_start+1..i);
mrow_children[i_start] = ChildOfElement::Element(new_mrow);
return i_start;
}
if is_child_simple_base(mrow_children[i-1]) {
return i-1;
}
}
for i_base in i+1..mrow_children.len() {
if is_child_simple_base(mrow_children[i_base]) {
return i_base;
} else {
let child = as_element(mrow_children[i_base]);
let child_name = name(&child);
if !(child_name == "msub" || child_name == "msup" || child_name == "msubsup") {
break;
}
}
}
assert!(mrow_children.len() > i);
return i;
fn is_child_simple_base(child: ChildOfElement) -> bool {
let mut child = as_element(child);
let child_name = name(&child);
if child_name == "msub" || child_name == "msup" || child_name == "msubsup" {
child = as_element(child.children()[0]);
}
return is_leaf(child) && !is_empty_element(child); }
fn is_grouped_base(mrow_children: &[ChildOfElement]) -> Option<usize> {
let i_last = mrow_children.len()-1;
let last_child = as_element(mrow_children[i_last]);
if name(&last_child) == "mo" &&
CanonicalizeContext::new().find_operator(last_child, None, None, None).is_right_fence() {
for i_child in (0..i_last).rev() {
let child = as_element(mrow_children[i_child]);
if name(&child) == "mo" &&
CanonicalizeContext::new().find_operator(child, None, None, None).is_left_fence() {
return Some(i_child);
}
}
}
return None;
}
}
}
fn canonicalize_mrows<'a>(&self, mathml: Element<'a>) -> Result<Element<'a>> {
let tag_name = name(&mathml);
set_mathml_name(mathml, tag_name); match tag_name {
"mi" | "ms" | "mtext" | "mspace" => {
self.canonicalize_plane1(mathml);
return Ok( mathml ); },
"mo" => {
self.canonicalize_plane1(mathml);
self.canonicalize_mo_text(mathml);
return Ok( mathml );
},
"mn" => {
self.canonicalize_plane1(mathml);
return Ok( mathml );
},
"mrow" => {
return self.canonicalize_mrows_in_mrow(mathml);
},
"semantics" => {
let mut children = mathml.children();
let (i, presentation) = get_presentation_element(mathml);
children[i] = ChildOfElement::Element(self.canonicalize_mrows(presentation)? );
mathml.replace_children(children);
return Ok(mathml);
},
_ => {
let mut new_children = Vec::with_capacity(mathml.children().len());
for child in mathml.children() {
match child {
ChildOfElement::Element(e) => {
new_children.push( ChildOfElement::Element(self.canonicalize_mrows(e)? ));
},
_ => panic!("Should have been an element or text"),
}
}
mathml.replace_children(new_children);
return Ok( mathml );
},
}
}
fn potentially_lift_script<'a>(&self, mrow: Element<'a>) -> Element<'a> {
if name(&mrow) != "mrow" {
return mrow;
}
let mut mrow_children = mrow.children();
let first_child = as_element(mrow_children[0]);
let last_child = as_element(mrow_children[mrow_children.len()-1]);
let last_child_name = name(&last_child);
if name(&first_child) == "mo" && is_fence(first_child) &&
(last_child_name == "msub" || last_child_name == "msup" || last_child_name == "msubsup") {
let base = as_element(last_child.children()[0]);
if !(name(&base) == "mo" && is_fence(base)) {
return mrow; }
} else {
return mrow; }
let script = last_child; let mut script_children = script.children();
let close_fence = script_children[0];
let mrow_children_len = mrow_children.len(); mrow_children[mrow_children_len-1] = close_fence; mrow.replace_children(mrow_children);
script_children[0] = ChildOfElement::Element(mrow);
script.replace_children(script_children);
return script;
}
fn canonicalize_plane1<'a>(&self, mi: Element<'a>) -> Element<'a> {
static MATH_VARIANTS: phf::Map<&str, [u32; 3]> = phf_map! {
"italic" => [0, 0, 0x1D6E2],
"bold" => [0x1D400, 0x1D7CE, 0x1D6A8],
"bold-italic" => [0x1D468, 0x1D7CE, 0x1D71C],
"double-struck" => [0x1D538, 0x1D7D8, 0],
"bold-fraktur" => [0x1D56C, 0, 0x1D6A8],
"script" => [0x1D49C, 0, 0],
"bold-script" => [0x1D4D0, 0, 0x1D6A8],
"fraktur" => [0x1D504, 0, 0],
"sans-serif" => [0x1D5A0, 0x1D7E2, 0],
"bold-sans-serif" => [0x1D5D4, 0x1D7EC, 0x1D756],
"sans-serif-italic" => [0x1D608, 0x1D7E2, 0],
"sans-serif-bold-italic" => [0x1D63C, 0x1D7EC, 0x1D790],
"monospace" => [0x1D670, 0x1D7F6, 0],
};
let variant = mi.attribute_value("mathvariant");
if variant.is_none() {
return mi;
}
let mi_text = as_text(mi);
let new_text = match MATH_VARIANTS.get(variant.unwrap()) {
None => mi_text.to_string(),
Some(start) => shift_text(mi_text, start),
};
mi.set_text(&new_text);
return mi;
fn shift_text(old_text: &str, char_mapping: &[u32; 3]) -> String {
struct Offsets {
ch: u32,
table: usize,
}
static SHIFT_AMOUNTS: phf::Map<char, Offsets> = phf_map! {
'A' => Offsets{ ch: 0, table: 0},
'B' => Offsets{ ch: 1, table: 0},
'C' => Offsets{ ch: 2, table: 0},
'D' => Offsets{ ch: 3, table: 0},
'E' => Offsets{ ch: 4, table: 0},
'F' => Offsets{ ch: 5, table: 0},
'G' => Offsets{ ch: 6, table: 0},
'H' => Offsets{ ch: 7, table: 0},
'I' => Offsets{ ch: 8, table: 0},
'J' => Offsets{ ch: 9, table: 0},
'K' => Offsets{ ch: 10, table: 0},
'L' => Offsets{ ch: 11, table: 0},
'M' => Offsets{ ch: 12, table: 0},
'N' => Offsets{ ch: 13, table: 0},
'O' => Offsets{ ch: 14, table: 0},
'P' => Offsets{ ch: 15, table: 0},
'Q' => Offsets{ ch: 16, table: 0},
'R' => Offsets{ ch: 17, table: 0},
'S' => Offsets{ ch: 18, table: 0},
'T' => Offsets{ ch: 19, table: 0},
'U' => Offsets{ ch: 20, table: 0},
'V' => Offsets{ ch: 21, table: 0},
'W' => Offsets{ ch: 22, table: 0},
'X' => Offsets{ ch: 23, table: 0},
'Y' => Offsets{ ch: 24, table: 0},
'Z' => Offsets{ ch: 25, table: 0},
'a' => Offsets{ ch: 26, table: 0},
'b' => Offsets{ ch: 27, table: 0},
'c' => Offsets{ ch: 28, table: 0},
'd' => Offsets{ ch: 29, table: 0},
'e' => Offsets{ ch: 30, table: 0},
'f' => Offsets{ ch: 31, table: 0},
'g' => Offsets{ ch: 32, table: 0},
'h' => Offsets{ ch: 33, table: 0},
'i' => Offsets{ ch: 34, table: 0},
'j' => Offsets{ ch: 35, table: 0},
'k' => Offsets{ ch: 36, table: 0},
'l' => Offsets{ ch: 37, table: 0},
'm' => Offsets{ ch: 38, table: 0},
'n' => Offsets{ ch: 39, table: 0},
'o' => Offsets{ ch: 40, table: 0},
'p' => Offsets{ ch: 41, table: 0},
'q' => Offsets{ ch: 42, table: 0},
'r' => Offsets{ ch: 43, table: 0},
's' => Offsets{ ch: 44, table: 0},
't' => Offsets{ ch: 45, table: 0},
'u' => Offsets{ ch: 46, table: 0},
'v' => Offsets{ ch: 47, table: 0},
'w' => Offsets{ ch: 48, table: 0},
'x' => Offsets{ ch: 49, table: 0},
'y' => Offsets{ ch: 50, table: 0},
'z' => Offsets{ ch: 51, table: 0},
'0' => Offsets{ ch: 0, table: 1},
'1' => Offsets{ ch: 1, table: 1},
'2' => Offsets{ ch: 2, table: 1},
'3' => Offsets{ ch: 3, table: 1},
'4' => Offsets{ ch: 4, table: 1},
'5' => Offsets{ ch: 5, table: 1},
'6' => Offsets{ ch: 6, table: 1},
'7' => Offsets{ ch: 7, table: 1},
'8' => Offsets{ ch: 8, table: 1},
'9' => Offsets{ ch: 9, table: 1},
'Α' => Offsets{ ch: 0, table: 2},
'Β' => Offsets{ ch: 1, table: 2},
'Γ' => Offsets{ ch: 2, table: 2},
'Δ' => Offsets{ ch: 3, table: 2},
'Ε' => Offsets{ ch: 4, table: 2},
'Ζ' => Offsets{ ch: 5, table: 2},
'Η' => Offsets{ ch: 6, table: 2},
'Θ' => Offsets{ ch: 7, table: 2},
'Ι' => Offsets{ ch: 8, table: 2},
'Κ' => Offsets{ ch: 9, table: 2},
'Λ' => Offsets{ ch: 10, table: 2},
'Μ' => Offsets{ ch: 11, table: 2},
'Ν' => Offsets{ ch: 12, table: 2},
'Ξ' => Offsets{ ch: 13, table: 2},
'Ο' => Offsets{ ch: 14, table: 2},
'Π' => Offsets{ ch: 15, table: 2},
'Ρ' => Offsets{ ch: 16, table: 2},
'ϴ' => Offsets{ ch: 17, table: 2},
'Σ' => Offsets{ ch: 18, table: 2},
'Τ' => Offsets{ ch: 19, table: 2},
'Υ' => Offsets{ ch: 20, table: 2},
'Φ' => Offsets{ ch: 21, table: 2},
'Χ' => Offsets{ ch: 22, table: 2},
'Ψ' => Offsets{ ch: 23, table: 2},
'Ω' => Offsets{ ch: 24, table: 2},
'∇' => Offsets{ ch: 25, table: 2},
'α' => Offsets{ ch: 26, table: 2},
'β' => Offsets{ ch: 27, table: 2},
'γ' => Offsets{ ch: 28, table: 2},
'δ' => Offsets{ ch: 29, table: 2},
'ε' => Offsets{ ch: 30, table: 2},
'ζ' => Offsets{ ch: 31, table: 2},
'η' => Offsets{ ch: 32, table: 2},
'θ' => Offsets{ ch: 33, table: 2},
'ι' => Offsets{ ch: 34, table: 2},
'κ' => Offsets{ ch: 35, table: 2},
'λ' => Offsets{ ch: 36, table: 2},
'μ' => Offsets{ ch: 37, table: 2},
'ν' => Offsets{ ch: 38, table: 2},
'ξ' => Offsets{ ch: 39, table: 2},
'ο' => Offsets{ ch: 40, table: 2},
'π' => Offsets{ ch: 41, table: 2},
'ρ' => Offsets{ ch: 42, table: 2},
'ς' => Offsets{ ch: 43, table: 2},
'σ' => Offsets{ ch: 44, table: 2},
'τ' => Offsets{ ch: 45, table: 2},
'υ' => Offsets{ ch: 46, table: 2},
'φ' => Offsets{ ch: 47, table: 2},
'χ' => Offsets{ ch: 48, table: 2},
'ψ' => Offsets{ ch: 49, table: 2},
'ω' => Offsets{ ch: 50, table: 2},
'∂' => Offsets{ ch: 51, table: 2},
'ϵ' => Offsets{ ch: 52, table: 2},
'ϑ' => Offsets{ ch: 53, table: 2},
'ϰ' => Offsets{ ch: 54, table: 2},
'ϕ' => Offsets{ ch: 55, table: 2},
'ϱ' => Offsets{ ch: 56, table: 2},
'ϖ' => Offsets{ ch: 57, table: 2},
};
let mut new_text = String::new();
for ch in old_text.chars() {
new_text.push(
match SHIFT_AMOUNTS.get(&ch) {
None => {
if char_mapping[2] == 0x1D6A8 {
match ch {
'Ϝ' => '𝟊',
'ϝ' => '𝟋',
_ => ch,
}
} else {
ch
}
},
Some(offsets) => {
let start_of_mapping = char_mapping[offsets.table];
if start_of_mapping == 0 {ch} else {shift_char(start_of_mapping + offsets.ch)}
}
}
)
}
return new_text;
fn shift_char(ch: u32) -> char {
static EXCEPTIONS: phf::Map<u32, u32> = phf_map! {
0x1D455u32 => 0x210Eu32,
0x1D49Du32 => 0x212Cu32,
0x1D4A0u32 => 0x2130u32,
0x1D4A1u32 => 0x2131u32,
0x1D4A3u32 => 0x210Bu32,
0x1D4A4u32 => 0x2110u32,
0x1D4A7u32 => 0x2112u32,
0x1D4A8u32 => 0x2133u32,
0x1D4ADu32 => 0x211Bu32,
0x1D4BAu32 => 0x212Fu32,
0x1D4BCu32 => 0x210Au32,
0x1D4C4u32 => 0x2134u32,
0x1D506u32 => 0x212Du32,
0x1D50Bu32 => 0x210Cu32,
0x1D50Cu32 => 0x2111u32,
0x1D515u32 => 0x211Cu32,
0x1D51Du32 => 0x2128u32,
0x1D53Au32 => 0x2102u32,
0x1D53Fu32 => 0x210Du32,
0x1D545u32 => 0x2115u32,
0x1D547u32 => 0x2119u32,
0x1D548u32 => 0x211Au32,
0x1D549u32 => 0x211Du32,
0x1D551u32 => 0x2124u32,
};
return unsafe { char::from_u32_unchecked(
match EXCEPTIONS.get(&ch) {
None => ch,
Some(exception_value) => *exception_value,
}
) }
}
}
}
fn canonicalize_mo_text(&self, mo: Element) {
let mut mo_text = as_text(mo);
let parent = mo.parent().unwrap().element().unwrap();
let parent_name = name(&parent);
if parent_name == "mover" || parent_name == "munder" || parent_name == "munderover" {
mo_text = match mo_text {
"_" | "\u{02C9}"| "\u{0304}"| "\u{0305}"| "\u{2212}" |
"\u{2010}" | "\u{2011}" | "\u{2012}" | "\u{2013}" | "\u{2014}" | "\u{2015}" => "\u{00AF}",
"\u{02BC}" => "`",
"\u{02DC}" => "~",
"\u{02C6}"| "\u{0302}" => "^",
"\u{0307}" => "\u{02D9}", "\u{0308}" => "¨",
_ => mo_text,
}
} else {
mo_text = match mo_text {
"\u{00AF}"| "\u{02C9}"| "\u{0304}"| "\u{0305}" => "_",
_ => mo_text,
};
};
mo_text = match mo_text {
"\u{2212}" => "-",
_ => mo_text,
};
mo.set_text(mo_text);
}
fn find_operator<'a>(&self, mo_node: Element<'a>, previous_operator: Option<&'static OperatorInfo>,
previous_node: Option<Element<'a>>, next_node: Option<Element<'a>>) -> &'static OperatorInfo {
assert!( name(&mo_node) == "mo");
let form = mo_node.attribute_value("form");
let op_type = match form {
None => compute_type_from_position(self, previous_operator, previous_node, next_node),
Some(form) => match form.to_lowercase().as_str() {
"prefix" => OperatorTypes::PREFIX,
"postfix" => OperatorTypes::POSTFIX,
_ => OperatorTypes::INFIX,
}
};
let found_op_info = if mo_node.attribute_value(CHEMICAL_BOND).is_some() {
Some(&*IMPLIED_CHEMICAL_BOND)
} else {
OPERATORS.get(as_text(mo_node))
};
if found_op_info.is_none() {
return op_not_in_operator_dictionary(op_type);
}
let found_op_info = found_op_info.unwrap();
let matching_op_info = find_operator_info(found_op_info, op_type, form.is_some());
if ptr_eq(matching_op_info, *ILLEGAL_OPERATOR_INFO) {
return op_not_in_operator_dictionary(op_type);
} else {
return matching_op_info;
}
fn compute_type_from_position<'a>(context: &CanonicalizeContext, previous_operator: Option<&'static OperatorInfo>, previous_node: Option<Element<'a>>, next_node: Option<Element<'a>>) -> OperatorTypes {
if next_node.is_some() &&
context.is_function_name(get_possible_embellished_node(next_node.unwrap()), None) == FunctionNameCertainty::True {
return OperatorTypes::INFIX;
}
if previous_node.is_some() &&
context.is_function_name(get_possible_embellished_node(previous_node.unwrap()), None) == FunctionNameCertainty::True {
return OperatorTypes::PREFIX;
}
let operand_on_left = previous_operator.is_none() || previous_operator.unwrap().is_postfix(); let operand_on_right = next_node.is_some() && name(&get_possible_embellished_node(next_node.unwrap())) !="mo";
if operand_on_left && operand_on_right {
return OperatorTypes::INFIX; } else if !operand_on_left && operand_on_right {
return OperatorTypes::PREFIX; } else if operand_on_left && !operand_on_right {
return OperatorTypes::POSTFIX; } else {
return OperatorTypes::INFIX;
}
}
fn find_operator_info(op_info: &OperatorInfo, op_type: OperatorTypes, from_form_attr: bool) -> &OperatorInfo {
if op_info.is_operator_type(op_type) {
return op_info;
} else if let Some(next_op_info) = op_info.next {
if next_op_info.is_operator_type(op_type) {
return next_op_info;
} else if let Some(last_op_info) = next_op_info.next {
if last_op_info.is_operator_type(op_type) {
return last_op_info;
}
}
}
return if from_form_attr {&ILLEGAL_OPERATOR_INFO} else {op_info};
}
fn op_not_in_operator_dictionary(op_type: OperatorTypes) -> &'static OperatorInfo {
return match op_type {
OperatorTypes::PREFIX => &DEFAULT_OPERATOR_INFO_PREFIX,
OperatorTypes::POSTFIX => &DEFAULT_OPERATOR_INFO_POSTFIX,
_ => &DEFAULT_OPERATOR_INFO_INFIX, };
}
}
fn n_vertical_bars_on_right<'a>(&self, remaining_children: &[ChildOfElement], vert_bar_ch: &'a str) -> usize {
let mut n = 0;
for child_of_element in remaining_children {
let child = as_element(*child_of_element);
if name(&child) == "mo" {
let operator_str = as_text(child);
if operator_str == vert_bar_ch {
n += 1;
}
}
}
return n;
}
fn determine_vertical_bar_op<'a>(&self, original_op: &'static OperatorInfo, mo_node: Element<'a>,
next_child: Option<Element<'a>>,
parse_stack: &'a mut Vec<StackInfo>,
n_vertical_bars_on_right: usize) -> &'static OperatorInfo {
let operator_str = as_text(mo_node);
let found_op_info = OPERATORS.get(operator_str);
if found_op_info.is_none() {
return original_op;
}
let op = found_op_info.unwrap();
if !AMBIGUOUS_OPERATORS.contains(operator_str) {
return original_op;
};
let operator_versions = OperatorVersions::new(op);
if operator_versions.prefix.is_some() &&
(top(parse_stack).last_child_in_mrow().is_none() || !top(parse_stack).is_operand) {
return operator_versions.prefix.unwrap();
}
let has_left_match = if let Some(op_prefix) = operator_versions.prefix {
if ptr_eq(top(parse_stack).op_pair.op, op_prefix) { true
} else if parse_stack.len() > 2 {
let old_top = parse_stack.pop().unwrap();
let top_op = top(parse_stack).op_pair.op; parse_stack.push(old_top);
ptr_eq(top_op, op_prefix)
} else {
false
}
} else {
false
};
if operator_versions.postfix.is_some() && (next_child.is_none() || has_left_match) {
return operator_versions.postfix.unwrap();
} else if next_child.is_none() {
return if operator_versions.infix.is_none() {op} else {operator_versions.infix.unwrap()};
}
let next_child = next_child.unwrap();
if operator_versions.prefix.is_some() && (n_vertical_bars_on_right & 0x1 != 0) {
return operator_versions.prefix.unwrap(); }
let next_child = get_possible_embellished_node(next_child);
let next_child_op = if name(&next_child) != "mo" {
None
} else {
let next_next_children = next_child.following_siblings();
let next_next_child = if next_next_children.is_empty() { None } else { Some( as_element(next_next_children[0]) )};
Some( self.find_operator(next_child, operator_versions.infix,
top(parse_stack).last_child_in_mrow(), next_next_child) )
};
if next_child_op.is_some() && !next_child_op.unwrap().is_left_fence() && !next_child_op.unwrap().is_prefix() {
if operator_versions.postfix.is_some() {
return operator_versions.postfix.unwrap();
}
} else if operator_versions.infix.is_some() {
return operator_versions.infix.unwrap();
}
return op;
}
fn is_likely_chemical_state<'a>(&self, node: Element<'a>, right_sibling: Element<'a>) -> FunctionNameCertainty {
assert_eq!(name(&node.parent().unwrap().element().unwrap()), "mrow");
let node_chem_likelihood= node.attribute_value(MAYBE_CHEMISTRY);
if node.attribute(MAYBE_CHEMISTRY).is_none() {
return FunctionNameCertainty::True;
}
if name(&right_sibling) == "mrow" { let state_likelihood = likely_chem_state(right_sibling);
if state_likelihood > 0 {
right_sibling.set_attribute_value(MAYBE_CHEMISTRY, state_likelihood.to_string().as_str());
if state_likelihood + node_chem_likelihood.unwrap().parse::<isize>().unwrap() > 2 {
return FunctionNameCertainty::False;
} else {
return FunctionNameCertainty::Maybe
}
}
}
return FunctionNameCertainty::True;
}
fn is_function_name<'a>(&self, node: Element<'a>, right_siblings: Option<&[ChildOfElement<'a>]>) -> FunctionNameCertainty {
let base_of_name = get_possible_embellished_node(node);
let node_name = name(&base_of_name);
if node_name != "mi" && node_name != "mtext" {
return FunctionNameCertainty::False;
}
let base_name = as_text(base_of_name).trim();
if base_name.is_empty() {
return FunctionNameCertainty::False;
}
return crate::definitions::DEFINITIONS.with(|defs| {
let defs = defs.borrow();
let names = defs.get_hashset("FunctionNames").unwrap();
if names.contains(&base_name.to_ascii_lowercase()) {
return FunctionNameCertainty::True; }
let shapes = defs.get_hashset("GeometryShapes").unwrap();
if shapes.contains(base_name) {
return FunctionNameCertainty::True; }
if right_siblings.is_none() {
return FunctionNameCertainty::False; }
assert_eq!(name(&node.parent().unwrap().element().unwrap()), "mrow");
let right_siblings = right_siblings.unwrap();
if right_siblings.is_empty() {
return FunctionNameCertainty::False;
}
let first_child = as_element(right_siblings[0]);
let chem_state_certainty = self.is_likely_chemical_state(node, first_child);
if chem_state_certainty != FunctionNameCertainty::True {
return chem_state_certainty;
}
if name(&first_child) == "mrow" && is_left_paren(as_element(first_child.children()[0])) {
return self.is_function_name(node, Some(&first_child.children()));
}
if right_siblings.len() < 2 {
return FunctionNameCertainty::False; }
let first_sibling = as_element(right_siblings[0]);
if name(&first_sibling) != "mo" || !is_left_paren(first_sibling) {
return FunctionNameCertainty::False;
}
let likely_names = defs.get_hashset("LikelyFunctionNames").unwrap();
if likely_names.contains(base_name) {
return FunctionNameCertainty::True; }
if is_single_arg(as_text(first_sibling), &right_siblings[1..]) {
return FunctionNameCertainty::True; };
if is_comma_arg(as_text(first_sibling), &right_siblings[1..]) {
return FunctionNameCertainty::True; };
if node.attribute(MAYBE_CHEMISTRY).is_some() &&
as_element(right_siblings[1]).attribute(MAYBE_CHEMISTRY).is_some() {
return FunctionNameCertainty::False;
}
let mut chars = base_name.chars();
let first_char = chars.next().unwrap(); if chars.next().is_some() && first_char.is_uppercase() {
return FunctionNameCertainty::True;
}
return FunctionNameCertainty::Maybe; });
fn is_single_arg<'a>(open: &str, following_nodes: &[ChildOfElement<'a>]) -> bool {
if following_nodes.is_empty() {
return true; }
let first_child = as_element(following_nodes[0]);
if is_matching_right_paren(open, first_child) {
return true; }
return following_nodes.len() > 1 &&
name(&first_child) != "mrow" &&
is_matching_right_paren(open, as_element(following_nodes[1]));
}
fn is_comma_arg<'a>(open: &str, following_nodes: &[ChildOfElement<'a>]) -> bool {
if following_nodes.len() == 1 {
return false;
}
let first_child = as_element(following_nodes[1]);
if name(&first_child) == "mrow" {
return is_comma_arg(open, &first_child.children()[..]);
}
for child in following_nodes {
let child = as_element(*child);
if name(&child) == "mo" {
if as_text(child) == "," {
return true;
}
if is_matching_right_paren(open, child) {
return false;
}
}
}
return false;
}
fn is_left_paren(node: Element) -> bool {
if name(&node) != "mo" {
return false;
}
let text = as_text(node);
return text == "(" || text == "[";
}
fn is_matching_right_paren(open: &str, node: Element) -> bool {
if name(&node) != "mo" {
return false;
}
let text = as_text(node);
return (open == "(" && text == ")") || (open == "[" && text == "]");
}
}
fn is_mixed_fraction<'a>(&self, integer_part: &'a Element<'a>, fraction_children: &[ChildOfElement<'a>]) -> Result<bool> {
if fraction_children.is_empty() {
return Ok( false );
}
let right_child = as_element(fraction_children[0]);
let right_child_name = name(&right_child);
if ! (right_child_name == "mfrac" ||
(right_child_name == "mrow" && right_child.children().len() == 3) ||
(right_child_name == "mn" && fraction_children.len() >= 3) ) {
return Ok( false );
};
if !is_integer_part_ok(integer_part) {
return Ok( false );
}
if right_child_name == "mfrac" {
return Ok( is_mfrac_ok(&right_child) );
}
return is_linear_fraction(self, fraction_children);
fn is_int<'a>(integer_part: &'a Element<'a>) -> bool {
return name(integer_part) == "mn" && !as_text(*integer_part).contains(DECIMAL_SEPARATOR);
}
fn is_integer_part_ok<'a>(integer_part: &'a Element<'a>) -> bool {
let integer_part_name = name(integer_part);
if integer_part_name == "mrow" {
let children = integer_part.children();
if children.len() == 2 &&
name(&as_element(children[0])) == "mo" &&
as_text(as_element(children[0])) == "-" {
let integer_part = as_element(children[1]);
return is_int(&integer_part);
}
return false;
};
return is_int(integer_part);
}
fn is_mfrac_ok<'a>(fraction_part: &'a Element<'a>) -> bool {
let fraction_children = fraction_part.children();
if fraction_children.len() != 2 {
return false;
}
let numerator = as_element(fraction_children[0]);
if name(&numerator) != "mn" || as_text(numerator).contains(DECIMAL_SEPARATOR) {
return false;
}
let denominator = as_element(fraction_children[1]);
return is_int(&denominator);
}
fn is_linear_fraction<'a>(canonicalize: &CanonicalizeContext, fraction_children: &[ChildOfElement<'a>]) -> Result<bool> {
let first_child = as_element(fraction_children[0]);
if name(&first_child) == "mrow" {
if first_child.children().len() != 3 {
return Ok( false );
}
return is_linear_fraction(canonicalize, &first_child.children())
}
assert!(fraction_children.len() >= 3);
if !is_int(&first_child) {
return Ok( false );
}
let slash_part = canonicalize.canonicalize_mrows(as_element(fraction_children[1]))?;
if name(&slash_part) == "mo" && as_text(slash_part) == "/" {
let denom = canonicalize.canonicalize_mrows(as_element(fraction_children[2]))?;
return Ok( is_int(&denom) );
}
return Ok( false );
}
}
fn is_implied_comma<'a>(&self, prev: &'a Element<'a>, current: &'a Element<'a>) -> bool {
if name(prev) != "mn" || name(current) != "mn" {
return false;
}
let mrow = current.parent().unwrap().element().unwrap();
assert_eq!(name(&mrow), "mrow");
let container = mrow.parent().unwrap().element().unwrap();
let name = name(&container);
return (name == "msub" || name == "msubsup" || name == "msup") && !mrow.preceding_siblings().is_empty();
}
fn is_implied_chemical_bond<'a>(&self, prev: &'a Element<'a>, current: &'a Element<'a>) -> bool {
if prev.attribute(MAYBE_CHEMISTRY).is_none() || current.attribute(MAYBE_CHEMISTRY).is_none() {
return false;
}
for child in prev.preceding_siblings() {
if !is_valid_chemistry(as_element(child)) {
return false;
}
}
for child in current.following_siblings() {
if !is_valid_chemistry(as_element(child)) {
return false;
}
}
return true;
fn is_valid_chemistry(child: Element) -> bool {
let child = get_possible_embellished_node(child);
return child.attribute(MAYBE_CHEMISTRY).is_some() || (name(&child) != "mi" && name(&child) != "mtext");
}
}
fn is_implied_separator<'a>(&self, prev: &'a Element<'a>, current: &'a Element<'a>) -> bool {
if name(prev) != "mi" || name(current) != "mi" {
return false;
}
let prev_text = as_text(*prev);
let current_text = as_text(*current);
return prev_text.len() == 1 && current_text.len() == 1 &&
is_cap(prev_text) && is_cap(current_text);
fn is_cap(str: &str) -> bool {
assert_eq!(str.len(), 1);
return str.chars().next().unwrap().is_ascii_uppercase();
}
}
fn shift_stack<'s, 'a:'s, 'op:'a>(
&self, parse_stack: &'s mut Vec<StackInfo<'a, 'op>>,
current_child: Element<'a>,
current_op: OperatorPair<'op>) -> (Element<'a>, OperatorPair<'op>) {
let mut new_current_child = current_child;
let mut new_current_op = current_op.clone();
let previous_op = top(parse_stack).op_pair.clone();
if !previous_op.op.is_nary(current_op.op) {
let mut top_of_stack = parse_stack.pop().unwrap();
if top_of_stack.mrow.children().is_empty() || (!top_of_stack.is_operand && !current_op.op.is_right_fence()) {
parse_stack.push( top_of_stack ); parse_stack.push( StackInfo::new(current_child.document()) );
} else if current_op.op.is_right_fence() {
let mrow = top_of_stack.mrow;
top_of_stack.add_child_to_mrow(current_child, current_op);
new_current_op = OperatorPair::new(); new_current_child = mrow;
let children = mrow.children();
if children.len() == 2 &&
( name(&as_element(children[0])) != "mo" ||
!self.find_operator(as_element(children[0]),
None, Some(as_element(children[0])), Some(mrow) ).is_left_fence()) {
parse_stack.push( StackInfo::new(mrow.document()) );
} else if children.len() <= 3 {
new_current_child = self.potentially_lift_script(mrow)
} else {
panic!("Wrong number of children in mrow when handling a close fence");
}
} else if current_op.op.is_postfix() {
let previous_child = top_of_stack.remove_last_operand_from_mrow(); parse_stack.push(top_of_stack);
let mut new_top_of_stack = StackInfo::with_op(¤t_child.document(), previous_child, current_op.clone()); new_top_of_stack.add_child_to_mrow(current_child, current_op); new_current_child = new_top_of_stack.mrow; new_current_op = OperatorPair::new(); } else {
let previous_child = top_of_stack.remove_last_operand_from_mrow();
parse_stack.push(top_of_stack);
parse_stack.push( StackInfo::with_op(¤t_child.document(),previous_child, current_op) );
}
}
return (new_current_child, new_current_op);
}
fn reduce_stack<'s, 'a:'s, 'op:'a>(&self, parse_stack: &'s mut Vec<StackInfo<'a, 'op>>, current_priority: usize) {
let mut prev_priority = top(parse_stack).priority();
while current_priority < prev_priority { if parse_stack.len() == 1 {
break; }
prev_priority = self.reduce_stack_one_time(parse_stack);
};
}
fn reduce_stack_one_time<'s, 'a:'s, 'op:'a>(&self, parse_stack: &'s mut Vec<StackInfo<'a, 'op>>) -> usize {
let mut top_of_stack = parse_stack.pop().unwrap();
let mut mrow = top_of_stack.mrow;
if mrow.children().len() == 1 {
let single_child = top_of_stack.remove_last_operand_from_mrow();
mrow = single_child;
}
let mut top_of_stack = parse_stack.pop().unwrap();
top_of_stack.add_child_to_mrow(mrow, OperatorPair::new()); let prev_priority = top_of_stack.priority();
parse_stack.push(top_of_stack);
return prev_priority;
}
fn is_trig_arg<'a, 'op:'a>(&self, previous_child: Element<'a>, current_child: Element<'a>, parse_stack: &mut Vec<StackInfo<'a, 'op>>) -> bool {
use crate::xpath_functions::IsNode;
if !IsNode::is_simple(¤t_child) {
return false;
}
if IsBracketed::is_bracketed(&previous_child, "(", ")", false, false) ||
IsBracketed::is_bracketed(&previous_child, "[", "]", false, false) {
return false;
}
if self.is_function_name(current_child, None) == FunctionNameCertainty::True {
return false;
}
let op_on_top = &top(parse_stack).op_pair;
if ptr_eq(op_on_top.op, *INVISIBLE_FUNCTION_APPLICATION) {
let function_element = as_element(top(parse_stack).mrow.children()[0]);
return is_trig(function_element);
}
if ptr_eq(op_on_top.op, *PREFIX_MINUS) {
if parse_stack.len() < 2 {
return false;
}
let next_stack_info = &parse_stack[parse_stack.len()-2];
if !ptr_eq(next_stack_info.op_pair.op, *INVISIBLE_FUNCTION_APPLICATION) {
return false;
}
let function_element = as_element(next_stack_info.mrow.children()[0]);
if is_trig(function_element) {
self.reduce_stack_one_time(parse_stack);
return true;
}
return false;
}
return ptr_eq(op_on_top.op, &*IMPLIED_TIMES_HIGH_PRIORITY);
fn is_trig(node: Element) -> bool {
let base_of_name = get_possible_embellished_node(node);
let node_name = name(&base_of_name);
if node_name != "mi" && node_name != "mtext" {
return false;
}
let base_name = as_text(base_of_name).trim();
if base_name.is_empty() {
return false;
}
return crate::definitions::DEFINITIONS.with(|defs| {
let defs = defs.borrow();
let names = defs.get_hashset("TrigFunctionNames").unwrap();
return names.contains(&base_name.to_ascii_lowercase());
});
}
}
fn canonicalize_mrows_in_mrow<'a>(&self, mrow: Element<'a>) -> Result<Element<'a>> {
let saved_mrow_attrs = mrow.attributes();
assert_eq!(name(&mrow), "mrow");
let mut parse_stack = vec![StackInfo::new(mrow.document())];
let mut children = mrow.children();
let num_children = children.len();
for i_child in 0..num_children {
let mut current_child = self.canonicalize_mrows(as_element(children[i_child]))?;
children[i_child] = ChildOfElement::Element( current_child );
let base_of_child = get_possible_embellished_node(current_child);
let mut current_op = OperatorPair::new();
if name(&base_of_child) == "mo" &&
!( base_of_child.children().is_empty() || IS_WHITESPACE.is_match(as_text(base_of_child)) ) { let previous_op = if top(&parse_stack).is_operand {None} else {Some( top(&parse_stack).op_pair.op )};
let next_node = if i_child + 1 < num_children {Some(as_element(children[i_child+1]))} else {None};
current_op = OperatorPair{
ch: as_text(base_of_child),
op: self.find_operator(base_of_child, previous_op,
top(&parse_stack).last_child_in_mrow(), next_node)
};
current_op.op = self.determine_vertical_bar_op(
current_op.op,
base_of_child,
next_node,
&mut parse_stack,
self.n_vertical_bars_on_right(&children[i_child+1..], current_op.ch)
);
} else if top(&parse_stack).last_child_in_mrow().is_some() {
let previous_child = top(&parse_stack).last_child_in_mrow().unwrap();
let base_of_previous_child = get_possible_embellished_node(previous_child);
if name(&base_of_previous_child) != "mo" {
let likely_function_name = self.is_function_name(previous_child, Some(&children[i_child..]));
current_op = if likely_function_name == FunctionNameCertainty::True {
OperatorPair{ ch: "\u{2061}", op: &INVISIBLE_FUNCTION_APPLICATION }
} else if self.is_mixed_fraction(&previous_child, &children[i_child..])? {
OperatorPair{ ch: "\u{2064}", op: &IMPLIED_INVISIBLE_PLUS }
} else if self.is_implied_comma(&previous_child, ¤t_child) {
OperatorPair{ch: "\u{2063}", op: &IMPLIED_INVISIBLE_COMMA }
} else if self.is_implied_chemical_bond(&previous_child, ¤t_child) {
OperatorPair{ch: "\u{2063}", op: &IMPLIED_CHEMICAL_BOND }
} else if self.is_implied_separator(&previous_child, ¤t_child) {
OperatorPair{ch: "\u{2063}", op: &IMPLIED_SEPARATOR_HIGH_PRIORITY }
} else if self.is_trig_arg(base_of_previous_child, base_of_child, &mut parse_stack) {
OperatorPair{ch: "\u{2062}", op: &IMPLIED_TIMES_HIGH_PRIORITY }
} else {
OperatorPair{ ch: "\u{2062}", op: &IMPLIED_TIMES }
};
if name(&base_of_child) == "mo" {
current_op.ch = as_text(base_of_child);
} else {
self.reduce_stack(&mut parse_stack, current_op.op.priority);
let implied_mo = create_mo(current_child.document(), current_op.ch, ADDED_ATTR_VALUE);
if likely_function_name == FunctionNameCertainty::Maybe {
implied_mo.set_attribute_value("data-function-guess", "true");
}
let shift_result = self.shift_stack(&mut parse_stack, implied_mo, current_op.clone());
assert_eq!(implied_mo, shift_result.0);
assert!( ptr_eq(current_op.op, shift_result.1.op) );
let mut top_of_stack = parse_stack.pop().unwrap();
top_of_stack.add_child_to_mrow(implied_mo, current_op);
parse_stack.push(top_of_stack);
current_op = OperatorPair::new();
}
}
}
if !ptr_eq(current_op.op, *ILLEGAL_OPERATOR_INFO) {
if current_op.op.is_left_fence() || current_op.op.is_prefix() {
if top(&parse_stack).is_operand {
let likely_function_name = self.is_function_name(as_element(children[i_child-1]), Some(&children[i_child..]));
let implied_operator = if likely_function_name== FunctionNameCertainty::True {
OperatorPair{ ch: "\u{2061}", op: &INVISIBLE_FUNCTION_APPLICATION }
} else {
OperatorPair{ ch: "\u{2062}", op: &IMPLIED_TIMES }
};
let implied_mo = create_mo(current_child.document(), implied_operator.ch, ADDED_ATTR_VALUE);
if likely_function_name == FunctionNameCertainty::Maybe {
implied_mo.set_attribute_value("data-function-guess", "true");
}
let shift_result = self.shift_stack(&mut parse_stack, implied_mo, implied_operator.clone());
assert_eq!(implied_mo, shift_result.0);
assert!( ptr_eq(implied_operator.op, shift_result.1.op) );
let mut top_of_stack = parse_stack.pop().unwrap();
top_of_stack.add_child_to_mrow(implied_mo, implied_operator);
parse_stack.push(top_of_stack);
}
parse_stack.push( StackInfo::new(current_child.document()) );
} else {
if current_op.ch == "/" && top(&parse_stack).op_pair.ch == "\u{2064}" {
current_op.op = &IMPLIED_PLUS_SLASH_HIGH_PRIORITY;
}
self.reduce_stack(&mut parse_stack, current_op.op.priority);
let shift_result = self.shift_stack(&mut parse_stack, current_child, current_op);
current_child = shift_result.0;
current_op = shift_result.1;
}
}
let mut top_of_stack = parse_stack.pop().unwrap();
top_of_stack.add_child_to_mrow(current_child, current_op);
parse_stack.push(top_of_stack);
}
self.reduce_stack(&mut parse_stack, LEFT_FENCEPOST.priority);
let mut top_of_stack = parse_stack.pop().unwrap();
assert_eq!(parse_stack.len(), 0);
let mut parsed_mrow = top_of_stack.mrow;
assert_eq!( name(&top_of_stack.mrow), "mrow");
if parsed_mrow.children().len() == 1 {
parsed_mrow = top_of_stack.remove_last_operand_from_mrow();
}
parsed_mrow.remove_attribute(CHANGED_ATTR);
return Ok( add_attrs(parsed_mrow, saved_mrow_attrs) );
}
}
fn top<'s, 'a:'s, 'op:'a>(vec: &'s[StackInfo<'a, 'op>]) -> &'s StackInfo<'a, 'op> {
return &vec[vec.len()-1];
}
fn add_attrs<'a>(mathml: Element<'a>, attrs: Vec<Attribute>) -> Element<'a> {
static GLOBAL_ATTRS: phf::Set<&str> = phf_set! {
"class", "dir", "displaystyle", "id", "mathbackground", "mathcolor", "mathsize",
"mathvariant", "nonce", "scriptlevel", "style", "tabindex",
"intent", "arg",
};
for attr in mathml.attributes() {
let attr_name = attr.name().local_part();
if !( attr_name.starts_with("data-") || GLOBAL_ATTRS.contains(attr_name) ||
attr_name.starts_with("on") ) { mathml.remove_attribute(attr.name());
}
}
for attr in attrs {
mathml.set_attribute_value(attr.name(), attr.value());
}
return mathml;
}
pub fn name<'a>(node: &'a Element<'a>) -> &str {
return node.name().local_part();
}
pub fn as_element(child: ChildOfElement) -> Element {
return match child {
ChildOfElement::Element(e) => e,
_ => {
panic!("as_element: internal error -- found non-element child (text? '{:?}')", child.text());
},
};
}
pub fn as_text(leaf_child: Element) -> &str {
assert!(is_leaf(leaf_child) || name(&leaf_child) == crate::infer_intent::LITERAL_NAME);
let children = leaf_child.children();
if children.is_empty() {
return "";
}
assert!(children.len() == 1);
return match children[0] {
ChildOfElement::Text(t) => t.text(),
_ => panic!("as_text: internal error -- found non-text child of leaf element"),
}
}
#[allow(dead_code)] fn element_summary(mathml: Element) -> String {
return format!("{}<{}>", name(&mathml),
if is_leaf(mathml) {show_invisible_op_char(as_text(mathml)).to_string()}
else
{mathml.children().len().to_string()});
}
fn create_mo<'a, 'd:'a>(doc: Document<'d>, ch: &'a str, attr_value: &str) -> Element<'d> {
let implied_mo = create_mathml_element(&doc, "mo");
implied_mo.set_attribute_value(CHANGED_ATTR, attr_value);
let mo_text = doc.create_text(ch);
implied_mo.append_child(mo_text);
return implied_mo;
}
fn is_adorned_node<'a>(node: &'a Element<'a>) -> bool {
let name = name(node);
return name == "msub" || name == "msup" || name == "msubsup" ||
name == "munder" || name == "mover" || name == "munderover" ||
name == "mmultiscripts";
}
pub fn get_possible_embellished_node(node: Element) -> Element {
let mut node = node;
while is_adorned_node(&node) {
node = as_element(node.children()[0]);
}
return node;
}
#[allow(dead_code)] fn show_invisible_op_char(ch: &str) -> &str {
return match ch.chars().next().unwrap() {
'\u{2061}' => "⁡",
'\u{2062}' => "⁢",
'\u{2063}' => "⁣",
'\u{2064}' => "⁤",
'\u{E000}' => "",
_ => ch
};
}
#[cfg(test)]
mod canonicalize_tests {
#[allow(unused_imports)]
use super::super::init_logger;
use super::super::are_strs_canonically_equal;
use super::*;
use sxd_document::parser;
#[test]
fn canonical_same() {
let target_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
assert!(are_strs_canonically_equal(target_str, target_str));
}
#[test]
fn plane1_common() {
let test_str = "<math>
<mi mathvariant='normal'>sin</mi> <mo>,</mo> <!-- shouldn't change -->
<mi mathvariant='italic'>bB4</mi> <mo>,</mo> <!-- shouldn't change -->
<mi mathvariant='bold'>a</mi> <mo>,</mo> <!-- single char id tests -->
<mi mathvariant='bold'>Z</mi> <mo>,</mo>
<mn mathvariant='bold'>19=𝟗</mn> <mo>,</mo> <!-- '=' and plane1 shouldn't change -->
<mn mathvariant='double-struck'>024689</mn> <mo>,</mo> <!-- '=' and plane1 shouldn't change -->
<mi mathvariant='double-struck'>yzCHNPQRZ</mi> <mo>,</mo>
<mi mathvariant='fraktur'>0yACHIRZ</mi> <mo>,</mo> <!-- 0 stays as ASCII -->
<mi mathvariant='bold-fraktur'>nC</mi> <mo>,</mo>
<mi mathvariant='script'>ABEFHILMRegow</mi> <mo>,</mo>
<mi mathvariant='bold-script'>fG*</mi> <!-- '*' shouldn't change -->
</math>";
let target_str = "<math>
<mrow data-changed='added'>
<mi mathvariant='normal'>sin</mi>
<mo >,</mo>
<mi mathvariant='italic'>bB4</mi>
<mo>,</mo>
<mi mathvariant='bold'>𝐚</mi>
<mo>,</mo>
<mi mathvariant='bold'>𝐙</mi>
<mo>,</mo>
<mn mathvariant='bold'>𝟏𝟗=𝟗</mn>
<mo>,</mo>
<mn mathvariant='double-struck'>𝟘𝟚𝟜𝟞𝟠𝟡</mn>
<mo>,</mo>
<mi mathvariant='double-struck'>𝕪𝕫ℂℍℕℙℚℝℤ</mi>
<mo>,</mo>
<mi mathvariant='fraktur'>0𝔶𝔄ℭℌℑℜℨ</mi>
<mo>,</mo>
<mi mathvariant='bold-fraktur'>𝖓𝕮</mi>
<mo>,</mo>
<mi mathvariant='script'>𝒜ℬℰℱℋℐℒℳℛℯℊℴ𝓌</mi>
<mo>,</mo>
<mi mathvariant='bold-script'>𝓯𝓖*</mi>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn plane1_font_styles() {
let test_str = "<math>
<mi mathvariant='sans-serif'>aA09=</mi> <mo>,</mo> <!-- '=' shouldn't change -->
<mi mathvariant='bold-sans-serif'>zZ09</mi> <mo>,</mo>
<mi mathvariant='sans-serif-italic'>azAZ09</mi> <mo>,</mo> <!-- italic digits don't exist: revert to sans-serif -->
<mi mathvariant='sans-serif-bold-italic'>AZaz09</mi> <mo>,</mo> <!-- italic digits don't exist: revert to just bold -->
<mi mathvariant='monospace'>aA09</mi>
</math>";
let target_str = "<math>
<mrow data-changed='added'>
<mi mathvariant='sans-serif'>𝖺𝖠𝟢𝟫=</mi>
<mo>,</mo>
<mi mathvariant='bold-sans-serif'>𝘇𝗭𝟬𝟵</mi>
<mo>,</mo>
<mi mathvariant='sans-serif-italic'>𝘢𝘻𝘈𝘡𝟢𝟫</mi>
<mo>,</mo>
<mi mathvariant='sans-serif-bold-italic'>𝘼𝙕𝙖𝙯𝟬𝟵</mi>
<mo>,</mo>
<mi mathvariant='monospace'>𝚊𝙰𝟶𝟿</mi>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn plane1_greek() {
let test_str = "<math>
<mi mathvariant='normal'>ΑΩαω∇∂ϵ=</mi> <mo>,</mo> <!-- shouldn't change -->
<mi mathvariant='italic'>ϴΑΩαω∇∂ϵ</mi> <mo>,</mo>
<mi mathvariant='bold'>ΑΩαωϝϜ</mi> <mo>,</mo>
<mi mathvariant='double-struck'>Σβ∇</mi> <mo>,</mo> <!-- shouldn't change -->
<mi mathvariant='fraktur'>ΞΦλϱ</mi> <mo>,</mo> <!-- shouldn't change -->
<mi mathvariant='bold-fraktur'>ψΓ</mi> <mo>,</mo> <!-- map to bold -->
<mi mathvariant='script'>μΨ</mi> <mo>,</mo> <!-- shouldn't change -->
<mi mathvariant='bold-script'>Σπ</mi> <!-- map to bold -->
</math>";
let target_str = "<math>
<mrow data-changed='added'>
<mi mathvariant='normal'>ΑΩαω∇∂ϵ=</mi>
<mo>,</mo>
<mi mathvariant='italic'>𝛳𝛢𝛺𝛼𝜔𝛻𝜕𝜖</mi>
<mo>,</mo>
<mi mathvariant='bold'>𝚨𝛀𝛂𝛚𝟋𝟊</mi>
<mo>,</mo>
<mi mathvariant='double-struck'>Σβ∇</mi>
<mo>,</mo>
<mi mathvariant='fraktur'>ΞΦλϱ</mi>
<mo>,</mo>
<mi mathvariant='bold-fraktur'>𝛙𝚪</mi>
<mo>,</mo>
<mi mathvariant='script'>μΨ</mi>
<mo>,</mo>
<mi mathvariant='bold-script'>𝚺𝛑</mi>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn plane1_greek_font_styles() {
let test_str = "<math>
<mi mathvariant='sans-serif'>ΑΩαω∇∂ϵ=</mi> <mo>,</mo> <!-- '=' shouldn't change -->
<mi mathvariant='bold-sans-serif'>ϴ0ΑΩαω∇∂ϵ</mi> <mo>,</mo>
<mi mathvariant='sans-serif-italic'>aΑΩαω∇∂ϵ</mi> <mo>,</mo> <!-- italic digits don't exist: revert to sans-serif -->
<mi mathvariant='sans-serif-bold-italic'>ZΑΩαωϰϕϱϖ</mi> <mo>,</mo> <!-- italic digits don't exist: revert to just bold -->
<mi mathvariant='monospace'>zΑΩαω∇∂</mi>
</math>";
let target_str = "<math>
<mrow data-changed='added'>
<mi mathvariant='sans-serif'>ΑΩαω∇∂ϵ=</mi>
<mo>,</mo>
<mi mathvariant='bold-sans-serif'>𝝧𝟬𝝖𝝮𝝰𝞈𝝯𝞉𝞊</mi>
<mo>,</mo>
<mi mathvariant='sans-serif-italic'>𝘢ΑΩαω∇∂ϵ</mi>
<mo>,</mo>
<mi mathvariant='sans-serif-bold-italic'>𝙕𝞐𝞨𝞪𝟂𝟆𝟇𝟈𝟉</mi>
<mo>,</mo>
<mi mathvariant='monospace'>𝚣ΑΩαω∇∂</mi>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn short_and_long_dash() {
let test_str = "<math><mi>x</mi> <mo>=</mo> <mi>--</mi><mo>+</mo><mtext>----</mtext></math>";
let target_str = "<math>
<mrow data-changed='added'>
<mi>x</mi>
<mo>=</mo>
<mrow data-changed='added'>
<mi>—</mi>
<mo>+</mo>
<mtext>―</mtext>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn illegal_mathml_element() {
use crate::interface::*;
let test_str = "<math><foo><mi>f</mi></foo></math>";
let package1 = &parser::parse(test_str).expect("Failed to parse test input");
let mathml = get_element(package1);
trim_element(&mathml);
assert!(canonicalize(mathml).is_err());
}
#[test]
fn mfenced_no_children() {
let test_str = "<math><mi>f</mi><mfenced><mrow/></mfenced></math>";
let target_str = "<math>
<mrow data-changed='added'>
<mi>f</mi>
<mo data-changed='added'>⁡</mo>
<mrow>
<mo data-changed='from_mfenced'>(</mo>
<mo data-changed='from_mfenced'>)</mo>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn mfenced_one_child() {
let test_str = "<math><mi>f</mi><mfenced open='[' close=']'><mi>x</mi></mfenced></math>";
let target_str = " <math>
<mrow data-changed='added'>
<mi>f</mi>
<mo data-changed='added'>⁡</mo>
<mrow>
<mo data-changed='from_mfenced'>[</mo>
<mi>x</mi>
<mo data-changed='from_mfenced'>]</mo>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn mfenced_no_attrs() {
let test_str = "<math><mi>f</mi><mfenced><mrow><mi>x</mi><mo>,</mo><mi>y</mi><mo>,</mo><mi>z</mi></mrow></mfenced></math>";
let target_str = " <math>
<mrow data-changed='added'>
<mi>f</mi>
<mo data-changed='added'>⁡</mo>
<mrow>
<mo data-changed='from_mfenced'>(</mo>
<mrow>
<mi>x</mi>
<mo>,</mo>
<mi>y</mi>
<mo>,</mo>
<mi>z</mi>
</mrow>
<mo data-changed='from_mfenced'>)</mo>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn mfenced_with_separators() {
let test_str = "<math><mi>f</mi><mfenced separators=',;'><mi>x</mi><mi>y</mi><mi>z</mi><mi>a</mi></mfenced></math>";
let target_str = "<math>
<mrow data-changed='added'>
<mi>f</mi>
<mo data-changed='added'>⁡</mo>
<mrow>
<mo data-changed='from_mfenced'>(</mo>
<mrow data-changed='added'>
<mrow data-changed='added'>
<mi>x</mi>
<mo data-changed='from_mfenced'>,</mo>
<mi>y</mi>
</mrow>
<mo data-changed='from_mfenced'>;</mo>
<mrow data-changed='added'>
<mi>z</mi>
<mo data-changed='from_mfenced'>,</mo>
<mi>a</mi>
</mrow>
</mrow>
<mo data-changed='from_mfenced'>)</mo>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn canonical_one_element_mrow_around_mrow() {
let test_str = "<math><mrow><mrow><mo>-</mo><mi>a</mi></mrow></mrow></math>";
let target_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn canonical_one_element_mrow_around_mo() {
let test_str = "<math><mrow><mrow><mo>-</mo></mrow><mi>a</mi></mrow></math>";
let target_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn canonical_flat_to_times_and_plus() {
let test_str = "<math><mi>c</mi><mo>+</mo><mi>x</mi><mi>y</mi></math>";
let target_str = "<math>
<mrow data-changed='added'><mi>c</mi><mo>+</mo>
<mrow data-changed='added'><mi>x</mi><mo data-changed='added'>⁢</mo><mi>y</mi></mrow>
</mrow></math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn canonical_prefix_and_infix() {
let test_str = "<math><mrow><mo>-</mo><mi>a</mi><mo>-</mo><mi>b</mi></mrow></math>";
let target_str = "<math>
<mrow>
<mrow data-changed='added'>
<mo>-</mo>
<mi>a</mi>
</mrow>
<mo>-</mo>
<mi>b</mi>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn function_with_single_arg() {
let test_str = "<math><mrow>
<mi>sin</mi><mo>(</mo><mi>x</mi><mo>)</mo>
<mo>+</mo>
<mi>f</mi><mo>(</mo><mi>x</mi><mo>)</mo>
<mo>+</mo>
<mi>t</mi><mrow><mo>(</mo><mi>x</mi><mo>)</mo></mrow>
</mrow></math>";
let target_str = "<math>
<mrow>
<mrow data-changed='added'>
<mi>sin</mi>
<mo data-changed='added'>⁡</mo>
<mrow data-changed='added'>
<mo>(</mo>
<mi>x</mi>
<mo>)</mo>
</mrow>
</mrow>
<mo>+</mo>
<mrow data-changed='added'>
<mi>f</mi>
<mo data-changed='added'>⁡</mo>
<mrow data-changed='added'>
<mo>(</mo>
<mi>x</mi>
<mo>)</mo>
</mrow>
</mrow>
<mo>+</mo>
<mrow data-changed='added'>
<mi>t</mi>
<mo data-changed='added'>⁡</mo>
<mrow>
<mo>(</mo>
<mi>x</mi>
<mo>)</mo>
</mrow>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn maybe_function() {
let test_str = "<math>
<mrow>
<mi>P</mi>
<mo>(</mo>
<mi>A</mi>
<mo>∩</mo>
<mi>B</mi>
<mo>)</mo>
</mrow>
</math>";
let target_str = "<math>
<mrow>
<mi>P</mi>
<mo data-function-guess='true' data-changed='added'>⁢</mo>
<mrow data-changed='added'>
<mo>(</mo>
<mrow data-changed='added'>
<mi>A</mi>
<mo>∩</mo>
<mi>B</mi>
</mrow>
<mo>)</mo>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn function_with_multiple_args() {
let test_str = "<math>
<mi>sin</mi><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo>
<mo>+</mo>
<mi>f</mi><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo>
<mo>+</mo>
<mi>t</mi><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo>
<mo>+</mo>
<mi>w</mi><mo>(</mo><mi>x</mi><mo>,</mo><mi>y</mi><mo>)</mo>
</math>";
let target_str = " <math>
<mrow data-changed='added'>
<mrow data-changed='added'>
<mi>sin</mi>
<mo data-changed='added'>⁡</mo>
<mrow data-changed='added'>
<mo>(</mo>
<mrow data-changed='added'>
<mi>x</mi>
<mo>+</mo>
<mi>y</mi>
</mrow>
<mo>)</mo>
</mrow>
</mrow>
<mo>+</mo>
<mrow data-changed='added'>
<mi>f</mi>
<mo data-changed='added'>⁡</mo>
<mrow data-changed='added'>
<mo>(</mo>
<mrow data-changed='added'>
<mi>x</mi>
<mo>+</mo>
<mi>y</mi>
</mrow>
<mo>)</mo>
</mrow>
</mrow>
<mo>+</mo>
<mrow data-changed='added'>
<mi>t</mi>
<mo data-changed='added' data-function-guess='true'>⁢</mo>
<mrow data-changed='added'>
<mo>(</mo>
<mrow data-changed='added'>
<mi>x</mi>
<mo>+</mo>
<mi>y</mi>
</mrow>
<mo>)</mo>
</mrow>
</mrow>
<mo>+</mo>
<mrow data-changed='added'>
<mi>w</mi>
<mo data-changed='added'>⁡</mo>
<mrow data-changed='added'>
<mo>(</mo>
<mrow data-changed='added'>
<mi>x</mi>
<mo>,</mo>
<mi>y</mi>
</mrow>
<mo>)</mo>
</mrow>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn function_with_no_args() {
let test_str = "<math><mrow>
<mi>sin</mi><mi>x</mi>
<mo>+</mo>
<mi>f</mi><mi>x</mi>
<mo>+</mo>
<mi>t</mi><mi>x</mi>
</mrow></math>";
let target_str = " <math>
<mrow>
<mrow data-changed='added'>
<mi>sin</mi>
<mo data-changed='added'>⁡</mo>
<mi>x</mi>
</mrow>
<mo>+</mo>
<mrow data-changed='added'>
<mi>f</mi>
<mo data-changed='added'>⁢</mo>
<mi>x</mi>
</mrow>
<mo>+</mo>
<mrow data-changed='added'>
<mi>t</mi>
<mo data-changed='added'>⁢</mo>
<mi>x</mi>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn function_call_vs_implied_times() {
let test_str = "<math><mi>f</mi><mo>(</mo><mi>x</mi><mo>)</mo><mi>y</mi></math>";
let target_str = "<math>
<mrow data-changed='added'>
<mrow data-changed='added'>
<mi>f</mi>
<mo data-changed='added'>⁡</mo>
<mrow data-changed='added'> <mo>(</mo> <mi>x</mi> <mo>)</mo> </mrow>
</mrow>
<mo data-changed='added'>⁢</mo>
<mi>y</mi> </mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn implied_plus() {
let test_str = "<math><mrow>
<mn>2</mn><mfrac><mn>3</mn><mn>4</mn></mfrac>
</mrow></math>";
let target_str = "<math>
<mrow>
<mn>2</mn>
<mo data-changed='added'>⁤</mo>
<mfrac>
<mn>3</mn>
<mn>4</mn>
</mfrac>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn implied_plus_linear() {
let test_str = "<math><mrow>
<mn>2</mn><mn>3</mn><mo>/</mo><mn>4</mn>
</mrow></math>";
let target_str = "<math>
<mrow>
<mn>2</mn>
<mo data-changed='added'>⁤</mo>
<mrow data-changed='added'>>
<mn>3</mn>
<mo>/</mo>
<mn>4</mn>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn implied_plus_linear2() {
let test_str = "<math><mrow>
<mn>2</mn><mrow><mn>3</mn><mo>/</mo><mn>4</mn></mrow>
</mrow></math>";
let target_str = "<math>
<mrow>
<mn>2</mn>
<mo data-changed='added'>⁤</mo>
<mrow>
<mn>3</mn>
<mo>/</mo>
<mn>4</mn>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn implied_comma() {
let test_str = "<math><msub><mi>b</mi><mrow><mn>1</mn><mn>2</mn></mrow></msub></math>";
let target_str = "<math>
<msub><mi>b</mi><mrow><mn>1</mn><mo data-changed='added'>⁣</mo><mn>2</mn></mrow></msub>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn no_implied_comma() {
let test_str = "<math><mfrac><mi>b</mi><mrow><mn>1</mn><mn>2</mn></mrow></mfrac></math>";
let target_str = "<math>
<mfrac><mi>b</mi><mrow><mn>1</mn><mo data-changed='added'>⁢</mo><mn>2</mn></mrow></mfrac>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn vertical_bars() {
let test_str = "<math>
<mo>|</mo> <mi>x</mi> <mo>|</mo><mo>+</mo><mo>|</mo>
<mi>a</mi><mo>+</mo><mn>1</mn> <mo>|</mo>
</math>";
let target_str = " <math>
<mrow data-changed='added'>
<mrow data-changed='added'>
<mo>|</mo>
<mi>x</mi>
<mo>|</mo>
</mrow>
<mo>+</mo>
<mrow data-changed='added'>
<mo>|</mo>
<mrow data-changed='added'>
<mi>a</mi>
<mo>+</mo>
<mn>1</mn>
</mrow>
<mo>|</mo>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn vertical_bars_nested() {
let test_str = "<math><mo>|</mo><mi>x</mi><mo>|</mo><mi>y</mi><mo>|</mo><mi>z</mi><mo>|</mo></math>";
let target_str = "<math>
<mrow data-changed='added'>
<mrow data-changed='added'>
<mo>|</mo>
<mi>x</mi>
<mo>|</mo>
</mrow>
<mo data-changed='added'>⁢</mo>
<mi>y</mi>
<mo data-changed='added'>⁢</mo>
<mrow data-changed='added'>
<mo>|</mo>
<mi>z</mi>
<mo>|</mo>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn vertical_bar_such_that() {
let test_str = "<math>
<mo>{</mo><mi>x</mi><mo>|</mo><mi>x</mi><mo>∈</mo><mi>S</mi><mo>}</mo>
</math>";
let target_str = "<math>
<mrow data-changed='added'>
<mo>{</mo>
<mrow data-changed='added'>
<mi>x</mi>
<mo>|</mo>
<mrow data-changed='added'>
<mi>x</mi>
<mo>∈</mo>
<mi>S</mi>
</mrow>
</mrow>
<mo>}</mo>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
#[ignore] fn vertical_bar_divides() {
let test_str = "<math>
<mi>x</mi><mo>+</mo><mi>y</mi> <mo>|</mo><mn>12</mn>
</math>";
let target_str = "<math>
<mrow data-changed='added'>
<mrow data-changed='added'>
<mi>x</mi>
<mo>+</mo>
<mi>y</mi>
</mrow>
<mo>∣ <!--divides--></mo>
<mn>12</mn>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn trig_mo() {
let test_str = "<math><mo>sin</mo><mi>x</mi>
<mo>+</mo><mo>cos</mo><mi>y</mi>
<mo>+</mo><munder><mo>lim</mo><mi>D</mi></munder><mi>y</mi>
</math>";
let target_str = "<math>
<mrow data-changed='added'>
<mrow data-changed='added'>
<mi>sin</mi>
<mo data-changed='added'>⁡</mo>
<mi>x</mi>
</mrow>
<mo>+</mo>
<mrow data-changed='added'>
<mi>cos</mi>
<mo data-changed='added'>⁡</mo>
<mi>y</mi>
</mrow>
<mo>+</mo>
<mrow data-changed='added'>
<munder>
<mi>lim</mi>
<mi>D</mi>
</munder>
<mo data-changed='added'>⁡</mo>
<mi>y</mi>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn trig_negative_args() {
let test_str = "<math><mi>sin</mi><mo>-</mo><mn>2</mn><mi>π</mi><mi>x</mi></math>";
let target_str = "<math>
<mrow data-changed='added'>
<mi>sin</mi>
<mo data-changed='added'>⁡</mo>
<mrow data-changed='added'>
<mrow data-changed='added'>
<mo>-</mo>
<mn>2</mn>
</mrow>
<mo data-changed='added'>⁢</mo>
<mi>π</mi>
<mo data-changed='added'>⁢</mo>
<mi>x</mi>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn not_trig_negative_args() {
let test_str = "<math><mi>ker</mi><mo>-</mo><mn>2</mn><mi>π</mi><mi>x</mi></math>";
let target_str = "<math>
<mrow data-changed='added'>
<mrow data-changed='added'>
<mi>ker</mi>
<mo data-changed='added'>⁡</mo>
<mrow data-changed='added'>
<mo>-</mo>
<mn>2</mn>
</mrow>
</mrow>
<mo data-changed='added'>⁢</mo>
<mi>π</mi>
<mo data-changed='added'>⁢</mo>
<mi>x</mi>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn trig_args() {
let test_str = "<math><mi>sin</mi><mn>2</mn><mi>π</mi><mi>x</mi></math>";
let target_str = "<math>
<mrow data-changed='added'>
<mi>sin</mi>
<mo data-changed='added'>⁡</mo>
<mrow data-changed='added'>
<mn>2</mn>
<mo data-changed='added'>⁢</mo>
<mi>π</mi>
<mo data-changed='added'>⁢</mo>
<mi>x</mi>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn not_trig_args() {
let test_str = "<math><mi>ker</mi><mn>2</mn><mi>π</mi><mi>x</mi></math>";
let target_str = "<math>
<mrow data-changed='added'>
<mrow data-changed='added'>
<mi>ker</mi>
<mo data-changed='added'>⁡</mo>
<mn>2</mn>
</mrow>
<mo data-changed='added'>⁢</mo>
<mi>π</mi>
<mo data-changed='added'>⁢</mo>
<mi>x</mi>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn trig_trig() {
let test_str = "<math><mi>sin</mi><mi>x</mi><mi>cos</mi><mi>y</mi></math>";
let target_str = "<math>
<mrow data-changed='added'>
<mrow data-changed='added'>
<mi>sin</mi>
<mo data-changed='added'>⁡</mo>
<mi>x</mi>
</mrow>
<mo data-changed='added'>⁢</mo>
<mrow data-changed='added'>
<mi>cos</mi>
<mo data-changed='added'>⁡</mo>
<mi>y</mi>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn trig_function_composition() {
let test_str = "<math><mo>(</mo><mi>sin</mi><mo>-</mo><mi>cos</mi><mo>)</mo><mi>x</mi></math>";
let target_str = "<math>
<mrow data-changed='added'>
<mrow data-changed='added'>
<mo>(</mo>
<mrow data-changed='added'>
<mi>sin</mi>
<mo>-</mo>
<mi>cos</mi>
</mrow>
<mo>)</mo>
</mrow>
<mo data-changed='added'>⁢</mo>
<mi>x</mi>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn mtext_whitespace_string() {
let test_str = "<math><mi>t</mi><mtext>  </mtext></math>";
let target_str = "<math><mi>t </mi></math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn mtext_whitespace_string_before() {
let test_str = "<math><mtext>  </mtext><mi>t</mi></math>";
let target_str = "<math><mi> t</mi></math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn mtext_whitespace_1() {
let test_str = "<math><mi>t</mi><mtext>  </mtext>
<mrow><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo></mrow></math>";
let target_str = " <math>
<mrow data-changed='added'>
<mi>t </mi>
<mo data-changed='added' data-function-guess='true'>⁢</mo>
<mrow>
<mo>(</mo>
<mrow data-changed='added'>
<mi>x</mi>
<mo>+</mo>
<mi>y</mi>
</mrow>
<mo>)</mo>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn mtext_whitespace_2() {
let test_str = "<math><mi>f</mi><mtext>  </mtext>
<mrow><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo></mrow></math>";
let target_str = " <math>
<mrow data-changed='added'>
<mi>f </mi>
<mo data-changed='added'>⁡</mo>
<mrow>
<mo>(</mo>
<mrow data-changed='added'>
<mi>x</mi>
<mo>+</mo>
<mi>y</mi>
</mrow>
<mo>)</mo>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn remove_mtext_whitespace_3() {
let test_str = "<math><mi>t</mi>
<mrow><mtext> </mtext><mo>(</mo><mi>x</mi><mo>+</mo><mi>y</mi><mo>)</mo></mrow></math>";
let target_str = "<math>
<mrow data-changed='added'>
<mi>t</mi>
<mo data-changed='added' data-function-guess='true'>⁢</mo>
<mrow>
<mo>(</mo>
<mrow data-changed='added'>
<mi>x</mi>
<mo>+</mo>
<mi>y</mi>
</mrow>
<mo>)</mo>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn do_not_remove_any_whitespace() {
let test_str = "<math><mfrac>
<mrow><mspace width='3em'/></mrow>
<mtext> </mtext>
</mfrac></math>";
let target_str = " <math> <mfrac>
<mtext width='3em' data-changed='empty_content'> </mtext>
<mtext data-changed='empty_content'> </mtext>
</mfrac> </math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn remove_mo_whitespace() {
let test_str = "<math><mi>cos</mi><mo> </mo><mi>x</mi></math>";
let target_str = "<math>
<mrow data-changed='added'>
<mi>cos </mi>
<mo data-changed='added'>⁡</mo>
<mi>x</mi>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn do_not_remove_some_whitespace() {
let test_str = "<math><mroot>
<mrow><mi>b</mi><mphantom><mi>y</mi></mphantom></mrow>
<mtext> </mtext>
</mroot></math>";
let target_str = "<math><mroot>
<mi>b</mi>
<mtext data-changed='empty_content'> </mtext>
</mroot></math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn remove_all_extra_elements() {
let test_str = "<math><msqrt>
<mstyle> <mi>b</mi> </mstyle>
<mphantom><mi>y</mi></mphantom>
<mtext> </mtext>
<mspace width='3em'/>
</msqrt></math>";
let target_str = "<math><msqrt>
<mi>b </mi>
</msqrt></math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn clean_semantics() {
let test_str = "<math>
<semantics>
<mrow><mi>z</mi></mrow>
<annotation-xml encoding='MathML-Content'>
<ci>𝑧</ci>
</annotation-xml>
<annotation encoding='application/x-tex'>z</annotation>
<annotation encoding='application/x-llamapun'>italic_z</annotation>
</semantics>
</math>";
let target_str = "<math>
<semantics>
<mi>z</mi>
<annotation-xml encoding='MathML-Content'>
<ci>𝑧</ci>
</annotation-xml>
<annotation encoding='application/x-tex'>z</annotation>
<annotation encoding='application/x-llamapun'>italic_z</annotation>
</semantics>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn clean_up_mi_operator() {
let test_str = "<math><mrow><mi>∠</mi><mi>A</mi><mi>B</mi><mi>C</mi></mrow></math>";
let target_str = " <math>
<mrow>
<mo>∠</mo>
<mrow data-changed='added'>
<mi>A</mi>
<mo data-changed='added'>⁣</mo>
<mi>B</mi>
<mo data-changed='added'>⁣</mo>
<mi>C</mi>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn clean_up_arc() {
let test_str = "<math><mtext>arc </mtext><mi>cos</mi><mi>x</mi></math>";
let target_str = "<math>
<mrow data-changed='added'>
<mi>arccos</mi>
<mo data-changed='added'>⁢</mo>
<mi>x</mi>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn clean_up_arc_nospace() {
let test_str = "<math><mtext>arc</mtext><mi>cos</mi><mi>x</mi></math>";
let target_str = "<math>
<mrow data-changed='added'>
<mi>arccos</mi>
<mo data-changed='added'>⁢</mo>
<mi>x</mi>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn roman_numeral() {
let test_str = "<math><mrow><mtext>XLVIII</mtext> <mo>+</mo><mn>mmxxvi</mn></mrow></math>";
let target_str = "<math><mrow><mtext>XLVIII</mtext> <mo>+</mo><mn>mmxxvi</mn></mrow></math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn digit_block_binary() {
let test_str = "<math><mo>(</mo><mn>0110</mn><mspace width=\"thickmathspace\"></mspace><mn>1110</mn><mspace width=\"thickmathspace\"></mspace><mn>0110</mn><mo>)</mo></math>";
let target_str = " <math>
<mrow data-changed='added'>
<mo>(</mo>
<mn>0110\u{A0}1110\u{A0}0110</mn>
<mo>)</mo>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn digit_block_decimal() {
let test_str = "<math><mn>8</mn><mo>,</mo><mn>123</mn><mo>,</mo><mn>456</mn><mo>+</mo>
<mn>4</mn><mo>.</mo><mn>32</mn></math>";
let target_str = " <math>
<mrow data-changed='added'>
<mn>8,123,456</mn>
<mo>+</mo>
<mn>4.32</mn>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn digit_block_decimal_pt() {
let test_str = "<math><mn>8</mn><mo>,</mo><mn>123</mn><mo>.</mo>
<mo>+</mo><mn>4</mn><mo>.</mo>
<mo>+</mo><mo>.</mo><mn>01</mn></math>";
let target_str = " <math>
<mrow data-changed='added'>
<mn>8,123.</mn>
<mo>+</mo>
<mn>4.</mn>
<mo>+</mo>
<mn>.01</mn>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn not_digit_block_parens() {
let test_str = "<math><mo>(</mo><mn>451</mn><mo>,</mo><mn>231</mn><mo>)</mo></math>";
let target_str = " <math> <mrow data-changed='added'>
<mo>(</mo>
<mrow data-changed='added'>
<mn>451</mn> <mo>,</mo> <mn>231</mn>
</mrow>
<mo>)</mo>
</mrow></math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn not_digit_block_parens_mrow() {
let test_str = "<math><mo>(</mo><mrow><mn>451</mn><mo>,</mo><mn>231</mn></mrow><mo>)</mo></math>";
let target_str = " <math> <mrow data-changed='added'>
<mo>(</mo>
<mrow>
<mn>451</mn> <mo>,</mo> <mn>231</mn>
</mrow>
<mo>)</mo>
</mrow></math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn not_digit_block_decimal() {
let test_str = "<math><mn>8</mn><mo>,</mo><mn>49</mn><mo>,</mo><mn>456</mn><mo>+</mo>
<mn>4</mn><mtext> </mtext><mn>32</mn><mo>+</mo>
<mn>1</mn><mo>,</mo><mn>234</mn><mo>,</mo><mn>56</mn></math>";
let target_str = " <math>
<mrow data-changed='added'>
<mn>8</mn>
<mo>,</mo>
<mn>49</mn>
<mo>,</mo>
<mrow data-changed='added'>
<mn>456</mn>
<mo>+</mo>
<mrow data-changed='added'>
<mn>4</mn>
<mo data-changed='added'>⁢</mo>
<mn>32</mn>
</mrow>
<mo>+</mo>
<mn>1</mn>
</mrow>
<mo>,</mo>
<mn>234</mn>
<mo>,</mo>
<mn>56</mn>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn not_digit_block_ellipsis() {
let test_str = "<math><mrow><mn>8</mn><mo>,</mo><mn>123</mn><mo>,</mo><mn>456</mn><mo>,</mo>
<mi>…</mi></mrow></math>";
let target_str = "<math>
<mrow>
<mn>8</mn>
<mo>,</mo>
<mn>123</mn>
<mo>,</mo>
<mn>456</mn>
<mo>,</mo>
<mi>…</mi>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn ellipsis() {
let test_str = "<math><mn>5</mn><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><mo>,</mo><mn>8</mn><mo>,</mo>
<mn>9</mn><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><mo>,</mo><mn>11</mn><mo>,</mo>
<mn>5</mn><mo>,</mo><mo>.</mo><mo>.</mo><mo>,</mo><mn>8</mn>
</math>";
let target_str = "<math><mrow data-changed='added'>
<mn>5</mn><mo>,</mo><mi>…</mi><mo>,</mo><mn>8</mn><mo>,</mo>
<mn>9</mn><mo>,</mo><mi>…</mi><mo>,</mo><mn>11</mn><mo>,</mo>
<mn>5</mn><mo>,</mo><mrow data-changed='added'><mo>.</mo><mo>.</mo></mrow>
<mo>,</mo><mn>8</mn></mrow></math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn primes_common() {
let test_str = "<math><msup><mn>5</mn><mo>'</mo></msup>
<msup><mn>5</mn><mo>''</mo></msup>
<msup><mn>8</mn><mrow><mo>'</mo><mo>'</mo></mrow></msup></math>";
let target_str = "<math>
<mrow data-changed='added'>
<msup>
<mn>5</mn>
<mo>′</mo>
</msup>
<mo data-changed='added'>⁢</mo>
<msup>
<mn>5</mn>
<mo>″</mo>
</msup>
<mo data-changed='added'>⁢</mo>
<msup>
<mn>8</mn>
<mo>″</mo>
</msup>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn primes_uncommon() {
let test_str = "<math><msup><mn>5</mn><mo>''′</mo></msup>
<msup><mn>5</mn><mo>''''</mo></msup>
<msup><mn>8</mn><mrow><mo>′</mo><mo>⁗</mo></mrow></msup></math>";
let target_str = " <math>
<mrow data-changed='added'>
<msup>
<mn>5</mn>
<mo>‴</mo>
</msup>
<mo data-changed='added'>⁢</mo>
<msup>
<mn>5</mn>
<mo>⁗</mo>
</msup>
<mo data-changed='added'>⁢</mo>
<msup>
<mn>8</mn>
<mo>⁗′</mo>
</msup>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn lift_script() {
let test_str = "<math xmlns='http://www.w3.org/1998/Math/MathML' >
<mrow>
<mstyle scriptlevel='0' displaystyle='true'>
<mrow>
<msqrt>
<munder>
<mo>∑<!-- ∑ --></mo>
<mrow>
<mn>0</mn>
<mo>≤<!-- ≤ --></mo>
<mi>k</mi>
<mo>≤<!-- ≤ --></mo>
<mi>n</mi>
</mrow>
</munder>
<mrow>
<mo stretchy='false'>|</mo>
</mrow>
<msub>
<mi>a</mi>
<mrow>
<mi>k</mi>
</mrow>
</msub>
<msup>
<mrow>
<mo stretchy='false'>|</mo>
</mrow>
<mrow>
<mn>2</mn>
</mrow>
</msup>
</msqrt>
</mrow>
</mstyle>
</mrow>
</math>";
let target_str = "<math>
<msqrt scriptlevel='0' displaystyle='true'>
<mrow data-changed='added'>
<munder>
<mo>∑</mo>
<mrow>
<mn>0</mn>
<mo>≤</mo>
<mi>k</mi>
<mo>≤</mo>
<mi>n</mi>
</mrow>
</munder>
<msup>
<mrow data-changed='added'>
<mo stretchy='false'>|</mo>
<msub>
<mi>a</mi>
<mi>k</mi>
</msub>
<mo stretchy='false'>|</mo>
</mrow>
<mn>2</mn>
</msup>
</mrow>
</msqrt>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn pseudo_scripts() {
let test_str = "<math><mrow>
<mi>cos</mi><mn>30</mn><mo>º</mo>
<mi>sin</mi><mn>60</mn><mo>′</mo>
</mrow></math>";
let target_str = "<math>
<mrow>
<mrow data-changed='added'>
<mi>cos</mi>
<mo data-changed='added'>⁡</mo>
<msup data-changed='added'><mn>30</mn><mo>º</mo></msup>
</mrow>
<mo data-changed='added'>⁢</mo>
<mrow data-changed='added'>
<mi>sin</mi>
<mo data-changed='added'>⁡</mo>
<msup data-changed='added'><mn>60</mn><mo>′</mo></msup>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn prescript_only() {
let test_str = "<math><msub><mtext/><mn>92</mn></msub><mi>U</mi></math>";
let target_str = "<math><mmultiscripts><mi>U</mi><mprescripts/> <mn>92</mn><none/> </mmultiscripts></math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn pre_and_postscript_only() {
let test_str = "<math>
<msub><mrow/><mn>0</mn></msub>
<msub><mi>F</mi><mn>1</mn></msub>
<mo stretchy='false'>(</mo>
<mi>a</mi><mo>,</mo><mi>b</mi><mo>;</mo><mi>c</mi><mo>;</mo><mi>z</mi>
<mo stretchy='false'>)</mo>
</math>";
let target_str = " <math>
<mrow data-changed='added'>
<mmultiscripts>
<mi>F</mi>
<mn>1</mn>
<none></none>
<mprescripts></mprescripts>
<mn>0</mn>
<none></none>
</mmultiscripts>
<mo data-changed='added'>⁡</mo>
<mrow data-changed='added'>
<mo stretchy='false'>(</mo>
<mrow data-changed='added'>
<mrow data-changed='added'>
<mi>a</mi>
<mo>,</mo>
<mi>b</mi>
</mrow>
<mo>;</mo>
<mi>c</mi>
<mo>;</mo>
<mi>z</mi>
</mrow>
<mo stretchy='false'>)</mo>
</mrow>
</mrow>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
fn pointless_nones_in_mmultiscripts() {
let test_str = "<math><mmultiscripts>
<mtext>C</mtext>
<none />
<none />
<mprescripts />
<mn>6</mn>
<mn>14</mn>
</mmultiscripts></math>";
let target_str = "<math><mmultiscripts>
<mtext>C</mtext>
<mprescripts />
<mn>6</mn>
<mn>14</mn>
</mmultiscripts></math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
#[test]
#[ignore] fn tensor() {
let test_str = "<math>
<msub><mi>R</mi><mi>i</mi></msub>
<msup><mrow/><mi>j</mi></msup>
<msub><mrow/><mi>k</mi></msub>
<msub><mrow/><mi>l</mi></msub>
</math>";
let target_str = "<math>
<mmultiscripts>
<mi> R </mi>
<mi> i </mi>
<none/>
<none/>
<mi> j </mi>
<mi> k </mi>
<none/>
<mi> l </mi>
<none/>
</mmultiscripts>
</math>";
assert!(are_strs_canonically_equal(test_str, target_str));
}
}