#![allow(non_snake_case)]
#![allow(clippy::needless_return)]
use std::cell::RefCell;
use std::sync::LazyLock;
use crate::canonicalize::{as_text, create_mathml_element};
use crate::errors::*;
use phf::phf_map;
use regex::{Captures, Regex};
use sxd_document::dom::{Element, Document, ChildOfRoot, ChildOfElement, Attribute};
use sxd_document::parser;
use sxd_document::Package;
use crate::canonicalize::{as_element, name};
use crate::shim_filesystem::{find_all_dirs_shim, find_files_in_dir_that_ends_with_shim};
use log::{debug, error};
use crate::navigate::*;
use crate::pretty_print::mml_to_string;
use crate::xpath_functions::{is_leaf, IsNode};
use std::panic::{catch_unwind, AssertUnwindSafe};
pub const MAX_DEPTH: usize = 512;
#[cfg(feature = "enable-logs")]
use std::sync::Once;
#[cfg(feature = "enable-logs")]
static INIT: Once = Once::new();
fn enable_logs() {
#[cfg(feature = "enable-logs")]
INIT.call_once(||{
#[cfg(target_os = "android")]
{
use log::*;
use android_logger::*;
android_logger::init_once(
Config::default()
.with_max_level(LevelFilter::Trace)
.with_tag("MathCat")
);
trace!("Activated Android logger!");
}
});
}
thread_local! {
static PANIC_INFO: RefCell<Option<(String, String, u32)>> = const { RefCell::new(None) };
}
pub fn init_panic_handler() {
use std::panic;
panic::set_hook(Box::new(|info| {
let location = info.location()
.map(|l| format!("{}:{}", l.file(), l.line()))
.unwrap_or_else(|| "unknown".to_string());
let payload = info.payload();
let msg = if let Some(s) = payload.downcast_ref::<&'static str>() {
s.to_string()
} else if let Some(s) = payload.downcast_ref::<String>() {
s.clone()
} else {
"Unknown panic payload".to_string()
};
let _ = PANIC_INFO.try_with(|cell| {
if let Ok(mut slot) = cell.try_borrow_mut() {
*slot = Some((msg, location, 0));
}
});
}));
}
pub fn report_any_panic<T>(result: Result<Result<T, Error>, Box<dyn std::any::Any + Send>>) -> Result<T, Error> {
match result {
Ok(val) => val,
Err(_) => {
let details = PANIC_INFO.with(|cell| cell.borrow_mut().take());
if let Some((msg, file, line)) = details {
Err(anyhow::anyhow!(
"MathCAT crash! Please report the following information: '{}' at {}:{}",
msg, file, line
))
} else {
Err(anyhow::anyhow!("MathCAT crash! -- please report"))
}
}
}
}
fn cleanup_mathml(mathml: Element) -> Result<Element> {
trim_element(mathml, false);
let mathml = crate::canonicalize::canonicalize(mathml)?;
let mathml = add_ids(mathml);
return Ok(mathml);
}
thread_local! {
pub static MATHML_INSTANCE: RefCell<Package> = init_mathml_instance();
}
fn init_mathml_instance() -> RefCell<Package> {
let package = parser::parse("<math></math>")
.expect("Internal error in 'init_mathml_instance;: didn't parse initializer string");
return RefCell::new(package);
}
pub fn set_rules_dir(dir: impl AsRef<str>) -> Result<()> {
enable_logs();
init_panic_handler();
let dir = dir.as_ref().to_string();
let result = catch_unwind(AssertUnwindSafe(|| {
use std::path::PathBuf;
let dir_os = if dir.is_empty() {
std::env::var_os("MathCATRulesDir").unwrap_or_default()
} else {
std::ffi::OsString::from(&dir)
};
let pref_manager = crate::prefs::PreferenceManager::get();
pref_manager.borrow_mut().initialize(PathBuf::from(dir_os))
}));
return report_any_panic(result);
}
pub fn get_version() -> String {
enable_logs();
const VERSION: &str = env!("CARGO_PKG_VERSION");
return VERSION.to_string();
}
pub fn set_mathml(mathml_str: impl AsRef<str>) -> Result<String> {
enable_logs();
static MATHJAX_V2: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"class *= *['"]MJX-.*?['"]"#).unwrap());
static MATHJAX_V3: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"class *= *['"]data-mjx-.*?['"]"#).unwrap());
static PROCESSING_INSTRUCTION: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"<\?[\s\S]{1,2048}\?>"#).unwrap());
static XML_COMMENT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"(?s)"#).unwrap());
static NAMESPACE_DECL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"xmlns:[[:alpha:]]{1,32}"#).unwrap());
static PREFIX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"(</?)[[:alpha:]]{1,32}:"#).unwrap());
static HTML_ENTITIES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"&([a-zA-Z]{2,10});"#).unwrap());
let result = catch_unwind(AssertUnwindSafe(|| {
NAVIGATION_STATE.with(|nav_stack| {
nav_stack.borrow_mut().reset();
});
crate::speech::SPEECH_RULES.with(|rules| rules.borrow_mut().read_files())?;
let mathml_str = mathml_str.as_ref();
if mathml_str.len() > 1024 * 1024 {
bail!("MathML string of size {} bytes exceeds length limit of 1MB", mathml_str.len());
}
return MATHML_INSTANCE.with(|old_package| {
static HTML_ENTITIES_MAPPING: phf::Map<&str, &str> = include!("entities.in");
let mut error_message = "".to_string();
let mathml_str = XML_COMMENT.replace_all(mathml_str, "");
let mathml_str = PROCESSING_INSTRUCTION.replace_all(&mathml_str, "");
let mathml_str = HTML_ENTITIES.replace_all(&mathml_str, |cap: &Captures| match HTML_ENTITIES_MAPPING.get(&cap[1]) {
None => {
error_message = format!("No entity named '{}'", &cap[0]);
cap[0].to_string()
}
Some(&ch) => ch.to_string(),
});
if !error_message.is_empty() {
old_package.replace(parser::parse("<math></math>").unwrap());
bail!(error_message);
}
let mathml_str = MATHJAX_V2.replace_all(&mathml_str, "");
let mathml_str = MATHJAX_V3.replace_all(&mathml_str, "");
let mathml_str = NAMESPACE_DECL.replace(&mathml_str, "xmlns"); let mathml_str = PREFIX.replace_all(&mathml_str, "$1");
let new_package = parser::parse(&mathml_str);
if let Err(e) = new_package {
old_package.replace(parser::parse("<math></math>").unwrap());
bail!("Invalid MathML input:\n{}\nError is: {}", &mathml_str, &e.to_string());
}
let new_package = new_package.unwrap();
let mathml = get_element(&new_package);
let mathml = cleanup_mathml(mathml)?;
let mathml_string = mml_to_string(mathml);
old_package.replace(new_package);
return Ok(mathml_string);
});
}));
return report_any_panic(result);
}
pub fn get_spoken_text() -> Result<String> {
enable_logs();
let result = catch_unwind(AssertUnwindSafe(|| {
MATHML_INSTANCE.with(|package_instance| {
let package_instance = package_instance.borrow();
let mathml = get_element(&package_instance);
let new_package = Package::new();
let intent = crate::speech::intent_from_mathml(mathml, new_package.as_document())?;
debug!("Intent tree:\n{}", mml_to_string(intent));
let speech = crate::speech::speak_mathml(intent, "", 0)?;
return Ok(speech);
})
}));
return report_any_panic(result);
}
pub fn get_overview_text() -> Result<String> {
enable_logs();
let result = catch_unwind(AssertUnwindSafe(|| {
MATHML_INSTANCE.with(|package_instance| {
let package_instance = package_instance.borrow();
let mathml = get_element(&package_instance);
let speech = crate::speech::overview_mathml(mathml, "", 0)?;
return Ok(speech);
})
}));
return report_any_panic(result);
}
pub fn get_preference(name: impl AsRef<str>) -> Result<String> {
enable_logs();
let name = name.as_ref().to_string();
let result = catch_unwind(AssertUnwindSafe(|| {
use crate::prefs::NO_PREFERENCE;
crate::speech::SPEECH_RULES.with(|rules| {
let rules = rules.borrow();
let pref_manager = rules.pref_manager.borrow();
let mut value = pref_manager.pref_to_string(&name);
if value == NO_PREFERENCE {
value = pref_manager.pref_to_string(&name);
}
if value == NO_PREFERENCE {
bail!("No preference named '{}'", name);
} else {
return Ok(value);
}
})
}));
return report_any_panic(result);
}
pub fn set_preference(name: impl AsRef<str>, value: impl AsRef<str>) -> Result<()> {
enable_logs();
let name = name.as_ref().to_string();
let value = value.as_ref().to_string();
let result = catch_unwind(AssertUnwindSafe(|| {
set_preference_impl(&name, &value)
}));
return report_any_panic(result);
}
fn set_preference_impl(name: &str, value: &str) -> Result<()> {
let mut value = value.to_string();
if name == "Language" || name == "LanguageAuto" {
if value != "Auto" {
let mut lang_country_split = value.split('-');
let language = lang_country_split.next().unwrap_or("");
let country = lang_country_split.next().unwrap_or("");
if language.len() != 2 {
bail!(
"Improper format for 'Language' preference '{}'. Should be of form 'en' or 'en-gb'",
value
);
}
let mut new_lang_country = language.to_string(); if !country.is_empty() {
new_lang_country.push('-');
new_lang_country.push_str(country);
}
value = new_lang_country;
}
if name == "LanguageAuto" && value == "Auto" {
bail!("'LanguageAuto' can not have the value 'Auto'");
}
}
crate::speech::SPEECH_RULES.with(|rules| -> Result<()> {
if let Some(error_string) = rules.borrow().get_error() {
bail!("{}", error_string);
}
Ok(())
})?;
let pref_manager = crate::prefs::PreferenceManager::get();
let mut pref_manager = pref_manager.borrow_mut();
if name == "LanguageAuto" {
let language_pref = pref_manager.pref_to_string("Language");
if language_pref != "Auto" {
bail!(
"'LanguageAuto' can only be used when 'Language' has the value 'Auto'; Language={}",
language_pref
);
}
}
let lower_case_value = value.to_lowercase();
if lower_case_value == "true" || lower_case_value == "false" {
pref_manager.set_api_boolean_pref(name, value.to_lowercase() == "true");
} else {
match name {
"Pitch" | "Rate" | "Volume" | "CapitalLetters_Pitch" | "MathRate" | "PauseFactor" => {
pref_manager.set_api_float_pref(name, to_float(name, &value)?)
}
_ => {
pref_manager.set_string_pref(name, &value)?;
}
}
};
return Ok(());
}
fn to_float(name: &str, value: &str) -> Result<f64> {
return match value.parse::<f64>() {
Ok(val) => Ok(val),
Err(_) => bail!("SetPreference: preference'{}'s value '{}' must be a float", name, value),
};
}
pub fn get_braille(nav_node_id: impl AsRef<str>) -> Result<String> {
enable_logs();
let nav_node_id = nav_node_id.as_ref().to_string();
let result = catch_unwind(AssertUnwindSafe(|| {
MATHML_INSTANCE.with(|package_instance| {
let package_instance = package_instance.borrow();
let mathml = get_element(&package_instance);
let braille = crate::braille::braille_mathml(mathml, &nav_node_id)?.0;
return Ok(braille);
})
}));
return report_any_panic(result);
}
pub fn get_navigation_braille() -> Result<String> {
enable_logs();
let result = catch_unwind(AssertUnwindSafe(|| {
MATHML_INSTANCE.with(|package_instance| {
let package_instance = package_instance.borrow();
let mathml = get_element(&package_instance);
let new_package = Package::new(); let new_doc = new_package.as_document();
let nav_mathml = NAVIGATION_STATE.with(|nav_stack| {
return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
Err(e) => Err(e),
Ok((found, offset)) => {
if offset == 0 {
if name(found) == "math" {
Ok(found)
} else {
let new_mathml = create_mathml_element(&new_doc, "math");
new_mathml.append_child(copy_mathml(found));
new_doc.root().append_child(new_mathml);
Ok(new_mathml)
}
} else if !is_leaf(found) {
bail!(
"Internal error: non-zero offset '{}' on a non-leaf element '{}'",
offset,
name(found)
);
} else if let Some(ch) = as_text(found).chars().nth(offset) {
let internal_mathml = create_mathml_element(&new_doc, name(found));
internal_mathml.set_text(&ch.to_string());
let new_mathml = create_mathml_element(&new_doc, "math");
new_mathml.append_child(internal_mathml);
new_doc.root().append_child(new_mathml);
Ok(new_mathml)
} else {
bail!(
"Internal error: offset '{}' on leaf element '{}' doesn't exist",
offset,
mml_to_string(found)
);
}
}
};
})?;
let braille = crate::braille::braille_mathml(nav_mathml, "")?.0;
return Ok(braille);
})
}));
return report_any_panic(result);
}
pub fn do_navigate_keypress(
key: usize,
shift_key: bool,
control_key: bool,
alt_key: bool,
meta_key: bool,
) -> Result<String> {
enable_logs();
let result = catch_unwind(AssertUnwindSafe(|| {
MATHML_INSTANCE.with(|package_instance| {
let package_instance = package_instance.borrow();
let mathml = get_element(&package_instance);
return do_mathml_navigate_key_press(mathml, key, shift_key, control_key, alt_key, meta_key);
})
}));
return report_any_panic(result);
}
pub fn do_navigate_command(command: impl AsRef<str>) -> Result<String> {
enable_logs();
let command = command.as_ref().to_string();
let result = catch_unwind(AssertUnwindSafe(|| {
let cmd = NAV_COMMANDS.get_key(&command); if cmd.is_none() {
bail!("Unknown command in call to DoNavigateCommand()");
};
let cmd = *cmd.unwrap();
MATHML_INSTANCE.with(|package_instance| {
let package_instance = package_instance.borrow();
let mathml = get_element(&package_instance);
return do_navigate_command_string(mathml, cmd);
})
}));
return report_any_panic(result);
}
pub fn set_navigation_node(id: impl AsRef<str>, offset: usize) -> Result<()> {
enable_logs();
let id = id.as_ref().to_string();
let result = catch_unwind(AssertUnwindSafe(|| {
MATHML_INSTANCE.with(|package_instance| {
let package_instance = package_instance.borrow();
let mathml = get_element(&package_instance);
return set_navigation_node_from_id(mathml, &id, offset);
})
}));
return report_any_panic(result);
}
pub fn get_navigation_mathml() -> Result<(String, usize)> {
enable_logs();
let result = catch_unwind(AssertUnwindSafe(|| {
MATHML_INSTANCE.with(|package_instance| {
let package_instance = package_instance.borrow();
let mathml = get_element(&package_instance);
return NAVIGATION_STATE.with(|nav_stack| {
return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
Err(e) => Err(e),
Ok((found, offset)) => Ok((mml_to_string(found), offset)),
};
});
})
}));
return report_any_panic(result);
}
pub fn get_navigation_mathml_id() -> Result<(String, usize)> {
enable_logs();
let result = catch_unwind(AssertUnwindSafe(|| {
MATHML_INSTANCE.with(|package_instance| {
let package_instance = package_instance.borrow();
let mathml = get_element(&package_instance);
return Ok(NAVIGATION_STATE.with(|nav_stack| {
return nav_stack.borrow().get_navigation_mathml_id(mathml);
}));
})
}));
return report_any_panic(result);
}
pub fn get_braille_position() -> Result<(usize, usize)> {
enable_logs();
let result = catch_unwind(AssertUnwindSafe(|| {
MATHML_INSTANCE.with(|package_instance| {
let package_instance = package_instance.borrow();
let mathml = get_element(&package_instance);
let nav_node = get_navigation_mathml_id()?;
let (_, start, end) = crate::braille::braille_mathml(mathml, &nav_node.0)?;
return Ok((start, end));
})
}));
return report_any_panic(result);
}
pub fn get_navigation_node_from_braille_position(position: usize) -> Result<(String, usize)> {
enable_logs();
let result = catch_unwind(AssertUnwindSafe(|| {
MATHML_INSTANCE.with(|package_instance| {
let package_instance = package_instance.borrow();
let mathml = get_element(&package_instance);
return crate::braille::get_navigation_node_from_braille_position(mathml, position);
})
}));
return report_any_panic(result);
}
pub fn get_supported_braille_codes() -> Result<Vec<String>> {
enable_logs();
let result = catch_unwind(AssertUnwindSafe(|| {
let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
let braille_dir = rules_dir.join("Braille");
let mut braille_code_paths = Vec::new();
find_all_dirs_shim(&braille_dir, &mut braille_code_paths);
let mut braille_code_paths = braille_code_paths.iter()
.map(|path| path.strip_prefix(&braille_dir).unwrap().to_string_lossy().to_string())
.filter(|string_path| !string_path.is_empty() )
.collect::<Vec<String>>();
braille_code_paths.sort();
Ok(braille_code_paths)
}));
return report_any_panic(result);
}
pub fn get_supported_languages() -> Result<Vec<String>> {
enable_logs();
let result = catch_unwind(AssertUnwindSafe(|| {
let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
let lang_dir = rules_dir.join("Languages");
let mut lang_paths = Vec::new();
find_all_dirs_shim(&lang_dir, &mut lang_paths);
let mut language_paths = lang_paths.iter()
.map(|path| path.strip_prefix(&lang_dir).unwrap()
.to_string_lossy()
.replace(std::path::MAIN_SEPARATOR, "-")
.to_string())
.filter(|string_path| !string_path.is_empty() )
.collect::<Vec<String>>();
language_paths.retain(|s| !s.starts_with("zz"));
language_paths.sort();
Ok(language_paths)
}));
return report_any_panic(result);
}
pub fn get_supported_speech_styles(lang: impl AsRef<str>) -> Result<Vec<String>> {
enable_logs();
let lang = lang.as_ref().to_string();
let result = catch_unwind(AssertUnwindSafe(|| {
let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
let lang_dir = rules_dir.join("Languages").join(&lang);
let mut speech_styles = find_files_in_dir_that_ends_with_shim(&lang_dir, "_Rules.yaml");
for file_name in &mut speech_styles {
file_name.truncate(file_name.len() - "_Rules.yaml".len())
}
speech_styles.sort();
speech_styles.dedup(); Ok(speech_styles)
}));
return report_any_panic(result);
}
pub fn copy_mathml(mathml: Element) -> Element {
return copy_mathml_recursive(mathml, 0);
}
fn copy_mathml_recursive(mathml: Element, depth: usize) -> Element {
if depth > MAX_DEPTH {
return create_mathml_element(&mathml.document(), name(mathml));
}
let children = mathml.children();
let new_mathml = create_mathml_element(&mathml.document(), name(mathml));
mathml.attributes().iter().for_each(|attr| {
new_mathml.set_attribute_value(attr.name(), attr.value());
});
if children.len() == 1 &&
let Some(text) = children[0].text() {
new_mathml.set_text(text.text());
return new_mathml;
}
let mut new_children = Vec::with_capacity(children.len());
for child in children {
let child = as_element(child);
let new_child = copy_mathml_recursive(child, depth + 1);
new_children.push(new_child);
}
new_mathml.append_children(new_children);
return new_mathml;
}
pub fn errors_to_string(e: &Error) -> String {
enable_logs();
let mut result = format!("{e}\n");
for cause in e.chain().skip(1) { result += &format!("caused by: {cause}\n");
}
result
}
fn add_ids(mathml: Element) -> Element {
use std::time::SystemTime;
let time = if cfg!(target_family = "wasm") {
fastrand::usize(..)
} else {
SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap()
.as_millis() as usize
};
let mut time_part = radix_fmt::radix(time, 36).to_string();
if time_part.len() < 3 {
time_part.push_str("a2c"); }
let mut random_part = radix_fmt::radix(fastrand::u32(..), 36).to_string();
if random_part.len() < 4 {
random_part.push_str("a1b2"); }
let prefix = "M".to_string() + &time_part[time_part.len() - 3..] + &random_part[random_part.len() - 4..] + "-"; add_ids_to_all(mathml, &prefix, 0);
return mathml;
fn add_ids_to_all(mathml: Element, id_prefix: &str, count: usize) -> usize {
let mut count = count;
if mathml.attribute("id").is_none() {
mathml.set_attribute_value("id", (id_prefix.to_string() + &count.to_string()).as_str());
mathml.set_attribute_value("data-id-added", "true");
count += 1;
};
if crate::xpath_functions::is_leaf(mathml) {
return count;
}
for child in mathml.children() {
let child = as_element(child);
count = add_ids_to_all(child, id_prefix, count);
}
return count;
}
}
pub fn get_element(package: &Package) -> Element<'_> {
enable_logs();
let doc = package.as_document();
let mut result = None;
for root_child in doc.root().children() {
if let ChildOfRoot::Element(e) = root_child {
assert!(result.is_none());
result = Some(e);
}
}
return result.unwrap();
}
#[allow(dead_code)]
pub fn get_intent<'a>(mathml: Element<'a>, doc: Document<'a>) -> Result<Element<'a>> {
crate::speech::SPEECH_RULES.with(|rules| rules.borrow_mut().read_files().unwrap());
let mathml = cleanup_mathml(mathml)?;
return crate::speech::intent_from_mathml(mathml, doc);
}
#[allow(dead_code)]
fn trim_doc(doc: &Document) {
for root_child in doc.root().children() {
if let ChildOfRoot::Element(e) = root_child {
trim_element(e, false);
} else {
doc.root().remove_child(root_child); }
}
}
pub fn trim_element(e: Element, allow_structure_in_leaves: bool) {
const WHITESPACE: &[char] = &[' ', '\u{0009}', '\u{000A}','\u{000C}', '\u{000D}'];
static WHITESPACE_MATCH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"[ \u{0009}\u{000A}\u{00C}\u{000D}]+"#).unwrap());
if is_leaf(e) && (!allow_structure_in_leaves || IsNode::is_mathml(e)) {
make_leaf_element(e);
return;
}
let mut single_text = "".to_string();
for child in e.children() {
match child {
ChildOfElement::Element(c) => {
trim_element(c, allow_structure_in_leaves);
}
ChildOfElement::Text(t) => {
single_text += t.text();
e.remove_child(child);
}
_ => {
e.remove_child(child);
}
}
}
if !(is_leaf(e) || name(e) == "intent-literal" || single_text.is_empty()) {
if !single_text.trim_matches(WHITESPACE).is_empty() {
error!(
"trim_element: both element and textual children which shouldn't happen -- ignoring text '{single_text}'"
);
}
return;
}
if e.children().is_empty() && !single_text.is_empty() {
e.set_text(&WHITESPACE_MATCH.replace_all(&single_text, " "));
}
fn make_leaf_element(mathml_leaf: Element) {
let children = mathml_leaf.children();
if children.is_empty() {
return;
}
if rewrite_and_flatten_embedded_mathml(mathml_leaf) {
return;
}
let mut text = "".to_string();
for child in children {
let child_text = match child {
ChildOfElement::Element(child) => {
if name(child) == "mglyph" {
child.attribute_value("alt").unwrap_or("").to_string()
} else {
gather_text(child)
}
}
ChildOfElement::Text(t) => {
t.text().to_string()
}
_ => "".to_string(),
};
if !child_text.is_empty() {
text += &child_text;
}
}
mathml_leaf.clear_children();
mathml_leaf.set_text(WHITESPACE_MATCH.replace_all(&text, " ").trim_matches(WHITESPACE));
fn gather_text(html: Element) -> String {
let mut text = "".to_string(); for child in html.children() {
match child {
ChildOfElement::Element(child) => {
text += &gather_text(child);
}
ChildOfElement::Text(t) => text += t.text(),
_ => (),
}
}
return text;
}
}
fn rewrite_and_flatten_embedded_mathml(mathml_leaf: Element) -> bool {
let mut needs_rewrite = false;
for child in mathml_leaf.children() {
if let Some(element) = child.element() {
if name(element) != "math" {
return false; }
needs_rewrite = true;
}
};
if !needs_rewrite {
return false;
}
let leaf_name = name(mathml_leaf);
let doc = mathml_leaf.document();
let mut new_children = Vec::new();
let mut is_last_mtext = false;
for child in mathml_leaf.children() {
if let Some(element) = child.element() {
trim_element(element, true);
new_children.append(&mut element.children()); is_last_mtext = false;
} else if let Some(text) = child.text() {
if is_last_mtext {
let last_child = new_children.last_mut().unwrap().element().unwrap();
let new_text = as_text(last_child).to_string() + text.text();
last_child.set_text(&new_text);
} else {
let new_leaf_node = create_mathml_element(&doc, leaf_name);
new_leaf_node.set_text(text.text());
new_children.push(ChildOfElement::Element(new_leaf_node));
is_last_mtext = true;
}
}
};
for child in &mut new_children {
if let Some(element) = child.element() && is_leaf(element) {
let text = as_text(element);
let cleaned_text = WHITESPACE_MATCH.replace_all(text, " ").trim_matches(WHITESPACE).to_string();
element.set_text(&cleaned_text);
}
}
crate::canonicalize::set_mathml_name(mathml_leaf, "mrow");
mathml_leaf.clear_children();
mathml_leaf.append_children(new_children);
return true;
}
}
#[allow(dead_code)]
fn is_same_doc(doc1: &Document, doc2: &Document) -> Result<()> {
if doc1.root().children().len() != doc2.root().children().len() {
bail!(
"Children of docs have {} != {} children",
doc1.root().children().len(),
doc2.root().children().len()
);
}
for (i, (c1, c2)) in doc1
.root()
.children()
.iter()
.zip(doc2.root().children().iter())
.enumerate()
{
match c1 {
ChildOfRoot::Element(e1) => {
if let ChildOfRoot::Element(e2) = c2 {
is_same_element(*e1, *e2, &[])?;
} else {
bail!("child #{}, first is element, second is something else", i);
}
}
ChildOfRoot::Comment(com1) => {
if let ChildOfRoot::Comment(com2) = c2 {
if com1.text() != com2.text() {
bail!("child #{} -- comment text differs", i);
}
} else {
bail!("child #{}, first is comment, second is something else", i);
}
}
ChildOfRoot::ProcessingInstruction(p1) => {
if let ChildOfRoot::ProcessingInstruction(p2) = c2 {
if p1.target() != p2.target() || p1.value() != p2.value() {
bail!("child #{} -- processing instruction differs", i);
}
} else {
bail!(
"child #{}, first is processing instruction, second is something else",
i
);
}
}
}
}
return Ok(());
}
#[allow(dead_code)]
pub fn is_same_element(e1: Element, e2: Element, ignore_attrs: &[&str]) -> Result<()> {
enable_logs();
if name(e1) != name(e2) {
bail!("Names not the same: {}, {}", name(e1), name(e2));
}
if e1.children().len() != e2.children().len() {
bail!(
"Children of {} have {} != {} children",
name(e1),
e1.children().len(),
e2.children().len()
);
}
if let Err(e) = attrs_are_same(e1.attributes(), e2.attributes(), ignore_attrs) {
bail!("In element {}, {}", name(e1), e);
}
for (i, (c1, c2)) in e1.children().iter().zip(e2.children().iter()).enumerate() {
match c1 {
ChildOfElement::Element(child1) => {
if let ChildOfElement::Element(child2) = c2 {
is_same_element(*child1, *child2, ignore_attrs)?;
} else {
bail!("{} child #{}, first is element, second is something else", name(e1), i);
}
}
ChildOfElement::Comment(com1) => {
if let ChildOfElement::Comment(com2) = c2 {
if com1.text() != com2.text() {
bail!("{} child #{} -- comment text differs", name(e1), i);
}
} else {
bail!("{} child #{}, first is comment, second is something else", name(e1), i);
}
}
ChildOfElement::ProcessingInstruction(p1) => {
if let ChildOfElement::ProcessingInstruction(p2) = c2 {
if p1.target() != p2.target() || p1.value() != p2.value() {
bail!("{} child #{} -- processing instruction differs", name(e1), i);
}
} else {
bail!(
"{} child #{}, first is processing instruction, second is something else",
name(e1),
i
);
}
}
ChildOfElement::Text(t1) => {
if let ChildOfElement::Text(t2) = c2 {
if t1.text() != t2.text() {
bail!("{} child #{} -- text differs", name(e1), i);
}
} else {
bail!("{} child #{}, first is text, second is something else", name(e1), i);
}
}
}
}
return Ok(());
fn attrs_are_same(attrs1: Vec<Attribute>, attrs2: Vec<Attribute>, ignore: &[&str]) -> Result<()> {
let attrs1 = attrs1.iter()
.filter(|a| !ignore.contains(&a.name().local_part())).cloned()
.collect::<Vec<Attribute>>();
let attrs2 = attrs2.iter()
.filter(|a| !ignore.contains(&a.name().local_part())).cloned()
.collect::<Vec<Attribute>>();
if attrs1.len() != attrs2.len() {
bail!("Attributes have different length: {:?} != {:?}", attrs1, attrs2);
}
for attr1 in attrs1 {
if let Some(found_attr2) = attrs2
.iter()
.find(|&attr2| attr1.name().local_part() == attr2.name().local_part())
{
if attr1.value() == found_attr2.value() {
continue;
} else {
bail!(
"Attribute named {} has differing values:\n '{}'\n '{}'",
attr1.name().local_part(),
attr1.value(),
found_attr2.value()
);
}
} else {
bail!(
"Attribute name {} not in [{}]",
print_attr(&attr1),
print_attrs(&attrs2)
);
}
}
return Ok(());
fn print_attr(attr: &Attribute) -> String {
return format!("@{}='{}'", attr.name().local_part(), attr.value());
}
fn print_attrs(attrs: &[Attribute]) -> String {
return attrs.iter().map(print_attr).collect::<Vec<String>>().join(", ");
}
}
}
#[cfg(test)]
mod tests {
#[allow(unused_imports)]
use super::super::init_logger;
use super::*;
fn are_parsed_strs_equal(test: &str, target: &str) -> bool {
let test_package = &parser::parse(test).expect("Failed to parse input");
let test_doc = test_package.as_document();
trim_doc(&test_doc);
debug!("test:\n{}", mml_to_string(get_element(test_package)));
let target_package = &parser::parse(target).expect("Failed to parse input");
let target_doc = target_package.as_document();
trim_doc(&target_doc);
debug!("target:\n{}", mml_to_string(get_element(target_package)));
match is_same_doc(&test_doc, &target_doc) {
Ok(_) => return true,
Err(e) => panic!("{}", e),
}
}
#[test]
fn trim_same() {
let trimmed_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
assert!(are_parsed_strs_equal(trimmed_str, trimmed_str));
}
#[test]
fn trim_whitespace() {
let trimmed_str = "<math><mrow><mo>-</mo><mi> a </mi></mrow></math>";
let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
}
#[test]
fn no_trim_whitespace_nbsp() {
let trimmed_str = "<math><mrow><mo>-</mo><mtext>  a </mtext></mrow></math>";
let whitespace_str = "<math> <mrow ><mo>-</mo><mtext>  a </mtext></mrow ></math>";
assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
}
#[test]
fn trim_comment() {
let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
let comment_str = "<math><mrow><mo>-</mo><!--a comment --><mi> a </mi></mrow></math>";
assert!(are_parsed_strs_equal(comment_str, whitespace_str));
}
#[test]
fn replace_mglyph() {
let mglyph_str = "<math>
<mrow>
<mi>X<mglyph fontfamily='my-braid-font' index='2' alt='23braid' /></mi>
<mo>+</mo>
<mi>
<mglyph fontfamily='my-braid-font' index='5' alt='132braid' />Y
</mi>
<mo>=</mo>
<mi>
<mglyph fontfamily='my-braid-font' index='3' alt='13braid' />
</mi>
</mrow>
</math>";
let result_str = "<math>
<mrow>
<mi>X23braid</mi>
<mo>+</mo>
<mi>132braidY</mi>
<mo>=</mo>
<mi>13braid</mi>
</mrow>
</math>";
assert!(are_parsed_strs_equal(mglyph_str, result_str));
}
#[test]
fn trim_differs() {
let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
let different_str = "<math> <mrow ><mo>-</mo><mi> b </mi></mrow ></math>";
let package1 = &parser::parse(whitespace_str).expect("Failed to parse input");
let doc1 = package1.as_document();
trim_doc(&doc1);
debug!("doc1:\n{}", mml_to_string(get_element(package1)));
let package2 = parser::parse(different_str).expect("Failed to parse input");
let doc2 = package2.as_document();
trim_doc(&doc2);
debug!("doc2:\n{}", mml_to_string(get_element(&package2)));
assert!(is_same_doc(&doc1, &doc2).is_err());
}
#[test]
fn test_entities() {
set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
let entity_str = set_mathml("<math><mrow><mo>−</mo><mi>𝕞</mi></mrow></math>").unwrap();
let converted_str =
set_mathml("<math><mrow><mo>−</mo><mi>𝕞</mi></mrow></math>").unwrap();
static ID_MATCH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"id='.+?' "#).unwrap());
let entity_str = ID_MATCH.replace_all(&entity_str, "");
let converted_str = ID_MATCH.replace_all(&converted_str, "");
assert_eq!(entity_str, converted_str, "normal entity test failed");
let entity_str = set_mathml(
"<math data-quot=\""value"\" data-apos=''value''><mi>XXX</mi></math>",
)
.unwrap();
let converted_str =
set_mathml("<math data-quot='\"value\"' data-apos=\"'value'\"><mi>XXX</mi></math>").unwrap();
let entity_str = ID_MATCH.replace_all(&entity_str, "");
let converted_str = ID_MATCH.replace_all(&converted_str, "");
assert_eq!(entity_str, converted_str, "special entities quote test failed");
let entity_str =
set_mathml("<math><mo><</mo><mo>></mo><mtext>&lt;</mtext></math>").unwrap();
let converted_str =
set_mathml("<math><mo><</mo><mo>></mo><mtext>&lt;</mtext></math>")
.unwrap();
let entity_str = ID_MATCH.replace_all(&entity_str, "");
let converted_str = ID_MATCH.replace_all(&converted_str, "");
assert_eq!(entity_str, converted_str, "special entities <,>,& test failed");
}
#[test]
fn can_recover_from_invalid_set_rules_dir() {
use std::env;
unsafe { env::set_var("MathCATRulesDir", "MathCATRulesDir"); } assert!(set_rules_dir("someInvalidRulesDir").is_err());
assert!(
set_rules_dir(super::super::abs_rules_dir_path()).is_ok(),
"\nset_rules_dir to '{}' failed",
super::super::abs_rules_dir_path()
);
assert!(set_mathml("<math><mn>1</mn></math>").is_ok());
}
#[test]
fn single_html_in_mtext() {
let test = "<math><mn>1</mn> <mtext>a<p> para 1</p>bc</mtext> <mi>y</mi></math>";
let target = "<math><mn>1</mn> <mtext>a para 1bc</mtext> <mi>y</mi></math>";
assert!(are_parsed_strs_equal(test, target));
}
#[test]
fn multiple_html_in_mtext() {
let test = "<math><mn>1</mn> <mtext>a<p>para 1</p> <p>para 2</p>bc </mtext> <mi>y</mi></math>";
let target = "<math><mn>1</mn> <mtext>apara 1 para 2bc</mtext> <mi>y</mi></math>";
assert!(are_parsed_strs_equal(test, target));
}
#[test]
fn nested_html_in_mtext() {
let test = "<math><mn>1</mn> <mtext>a <ol><li>first</li><li>second</li></ol> bc</mtext> <mi>y</mi></math>";
let target = "<math><mn>1</mn> <mtext>a firstsecond bc</mtext> <mi>y</mi></math>";
assert!(are_parsed_strs_equal(test, target));
}
#[test]
fn empty_html_in_mtext() {
let test = "<math><mn>1</mn> <mtext>a<br/>bc</mtext> <mi>y</mi></math>";
let target = "<math><mn>1</mn> <mtext>abc</mtext> <mi>y</mi></math>";
assert!(are_parsed_strs_equal(test, target));
}
#[test]
fn mathml_in_mtext() {
let test = "<math><mtext>if <math> <msup><mi>n</mi><mn>2</mn></msup></math> is real</mtext></math>";
let target = "<math><mrow><mtext>if </mtext><msup><mi>n</mi><mn>2</mn></msup><mtext> is real</mtext></mrow></math>";
assert!(are_parsed_strs_equal(test, target));
}
#[test]
fn stack_overflow_protection() {
set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
let mut bad_mathml = String::from("<math>");
for _ in 0..MAX_DEPTH+1 {
bad_mathml.push_str("<msqrt><mi>n</mi>");
}
for _ in 0..MAX_DEPTH+1 {
bad_mathml.push_str("</msqrt>");
}
bad_mathml.push_str("</math>");
assert_eq!(set_mathml(bad_mathml).unwrap_err().to_string(), "MathML is too deeply nested to process");
}
#[test]
fn old_mathml_cleared_on_error() {
set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
let good_mathml = "<math><mn>3</mn></math>";
set_mathml(good_mathml).unwrap();
let bad_mathml = "<math><mi>&xabc;</mi></math>";
assert!(set_mathml(bad_mathml).is_err());
assert!(get_spoken_text().unwrap() == "");
set_mathml(good_mathml).unwrap();
let bad_mathml = "<math>garbage";
assert!(set_mathml(bad_mathml).is_err());
assert!(get_spoken_text().unwrap() == "");
}
}