1#![allow(non_snake_case)]
4#![allow(clippy::needless_return)]
5use std::cell::RefCell;
6use std::sync::LazyLock;
7
8use crate::canonicalize::{as_text, create_mathml_element};
9use crate::errors::*;
10use phf::phf_map;
11use regex::{Captures, Regex};
12use sxd_document::dom::{Element, Document, ChildOfRoot, ChildOfElement, Attribute};
13use sxd_document::parser;
14use sxd_document::Package;
15
16use crate::canonicalize::{as_element, name};
17use crate::shim_filesystem::{find_all_dirs_shim, find_files_in_dir_that_ends_with_shim};
18use log::{debug, error};
19
20use crate::navigate::*;
21use crate::pretty_print::mml_to_string;
22use crate::xpath_functions::{is_leaf, IsNode};
23use std::panic::{catch_unwind, AssertUnwindSafe};
24
25pub const MAX_DEPTH: usize = 512;
27
28#[cfg(feature = "enable-logs")]
29use std::sync::Once;
30#[cfg(feature = "enable-logs")]
31static INIT: Once = Once::new();
32
33fn enable_logs() {
34 #[cfg(feature = "enable-logs")]
35 INIT.call_once(||{
36 #[cfg(target_os = "android")]
37 {
38 use log::*;
39 use android_logger::*;
40
41 android_logger::init_once(
42 Config::default()
43 .with_max_level(LevelFilter::Trace)
44 .with_tag("MathCat")
45 );
46 trace!("Activated Android logger!");
47 }
48 });
49}
50
51thread_local! {
53 static PANIC_INFO: RefCell<Option<(String, String, u32)>> = const { RefCell::new(None) };
55}
56
57pub fn init_panic_handler() {
59 use std::panic;
60
61 panic::set_hook(Box::new(|info| {
62 let location = info.location()
63 .map(|l| format!("{}:{}", l.file(), l.line()))
64 .unwrap_or_else(|| "unknown".to_string());
65
66 let payload = info.payload();
67 let msg = if let Some(s) = payload.downcast_ref::<&'static str>() {
68 s.to_string()
69 } else if let Some(s) = payload.downcast_ref::<String>() {
70 s.clone()
71 } else {
72 "Unknown panic payload".to_string()
73 };
74
75 let _ = PANIC_INFO.try_with(|cell| {
77 if let Ok(mut slot) = cell.try_borrow_mut() {
78 *slot = Some((msg, location, 0));
79 }
80 });
81 }));
82}
83
84pub fn report_any_panic<T>(result: Result<Result<T, Error>, Box<dyn std::any::Any + Send>>) -> Result<T, Error> {
85 match result {
86 Ok(val) => val,
87 Err(_) => {
88 let details = PANIC_INFO.with(|cell| cell.borrow_mut().take());
90
91 if let Some((msg, file, line)) = details {
92 Err(anyhow::anyhow!(
93 "MathCAT crash! Please report the following information: '{}' at {}:{}",
94 msg, file, line
95 ))
96 } else {
97 Err(anyhow::anyhow!("MathCAT crash! -- please report"))
98 }
99 }
100 }
101}
102
103fn cleanup_mathml(mathml: Element) -> Result<Element> {
105 trim_element(mathml, false);
106 let mathml = crate::canonicalize::canonicalize(mathml)?;
107 let mathml = add_ids(mathml);
108 return Ok(mathml);
109}
110
111thread_local! {
112 pub static MATHML_INSTANCE: RefCell<Package> = init_mathml_instance();
114}
115
116fn init_mathml_instance() -> RefCell<Package> {
117 let package = parser::parse("<math></math>")
118 .expect("Internal error in 'init_mathml_instance;: didn't parse initializer string");
119 return RefCell::new(package);
120}
121
122pub fn set_rules_dir(dir: impl AsRef<str>) -> Result<()> {
125 enable_logs();
126 init_panic_handler();
127 let dir = dir.as_ref().to_string();
128 let result = catch_unwind(AssertUnwindSafe(|| {
129 use std::path::PathBuf;
130 let dir_os = if dir.is_empty() {
131 std::env::var_os("MathCATRulesDir").unwrap_or_default()
132 } else {
133 std::ffi::OsString::from(&dir)
134 };
135 let pref_manager = crate::prefs::PreferenceManager::get();
136 pref_manager.borrow_mut().initialize(PathBuf::from(dir_os))
137 }));
138 return report_any_panic(result);
139}
140
141pub fn get_version() -> String {
143 enable_logs();
144 const VERSION: &str = env!("CARGO_PKG_VERSION");
145 return VERSION.to_string();
146}
147
148pub fn set_mathml(mathml_str: impl AsRef<str>) -> Result<String> {
152 enable_logs();
153 static MATHJAX_V2: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"class *= *['"]MJX-.*?['"]"#).unwrap());
155 static MATHJAX_V3: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"class *= *['"]data-mjx-.*?['"]"#).unwrap());
156
157 static PROCESSING_INSTRUCTION: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"<\?[\s\S]{1,2048}\?>"#).unwrap());
159 static XML_COMMENT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"(?s)"#).unwrap());
160
161 static NAMESPACE_DECL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"xmlns:[[:alpha:]]{1,32}"#).unwrap());
163 static PREFIX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"(</?)[[:alpha:]]{1,32}:"#).unwrap());
164 static HTML_ENTITIES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"&([a-zA-Z]{2,10});"#).unwrap());
165 let result = catch_unwind(AssertUnwindSafe(|| {
166 NAVIGATION_STATE.with(|nav_stack| {
167 nav_stack.borrow_mut().reset();
168 });
169
170 crate::speech::SPEECH_RULES.with(|rules| rules.borrow_mut().read_files())?;
173
174 let mathml_str = mathml_str.as_ref();
175 if mathml_str.len() > 1024 * 1024 {
177 bail!("MathML string of size {} bytes exceeds length limit of 1MB", mathml_str.len());
178 }
179
180 return MATHML_INSTANCE.with(|old_package| {
181 static HTML_ENTITIES_MAPPING: phf::Map<&str, &str> = include!("entities.in");
182
183 let mut error_message = "".to_string(); let mathml_str = XML_COMMENT.replace_all(mathml_str, "");
186 let mathml_str = PROCESSING_INSTRUCTION.replace_all(&mathml_str, "");
187 let mathml_str = HTML_ENTITIES.replace_all(&mathml_str, |cap: &Captures| match HTML_ENTITIES_MAPPING.get(&cap[1]) {
189 None => {
190 error_message = format!("No entity named '{}'", &cap[0]);
191 cap[0].to_string()
192 }
193 Some(&ch) => ch.to_string(),
194 });
195
196 if !error_message.is_empty() {
197 old_package.replace(parser::parse("<math></math>").unwrap());
199 bail!(error_message);
200 }
201 let mathml_str = MATHJAX_V2.replace_all(&mathml_str, "");
202 let mathml_str = MATHJAX_V3.replace_all(&mathml_str, "");
203
204 let mathml_str = NAMESPACE_DECL.replace(&mathml_str, "xmlns"); let mathml_str = PREFIX.replace_all(&mathml_str, "$1");
209
210 let new_package = parser::parse(&mathml_str);
211 if let Err(e) = new_package {
212 old_package.replace(parser::parse("<math></math>").unwrap());
214 bail!("Invalid MathML input:\n{}\nError is: {}", &mathml_str, &e.to_string());
215 }
216
217 let new_package = new_package.unwrap();
218 let mathml = get_element(&new_package);
219 let mathml = cleanup_mathml(mathml)?;
220 let mathml_string = mml_to_string(mathml);
221 old_package.replace(new_package);
222
223 return Ok(mathml_string);
224 });
225 }));
226
227 return report_any_panic(result);
228}
229
230pub fn get_spoken_text() -> Result<String> {
233 enable_logs();
234 let result = catch_unwind(AssertUnwindSafe(|| {
235 MATHML_INSTANCE.with(|package_instance| {
236 let package_instance = package_instance.borrow();
237 let mathml = get_element(&package_instance);
238 let new_package = Package::new();
239 let intent = crate::speech::intent_from_mathml(mathml, new_package.as_document())?;
240 debug!("Intent tree:\n{}", mml_to_string(intent));
241 let speech = crate::speech::speak_mathml(intent, "", 0)?;
242 return Ok(speech);
243 })
244 }));
245 return report_any_panic(result);
246}
247
248pub fn get_overview_text() -> Result<String> {
252 enable_logs();
253 let result = catch_unwind(AssertUnwindSafe(|| {
254 MATHML_INSTANCE.with(|package_instance| {
255 let package_instance = package_instance.borrow();
256 let mathml = get_element(&package_instance);
257 let speech = crate::speech::overview_mathml(mathml, "", 0)?;
258 return Ok(speech);
259 })
260 }));
261 return report_any_panic(result);
262}
263
264pub fn get_preference(name: impl AsRef<str>) -> Result<String> {
267 enable_logs();
268 let name = name.as_ref().to_string();
269 let result = catch_unwind(AssertUnwindSafe(|| {
270 use crate::prefs::NO_PREFERENCE;
271 crate::speech::SPEECH_RULES.with(|rules| {
272 let rules = rules.borrow();
273 let pref_manager = rules.pref_manager.borrow();
274 let mut value = pref_manager.pref_to_string(&name);
275 if value == NO_PREFERENCE {
276 value = pref_manager.pref_to_string(&name);
277 }
278 if value == NO_PREFERENCE {
279 bail!("No preference named '{}'", name);
280 } else {
281 return Ok(value);
282 }
283 })
284 }));
285 return report_any_panic(result);
286}
287
288pub fn set_preference(name: impl AsRef<str>, value: impl AsRef<str>) -> Result<()> {
309 enable_logs();
310 let name = name.as_ref().to_string();
311 let value = value.as_ref().to_string();
312 let result = catch_unwind(AssertUnwindSafe(|| {
313 set_preference_impl(&name, &value)
314 }));
315 return report_any_panic(result);
316}
317
318fn set_preference_impl(name: &str, value: &str) -> Result<()> {
319 let mut value = value.to_string();
320 if name == "Language" || name == "LanguageAuto" {
321 if value != "Auto" {
323 let mut lang_country_split = value.split('-');
325 let language = lang_country_split.next().unwrap_or("");
326 let country = lang_country_split.next().unwrap_or("");
327 if language.len() != 2 {
328 bail!(
329 "Improper format for 'Language' preference '{}'. Should be of form 'en' or 'en-gb'",
330 value
331 );
332 }
333 let mut new_lang_country = language.to_string(); if !country.is_empty() {
335 new_lang_country.push('-');
336 new_lang_country.push_str(country);
337 }
338 value = new_lang_country;
339 }
340 if name == "LanguageAuto" && value == "Auto" {
341 bail!("'LanguageAuto' can not have the value 'Auto'");
342 }
343 }
344
345 crate::speech::SPEECH_RULES.with(|rules| -> Result<()> {
346 if let Some(error_string) = rules.borrow().get_error() {
347 bail!("{}", error_string);
348 }
349 Ok(())
350 })?;
351
352 let pref_manager = crate::prefs::PreferenceManager::get();
354 let mut pref_manager = pref_manager.borrow_mut();
355 if name == "LanguageAuto" {
356 let language_pref = pref_manager.pref_to_string("Language");
357 if language_pref != "Auto" {
358 bail!(
359 "'LanguageAuto' can only be used when 'Language' has the value 'Auto'; Language={}",
360 language_pref
361 );
362 }
363 }
364 let lower_case_value = value.to_lowercase();
365 if lower_case_value == "true" || lower_case_value == "false" {
366 pref_manager.set_api_boolean_pref(name, value.to_lowercase() == "true");
367 } else {
368 match name {
369 "Pitch" | "Rate" | "Volume" | "CapitalLetters_Pitch" | "MathRate" | "PauseFactor" => {
370 pref_manager.set_api_float_pref(name, to_float(name, &value)?)
371 }
372 _ => {
373 pref_manager.set_string_pref(name, &value)?;
374 }
375 }
376 };
377
378 return Ok(());
379}
380
381fn to_float(name: &str, value: &str) -> Result<f64> {
382 return match value.parse::<f64>() {
383 Ok(val) => Ok(val),
384 Err(_) => bail!("SetPreference: preference'{}'s value '{}' must be a float", name, value),
385 };
386}
387
388pub fn get_braille(nav_node_id: impl AsRef<str>) -> Result<String> {
392 enable_logs();
393 let nav_node_id = nav_node_id.as_ref().to_string();
394 let result = catch_unwind(AssertUnwindSafe(|| {
395 MATHML_INSTANCE.with(|package_instance| {
396 let package_instance = package_instance.borrow();
397 let mathml = get_element(&package_instance);
398 let braille = crate::braille::braille_mathml(mathml, &nav_node_id)?.0;
399 return Ok(braille);
400 })
401 }));
402 return report_any_panic(result);
403}
404
405pub fn get_navigation_braille() -> Result<String> {
409 enable_logs();
410 let result = catch_unwind(AssertUnwindSafe(|| {
411 MATHML_INSTANCE.with(|package_instance| {
412 let package_instance = package_instance.borrow();
413 let mathml = get_element(&package_instance);
414 let new_package = Package::new(); let new_doc = new_package.as_document();
416 let nav_mathml = NAVIGATION_STATE.with(|nav_stack| {
417 return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
418 Err(e) => Err(e),
419 Ok((found, offset)) => {
420 if offset == 0 {
423 if name(found) == "math" {
424 Ok(found)
425 } else {
426 let new_mathml = create_mathml_element(&new_doc, "math");
427 new_mathml.append_child(copy_mathml(found));
428 new_doc.root().append_child(new_mathml);
429 Ok(new_mathml)
430 }
431 } else if !is_leaf(found) {
432 bail!(
433 "Internal error: non-zero offset '{}' on a non-leaf element '{}'",
434 offset,
435 name(found)
436 );
437 } else if let Some(ch) = as_text(found).chars().nth(offset) {
438 let internal_mathml = create_mathml_element(&new_doc, name(found));
439 internal_mathml.set_text(&ch.to_string());
440 let new_mathml = create_mathml_element(&new_doc, "math");
441 new_mathml.append_child(internal_mathml);
442 new_doc.root().append_child(new_mathml);
443 Ok(new_mathml)
444 } else {
445 bail!(
446 "Internal error: offset '{}' on leaf element '{}' doesn't exist",
447 offset,
448 mml_to_string(found)
449 );
450 }
451 }
452 };
453 })?;
454
455 let braille = crate::braille::braille_mathml(nav_mathml, "")?.0;
456 return Ok(braille);
457 })
458 }));
459 return report_any_panic(result);
460}
461
462pub fn do_navigate_keypress(
466 key: usize,
467 shift_key: bool,
468 control_key: bool,
469 alt_key: bool,
470 meta_key: bool,
471) -> Result<String> {
472 enable_logs();
473 let result = catch_unwind(AssertUnwindSafe(|| {
474 MATHML_INSTANCE.with(|package_instance| {
475 let package_instance = package_instance.borrow();
476 let mathml = get_element(&package_instance);
477 return do_mathml_navigate_key_press(mathml, key, shift_key, control_key, alt_key, meta_key);
478 })
479 }));
480 return report_any_panic(result);
481}
482
483pub fn do_navigate_command(command: impl AsRef<str>) -> Result<String> {
517 enable_logs();
518 let command = command.as_ref().to_string();
519 let result = catch_unwind(AssertUnwindSafe(|| {
520 let cmd = NAV_COMMANDS.get_key(&command); if cmd.is_none() {
522 bail!("Unknown command in call to DoNavigateCommand()");
523 };
524 let cmd = *cmd.unwrap();
525 MATHML_INSTANCE.with(|package_instance| {
526 let package_instance = package_instance.borrow();
527 let mathml = get_element(&package_instance);
528 return do_navigate_command_string(mathml, cmd);
529 })
530 }));
531 return report_any_panic(result);
532}
533
534pub fn set_navigation_node(id: impl AsRef<str>, offset: usize) -> Result<()> {
537 enable_logs();
538 let id = id.as_ref().to_string();
539 let result = catch_unwind(AssertUnwindSafe(|| {
540 MATHML_INSTANCE.with(|package_instance| {
541 let package_instance = package_instance.borrow();
542 let mathml = get_element(&package_instance);
543 return set_navigation_node_from_id(mathml, &id, offset);
544 })
545 }));
546 return report_any_panic(result);
547}
548
549pub fn get_navigation_mathml() -> Result<(String, usize)> {
552 enable_logs();
553 let result = catch_unwind(AssertUnwindSafe(|| {
554 MATHML_INSTANCE.with(|package_instance| {
555 let package_instance = package_instance.borrow();
556 let mathml = get_element(&package_instance);
557 return NAVIGATION_STATE.with(|nav_stack| {
558 return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
559 Err(e) => Err(e),
560 Ok((found, offset)) => Ok((mml_to_string(found), offset)),
561 };
562 });
563 })
564 }));
565 return report_any_panic(result);
566}
567
568pub fn get_navigation_mathml_id() -> Result<(String, usize)> {
572 enable_logs();
573 let result = catch_unwind(AssertUnwindSafe(|| {
574 MATHML_INSTANCE.with(|package_instance| {
575 let package_instance = package_instance.borrow();
576 let mathml = get_element(&package_instance);
577 return Ok(NAVIGATION_STATE.with(|nav_stack| {
578 return nav_stack.borrow().get_navigation_mathml_id(mathml);
579 }));
580 })
581 }));
582 return report_any_panic(result);
583}
584
585pub fn get_braille_position() -> Result<(usize, usize)> {
587 enable_logs();
588 let result = catch_unwind(AssertUnwindSafe(|| {
589 MATHML_INSTANCE.with(|package_instance| {
590 let package_instance = package_instance.borrow();
591 let mathml = get_element(&package_instance);
592 let nav_node = get_navigation_mathml_id()?;
593 let (_, start, end) = crate::braille::braille_mathml(mathml, &nav_node.0)?;
594 return Ok((start, end));
595 })
596 }));
597 return report_any_panic(result);
598}
599
600pub fn get_navigation_node_from_braille_position(position: usize) -> Result<(String, usize)> {
603 enable_logs();
604 let result = catch_unwind(AssertUnwindSafe(|| {
605 MATHML_INSTANCE.with(|package_instance| {
606 let package_instance = package_instance.borrow();
607 let mathml = get_element(&package_instance);
608 return crate::braille::get_navigation_node_from_braille_position(mathml, position);
609 })
610 }));
611 return report_any_panic(result);
612}
613
614pub fn get_supported_braille_codes() -> Result<Vec<String>> {
615 enable_logs();
616 let result = catch_unwind(AssertUnwindSafe(|| {
617 let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
618 let braille_dir = rules_dir.join("Braille");
619 let mut braille_code_paths = Vec::new();
620
621 find_all_dirs_shim(&braille_dir, &mut braille_code_paths);
622 let mut braille_code_paths = braille_code_paths.iter()
623 .map(|path| path.strip_prefix(&braille_dir).unwrap().to_string_lossy().to_string())
624 .filter(|string_path| !string_path.is_empty() )
625 .collect::<Vec<String>>();
626 braille_code_paths.sort();
627
628 Ok(braille_code_paths)
629 }));
630 return report_any_panic(result);
631 }
632
633pub fn get_supported_languages() -> Result<Vec<String>> {
635 enable_logs();
636 let result = catch_unwind(AssertUnwindSafe(|| {
637 let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
638 let lang_dir = rules_dir.join("Languages");
639 let mut lang_paths = Vec::new();
640
641 find_all_dirs_shim(&lang_dir, &mut lang_paths);
642 let mut language_paths = lang_paths.iter()
643 .map(|path| path.strip_prefix(&lang_dir).unwrap()
644 .to_string_lossy()
645 .replace(std::path::MAIN_SEPARATOR, "-")
646 .to_string())
647 .filter(|string_path| !string_path.is_empty() )
648 .collect::<Vec<String>>();
649
650 language_paths.retain(|s| !s.starts_with("zz"));
652 language_paths.sort();
653 Ok(language_paths)
654 }));
655 return report_any_panic(result);
656 }
657
658 pub fn get_supported_speech_styles(lang: impl AsRef<str>) -> Result<Vec<String>> {
659 enable_logs();
660 let lang = lang.as_ref().to_string();
661 let result = catch_unwind(AssertUnwindSafe(|| {
662 let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
663 let lang_dir = rules_dir.join("Languages").join(&lang);
664 let mut speech_styles = find_files_in_dir_that_ends_with_shim(&lang_dir, "_Rules.yaml");
665 for file_name in &mut speech_styles {
666 file_name.truncate(file_name.len() - "_Rules.yaml".len())
667 }
668 speech_styles.sort();
669 speech_styles.dedup(); Ok(speech_styles)
671 }));
672 return report_any_panic(result);
673 }
674
675pub fn copy_mathml(mathml: Element) -> Element {
681 return copy_mathml_recursive(mathml, 0);
682}
683
684fn copy_mathml_recursive(mathml: Element, depth: usize) -> Element {
685 if depth > MAX_DEPTH {
687 return create_mathml_element(&mathml.document(), name(mathml));
689 }
690
691 let children = mathml.children();
693 let new_mathml = create_mathml_element(&mathml.document(), name(mathml));
694 mathml.attributes().iter().for_each(|attr| {
695 new_mathml.set_attribute_value(attr.name(), attr.value());
696 });
697
698 if children.len() == 1 &&
700 let Some(text) = children[0].text() {
701 new_mathml.set_text(text.text());
702 return new_mathml;
703 }
704
705 let mut new_children = Vec::with_capacity(children.len());
706 for child in children {
707 let child = as_element(child);
708 let new_child = copy_mathml_recursive(child, depth + 1);
709 new_children.push(new_child);
710 }
711 new_mathml.append_children(new_children);
712 return new_mathml;
713}
714
715pub fn errors_to_string(e: &Error) -> String {
716 enable_logs();
717 let mut result = format!("{e}\n");
718 for cause in e.chain().skip(1) { result += &format!("caused by: {cause}\n");
720 }
721 result
722}
723
724fn add_ids(mathml: Element) -> Element {
725 use std::time::SystemTime;
726 let time = if cfg!(target_family = "wasm") {
727 fastrand::usize(..)
728 } else {
729 SystemTime::now()
730 .duration_since(SystemTime::UNIX_EPOCH)
731 .unwrap()
732 .as_millis() as usize
733 };
734 let mut time_part = radix_fmt::radix(time, 36).to_string();
735 if time_part.len() < 3 {
736 time_part.push_str("a2c"); }
738 let mut random_part = radix_fmt::radix(fastrand::u32(..), 36).to_string();
739 if random_part.len() < 4 {
740 random_part.push_str("a1b2"); }
742 let prefix = "M".to_string() + &time_part[time_part.len() - 3..] + &random_part[random_part.len() - 4..] + "-"; add_ids_to_all(mathml, &prefix, 0);
744 return mathml;
745
746 fn add_ids_to_all(mathml: Element, id_prefix: &str, count: usize) -> usize {
747 let mut count = count;
748 if mathml.attribute("id").is_none() {
749 mathml.set_attribute_value("id", (id_prefix.to_string() + &count.to_string()).as_str());
750 mathml.set_attribute_value("data-id-added", "true");
751 count += 1;
752 };
753
754 if crate::xpath_functions::is_leaf(mathml) {
755 return count;
756 }
757
758 for child in mathml.children() {
759 let child = as_element(child);
760 count = add_ids_to_all(child, id_prefix, count);
761 }
762 return count;
763 }
764}
765
766pub fn get_element(package: &Package) -> Element<'_> {
767 enable_logs();
768 let doc = package.as_document();
769 let mut result = None;
770 for root_child in doc.root().children() {
771 if let ChildOfRoot::Element(e) = root_child {
772 assert!(result.is_none());
773 result = Some(e);
774 }
775 }
776 return result.unwrap();
777}
778
779#[allow(dead_code)]
782pub fn get_intent<'a>(mathml: Element<'a>, doc: Document<'a>) -> Result<Element<'a>> {
783 crate::speech::SPEECH_RULES.with(|rules| rules.borrow_mut().read_files().unwrap());
784 let mathml = cleanup_mathml(mathml)?;
785 return crate::speech::intent_from_mathml(mathml, doc);
786}
787
788#[allow(dead_code)]
789fn trim_doc(doc: &Document) {
790 for root_child in doc.root().children() {
791 if let ChildOfRoot::Element(e) = root_child {
792 trim_element(e, false);
793 } else {
794 doc.root().remove_child(root_child); }
796 }
797}
798
799pub fn trim_element(e: Element, allow_structure_in_leaves: bool) {
801 const WHITESPACE: &[char] = &[' ', '\u{0009}', '\u{000A}','\u{000C}', '\u{000D}'];
806 static WHITESPACE_MATCH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"[ \u{0009}\u{000A}\u{00C}\u{000D}]+"#).unwrap());
807
808 if is_leaf(e) && (!allow_structure_in_leaves || IsNode::is_mathml(e)) {
809 make_leaf_element(e);
811 return;
812 }
813
814 let mut single_text = "".to_string();
815 for child in e.children() {
816 match child {
817 ChildOfElement::Element(c) => {
818 trim_element(c, allow_structure_in_leaves);
819 }
820 ChildOfElement::Text(t) => {
821 single_text += t.text();
822 e.remove_child(child);
823 }
824 _ => {
825 e.remove_child(child);
826 }
827 }
828 }
829
830 if !(is_leaf(e) || name(e) == "intent-literal" || single_text.is_empty()) {
832 if !single_text.trim_matches(WHITESPACE).is_empty() {
836 error!(
837 "trim_element: both element and textual children which shouldn't happen -- ignoring text '{single_text}'"
838 );
839 }
840 return;
841 }
842 if e.children().is_empty() && !single_text.is_empty() {
843 e.set_text(&WHITESPACE_MATCH.replace_all(&single_text, " "));
845 }
846
847 fn make_leaf_element(mathml_leaf: Element) {
848 let children = mathml_leaf.children();
853 if children.is_empty() {
854 return;
855 }
856
857 if rewrite_and_flatten_embedded_mathml(mathml_leaf) {
858 return;
859 }
860
861 let mut text = "".to_string();
863 for child in children {
864 let child_text = match child {
865 ChildOfElement::Element(child) => {
866 if name(child) == "mglyph" {
867 child.attribute_value("alt").unwrap_or("").to_string()
868 } else {
869 gather_text(child)
870 }
871 }
872 ChildOfElement::Text(t) => {
873 t.text().to_string()
875 }
876 _ => "".to_string(),
877 };
878 if !child_text.is_empty() {
879 text += &child_text;
880 }
881 }
882
883 mathml_leaf.clear_children();
885 mathml_leaf.set_text(WHITESPACE_MATCH.replace_all(&text, " ").trim_matches(WHITESPACE));
886 fn gather_text(html: Element) -> String {
890 let mut text = "".to_string(); for child in html.children() {
892 match child {
893 ChildOfElement::Element(child) => {
894 text += &gather_text(child);
895 }
896 ChildOfElement::Text(t) => text += t.text(),
897 _ => (),
898 }
899 }
900 return text;
902 }
903 }
904
905 fn rewrite_and_flatten_embedded_mathml(mathml_leaf: Element) -> bool {
906 let mut needs_rewrite = false;
909 for child in mathml_leaf.children() {
910 if let Some(element) = child.element() {
911 if name(element) != "math" {
912 return false; }
914 needs_rewrite = true;
915 }
916 };
917
918 if !needs_rewrite {
919 return false;
920 }
921
922 let leaf_name = name(mathml_leaf);
924 let doc = mathml_leaf.document();
925 let mut new_children = Vec::new();
926 let mut is_last_mtext = false;
927 for child in mathml_leaf.children() {
928 if let Some(element) = child.element() {
929 trim_element(element, true);
930 new_children.append(&mut element.children()); is_last_mtext = false;
932 } else if let Some(text) = child.text() {
933 if is_last_mtext {
935 let last_child = new_children.last_mut().unwrap().element().unwrap();
936 let new_text = as_text(last_child).to_string() + text.text();
937 last_child.set_text(&new_text);
938 } else {
939 let new_leaf_node = create_mathml_element(&doc, leaf_name);
940 new_leaf_node.set_text(text.text());
941 new_children.push(ChildOfElement::Element(new_leaf_node));
942 is_last_mtext = true;
943 }
944 }
945 };
946
947 for child in &mut new_children {
949 if let Some(element) = child.element() && is_leaf(element) {
950 let text = as_text(element);
951 let cleaned_text = WHITESPACE_MATCH.replace_all(text, " ").trim_matches(WHITESPACE).to_string();
952 element.set_text(&cleaned_text);
953 }
954 }
955
956 crate::canonicalize::set_mathml_name(mathml_leaf, "mrow");
957 mathml_leaf.clear_children();
958 mathml_leaf.append_children(new_children);
959
960 return true;
962 }
963}
964
965#[allow(dead_code)]
968fn is_same_doc(doc1: &Document, doc2: &Document) -> Result<()> {
969 if doc1.root().children().len() != doc2.root().children().len() {
972 bail!(
973 "Children of docs have {} != {} children",
974 doc1.root().children().len(),
975 doc2.root().children().len()
976 );
977 }
978
979 for (i, (c1, c2)) in doc1
980 .root()
981 .children()
982 .iter()
983 .zip(doc2.root().children().iter())
984 .enumerate()
985 {
986 match c1 {
987 ChildOfRoot::Element(e1) => {
988 if let ChildOfRoot::Element(e2) = c2 {
989 is_same_element(*e1, *e2, &[])?;
990 } else {
991 bail!("child #{}, first is element, second is something else", i);
992 }
993 }
994 ChildOfRoot::Comment(com1) => {
995 if let ChildOfRoot::Comment(com2) = c2 {
996 if com1.text() != com2.text() {
997 bail!("child #{} -- comment text differs", i);
998 }
999 } else {
1000 bail!("child #{}, first is comment, second is something else", i);
1001 }
1002 }
1003 ChildOfRoot::ProcessingInstruction(p1) => {
1004 if let ChildOfRoot::ProcessingInstruction(p2) = c2 {
1005 if p1.target() != p2.target() || p1.value() != p2.value() {
1006 bail!("child #{} -- processing instruction differs", i);
1007 }
1008 } else {
1009 bail!(
1010 "child #{}, first is processing instruction, second is something else",
1011 i
1012 );
1013 }
1014 }
1015 }
1016 }
1017 return Ok(());
1018}
1019
1020#[allow(dead_code)]
1023pub fn is_same_element(e1: Element, e2: Element, ignore_attrs: &[&str]) -> Result<()> {
1024 enable_logs();
1025 if name(e1) != name(e2) {
1026 bail!("Names not the same: {}, {}", name(e1), name(e2));
1027 }
1028
1029 if e1.children().len() != e2.children().len() {
1032 bail!(
1033 "Children of {} have {} != {} children",
1034 name(e1),
1035 e1.children().len(),
1036 e2.children().len()
1037 );
1038 }
1039
1040 if let Err(e) = attrs_are_same(e1.attributes(), e2.attributes(), ignore_attrs) {
1041 bail!("In element {}, {}", name(e1), e);
1042 }
1043
1044 for (i, (c1, c2)) in e1.children().iter().zip(e2.children().iter()).enumerate() {
1045 match c1 {
1046 ChildOfElement::Element(child1) => {
1047 if let ChildOfElement::Element(child2) = c2 {
1048 is_same_element(*child1, *child2, ignore_attrs)?;
1049 } else {
1050 bail!("{} child #{}, first is element, second is something else", name(e1), i);
1051 }
1052 }
1053 ChildOfElement::Comment(com1) => {
1054 if let ChildOfElement::Comment(com2) = c2 {
1055 if com1.text() != com2.text() {
1056 bail!("{} child #{} -- comment text differs", name(e1), i);
1057 }
1058 } else {
1059 bail!("{} child #{}, first is comment, second is something else", name(e1), i);
1060 }
1061 }
1062 ChildOfElement::ProcessingInstruction(p1) => {
1063 if let ChildOfElement::ProcessingInstruction(p2) = c2 {
1064 if p1.target() != p2.target() || p1.value() != p2.value() {
1065 bail!("{} child #{} -- processing instruction differs", name(e1), i);
1066 }
1067 } else {
1068 bail!(
1069 "{} child #{}, first is processing instruction, second is something else",
1070 name(e1),
1071 i
1072 );
1073 }
1074 }
1075 ChildOfElement::Text(t1) => {
1076 if let ChildOfElement::Text(t2) = c2 {
1077 if t1.text() != t2.text() {
1078 bail!("{} child #{} -- text differs", name(e1), i);
1079 }
1080 } else {
1081 bail!("{} child #{}, first is text, second is something else", name(e1), i);
1082 }
1083 }
1084 }
1085 }
1086 return Ok(());
1087
1088 fn attrs_are_same(attrs1: Vec<Attribute>, attrs2: Vec<Attribute>, ignore: &[&str]) -> Result<()> {
1090 let attrs1 = attrs1.iter()
1091 .filter(|a| !ignore.contains(&a.name().local_part())).cloned()
1092 .collect::<Vec<Attribute>>();
1093 let attrs2 = attrs2.iter()
1094 .filter(|a| !ignore.contains(&a.name().local_part())).cloned()
1095 .collect::<Vec<Attribute>>();
1096 if attrs1.len() != attrs2.len() {
1097 bail!("Attributes have different length: {:?} != {:?}", attrs1, attrs2);
1098 }
1099 for attr1 in attrs1 {
1101 if let Some(found_attr2) = attrs2
1102 .iter()
1103 .find(|&attr2| attr1.name().local_part() == attr2.name().local_part())
1104 {
1105 if attr1.value() == found_attr2.value() {
1106 continue;
1107 } else {
1108 bail!(
1109 "Attribute named {} has differing values:\n '{}'\n '{}'",
1110 attr1.name().local_part(),
1111 attr1.value(),
1112 found_attr2.value()
1113 );
1114 }
1115 } else {
1116 bail!(
1117 "Attribute name {} not in [{}]",
1118 print_attr(&attr1),
1119 print_attrs(&attrs2)
1120 );
1121 }
1122 }
1123 return Ok(());
1124
1125 fn print_attr(attr: &Attribute) -> String {
1126 return format!("@{}='{}'", attr.name().local_part(), attr.value());
1127 }
1128 fn print_attrs(attrs: &[Attribute]) -> String {
1129 return attrs.iter().map(print_attr).collect::<Vec<String>>().join(", ");
1130 }
1131 }
1132}
1133
1134#[cfg(test)]
1135mod tests {
1136 #[allow(unused_imports)]
1137 use super::super::init_logger;
1138 use super::*;
1139
1140 fn are_parsed_strs_equal(test: &str, target: &str) -> bool {
1141 let test_package = &parser::parse(test).expect("Failed to parse input");
1142 let test_doc = test_package.as_document();
1143 trim_doc(&test_doc);
1144 debug!("test:\n{}", mml_to_string(get_element(test_package)));
1145
1146 let target_package = &parser::parse(target).expect("Failed to parse input");
1147 let target_doc = target_package.as_document();
1148 trim_doc(&target_doc);
1149 debug!("target:\n{}", mml_to_string(get_element(target_package)));
1150
1151 match is_same_doc(&test_doc, &target_doc) {
1152 Ok(_) => return true,
1153 Err(e) => panic!("{}", e),
1154 }
1155 }
1156
1157 #[test]
1158 fn trim_same() {
1159 let trimmed_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
1160 assert!(are_parsed_strs_equal(trimmed_str, trimmed_str));
1161 }
1162
1163 #[test]
1164 fn trim_whitespace() {
1165 let trimmed_str = "<math><mrow><mo>-</mo><mi> a </mi></mrow></math>";
1166 let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1167 assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
1168 }
1169
1170 #[test]
1171 fn no_trim_whitespace_nbsp() {
1172 let trimmed_str = "<math><mrow><mo>-</mo><mtext>  a </mtext></mrow></math>";
1173 let whitespace_str = "<math> <mrow ><mo>-</mo><mtext>  a </mtext></mrow ></math>";
1174 assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
1175 }
1176
1177 #[test]
1178 fn trim_comment() {
1179 let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1180 let comment_str = "<math><mrow><mo>-</mo><!--a comment --><mi> a </mi></mrow></math>";
1181 assert!(are_parsed_strs_equal(comment_str, whitespace_str));
1182 }
1183
1184 #[test]
1185 fn replace_mglyph() {
1186 let mglyph_str = "<math>
1187 <mrow>
1188 <mi>X<mglyph fontfamily='my-braid-font' index='2' alt='23braid' /></mi>
1189 <mo>+</mo>
1190 <mi>
1191 <mglyph fontfamily='my-braid-font' index='5' alt='132braid' />Y
1192 </mi>
1193 <mo>=</mo>
1194 <mi>
1195 <mglyph fontfamily='my-braid-font' index='3' alt='13braid' />
1196 </mi>
1197 </mrow>
1198 </math>";
1199 let result_str = "<math>
1200 <mrow>
1201 <mi>X23braid</mi>
1202 <mo>+</mo>
1203 <mi>132braidY</mi>
1204 <mo>=</mo>
1205 <mi>13braid</mi>
1206 </mrow>
1207 </math>";
1208 assert!(are_parsed_strs_equal(mglyph_str, result_str));
1209 }
1210
1211 #[test]
1212 fn trim_differs() {
1213 let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1214 let different_str = "<math> <mrow ><mo>-</mo><mi> b </mi></mrow ></math>";
1215
1216 let package1 = &parser::parse(whitespace_str).expect("Failed to parse input");
1218 let doc1 = package1.as_document();
1219 trim_doc(&doc1);
1220 debug!("doc1:\n{}", mml_to_string(get_element(package1)));
1221
1222 let package2 = parser::parse(different_str).expect("Failed to parse input");
1223 let doc2 = package2.as_document();
1224 trim_doc(&doc2);
1225 debug!("doc2:\n{}", mml_to_string(get_element(&package2)));
1226
1227 assert!(is_same_doc(&doc1, &doc2).is_err());
1228 }
1229
1230 #[test]
1231 fn test_entities() {
1232 set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1234
1235 let entity_str = set_mathml("<math><mrow><mo>−</mo><mi>𝕞</mi></mrow></math>").unwrap();
1236 let converted_str =
1237 set_mathml("<math><mrow><mo>−</mo><mi>𝕞</mi></mrow></math>").unwrap();
1238
1239 static ID_MATCH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"id='.+?' "#).unwrap());
1241 let entity_str = ID_MATCH.replace_all(&entity_str, "");
1242 let converted_str = ID_MATCH.replace_all(&converted_str, "");
1243 assert_eq!(entity_str, converted_str, "normal entity test failed");
1244
1245 let entity_str = set_mathml(
1246 "<math data-quot=\""value"\" data-apos=''value''><mi>XXX</mi></math>",
1247 )
1248 .unwrap();
1249 let converted_str =
1250 set_mathml("<math data-quot='\"value\"' data-apos=\"'value'\"><mi>XXX</mi></math>").unwrap();
1251 let entity_str = ID_MATCH.replace_all(&entity_str, "");
1252 let converted_str = ID_MATCH.replace_all(&converted_str, "");
1253 assert_eq!(entity_str, converted_str, "special entities quote test failed");
1254
1255 let entity_str =
1256 set_mathml("<math><mo><</mo><mo>></mo><mtext>&lt;</mtext></math>").unwrap();
1257 let converted_str =
1258 set_mathml("<math><mo><</mo><mo>></mo><mtext>&lt;</mtext></math>")
1259 .unwrap();
1260 let entity_str = ID_MATCH.replace_all(&entity_str, "");
1261 let converted_str = ID_MATCH.replace_all(&converted_str, "");
1262 assert_eq!(entity_str, converted_str, "special entities <,>,& test failed");
1263 }
1264
1265 #[test]
1266 fn can_recover_from_invalid_set_rules_dir() {
1267 use std::env;
1268 unsafe { env::set_var("MathCATRulesDir", "MathCATRulesDir"); } assert!(set_rules_dir("someInvalidRulesDir").is_err());
1271 assert!(
1272 set_rules_dir(super::super::abs_rules_dir_path()).is_ok(),
1273 "\nset_rules_dir to '{}' failed",
1274 super::super::abs_rules_dir_path()
1275 );
1276 assert!(set_mathml("<math><mn>1</mn></math>").is_ok());
1277 }
1278
1279 #[test]
1280 fn single_html_in_mtext() {
1281 let test = "<math><mn>1</mn> <mtext>a<p> para 1</p>bc</mtext> <mi>y</mi></math>";
1282 let target = "<math><mn>1</mn> <mtext>a para 1bc</mtext> <mi>y</mi></math>";
1283 assert!(are_parsed_strs_equal(test, target));
1284 }
1285
1286 #[test]
1287 fn multiple_html_in_mtext() {
1288 let test = "<math><mn>1</mn> <mtext>a<p>para 1</p> <p>para 2</p>bc </mtext> <mi>y</mi></math>";
1289 let target = "<math><mn>1</mn> <mtext>apara 1 para 2bc</mtext> <mi>y</mi></math>";
1290 assert!(are_parsed_strs_equal(test, target));
1291 }
1292
1293 #[test]
1294 fn nested_html_in_mtext() {
1295 let test = "<math><mn>1</mn> <mtext>a <ol><li>first</li><li>second</li></ol> bc</mtext> <mi>y</mi></math>";
1296 let target = "<math><mn>1</mn> <mtext>a firstsecond bc</mtext> <mi>y</mi></math>";
1297 assert!(are_parsed_strs_equal(test, target));
1298 }
1299
1300 #[test]
1301 fn empty_html_in_mtext() {
1302 let test = "<math><mn>1</mn> <mtext>a<br/>bc</mtext> <mi>y</mi></math>";
1303 let target = "<math><mn>1</mn> <mtext>abc</mtext> <mi>y</mi></math>";
1304 assert!(are_parsed_strs_equal(test, target));
1305 }
1306
1307 #[test]
1308 fn mathml_in_mtext() {
1309 let test = "<math><mtext>if <math> <msup><mi>n</mi><mn>2</mn></msup></math> is real</mtext></math>";
1310 let target = "<math><mrow><mtext>if </mtext><msup><mi>n</mi><mn>2</mn></msup><mtext> is real</mtext></mrow></math>";
1311 assert!(are_parsed_strs_equal(test, target));
1312 }
1313
1314 #[test]
1315 fn stack_overflow_protection() {
1316 set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1317 let mut bad_mathml = String::from("<math>");
1318 for _ in 0..MAX_DEPTH+1 {
1319 bad_mathml.push_str("<msqrt><mi>n</mi>");
1320 }
1321 for _ in 0..MAX_DEPTH+1 {
1322 bad_mathml.push_str("</msqrt>");
1323 }
1324 bad_mathml.push_str("</math>");
1325 assert_eq!(set_mathml(bad_mathml).unwrap_err().to_string(), "MathML is too deeply nested to process");
1326 }
1327
1328 #[test]
1329 fn old_mathml_cleared_on_error() {
1330 set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1331 let good_mathml = "<math><mn>3</mn></math>";
1332 set_mathml(good_mathml).unwrap();
1333 let bad_mathml = "<math><mi>&xabc;</mi></math>";
1334 assert!(set_mathml(bad_mathml).is_err());
1335 assert!(get_spoken_text().unwrap() == "");
1336 set_mathml(good_mathml).unwrap();
1337 let bad_mathml = "<math>garbage";
1338 assert!(set_mathml(bad_mathml).is_err());
1339 assert!(get_spoken_text().unwrap() == "");
1340 }
1341}