1#![allow(non_snake_case)]
4#![allow(clippy::needless_return)]
5use std::cell::RefCell;
6use std::sync::LazyLock;
7
8use crate::canonicalize::{as_text, create_mathml_element};
9use crate::errors::*;
10use phf::phf_map;
11use regex::{Captures, Regex};
12use sxd_document::dom::{Element, Document, ChildOfRoot, ChildOfElement, Attribute};
13use sxd_document::parser;
14use sxd_document::Package;
15
16use crate::canonicalize::{as_element, name};
17use crate::shim_filesystem::{find_all_dirs_shim, find_files_in_dir_that_ends_with_shim};
18use log::{debug, error};
19
20use crate::navigate::*;
21use crate::pretty_print::mml_to_string;
22use crate::xpath_functions::{is_leaf, IsNode};
23use std::panic::{catch_unwind, AssertUnwindSafe};
24
25#[cfg(feature = "enable-logs")]
26use std::sync::Once;
27#[cfg(feature = "enable-logs")]
28static INIT: Once = Once::new();
29
30fn enable_logs() {
31 #[cfg(feature = "enable-logs")]
32 INIT.call_once(||{
33 #[cfg(target_os = "android")]
34 {
35 use log::*;
36 use android_logger::*;
37
38 android_logger::init_once(
39 Config::default()
40 .with_max_level(LevelFilter::Trace)
41 .with_tag("MathCat")
42 );
43 trace!("Activated Android logger!");
44 }
45 });
46}
47
48thread_local! {
50 static PANIC_INFO: RefCell<Option<(String, String, u32)>> = const { RefCell::new(None) };
52}
53
54pub fn init_panic_handler() {
56 use std::panic;
57
58 panic::set_hook(Box::new(|info| {
59 let location = info.location()
60 .map(|l| format!("{}:{}", l.file(), l.line()))
61 .unwrap_or_else(|| "unknown".to_string());
62
63 let payload = info.payload();
64 let msg = if let Some(s) = payload.downcast_ref::<&'static str>() {
65 s.to_string()
66 } else if let Some(s) = payload.downcast_ref::<String>() {
67 s.clone()
68 } else {
69 "Unknown panic payload".to_string()
70 };
71
72 let _ = PANIC_INFO.try_with(|cell| {
74 if let Ok(mut slot) = cell.try_borrow_mut() {
75 *slot = Some((msg, location, 0));
76 }
77 });
78 }));
79}
80
81pub fn report_any_panic<T>(result: Result<Result<T, Error>, Box<dyn std::any::Any + Send>>) -> Result<T, Error> {
82 match result {
83 Ok(val) => val,
84 Err(_) => {
85 let details = PANIC_INFO.with(|cell| cell.borrow_mut().take());
87
88 if let Some((msg, file, line)) = details {
89 Err(anyhow::anyhow!(
90 "MathCAT crash! Please report the following information: '{}' at {}:{}",
91 msg, file, line
92 ))
93 } else {
94 Err(anyhow::anyhow!("MathCAT crash! -- please report"))
95 }
96 }
97 }
98}
99
100fn cleanup_mathml(mathml: Element) -> Result<Element> {
102 trim_element(mathml, false);
103 let mathml = crate::canonicalize::canonicalize(mathml)?;
104 let mathml = add_ids(mathml);
105 return Ok(mathml);
106}
107
108thread_local! {
109 pub static MATHML_INSTANCE: RefCell<Package> = init_mathml_instance();
111}
112
113fn init_mathml_instance() -> RefCell<Package> {
114 let package = parser::parse("<math></math>")
115 .expect("Internal error in 'init_mathml_instance;: didn't parse initializer string");
116 return RefCell::new(package);
117}
118
119pub fn set_rules_dir(dir: impl AsRef<str>) -> Result<()> {
122 enable_logs();
123 init_panic_handler();
124 let dir = dir.as_ref().to_string();
125 let result = catch_unwind(AssertUnwindSafe(|| {
126 use std::path::PathBuf;
127 let dir_os = if dir.is_empty() {
128 std::env::var_os("MathCATRulesDir").unwrap_or_default()
129 } else {
130 std::ffi::OsString::from(&dir)
131 };
132 let pref_manager = crate::prefs::PreferenceManager::get();
133 pref_manager.borrow_mut().initialize(PathBuf::from(dir_os))
134 }));
135 return report_any_panic(result);
136}
137
138pub fn get_version() -> String {
140 enable_logs();
141 const VERSION: &str = env!("CARGO_PKG_VERSION");
142 return VERSION.to_string();
143}
144
145pub fn set_mathml(mathml_str: impl AsRef<str>) -> Result<String> {
149 enable_logs();
150 static MATHJAX_V2: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"class *= *['"]MJX-.*?['"]"#).unwrap());
152 static MATHJAX_V3: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"class *= *['"]data-mjx-.*?['"]"#).unwrap());
153 static NAMESPACE_DECL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"xmlns:[[:alpha:]]+"#).unwrap()); static PREFIX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"(</?)[[:alpha:]]+:"#).unwrap()); static HTML_ENTITIES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"&([a-zA-Z]+?);"#).unwrap());
156
157 let result = catch_unwind(AssertUnwindSafe(|| {
158 NAVIGATION_STATE.with(|nav_stack| {
159 nav_stack.borrow_mut().reset();
160 });
161
162 crate::speech::SPEECH_RULES.with(|rules| rules.borrow_mut().read_files())?;
165
166 let mathml_str = mathml_str.as_ref();
167 return MATHML_INSTANCE.with(|old_package| {
168 static HTML_ENTITIES_MAPPING: phf::Map<&str, &str> = include!("entities.in");
169
170 let mut error_message = "".to_string(); let mathml_str =
173 HTML_ENTITIES.replace_all(mathml_str, |cap: &Captures| match HTML_ENTITIES_MAPPING.get(&cap[1]) {
174 None => {
175 error_message = format!("No entity named '{}'", &cap[0]);
176 cap[0].to_string()
177 }
178 Some(&ch) => ch.to_string(),
179 });
180
181 if !error_message.is_empty() {
182 bail!(error_message);
183 }
184 let mathml_str = MATHJAX_V2.replace_all(&mathml_str, "");
185 let mathml_str = MATHJAX_V3.replace_all(&mathml_str, "");
186
187 let mathml_str = NAMESPACE_DECL.replace(&mathml_str, "xmlns"); let mathml_str = PREFIX.replace_all(&mathml_str, "$1");
192
193 let new_package = parser::parse(&mathml_str);
194 if let Err(e) = new_package {
195 bail!("Invalid MathML input:\n{}\nError is: {}", &mathml_str, &e.to_string());
196 }
197
198 let new_package = new_package.unwrap();
199 let mathml = get_element(&new_package);
200 let mathml = cleanup_mathml(mathml)?;
201 let mathml_string = mml_to_string(mathml);
202 old_package.replace(new_package);
203
204 return Ok(mathml_string);
205 });
206 }));
207
208 return report_any_panic(result);
209}
210
211pub fn get_spoken_text() -> Result<String> {
214 enable_logs();
215 let result = catch_unwind(AssertUnwindSafe(|| {
216 MATHML_INSTANCE.with(|package_instance| {
217 let package_instance = package_instance.borrow();
218 let mathml = get_element(&package_instance);
219 let new_package = Package::new();
220 let intent = crate::speech::intent_from_mathml(mathml, new_package.as_document())?;
221 debug!("Intent tree:\n{}", mml_to_string(intent));
222 let speech = crate::speech::speak_mathml(intent, "", 0)?;
223 return Ok(speech);
224 })
225 }));
226 return report_any_panic(result);
227}
228
229pub fn get_overview_text() -> Result<String> {
233 enable_logs();
234 let result = catch_unwind(AssertUnwindSafe(|| {
235 MATHML_INSTANCE.with(|package_instance| {
236 let package_instance = package_instance.borrow();
237 let mathml = get_element(&package_instance);
238 let speech = crate::speech::overview_mathml(mathml, "", 0)?;
239 return Ok(speech);
240 })
241 }));
242 return report_any_panic(result);
243}
244
245pub fn get_preference(name: impl AsRef<str>) -> Result<String> {
248 enable_logs();
249 let name = name.as_ref().to_string();
250 let result = catch_unwind(AssertUnwindSafe(|| {
251 use crate::prefs::NO_PREFERENCE;
252 crate::speech::SPEECH_RULES.with(|rules| {
253 let rules = rules.borrow();
254 let pref_manager = rules.pref_manager.borrow();
255 let mut value = pref_manager.pref_to_string(&name);
256 if value == NO_PREFERENCE {
257 value = pref_manager.pref_to_string(&name);
258 }
259 if value == NO_PREFERENCE {
260 bail!("No preference named '{}'", name);
261 } else {
262 return Ok(value);
263 }
264 })
265 }));
266 return report_any_panic(result);
267}
268
269pub fn set_preference(name: impl AsRef<str>, value: impl AsRef<str>) -> Result<()> {
290 enable_logs();
291 let name = name.as_ref().to_string();
292 let value = value.as_ref().to_string();
293 let result = catch_unwind(AssertUnwindSafe(|| {
294 set_preference_impl(&name, &value)
295 }));
296 return report_any_panic(result);
297}
298
299fn set_preference_impl(name: &str, value: &str) -> Result<()> {
300 let mut value = value.to_string();
301 if name == "Language" || name == "LanguageAuto" {
302 if value != "Auto" {
304 let mut lang_country_split = value.split('-');
306 let language = lang_country_split.next().unwrap_or("");
307 let country = lang_country_split.next().unwrap_or("");
308 if language.len() != 2 {
309 bail!(
310 "Improper format for 'Language' preference '{}'. Should be of form 'en' or 'en-gb'",
311 value
312 );
313 }
314 let mut new_lang_country = language.to_string(); if !country.is_empty() {
316 new_lang_country.push('-');
317 new_lang_country.push_str(country);
318 }
319 value = new_lang_country;
320 }
321 if name == "LanguageAuto" && value == "Auto" {
322 bail!("'LanguageAuto' can not have the value 'Auto'");
323 }
324 }
325
326 crate::speech::SPEECH_RULES.with(|rules| {
327 let rules = rules.borrow_mut();
328 if let Some(error_string) = rules.get_error() {
329 bail!("{}", error_string);
330 }
331
332 let mut pref_manager = rules.pref_manager.borrow_mut();
334 if name == "LanguageAuto" {
335 let language_pref = pref_manager.pref_to_string("Language");
336 if language_pref != "Auto" {
337 bail!(
338 "'LanguageAuto' can only be used when 'Language' has the value 'Auto'; Language={}",
339 language_pref
340 );
341 }
342 }
343 let lower_case_value = value.to_lowercase();
344 if lower_case_value == "true" || lower_case_value == "false" {
345 pref_manager.set_api_boolean_pref(name, value.to_lowercase() == "true");
346 } else {
347 match name {
348 "Pitch" | "Rate" | "Volume" | "CapitalLetters_Pitch" | "MathRate" | "PauseFactor" => {
349 pref_manager.set_api_float_pref(name, to_float(name, &value)?)
350 }
351 _ => {
352 pref_manager.set_string_pref(name, &value)?;
353 }
354 }
355 };
356 return Ok::<(), Error>(());
357 })?;
358
359 return Ok(());
360}
361
362fn to_float(name: &str, value: &str) -> Result<f64> {
363 return match value.parse::<f64>() {
364 Ok(val) => Ok(val),
365 Err(_) => bail!("SetPreference: preference'{}'s value '{}' must be a float", name, value),
366 };
367}
368
369pub fn get_braille(nav_node_id: impl AsRef<str>) -> Result<String> {
373 enable_logs();
374 let nav_node_id = nav_node_id.as_ref().to_string();
375 let result = catch_unwind(AssertUnwindSafe(|| {
376 MATHML_INSTANCE.with(|package_instance| {
377 let package_instance = package_instance.borrow();
378 let mathml = get_element(&package_instance);
379 let braille = crate::braille::braille_mathml(mathml, &nav_node_id)?.0;
380 return Ok(braille);
381 })
382 }));
383 return report_any_panic(result);
384}
385
386pub fn get_navigation_braille() -> Result<String> {
390 enable_logs();
391 let result = catch_unwind(AssertUnwindSafe(|| {
392 MATHML_INSTANCE.with(|package_instance| {
393 let package_instance = package_instance.borrow();
394 let mathml = get_element(&package_instance);
395 let new_package = Package::new(); let new_doc = new_package.as_document();
397 let nav_mathml = NAVIGATION_STATE.with(|nav_stack| {
398 return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
399 Err(e) => Err(e),
400 Ok((found, offset)) => {
401 if offset == 0 {
404 if name(found) == "math" {
405 Ok(found)
406 } else {
407 let new_mathml = create_mathml_element(&new_doc, "math");
408 new_mathml.append_child(copy_mathml(found));
409 new_doc.root().append_child(new_mathml);
410 Ok(new_mathml)
411 }
412 } else if !is_leaf(found) {
413 bail!(
414 "Internal error: non-zero offset '{}' on a non-leaf element '{}'",
415 offset,
416 name(found)
417 );
418 } else if let Some(ch) = as_text(found).chars().nth(offset) {
419 let internal_mathml = create_mathml_element(&new_doc, name(found));
420 internal_mathml.set_text(&ch.to_string());
421 let new_mathml = create_mathml_element(&new_doc, "math");
422 new_mathml.append_child(internal_mathml);
423 new_doc.root().append_child(new_mathml);
424 Ok(new_mathml)
425 } else {
426 bail!(
427 "Internal error: offset '{}' on leaf element '{}' doesn't exist",
428 offset,
429 mml_to_string(found)
430 );
431 }
432 }
433 };
434 })?;
435
436 let braille = crate::braille::braille_mathml(nav_mathml, "")?.0;
437 return Ok(braille);
438 })
439 }));
440 return report_any_panic(result);
441}
442
443pub fn do_navigate_keypress(
447 key: usize,
448 shift_key: bool,
449 control_key: bool,
450 alt_key: bool,
451 meta_key: bool,
452) -> Result<String> {
453 enable_logs();
454 let result = catch_unwind(AssertUnwindSafe(|| {
455 MATHML_INSTANCE.with(|package_instance| {
456 let package_instance = package_instance.borrow();
457 let mathml = get_element(&package_instance);
458 return do_mathml_navigate_key_press(mathml, key, shift_key, control_key, alt_key, meta_key);
459 })
460 }));
461 return report_any_panic(result);
462}
463
464pub fn do_navigate_command(command: impl AsRef<str>) -> Result<String> {
498 enable_logs();
499 let command = command.as_ref().to_string();
500 let result = catch_unwind(AssertUnwindSafe(|| {
501 let cmd = NAV_COMMANDS.get_key(&command); if cmd.is_none() {
503 bail!("Unknown command in call to DoNavigateCommand()");
504 };
505 let cmd = *cmd.unwrap();
506 MATHML_INSTANCE.with(|package_instance| {
507 let package_instance = package_instance.borrow();
508 let mathml = get_element(&package_instance);
509 return do_navigate_command_string(mathml, cmd);
510 })
511 }));
512 return report_any_panic(result);
513}
514
515pub fn set_navigation_node(id: impl AsRef<str>, offset: usize) -> Result<()> {
518 enable_logs();
519 let id = id.as_ref().to_string();
520 let result = catch_unwind(AssertUnwindSafe(|| {
521 MATHML_INSTANCE.with(|package_instance| {
522 let package_instance = package_instance.borrow();
523 let mathml = get_element(&package_instance);
524 return set_navigation_node_from_id(mathml, &id, offset);
525 })
526 }));
527 return report_any_panic(result);
528}
529
530pub fn get_navigation_mathml() -> Result<(String, usize)> {
533 enable_logs();
534 let result = catch_unwind(AssertUnwindSafe(|| {
535 MATHML_INSTANCE.with(|package_instance| {
536 let package_instance = package_instance.borrow();
537 let mathml = get_element(&package_instance);
538 return NAVIGATION_STATE.with(|nav_stack| {
539 return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
540 Err(e) => Err(e),
541 Ok((found, offset)) => Ok((mml_to_string(found), offset)),
542 };
543 });
544 })
545 }));
546 return report_any_panic(result);
547}
548
549pub fn get_navigation_mathml_id() -> Result<(String, usize)> {
553 enable_logs();
554 let result = catch_unwind(AssertUnwindSafe(|| {
555 MATHML_INSTANCE.with(|package_instance| {
556 let package_instance = package_instance.borrow();
557 let mathml = get_element(&package_instance);
558 return Ok(NAVIGATION_STATE.with(|nav_stack| {
559 return nav_stack.borrow().get_navigation_mathml_id(mathml);
560 }));
561 })
562 }));
563 return report_any_panic(result);
564}
565
566pub fn get_braille_position() -> Result<(usize, usize)> {
568 enable_logs();
569 let result = catch_unwind(AssertUnwindSafe(|| {
570 MATHML_INSTANCE.with(|package_instance| {
571 let package_instance = package_instance.borrow();
572 let mathml = get_element(&package_instance);
573 let nav_node = get_navigation_mathml_id()?;
574 let (_, start, end) = crate::braille::braille_mathml(mathml, &nav_node.0)?;
575 return Ok((start, end));
576 })
577 }));
578 return report_any_panic(result);
579}
580
581pub fn get_navigation_node_from_braille_position(position: usize) -> Result<(String, usize)> {
584 enable_logs();
585 let result = catch_unwind(AssertUnwindSafe(|| {
586 MATHML_INSTANCE.with(|package_instance| {
587 let package_instance = package_instance.borrow();
588 let mathml = get_element(&package_instance);
589 return crate::braille::get_navigation_node_from_braille_position(mathml, position);
590 })
591 }));
592 return report_any_panic(result);
593}
594
595pub fn get_supported_braille_codes() -> Result<Vec<String>> {
596 enable_logs();
597 let result = catch_unwind(AssertUnwindSafe(|| {
598 let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
599 let braille_dir = rules_dir.join("Braille");
600 let mut braille_code_paths = Vec::new();
601
602 find_all_dirs_shim(&braille_dir, &mut braille_code_paths);
603 let mut braille_code_paths = braille_code_paths.iter()
604 .map(|path| path.strip_prefix(&braille_dir).unwrap().to_string_lossy().to_string())
605 .filter(|string_path| !string_path.is_empty() )
606 .collect::<Vec<String>>();
607 braille_code_paths.sort();
608
609 Ok(braille_code_paths)
610 }));
611 return report_any_panic(result);
612 }
613
614pub fn get_supported_languages() -> Result<Vec<String>> {
616 enable_logs();
617 let result = catch_unwind(AssertUnwindSafe(|| {
618 let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
619 let lang_dir = rules_dir.join("Languages");
620 let mut lang_paths = Vec::new();
621
622 find_all_dirs_shim(&lang_dir, &mut lang_paths);
623 let mut language_paths = lang_paths.iter()
624 .map(|path| path.strip_prefix(&lang_dir).unwrap()
625 .to_string_lossy()
626 .replace(std::path::MAIN_SEPARATOR, "-")
627 .to_string())
628 .filter(|string_path| !string_path.is_empty() )
629 .collect::<Vec<String>>();
630
631 language_paths.retain(|s| !s.starts_with("zz"));
633 language_paths.sort();
634 Ok(language_paths)
635 }));
636 return report_any_panic(result);
637 }
638
639 pub fn get_supported_speech_styles(lang: impl AsRef<str>) -> Result<Vec<String>> {
640 enable_logs();
641 let lang = lang.as_ref().to_string();
642 let result = catch_unwind(AssertUnwindSafe(|| {
643 let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
644 let lang_dir = rules_dir.join("Languages").join(&lang);
645 let mut speech_styles = find_files_in_dir_that_ends_with_shim(&lang_dir, "_Rules.yaml");
646 for file_name in &mut speech_styles {
647 file_name.truncate(file_name.len() - "_Rules.yaml".len())
648 }
649 speech_styles.sort();
650 speech_styles.dedup(); Ok(speech_styles)
652 }));
653 return report_any_panic(result);
654 }
655
656pub fn copy_mathml(mathml: Element) -> Element {
662 let children = mathml.children();
664 let new_mathml = create_mathml_element(&mathml.document(), name(mathml));
665 mathml.attributes().iter().for_each(|attr| {
666 new_mathml.set_attribute_value(attr.name(), attr.value());
667 });
668
669 if children.len() == 1 &&
671 let Some(text) = children[0].text() {
672 new_mathml.set_text(text.text());
673 return new_mathml;
674 }
675
676 let mut new_children = Vec::with_capacity(children.len());
677 for child in children {
678 let child = as_element(child);
679 let new_child = copy_mathml(child);
680 new_children.push(new_child);
681 }
682 new_mathml.append_children(new_children);
683 return new_mathml;
684}
685
686pub fn errors_to_string(e: &Error) -> String {
687 enable_logs();
688 let mut result = format!("{e}\n");
689 for cause in e.chain().skip(1) { result += &format!("caused by: {cause}\n");
691 }
692 result
693}
694
695fn add_ids(mathml: Element) -> Element {
696 use std::time::SystemTime;
697 let time = if cfg!(target_family = "wasm") {
698 fastrand::usize(..)
699 } else {
700 SystemTime::now()
701 .duration_since(SystemTime::UNIX_EPOCH)
702 .unwrap()
703 .as_millis() as usize
704 };
705 let mut time_part = radix_fmt::radix(time, 36).to_string();
706 if time_part.len() < 3 {
707 time_part.push_str("a2c"); }
709 let mut random_part = radix_fmt::radix(fastrand::u32(..), 36).to_string();
710 if random_part.len() < 4 {
711 random_part.push_str("a1b2"); }
713 let prefix = "M".to_string() + &time_part[time_part.len() - 3..] + &random_part[random_part.len() - 4..] + "-"; add_ids_to_all(mathml, &prefix, 0);
715 return mathml;
716
717 fn add_ids_to_all(mathml: Element, id_prefix: &str, count: usize) -> usize {
718 let mut count = count;
719 if mathml.attribute("id").is_none() {
720 mathml.set_attribute_value("id", (id_prefix.to_string() + &count.to_string()).as_str());
721 mathml.set_attribute_value("data-id-added", "true");
722 count += 1;
723 };
724
725 if crate::xpath_functions::is_leaf(mathml) {
726 return count;
727 }
728
729 for child in mathml.children() {
730 let child = as_element(child);
731 count = add_ids_to_all(child, id_prefix, count);
732 }
733 return count;
734 }
735}
736
737pub fn get_element(package: &Package) -> Element<'_> {
738 enable_logs();
739 let doc = package.as_document();
740 let mut result = None;
741 for root_child in doc.root().children() {
742 if let ChildOfRoot::Element(e) = root_child {
743 assert!(result.is_none());
744 result = Some(e);
745 }
746 }
747 return result.unwrap();
748}
749
750#[allow(dead_code)]
753pub fn get_intent<'a>(mathml: Element<'a>, doc: Document<'a>) -> Result<Element<'a>> {
754 crate::speech::SPEECH_RULES.with(|rules| rules.borrow_mut().read_files().unwrap());
755 let mathml = cleanup_mathml(mathml)?;
756 return crate::speech::intent_from_mathml(mathml, doc);
757}
758
759#[allow(dead_code)]
760fn trim_doc(doc: &Document) {
761 for root_child in doc.root().children() {
762 if let ChildOfRoot::Element(e) = root_child {
763 trim_element(e, false);
764 } else {
765 doc.root().remove_child(root_child); }
767 }
768}
769
770pub fn trim_element(e: Element, allow_structure_in_leaves: bool) {
772 const WHITESPACE: &[char] = &[' ', '\u{0009}', '\u{000A}','\u{000C}', '\u{000D}'];
777 static WHITESPACE_MATCH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"[ \u{0009}\u{000A}\u{00C}\u{000D}]+"#).unwrap());
778
779 if is_leaf(e) && (!allow_structure_in_leaves || IsNode::is_mathml(e)) {
780 make_leaf_element(e);
782 return;
783 }
784
785 let mut single_text = "".to_string();
786 for child in e.children() {
787 match child {
788 ChildOfElement::Element(c) => {
789 trim_element(c, allow_structure_in_leaves);
790 }
791 ChildOfElement::Text(t) => {
792 single_text += t.text();
793 e.remove_child(child);
794 }
795 _ => {
796 e.remove_child(child);
797 }
798 }
799 }
800
801 if !(is_leaf(e) || name(e) == "intent-literal" || single_text.is_empty()) {
803 if !single_text.trim_matches(WHITESPACE).is_empty() {
807 error!(
808 "trim_element: both element and textual children which shouldn't happen -- ignoring text '{single_text}'"
809 );
810 }
811 return;
812 }
813 if e.children().is_empty() && !single_text.is_empty() {
814 e.set_text(&WHITESPACE_MATCH.replace_all(&single_text, " "));
816 }
817
818 fn make_leaf_element(mathml_leaf: Element) {
819 let children = mathml_leaf.children();
824 if children.is_empty() {
825 return;
826 }
827
828 if rewrite_and_flatten_embedded_mathml(mathml_leaf) {
829 return;
830 }
831
832 let mut text = "".to_string();
834 for child in children {
835 let child_text = match child {
836 ChildOfElement::Element(child) => {
837 if name(child) == "mglyph" {
838 child.attribute_value("alt").unwrap_or("").to_string()
839 } else {
840 gather_text(child)
841 }
842 }
843 ChildOfElement::Text(t) => {
844 t.text().to_string()
846 }
847 _ => "".to_string(),
848 };
849 if !child_text.is_empty() {
850 text += &child_text;
851 }
852 }
853
854 mathml_leaf.clear_children();
856 mathml_leaf.set_text(WHITESPACE_MATCH.replace_all(&text, " ").trim_matches(WHITESPACE));
857 fn gather_text(html: Element) -> String {
861 let mut text = "".to_string(); for child in html.children() {
863 match child {
864 ChildOfElement::Element(child) => {
865 text += &gather_text(child);
866 }
867 ChildOfElement::Text(t) => text += t.text(),
868 _ => (),
869 }
870 }
871 return text;
873 }
874 }
875
876 fn rewrite_and_flatten_embedded_mathml(mathml_leaf: Element) -> bool {
877 let mut needs_rewrite = false;
880 for child in mathml_leaf.children() {
881 if let Some(element) = child.element() {
882 if name(element) != "math" {
883 return false; }
885 needs_rewrite = true;
886 }
887 };
888
889 if !needs_rewrite {
890 return false;
891 }
892
893 let leaf_name = name(mathml_leaf);
895 let doc = mathml_leaf.document();
896 let mut new_children = Vec::new();
897 let mut is_last_mtext = false;
898 for child in mathml_leaf.children() {
899 if let Some(element) = child.element() {
900 trim_element(element, true);
901 new_children.append(&mut element.children()); is_last_mtext = false;
903 } else if let Some(text) = child.text() {
904 if is_last_mtext {
906 let last_child = new_children.last_mut().unwrap().element().unwrap();
907 let new_text = as_text(last_child).to_string() + text.text();
908 last_child.set_text(&new_text);
909 } else {
910 let new_leaf_node = create_mathml_element(&doc, leaf_name);
911 new_leaf_node.set_text(text.text());
912 new_children.push(ChildOfElement::Element(new_leaf_node));
913 is_last_mtext = true;
914 }
915 }
916 };
917
918 for child in &mut new_children {
920 if let Some(element) = child.element() && is_leaf(element) {
921 let text = as_text(element);
922 let cleaned_text = WHITESPACE_MATCH.replace_all(text, " ").trim_matches(WHITESPACE).to_string();
923 element.set_text(&cleaned_text);
924 }
925 }
926
927 crate::canonicalize::set_mathml_name(mathml_leaf, "mrow");
928 mathml_leaf.clear_children();
929 mathml_leaf.append_children(new_children);
930
931 return true;
933 }
934}
935
936#[allow(dead_code)]
939fn is_same_doc(doc1: &Document, doc2: &Document) -> Result<()> {
940 if doc1.root().children().len() != doc2.root().children().len() {
943 bail!(
944 "Children of docs have {} != {} children",
945 doc1.root().children().len(),
946 doc2.root().children().len()
947 );
948 }
949
950 for (i, (c1, c2)) in doc1
951 .root()
952 .children()
953 .iter()
954 .zip(doc2.root().children().iter())
955 .enumerate()
956 {
957 match c1 {
958 ChildOfRoot::Element(e1) => {
959 if let ChildOfRoot::Element(e2) = c2 {
960 is_same_element(*e1, *e2, &[])?;
961 } else {
962 bail!("child #{}, first is element, second is something else", i);
963 }
964 }
965 ChildOfRoot::Comment(com1) => {
966 if let ChildOfRoot::Comment(com2) = c2 {
967 if com1.text() != com2.text() {
968 bail!("child #{} -- comment text differs", i);
969 }
970 } else {
971 bail!("child #{}, first is comment, second is something else", i);
972 }
973 }
974 ChildOfRoot::ProcessingInstruction(p1) => {
975 if let ChildOfRoot::ProcessingInstruction(p2) = c2 {
976 if p1.target() != p2.target() || p1.value() != p2.value() {
977 bail!("child #{} -- processing instruction differs", i);
978 }
979 } else {
980 bail!(
981 "child #{}, first is processing instruction, second is something else",
982 i
983 );
984 }
985 }
986 }
987 }
988 return Ok(());
989}
990
991#[allow(dead_code)]
994pub fn is_same_element(e1: Element, e2: Element, ignore_attrs: &[&str]) -> Result<()> {
995 enable_logs();
996 if name(e1) != name(e2) {
997 bail!("Names not the same: {}, {}", name(e1), name(e2));
998 }
999
1000 if e1.children().len() != e2.children().len() {
1003 bail!(
1004 "Children of {} have {} != {} children",
1005 name(e1),
1006 e1.children().len(),
1007 e2.children().len()
1008 );
1009 }
1010
1011 if let Err(e) = attrs_are_same(e1.attributes(), e2.attributes(), ignore_attrs) {
1012 bail!("In element {}, {}", name(e1), e);
1013 }
1014
1015 for (i, (c1, c2)) in e1.children().iter().zip(e2.children().iter()).enumerate() {
1016 match c1 {
1017 ChildOfElement::Element(child1) => {
1018 if let ChildOfElement::Element(child2) = c2 {
1019 is_same_element(*child1, *child2, ignore_attrs)?;
1020 } else {
1021 bail!("{} child #{}, first is element, second is something else", name(e1), i);
1022 }
1023 }
1024 ChildOfElement::Comment(com1) => {
1025 if let ChildOfElement::Comment(com2) = c2 {
1026 if com1.text() != com2.text() {
1027 bail!("{} child #{} -- comment text differs", name(e1), i);
1028 }
1029 } else {
1030 bail!("{} child #{}, first is comment, second is something else", name(e1), i);
1031 }
1032 }
1033 ChildOfElement::ProcessingInstruction(p1) => {
1034 if let ChildOfElement::ProcessingInstruction(p2) = c2 {
1035 if p1.target() != p2.target() || p1.value() != p2.value() {
1036 bail!("{} child #{} -- processing instruction differs", name(e1), i);
1037 }
1038 } else {
1039 bail!(
1040 "{} child #{}, first is processing instruction, second is something else",
1041 name(e1),
1042 i
1043 );
1044 }
1045 }
1046 ChildOfElement::Text(t1) => {
1047 if let ChildOfElement::Text(t2) = c2 {
1048 if t1.text() != t2.text() {
1049 bail!("{} child #{} -- text differs", name(e1), i);
1050 }
1051 } else {
1052 bail!("{} child #{}, first is text, second is something else", name(e1), i);
1053 }
1054 }
1055 }
1056 }
1057 return Ok(());
1058
1059 fn attrs_are_same(attrs1: Vec<Attribute>, attrs2: Vec<Attribute>, ignore: &[&str]) -> Result<()> {
1061 let attrs1 = attrs1.iter()
1062 .filter(|a| !ignore.contains(&a.name().local_part())).cloned()
1063 .collect::<Vec<Attribute>>();
1064 let attrs2 = attrs2.iter()
1065 .filter(|a| !ignore.contains(&a.name().local_part())).cloned()
1066 .collect::<Vec<Attribute>>();
1067 if attrs1.len() != attrs2.len() {
1068 bail!("Attributes have different length: {:?} != {:?}", attrs1, attrs2);
1069 }
1070 for attr1 in attrs1 {
1072 if let Some(found_attr2) = attrs2
1073 .iter()
1074 .find(|&attr2| attr1.name().local_part() == attr2.name().local_part())
1075 {
1076 if attr1.value() == found_attr2.value() {
1077 continue;
1078 } else {
1079 bail!(
1080 "Attribute named {} has differing values:\n '{}'\n '{}'",
1081 attr1.name().local_part(),
1082 attr1.value(),
1083 found_attr2.value()
1084 );
1085 }
1086 } else {
1087 bail!(
1088 "Attribute name {} not in [{}]",
1089 print_attr(&attr1),
1090 print_attrs(&attrs2)
1091 );
1092 }
1093 }
1094 return Ok(());
1095
1096 fn print_attr(attr: &Attribute) -> String {
1097 return format!("@{}='{}'", attr.name().local_part(), attr.value());
1098 }
1099 fn print_attrs(attrs: &[Attribute]) -> String {
1100 return attrs.iter().map(print_attr).collect::<Vec<String>>().join(", ");
1101 }
1102 }
1103}
1104
1105#[cfg(test)]
1106mod tests {
1107 #[allow(unused_imports)]
1108 use super::super::init_logger;
1109 use super::*;
1110
1111 fn are_parsed_strs_equal(test: &str, target: &str) -> bool {
1112 let test_package = &parser::parse(test).expect("Failed to parse input");
1113 let test_doc = test_package.as_document();
1114 trim_doc(&test_doc);
1115 debug!("test:\n{}", mml_to_string(get_element(&test_package)));
1116
1117 let target_package = &parser::parse(target).expect("Failed to parse input");
1118 let target_doc = target_package.as_document();
1119 trim_doc(&target_doc);
1120 debug!("target:\n{}", mml_to_string(get_element(&target_package)));
1121
1122 match is_same_doc(&test_doc, &target_doc) {
1123 Ok(_) => return true,
1124 Err(e) => panic!("{}", e),
1125 }
1126 }
1127
1128 #[test]
1129 fn trim_same() {
1130 let trimmed_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
1131 assert!(are_parsed_strs_equal(trimmed_str, trimmed_str));
1132 }
1133
1134 #[test]
1135 fn trim_whitespace() {
1136 let trimmed_str = "<math><mrow><mo>-</mo><mi> a </mi></mrow></math>";
1137 let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1138 assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
1139 }
1140
1141 #[test]
1142 fn no_trim_whitespace_nbsp() {
1143 let trimmed_str = "<math><mrow><mo>-</mo><mtext>  a </mtext></mrow></math>";
1144 let whitespace_str = "<math> <mrow ><mo>-</mo><mtext>  a </mtext></mrow ></math>";
1145 assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
1146 }
1147
1148 #[test]
1149 fn trim_comment() {
1150 let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1151 let comment_str = "<math><mrow><mo>-</mo><!--a comment --><mi> a </mi></mrow></math>";
1152 assert!(are_parsed_strs_equal(comment_str, whitespace_str));
1153 }
1154
1155 #[test]
1156 fn replace_mglyph() {
1157 let mglyph_str = "<math>
1158 <mrow>
1159 <mi>X<mglyph fontfamily='my-braid-font' index='2' alt='23braid' /></mi>
1160 <mo>+</mo>
1161 <mi>
1162 <mglyph fontfamily='my-braid-font' index='5' alt='132braid' />Y
1163 </mi>
1164 <mo>=</mo>
1165 <mi>
1166 <mglyph fontfamily='my-braid-font' index='3' alt='13braid' />
1167 </mi>
1168 </mrow>
1169 </math>";
1170 let result_str = "<math>
1171 <mrow>
1172 <mi>X23braid</mi>
1173 <mo>+</mo>
1174 <mi>132braidY</mi>
1175 <mo>=</mo>
1176 <mi>13braid</mi>
1177 </mrow>
1178 </math>";
1179 assert!(are_parsed_strs_equal(mglyph_str, result_str));
1180 }
1181
1182 #[test]
1183 fn trim_differs() {
1184 let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1185 let different_str = "<math> <mrow ><mo>-</mo><mi> b </mi></mrow ></math>";
1186
1187 let package1 = &parser::parse(whitespace_str).expect("Failed to parse input");
1189 let doc1 = package1.as_document();
1190 trim_doc(&doc1);
1191 debug!("doc1:\n{}", mml_to_string(get_element(&package1)));
1192
1193 let package2 = parser::parse(different_str).expect("Failed to parse input");
1194 let doc2 = package2.as_document();
1195 trim_doc(&doc2);
1196 debug!("doc2:\n{}", mml_to_string(get_element(&package2)));
1197
1198 assert!(is_same_doc(&doc1, &doc2).is_err());
1199 }
1200
1201 #[test]
1202 fn test_entities() {
1203 set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1205
1206 let entity_str = set_mathml("<math><mrow><mo>−</mo><mi>𝕞</mi></mrow></math>").unwrap();
1207 let converted_str =
1208 set_mathml("<math><mrow><mo>−</mo><mi>𝕞</mi></mrow></math>").unwrap();
1209
1210 static ID_MATCH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"id='.+?' "#).unwrap());
1212 let entity_str = ID_MATCH.replace_all(&entity_str, "");
1213 let converted_str = ID_MATCH.replace_all(&converted_str, "");
1214 assert_eq!(entity_str, converted_str, "normal entity test failed");
1215
1216 let entity_str = set_mathml(
1217 "<math data-quot=\""value"\" data-apos=''value''><mi>XXX</mi></math>",
1218 )
1219 .unwrap();
1220 let converted_str =
1221 set_mathml("<math data-quot='\"value\"' data-apos=\"'value'\"><mi>XXX</mi></math>").unwrap();
1222 let entity_str = ID_MATCH.replace_all(&entity_str, "");
1223 let converted_str = ID_MATCH.replace_all(&converted_str, "");
1224 assert_eq!(entity_str, converted_str, "special entities quote test failed");
1225
1226 let entity_str =
1227 set_mathml("<math><mo><</mo><mo>></mo><mtext>&lt;</mtext></math>").unwrap();
1228 let converted_str =
1229 set_mathml("<math><mo><</mo><mo>></mo><mtext>&lt;</mtext></math>")
1230 .unwrap();
1231 let entity_str = ID_MATCH.replace_all(&entity_str, "");
1232 let converted_str = ID_MATCH.replace_all(&converted_str, "");
1233 assert_eq!(entity_str, converted_str, "special entities <,>,& test failed");
1234 }
1235
1236 #[test]
1237 fn can_recover_from_invalid_set_rules_dir() {
1238 use std::env;
1239 unsafe { env::set_var("MathCATRulesDir", "MathCATRulesDir"); } assert!(set_rules_dir("someInvalidRulesDir").is_err());
1242 assert!(
1243 set_rules_dir(super::super::abs_rules_dir_path()).is_ok(),
1244 "\nset_rules_dir to '{}' failed",
1245 super::super::abs_rules_dir_path()
1246 );
1247 assert!(set_mathml("<math><mn>1</mn></math>").is_ok());
1248 }
1249
1250 #[test]
1251 fn single_html_in_mtext() {
1252 let test = "<math><mn>1</mn> <mtext>a<p> para 1</p>bc</mtext> <mi>y</mi></math>";
1253 let target = "<math><mn>1</mn> <mtext>a para 1bc</mtext> <mi>y</mi></math>";
1254 assert!(are_parsed_strs_equal(test, target));
1255 }
1256
1257 #[test]
1258 fn multiple_html_in_mtext() {
1259 let test = "<math><mn>1</mn> <mtext>a<p>para 1</p> <p>para 2</p>bc </mtext> <mi>y</mi></math>";
1260 let target = "<math><mn>1</mn> <mtext>apara 1 para 2bc</mtext> <mi>y</mi></math>";
1261 assert!(are_parsed_strs_equal(test, target));
1262 }
1263
1264 #[test]
1265 fn nested_html_in_mtext() {
1266 let test = "<math><mn>1</mn> <mtext>a <ol><li>first</li><li>second</li></ol> bc</mtext> <mi>y</mi></math>";
1267 let target = "<math><mn>1</mn> <mtext>a firstsecond bc</mtext> <mi>y</mi></math>";
1268 assert!(are_parsed_strs_equal(test, target));
1269 }
1270
1271 #[test]
1272 fn empty_html_in_mtext() {
1273 let test = "<math><mn>1</mn> <mtext>a<br/>bc</mtext> <mi>y</mi></math>";
1274 let target = "<math><mn>1</mn> <mtext>abc</mtext> <mi>y</mi></math>";
1275 assert!(are_parsed_strs_equal(test, target));
1276 }
1277
1278 #[test]
1279 fn mathml_in_mtext() {
1280 let test = "<math><mtext>if <math> <msup><mi>n</mi><mn>2</mn></msup></math> is real</mtext></math>";
1281 let target = "<math><mrow><mtext>if </mtext><msup><mi>n</mi><mn>2</mn></msup><mtext> is real</mtext></mrow></math>";
1282 assert!(are_parsed_strs_equal(test, target));
1283 }
1284}