1#![allow(non_snake_case)]
4#![allow(clippy::needless_return)]
5use std::cell::RefCell;
6use std::sync::LazyLock;
7
8use crate::canonicalize::{as_text, create_mathml_element};
9use crate::errors::*;
10use phf::phf_map;
11use regex::{Captures, Regex};
12use sxd_document::dom::*;
13use sxd_document::parser;
14use sxd_document::Package;
15
16use crate::canonicalize::{as_element, name};
17use crate::shim_filesystem::{find_all_dirs_shim, find_files_in_dir_that_ends_with_shim};
18use log::{debug, error};
19
20use crate::navigate::*;
21use crate::pretty_print::mml_to_string;
22use crate::xpath_functions::{is_leaf, IsNode};
23
24#[cfg(feature = "enable-logs")]
25use std::sync::Once;
26#[cfg(feature = "enable-logs")]
27static INIT: Once = Once::new();
28
29fn enable_logs() {
30 #[cfg(feature = "enable-logs")]
31 INIT.call_once(||{
32 #[cfg(target_os = "android")]
33 {
34 use log::*;
35 use android_logger::*;
36
37 android_logger::init_once(
38 Config::default()
39 .with_max_level(LevelFilter::Trace)
40 .with_tag("MathCat")
41 );
42 trace!("Activated Android logger!");
43 }
44 });
45}
46
47fn cleanup_mathml(mathml: Element) -> Result<Element> {
49 trim_element(mathml, false);
50 let mathml = crate::canonicalize::canonicalize(mathml)?;
51 let mathml = add_ids(mathml);
52 return Ok(mathml);
53}
54
55thread_local! {
56 pub static MATHML_INSTANCE: RefCell<Package> = init_mathml_instance();
58}
59
60fn init_mathml_instance() -> RefCell<Package> {
61 let package = parser::parse("<math></math>")
62 .expect("Internal error in 'init_mathml_instance;: didn't parse initializer string");
63 return RefCell::new(package);
64}
65
66pub fn set_rules_dir(dir: impl AsRef<str>) -> Result<()> {
69 enable_logs();
70 use std::path::PathBuf;
71 let dir = dir.as_ref();
72 let dir = if dir.is_empty() {
73 std::env::var_os("MathCATRulesDir").unwrap_or_default()
74 } else {
75 std::ffi::OsString::from(dir)
76 };
77 let pref_manager = crate::prefs::PreferenceManager::get();
78 return pref_manager.borrow_mut().initialize(PathBuf::from(dir));
79}
80
81pub fn get_version() -> String {
83 enable_logs();
84 const VERSION: &str = env!("CARGO_PKG_VERSION");
85 return VERSION.to_string();
86}
87
88pub fn set_mathml(mathml_str: impl AsRef<str>) -> Result<String> {
92 enable_logs();
93 static MATHJAX_V2: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"class *= *['"]MJX-.*?['"]"#).unwrap());
95 static MATHJAX_V3: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"class *= *['"]data-mjx-.*?['"]"#).unwrap());
96 static NAMESPACE_DECL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"xmlns:[[:alpha:]]+"#).unwrap()); static PREFIX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"(</?)[[:alpha:]]+:"#).unwrap()); static HTML_ENTITIES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"&([a-zA-Z]+?);"#).unwrap());
99
100 NAVIGATION_STATE.with(|nav_stack| {
101 nav_stack.borrow_mut().reset();
102 });
103
104 crate::speech::SPEECH_RULES.with(|rules| rules.borrow_mut().read_files())?;
107
108 let mathml_str = mathml_str.as_ref();
109 return MATHML_INSTANCE.with(|old_package| {
110 static HTML_ENTITIES_MAPPING: phf::Map<&str, &str> = include!("entities.in");
111
112 let mut error_message = "".to_string(); let mathml_str =
115 HTML_ENTITIES.replace_all(mathml_str, |cap: &Captures| match HTML_ENTITIES_MAPPING.get(&cap[1]) {
116 None => {
117 error_message = format!("No entity named '{}'", &cap[0]);
118 cap[0].to_string()
119 }
120 Some(&ch) => ch.to_string(),
121 });
122
123 if !error_message.is_empty() {
124 bail!(error_message);
125 }
126 let mathml_str = MATHJAX_V2.replace_all(&mathml_str, "");
127 let mathml_str = MATHJAX_V3.replace_all(&mathml_str, "");
128
129 let mathml_str = NAMESPACE_DECL.replace(&mathml_str, "xmlns"); let mathml_str = PREFIX.replace_all(&mathml_str, "$1");
134
135 let new_package = parser::parse(&mathml_str);
136 if let Err(e) = new_package {
137 bail!("Invalid MathML input:\n{}\nError is: {}", &mathml_str, &e.to_string());
138 }
139
140 let new_package = new_package.unwrap();
141 let mathml = get_element(&new_package);
142 let mathml = cleanup_mathml(mathml)?;
143 let mathml_string = mml_to_string(mathml);
144 old_package.replace(new_package);
145
146 return Ok(mathml_string);
147 });
148}
149
150pub fn get_spoken_text() -> Result<String> {
153 enable_logs();
154 return MATHML_INSTANCE.with(|package_instance| {
157 let package_instance = package_instance.borrow();
158 let mathml = get_element(&package_instance);
159 let new_package = Package::new();
160 let intent = crate::speech::intent_from_mathml(mathml, new_package.as_document())?;
161 debug!("Intent tree:\n{}", mml_to_string(intent));
162 let speech = crate::speech::speak_mathml(intent, "", 0)?;
163 return Ok(speech);
165 });
166}
167
168pub fn get_overview_text() -> Result<String> {
172 enable_logs();
173 return MATHML_INSTANCE.with(|package_instance| {
176 let package_instance = package_instance.borrow();
177 let mathml = get_element(&package_instance);
178 let speech = crate::speech::overview_mathml(mathml, "", 0)?;
179 return Ok(speech);
181 });
182}
183
184pub fn get_preference(name: impl AsRef<str>) -> Result<String> {
187 enable_logs();
188 let name = name.as_ref();
189 use crate::prefs::NO_PREFERENCE;
190 return crate::speech::SPEECH_RULES.with(|rules| {
191 let rules = rules.borrow();
192 let pref_manager = rules.pref_manager.borrow();
193 let mut value = pref_manager.pref_to_string(name);
194 if value == NO_PREFERENCE {
195 value = pref_manager.pref_to_string(name);
196 }
197 if value == NO_PREFERENCE {
198 bail!("No preference named '{}'", name);
199 } else {
200 return Ok(value);
201 }
202 });
203}
204
205pub fn set_preference(name: impl AsRef<str>, value: impl AsRef<str>) -> Result<()> {
226 enable_logs();
227 let name = name.as_ref();
228 let mut value = value.as_ref().to_string();
230 if name == "Language" || name == "LanguageAuto" {
231 if value != "Auto" {
233 let mut lang_country_split = value.split('-');
235 let language = lang_country_split.next().unwrap_or("");
236 let country = lang_country_split.next().unwrap_or("");
237 if language.len() != 2 {
238 bail!(
239 "Improper format for 'Language' preference '{}'. Should be of form 'en' or 'en-gb'",
240 value
241 );
242 }
243 let mut new_lang_country = language.to_string(); if !country.is_empty() {
245 new_lang_country.push('-');
246 new_lang_country.push_str(country);
247 }
248 value = new_lang_country;
249 }
250 if name == "LanguageAuto" && value == "Auto" {
251 bail!("'LanguageAuto' can not have the value 'Auto'");
252 }
253 }
254
255 crate::speech::SPEECH_RULES.with(|rules| {
256 let rules = rules.borrow_mut();
257 if let Some(error_string) = rules.get_error() {
258 bail!("{}", error_string);
259 }
260
261 let mut pref_manager = rules.pref_manager.borrow_mut();
263 if name == "LanguageAuto" {
264 let language_pref = pref_manager.pref_to_string("Language");
265 if language_pref != "Auto" {
266 bail!(
267 "'LanguageAuto' can only be used when 'Language' has the value 'Auto'; Language={}",
268 language_pref
269 );
270 }
271 }
272 let lower_case_value = value.to_lowercase();
273 if lower_case_value == "true" || lower_case_value == "false" {
274 pref_manager.set_api_boolean_pref(name, value.to_lowercase() == "true");
275 } else {
276 match name {
277 "Pitch" | "Rate" | "Volume" | "CapitalLetters_Pitch" | "MathRate" | "PauseFactor" => {
278 pref_manager.set_api_float_pref(name, to_float(name, &value)?)
279 }
280 _ => {
281 pref_manager.set_string_pref(name, &value)?;
282 }
283 }
284 };
285 return Ok::<(), Error>(());
286 })?;
287
288 return Ok(());
289
290 fn to_float(name: &str, value: &str) -> Result<f64> {
291 return match value.parse::<f64>() {
292 Ok(val) => Ok(val),
293 Err(_) => bail!("SetPreference: preference'{}'s value '{}' must be a float", name, value),
294 };
295 }
296}
297
298pub fn get_braille(nav_node_id: impl AsRef<str>) -> Result<String> {
302 enable_logs();
303 return MATHML_INSTANCE.with(|package_instance| {
306 let package_instance = package_instance.borrow();
307 let mathml = get_element(&package_instance);
308 let braille = crate::braille::braille_mathml(mathml, nav_node_id.as_ref())?.0;
309 return Ok(braille);
311 });
312}
313
314pub fn get_navigation_braille() -> Result<String> {
318 enable_logs();
319 return MATHML_INSTANCE.with(|package_instance| {
320 let package_instance = package_instance.borrow();
321 let mathml = get_element(&package_instance);
322 let new_package = Package::new(); let new_doc = new_package.as_document();
324 let nav_mathml = NAVIGATION_STATE.with(|nav_stack| {
325 return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
326 Err(e) => Err(e),
327 Ok((found, offset)) => {
328 if offset == 0 {
331 if name(found) == "math" {
332 Ok(found)
333 } else {
334 let new_mathml = create_mathml_element(&new_doc, "math");
335 new_mathml.append_child(copy_mathml(found));
336 new_doc.root().append_child(new_mathml);
337 Ok(new_mathml)
338 }
339 } else if !is_leaf(found) {
340 bail!(
341 "Internal error: non-zero offset '{}' on a non-leaf element '{}'",
342 offset,
343 name(found)
344 );
345 } else if let Some(ch) = as_text(found).chars().nth(offset) {
346 let internal_mathml = create_mathml_element(&new_doc, name(found));
347 internal_mathml.set_text(&ch.to_string());
348 let new_mathml = create_mathml_element(&new_doc, "math");
349 new_mathml.append_child(internal_mathml);
350 new_doc.root().append_child(new_mathml);
351 Ok(new_mathml)
352 } else {
353 bail!(
354 "Internal error: offset '{}' on leaf element '{}' doesn't exist",
355 offset,
356 mml_to_string(found)
357 );
358 }
359 }
360 };
361 })?;
362
363 let braille = crate::braille::braille_mathml(nav_mathml, "")?.0;
364 return Ok(braille);
365 });
366}
367
368pub fn do_navigate_keypress(
372 key: usize,
373 shift_key: bool,
374 control_key: bool,
375 alt_key: bool,
376 meta_key: bool,
377) -> Result<String> {
378 return MATHML_INSTANCE.with(|package_instance| {
379 let package_instance = package_instance.borrow();
380 let mathml = get_element(&package_instance);
381 return do_mathml_navigate_key_press(mathml, key, shift_key, control_key, alt_key, meta_key);
382 });
383}
384
385pub fn do_navigate_command(command: impl AsRef<str>) -> Result<String> {
419 enable_logs();
420 let command = NAV_COMMANDS.get_key(command.as_ref()); if command.is_none() {
422 bail!("Unknown command in call to DoNavigateCommand()");
423 };
424 let command = *command.unwrap();
425 return MATHML_INSTANCE.with(|package_instance| {
426 let package_instance = package_instance.borrow();
427 let mathml = get_element(&package_instance);
428 return do_navigate_command_string(mathml, command);
429 });
430}
431
432pub fn set_navigation_node(id: impl AsRef<str>, offset: usize) -> Result<()> {
435 enable_logs();
436 return MATHML_INSTANCE.with(|package_instance| {
437 let package_instance = package_instance.borrow();
438 let mathml = get_element(&package_instance);
439 return set_navigation_node_from_id(mathml, id.as_ref(), offset);
440 });
441}
442
443pub fn get_navigation_mathml() -> Result<(String, usize)> {
446 return MATHML_INSTANCE.with(|package_instance| {
447 let package_instance = package_instance.borrow();
448 let mathml = get_element(&package_instance);
449 return NAVIGATION_STATE.with(|nav_stack| {
450 return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
451 Err(e) => Err(e),
452 Ok((found, offset)) => Ok((mml_to_string(found), offset)),
453 };
454 });
455 });
456}
457
458pub fn get_navigation_mathml_id() -> Result<(String, usize)> {
462 enable_logs();
463 return MATHML_INSTANCE.with(|package_instance| {
464 let package_instance = package_instance.borrow();
465 let mathml = get_element(&package_instance);
466 return Ok(NAVIGATION_STATE.with(|nav_stack| {
467 return nav_stack.borrow().get_navigation_mathml_id(mathml);
468 }));
469 });
470}
471
472pub fn get_braille_position() -> Result<(usize, usize)> {
474 enable_logs();
475 return MATHML_INSTANCE.with(|package_instance| {
476 let package_instance = package_instance.borrow();
477 let mathml = get_element(&package_instance);
478 let nav_node = get_navigation_mathml_id()?;
479 let (_, start, end) = crate::braille::braille_mathml(mathml, &nav_node.0)?;
480 return Ok((start, end));
481 });
482}
483
484pub fn get_navigation_node_from_braille_position(position: usize) -> Result<(String, usize)> {
487 enable_logs();
488 return MATHML_INSTANCE.with(|package_instance| {
489 let package_instance = package_instance.borrow();
490 let mathml = get_element(&package_instance);
491 return crate::braille::get_navigation_node_from_braille_position(mathml, position);
492 });
493}
494
495pub fn get_supported_braille_codes() -> Vec<String> {
496 enable_logs();
497 let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
498 let braille_dir = rules_dir.join("Braille");
499 let mut braille_code_paths = Vec::new();
500
501 find_all_dirs_shim(&braille_dir, &mut braille_code_paths);
502 let mut braille_code_paths = braille_code_paths.iter()
503 .map(|path| path.strip_prefix(&braille_dir).unwrap().to_string_lossy().to_string())
504 .filter(|string_path| !string_path.is_empty() )
505 .collect::<Vec<String>>();
506 braille_code_paths.sort();
507
508 return braille_code_paths;
509 }
510
511pub fn get_supported_languages() -> Vec<String> {
513 enable_logs();
514 let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
515 let lang_dir = rules_dir.join("Languages");
516 let mut lang_paths = Vec::new();
517
518 find_all_dirs_shim(&lang_dir, &mut lang_paths);
519 let mut language_paths = lang_paths.iter()
520 .map(|path| path.strip_prefix(&lang_dir).unwrap()
521 .to_string_lossy()
522 .replace(std::path::MAIN_SEPARATOR, "-")
523 .to_string())
524 .filter(|string_path| !string_path.is_empty() )
525 .collect::<Vec<String>>();
526
527 language_paths.retain(|s| !s.starts_with("zz"));
529 language_paths.sort();
530 return language_paths;
531 }
532
533 pub fn get_supported_speech_styles(lang: impl AsRef<str>) -> Vec<String> {
534 enable_logs();
535 let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
536 let lang_dir = rules_dir.join("Languages").join(lang.as_ref());
537 let mut speech_styles = find_files_in_dir_that_ends_with_shim(&lang_dir, "_Rules.yaml");
538 for file_name in &mut speech_styles {
539 file_name.truncate(file_name.len() - "_Rules.yaml".len())
540 }
541 speech_styles.sort();
542 speech_styles.dedup(); return speech_styles;
544 }
545
546pub fn copy_mathml(mathml: Element) -> Element {
552 let children = mathml.children();
554 let new_mathml = create_mathml_element(&mathml.document(), name(mathml));
555 mathml.attributes().iter().for_each(|attr| {
556 new_mathml.set_attribute_value(attr.name(), attr.value());
557 });
558
559 if children.len() == 1 &&
561 let Some(text) = children[0].text() {
562 new_mathml.set_text(text.text());
563 return new_mathml;
564 }
565
566 let mut new_children = Vec::with_capacity(children.len());
567 for child in children {
568 let child = as_element(child);
569 let new_child = copy_mathml(child);
570 new_children.push(new_child);
571 }
572 new_mathml.append_children(new_children);
573 return new_mathml;
574}
575
576pub fn errors_to_string(e: &Error) -> String {
577 enable_logs();
578 let mut result = format!("{e}\n");
579 for cause in e.chain().skip(1) { result += &format!("caused by: {cause}\n");
581 }
582 result
583}
584
585fn add_ids(mathml: Element) -> Element {
586 use std::time::SystemTime;
587 let time = if cfg!(target_family = "wasm") {
588 fastrand::usize(..)
589 } else {
590 SystemTime::now()
591 .duration_since(SystemTime::UNIX_EPOCH)
592 .unwrap()
593 .as_millis() as usize
594 };
595 let mut time_part = radix_fmt::radix(time, 36).to_string();
596 if time_part.len() < 3 {
597 time_part.push_str("a2c"); }
599 let mut random_part = radix_fmt::radix(fastrand::u32(..), 36).to_string();
600 if random_part.len() < 4 {
601 random_part.push_str("a1b2"); }
603 let prefix = "M".to_string() + &time_part[time_part.len() - 3..] + &random_part[random_part.len() - 4..] + "-"; add_ids_to_all(mathml, &prefix, 0);
605 return mathml;
606
607 fn add_ids_to_all(mathml: Element, id_prefix: &str, count: usize) -> usize {
608 let mut count = count;
609 if mathml.attribute("id").is_none() {
610 mathml.set_attribute_value("id", (id_prefix.to_string() + &count.to_string()).as_str());
611 mathml.set_attribute_value("data-id-added", "true");
612 count += 1;
613 };
614
615 if crate::xpath_functions::is_leaf(mathml) {
616 return count;
617 }
618
619 for child in mathml.children() {
620 let child = as_element(child);
621 count = add_ids_to_all(child, id_prefix, count);
622 }
623 return count;
624 }
625}
626
627pub fn get_element(package: &Package) -> Element<'_> {
628 enable_logs();
629 let doc = package.as_document();
630 let mut result = None;
631 for root_child in doc.root().children() {
632 if let ChildOfRoot::Element(e) = root_child {
633 assert!(result.is_none());
634 result = Some(e);
635 }
636 }
637 return result.unwrap();
638}
639
640#[allow(dead_code)]
643pub fn get_intent<'a>(mathml: Element<'a>, doc: Document<'a>) -> Result<Element<'a>> {
644 crate::speech::SPEECH_RULES.with(|rules| rules.borrow_mut().read_files().unwrap());
645 let mathml = cleanup_mathml(mathml)?;
646 return crate::speech::intent_from_mathml(mathml, doc);
647}
648
649#[allow(dead_code)]
650fn trim_doc(doc: &Document) {
651 for root_child in doc.root().children() {
652 if let ChildOfRoot::Element(e) = root_child {
653 trim_element(e, false);
654 } else {
655 doc.root().remove_child(root_child); }
657 }
658}
659
660pub fn trim_element(e: Element, allow_structure_in_leaves: bool) {
662 const WHITESPACE: &[char] = &[' ', '\u{0009}', '\u{000A}','\u{000C}', '\u{000D}'];
667 static WHITESPACE_MATCH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"[ \u{0009}\u{000A}\u{00C}\u{000D}]+"#).unwrap());
668
669 if is_leaf(e) && (!allow_structure_in_leaves || IsNode::is_mathml(e)) {
670 make_leaf_element(e);
672 return;
673 }
674
675 let mut single_text = "".to_string();
676 for child in e.children() {
677 match child {
678 ChildOfElement::Element(c) => {
679 trim_element(c, allow_structure_in_leaves);
680 }
681 ChildOfElement::Text(t) => {
682 single_text += t.text();
683 e.remove_child(child);
684 }
685 _ => {
686 e.remove_child(child);
687 }
688 }
689 }
690
691 if !(is_leaf(e) || name(e) == "intent-literal" || single_text.is_empty()) {
693 if !single_text.trim_matches(WHITESPACE).is_empty() {
697 error!(
698 "trim_element: both element and textual children which shouldn't happen -- ignoring text '{single_text}'"
699 );
700 }
701 return;
702 }
703 if e.children().is_empty() && !single_text.is_empty() {
704 e.set_text(&WHITESPACE_MATCH.replace_all(&single_text, " "));
706 }
707
708 fn make_leaf_element(mathml_leaf: Element) {
709 let children = mathml_leaf.children();
714 if children.is_empty() {
715 return;
716 }
717
718 if rewrite_and_flatten_embedded_mathml(mathml_leaf) {
719 return;
720 }
721
722 let mut text = "".to_string();
724 for child in children {
725 let child_text = match child {
726 ChildOfElement::Element(child) => {
727 if name(child) == "mglyph" {
728 child.attribute_value("alt").unwrap_or("").to_string()
729 } else {
730 gather_text(child)
731 }
732 }
733 ChildOfElement::Text(t) => {
734 t.text().to_string()
736 }
737 _ => "".to_string(),
738 };
739 if !child_text.is_empty() {
740 text += &child_text;
741 }
742 }
743
744 mathml_leaf.clear_children();
746 mathml_leaf.set_text(WHITESPACE_MATCH.replace_all(&text, " ").trim_matches(WHITESPACE));
747 fn gather_text(html: Element) -> String {
751 let mut text = "".to_string(); for child in html.children() {
753 match child {
754 ChildOfElement::Element(child) => {
755 text += &gather_text(child);
756 }
757 ChildOfElement::Text(t) => text += t.text(),
758 _ => (),
759 }
760 }
761 return text;
763 }
764 }
765
766 fn rewrite_and_flatten_embedded_mathml(mathml_leaf: Element) -> bool {
767 let mut needs_rewrite = false;
770 for child in mathml_leaf.children() {
771 if let Some(element) = child.element() {
772 if name(element) != "math" {
773 return false; }
775 needs_rewrite = true;
776 }
777 };
778
779 if !needs_rewrite {
780 return false;
781 }
782
783 let leaf_name = name(mathml_leaf);
785 let doc = mathml_leaf.document();
786 let mut new_children = Vec::new();
787 let mut is_last_mtext = false;
788 for child in mathml_leaf.children() {
789 if let Some(element) = child.element() {
790 trim_element(element, true);
791 new_children.append(&mut element.children()); is_last_mtext = false;
793 } else if let Some(text) = child.text() {
794 if is_last_mtext {
796 let last_child = new_children.last_mut().unwrap().element().unwrap();
797 let new_text = as_text(last_child).to_string() + text.text();
798 last_child.set_text(&new_text);
799 } else {
800 let new_leaf_node = create_mathml_element(&doc, leaf_name);
801 new_leaf_node.set_text(text.text());
802 new_children.push(ChildOfElement::Element(new_leaf_node));
803 is_last_mtext = true;
804 }
805 }
806 };
807
808 for child in &mut new_children {
810 if let Some(element) = child.element() && is_leaf(element) {
811 let text = as_text(element);
812 let cleaned_text = WHITESPACE_MATCH.replace_all(text, " ").trim_matches(WHITESPACE).to_string();
813 element.set_text(&cleaned_text);
814 }
815 }
816
817 crate::canonicalize::set_mathml_name(mathml_leaf, "mrow");
818 mathml_leaf.clear_children();
819 mathml_leaf.append_children(new_children);
820
821 return true;
823 }
824}
825
826#[allow(dead_code)]
829fn is_same_doc(doc1: &Document, doc2: &Document) -> Result<()> {
830 if doc1.root().children().len() != doc2.root().children().len() {
833 bail!(
834 "Children of docs have {} != {} children",
835 doc1.root().children().len(),
836 doc2.root().children().len()
837 );
838 }
839
840 for (i, (c1, c2)) in doc1
841 .root()
842 .children()
843 .iter()
844 .zip(doc2.root().children().iter())
845 .enumerate()
846 {
847 match c1 {
848 ChildOfRoot::Element(e1) => {
849 if let ChildOfRoot::Element(e2) = c2 {
850 is_same_element(*e1, *e2, &[])?;
851 } else {
852 bail!("child #{}, first is element, second is something else", i);
853 }
854 }
855 ChildOfRoot::Comment(com1) => {
856 if let ChildOfRoot::Comment(com2) = c2 {
857 if com1.text() != com2.text() {
858 bail!("child #{} -- comment text differs", i);
859 }
860 } else {
861 bail!("child #{}, first is comment, second is something else", i);
862 }
863 }
864 ChildOfRoot::ProcessingInstruction(p1) => {
865 if let ChildOfRoot::ProcessingInstruction(p2) = c2 {
866 if p1.target() != p2.target() || p1.value() != p2.value() {
867 bail!("child #{} -- processing instruction differs", i);
868 }
869 } else {
870 bail!(
871 "child #{}, first is processing instruction, second is something else",
872 i
873 );
874 }
875 }
876 }
877 }
878 return Ok(());
879}
880
881#[allow(dead_code)]
884pub fn is_same_element(e1: Element, e2: Element, ignore_attrs: &[&str]) -> Result<()> {
885 enable_logs();
886 if name(e1) != name(e2) {
887 bail!("Names not the same: {}, {}", name(e1), name(e2));
888 }
889
890 if e1.children().len() != e2.children().len() {
893 bail!(
894 "Children of {} have {} != {} children",
895 name(e1),
896 e1.children().len(),
897 e2.children().len()
898 );
899 }
900
901 if let Err(e) = attrs_are_same(e1.attributes(), e2.attributes(), ignore_attrs) {
902 bail!("In element {}, {}", name(e1), e);
903 }
904
905 for (i, (c1, c2)) in e1.children().iter().zip(e2.children().iter()).enumerate() {
906 match c1 {
907 ChildOfElement::Element(child1) => {
908 if let ChildOfElement::Element(child2) = c2 {
909 is_same_element(*child1, *child2, ignore_attrs)?;
910 } else {
911 bail!("{} child #{}, first is element, second is something else", name(e1), i);
912 }
913 }
914 ChildOfElement::Comment(com1) => {
915 if let ChildOfElement::Comment(com2) = c2 {
916 if com1.text() != com2.text() {
917 bail!("{} child #{} -- comment text differs", name(e1), i);
918 }
919 } else {
920 bail!("{} child #{}, first is comment, second is something else", name(e1), i);
921 }
922 }
923 ChildOfElement::ProcessingInstruction(p1) => {
924 if let ChildOfElement::ProcessingInstruction(p2) = c2 {
925 if p1.target() != p2.target() || p1.value() != p2.value() {
926 bail!("{} child #{} -- processing instruction differs", name(e1), i);
927 }
928 } else {
929 bail!(
930 "{} child #{}, first is processing instruction, second is something else",
931 name(e1),
932 i
933 );
934 }
935 }
936 ChildOfElement::Text(t1) => {
937 if let ChildOfElement::Text(t2) = c2 {
938 if t1.text() != t2.text() {
939 bail!("{} child #{} -- text differs", name(e1), i);
940 }
941 } else {
942 bail!("{} child #{}, first is text, second is something else", name(e1), i);
943 }
944 }
945 }
946 }
947 return Ok(());
948
949 fn attrs_are_same(attrs1: Vec<Attribute>, attrs2: Vec<Attribute>, ignore: &[&str]) -> Result<()> {
951 let attrs1 = attrs1.iter()
952 .filter(|a| !ignore.contains(&a.name().local_part())).cloned()
953 .collect::<Vec<Attribute>>();
954 let attrs2 = attrs2.iter()
955 .filter(|a| !ignore.contains(&a.name().local_part())).cloned()
956 .collect::<Vec<Attribute>>();
957 if attrs1.len() != attrs2.len() {
958 bail!("Attributes have different length: {:?} != {:?}", attrs1, attrs2);
959 }
960 for attr1 in attrs1 {
962 if let Some(found_attr2) = attrs2
963 .iter()
964 .find(|&attr2| attr1.name().local_part() == attr2.name().local_part())
965 {
966 if attr1.value() == found_attr2.value() {
967 continue;
968 } else {
969 bail!(
970 "Attribute named {} has differing values:\n '{}'\n '{}'",
971 attr1.name().local_part(),
972 attr1.value(),
973 found_attr2.value()
974 );
975 }
976 } else {
977 bail!(
978 "Attribute name {} not in [{}]",
979 print_attr(&attr1),
980 print_attrs(&attrs2)
981 );
982 }
983 }
984 return Ok(());
985
986 fn print_attr(attr: &Attribute) -> String {
987 return format!("@{}='{}'", attr.name().local_part(), attr.value());
988 }
989 fn print_attrs(attrs: &[Attribute]) -> String {
990 return attrs.iter().map(print_attr).collect::<Vec<String>>().join(", ");
991 }
992 }
993}
994
995#[cfg(test)]
996mod tests {
997 #[allow(unused_imports)]
998 use super::super::init_logger;
999 use super::*;
1000
1001 fn are_parsed_strs_equal(test: &str, target: &str) -> bool {
1002 let test_package = &parser::parse(test).expect("Failed to parse input");
1003 let test_doc = test_package.as_document();
1004 trim_doc(&test_doc);
1005 debug!("test:\n{}", mml_to_string(get_element(&test_package)));
1006
1007 let target_package = &parser::parse(target).expect("Failed to parse input");
1008 let target_doc = target_package.as_document();
1009 trim_doc(&target_doc);
1010 debug!("target:\n{}", mml_to_string(get_element(&target_package)));
1011
1012 match is_same_doc(&test_doc, &target_doc) {
1013 Ok(_) => return true,
1014 Err(e) => panic!("{}", e),
1015 }
1016 }
1017
1018 #[test]
1019 fn trim_same() {
1020 let trimmed_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
1021 assert!(are_parsed_strs_equal(trimmed_str, trimmed_str));
1022 }
1023
1024 #[test]
1025 fn trim_whitespace() {
1026 let trimmed_str = "<math><mrow><mo>-</mo><mi> a </mi></mrow></math>";
1027 let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1028 assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
1029 }
1030
1031 #[test]
1032 fn no_trim_whitespace_nbsp() {
1033 let trimmed_str = "<math><mrow><mo>-</mo><mtext>  a </mtext></mrow></math>";
1034 let whitespace_str = "<math> <mrow ><mo>-</mo><mtext>  a </mtext></mrow ></math>";
1035 assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
1036 }
1037
1038 #[test]
1039 fn trim_comment() {
1040 let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1041 let comment_str = "<math><mrow><mo>-</mo><!--a comment --><mi> a </mi></mrow></math>";
1042 assert!(are_parsed_strs_equal(comment_str, whitespace_str));
1043 }
1044
1045 #[test]
1046 fn replace_mglyph() {
1047 let mglyph_str = "<math>
1048 <mrow>
1049 <mi>X<mglyph fontfamily='my-braid-font' index='2' alt='23braid' /></mi>
1050 <mo>+</mo>
1051 <mi>
1052 <mglyph fontfamily='my-braid-font' index='5' alt='132braid' />Y
1053 </mi>
1054 <mo>=</mo>
1055 <mi>
1056 <mglyph fontfamily='my-braid-font' index='3' alt='13braid' />
1057 </mi>
1058 </mrow>
1059 </math>";
1060 let result_str = "<math>
1061 <mrow>
1062 <mi>X23braid</mi>
1063 <mo>+</mo>
1064 <mi>132braidY</mi>
1065 <mo>=</mo>
1066 <mi>13braid</mi>
1067 </mrow>
1068 </math>";
1069 assert!(are_parsed_strs_equal(mglyph_str, result_str));
1070 }
1071
1072 #[test]
1073 fn trim_differs() {
1074 let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1075 let different_str = "<math> <mrow ><mo>-</mo><mi> b </mi></mrow ></math>";
1076
1077 let package1 = &parser::parse(whitespace_str).expect("Failed to parse input");
1079 let doc1 = package1.as_document();
1080 trim_doc(&doc1);
1081 debug!("doc1:\n{}", mml_to_string(get_element(&package1)));
1082
1083 let package2 = parser::parse(different_str).expect("Failed to parse input");
1084 let doc2 = package2.as_document();
1085 trim_doc(&doc2);
1086 debug!("doc2:\n{}", mml_to_string(get_element(&package2)));
1087
1088 assert!(is_same_doc(&doc1, &doc2).is_err());
1089 }
1090
1091 #[test]
1092 fn test_entities() {
1093 set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1095
1096 let entity_str = set_mathml("<math><mrow><mo>−</mo><mi>𝕞</mi></mrow></math>").unwrap();
1097 let converted_str =
1098 set_mathml("<math><mrow><mo>−</mo><mi>𝕞</mi></mrow></math>").unwrap();
1099
1100 static ID_MATCH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"id='.+?' "#).unwrap());
1102 let entity_str = ID_MATCH.replace_all(&entity_str, "");
1103 let converted_str = ID_MATCH.replace_all(&converted_str, "");
1104 assert_eq!(entity_str, converted_str, "normal entity test failed");
1105
1106 let entity_str = set_mathml(
1107 "<math data-quot=\""value"\" data-apos=''value''><mi>XXX</mi></math>",
1108 )
1109 .unwrap();
1110 let converted_str =
1111 set_mathml("<math data-quot='\"value\"' data-apos=\"'value'\"><mi>XXX</mi></math>").unwrap();
1112 let entity_str = ID_MATCH.replace_all(&entity_str, "");
1113 let converted_str = ID_MATCH.replace_all(&converted_str, "");
1114 assert_eq!(entity_str, converted_str, "special entities quote test failed");
1115
1116 let entity_str =
1117 set_mathml("<math><mo><</mo><mo>></mo><mtext>&lt;</mtext></math>").unwrap();
1118 let converted_str =
1119 set_mathml("<math><mo><</mo><mo>></mo><mtext>&lt;</mtext></math>")
1120 .unwrap();
1121 let entity_str = ID_MATCH.replace_all(&entity_str, "");
1122 let converted_str = ID_MATCH.replace_all(&converted_str, "");
1123 assert_eq!(entity_str, converted_str, "special entities <,>,& test failed");
1124 }
1125
1126 #[test]
1127 fn can_recover_from_invalid_set_rules_dir() {
1128 use std::env;
1129 unsafe { env::set_var("MathCATRulesDir", "MathCATRulesDir"); }
1131 assert!(set_rules_dir("someInvalidRulesDir").is_err());
1132 assert!(
1133 set_rules_dir(super::super::abs_rules_dir_path()).is_ok(),
1134 "\nset_rules_dir to '{}' failed",
1135 super::super::abs_rules_dir_path()
1136 );
1137 assert!(set_mathml("<math><mn>1</mn></math>").is_ok());
1138 }
1139
1140 #[test]
1141 fn single_html_in_mtext() {
1142 let test = "<math><mn>1</mn> <mtext>a<p> para 1</p>bc</mtext> <mi>y</mi></math>";
1143 let target = "<math><mn>1</mn> <mtext>a para 1bc</mtext> <mi>y</mi></math>";
1144 assert!(are_parsed_strs_equal(test, target));
1145 }
1146
1147 #[test]
1148 fn multiple_html_in_mtext() {
1149 let test = "<math><mn>1</mn> <mtext>a<p>para 1</p> <p>para 2</p>bc </mtext> <mi>y</mi></math>";
1150 let target = "<math><mn>1</mn> <mtext>apara 1 para 2bc</mtext> <mi>y</mi></math>";
1151 assert!(are_parsed_strs_equal(test, target));
1152 }
1153
1154 #[test]
1155 fn nested_html_in_mtext() {
1156 let test = "<math><mn>1</mn> <mtext>a <ol><li>first</li><li>second</li></ol> bc</mtext> <mi>y</mi></math>";
1157 let target = "<math><mn>1</mn> <mtext>a firstsecond bc</mtext> <mi>y</mi></math>";
1158 assert!(are_parsed_strs_equal(test, target));
1159 }
1160
1161 #[test]
1162 fn empty_html_in_mtext() {
1163 let test = "<math><mn>1</mn> <mtext>a<br/>bc</mtext> <mi>y</mi></math>";
1164 let target = "<math><mn>1</mn> <mtext>abc</mtext> <mi>y</mi></math>";
1165 assert!(are_parsed_strs_equal(test, target));
1166 }
1167
1168 #[test]
1169 fn mathml_in_mtext() {
1170 let test = "<math><mtext>if <math> <msup><mi>n</mi><mn>2</mn></msup></math> is real</mtext></math>";
1171 let target = "<math><mrow><mtext>if </mtext><msup><mi>n</mi><mn>2</mn></msup><mtext> is real</mtext></mrow></math>";
1172 assert!(are_parsed_strs_equal(test, target));
1173 }
1174}