Skip to main content

libmathcat/
interface.rs

1//! The interface module provides functionality both for calling from an API and also running the code from `main`.
2//!
3#![allow(non_snake_case)]
4#![allow(clippy::needless_return)]
5use std::cell::RefCell;
6use std::sync::LazyLock;
7
8use crate::canonicalize::{as_text, create_mathml_element};
9use crate::errors::*;
10use phf::phf_map;
11use regex::{Captures, Regex};
12use sxd_document::dom::{Element, Document, ChildOfRoot, ChildOfElement, Attribute};
13use sxd_document::parser;
14use sxd_document::Package;
15
16use crate::canonicalize::{as_element, name};
17use crate::shim_filesystem::{find_all_dirs_shim, find_files_in_dir_that_ends_with_shim};
18use log::{debug, error};
19
20use crate::navigate::*;
21use crate::pretty_print::mml_to_string;
22use crate::xpath_functions::{is_leaf, IsNode};
23use std::panic::{catch_unwind, AssertUnwindSafe};
24
25#[cfg(feature = "enable-logs")]
26use std::sync::Once;
27#[cfg(feature = "enable-logs")]
28static INIT: Once = Once::new();
29
30fn enable_logs() {
31    #[cfg(feature = "enable-logs")]
32    INIT.call_once(||{
33        #[cfg(target_os = "android")]
34        {
35            use log::*;
36            use android_logger::*;
37        
38            android_logger::init_once(
39                Config::default()
40                .with_max_level(LevelFilter::Trace)
41                .with_tag("MathCat")
42            );    
43            trace!("Activated Android logger!");  
44        }    
45    });
46}
47
48// For getting a message from a panic
49thread_local! {
50    // Stores (Message, File, Line)
51    static PANIC_INFO: RefCell<Option<(String, String, u32)>> = const { RefCell::new(None) };
52}
53
54/// Initialize the panic handler to catch panics and store the message, file, and line number in `PANIC_INFO`.
55pub fn init_panic_handler() {
56    use std::panic;
57
58    panic::set_hook(Box::new(|info| {
59        let location = info.location()
60            .map(|l| format!("{}:{}", l.file(), l.line()))
61            .unwrap_or_else(|| "unknown".to_string());
62
63        let payload = info.payload();
64        let msg = if let Some(s) = payload.downcast_ref::<&'static str>() {
65            s.to_string()
66        } else if let Some(s) = payload.downcast_ref::<String>() {
67            s.clone()
68        } else {
69            "Unknown panic payload".to_string()
70        };
71
72        // Use try_with/try_borrow_mut to ensure the hook never panics itself
73        let _ = PANIC_INFO.try_with(|cell| {
74            if let Ok(mut slot) = cell.try_borrow_mut() {
75                *slot = Some((msg, location, 0));
76            }
77        });
78    }));
79}
80
81pub fn report_any_panic<T>(result: Result<Result<T, Error>, Box<dyn std::any::Any + Send>>) -> Result<T, Error> {
82    match result {
83        Ok(val) => val,
84        Err(_) => {
85            // Retrieve the smuggled info
86            let details = PANIC_INFO.with(|cell| cell.borrow_mut().take());
87            
88            if let Some((msg, file, line)) = details {
89                Err(anyhow::anyhow!(
90                    "MathCAT crash! Please report the following information: '{}' at {}:{}",
91                    msg, file, line
92                ))
93            } else {
94                Err(anyhow::anyhow!("MathCAT crash! -- please report"))
95            }
96        }
97    }
98} 
99
100// wrap up some common functionality between the call from 'main' and AT
101fn cleanup_mathml(mathml: Element) -> Result<Element> {
102    trim_element(mathml, false);
103    let mathml = crate::canonicalize::canonicalize(mathml)?;
104    let mathml = add_ids(mathml);
105    return Ok(mathml);
106}
107
108thread_local! {
109    /// The current node being navigated (also spoken and brailled) is stored in `MATHML_INSTANCE`.
110    pub static MATHML_INSTANCE: RefCell<Package> = init_mathml_instance();
111}
112
113fn init_mathml_instance() -> RefCell<Package> {
114    let package = parser::parse("<math></math>")
115        .expect("Internal error in 'init_mathml_instance;: didn't parse initializer string");
116    return RefCell::new(package);
117}
118
119/// Set the Rules directory
120/// IMPORTANT: this should be the very first call to MathCAT. If 'dir' is an empty string, the environment var 'MathCATRulesDir' is tried.
121pub fn set_rules_dir(dir: impl AsRef<str>) -> Result<()> {
122    enable_logs();
123    init_panic_handler();
124    let dir = dir.as_ref().to_string();
125    let result = catch_unwind(AssertUnwindSafe(|| {
126        use std::path::PathBuf;
127        let dir_os = if dir.is_empty() {
128            std::env::var_os("MathCATRulesDir").unwrap_or_default()
129        } else {
130            std::ffi::OsString::from(&dir)
131        };
132        let pref_manager = crate::prefs::PreferenceManager::get();
133        pref_manager.borrow_mut().initialize(PathBuf::from(dir_os))
134    }));
135    return report_any_panic(result);
136}
137
138/// Returns the version number (from Cargo.toml) of the build
139pub fn get_version() -> String {
140    enable_logs();
141    const VERSION: &str = env!("CARGO_PKG_VERSION");
142    return VERSION.to_string();
143}
144
145/// This will override any previous MathML that was set.
146/// This returns canonical MathML with 'id's set on any node that doesn't have an id.
147/// The ids can be used for sync highlighting if the `Bookmark` API preference is true.
148pub fn set_mathml(mathml_str: impl AsRef<str>) -> Result<String> {
149    enable_logs();
150    // if these are present when resent to MathJaX, MathJaX crashes (https://github.com/mathjax/MathJax/issues/2822)
151    static MATHJAX_V2: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"class *= *['"]MJX-.*?['"]"#).unwrap());
152    static MATHJAX_V3: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"class *= *['"]data-mjx-.*?['"]"#).unwrap());
153    static NAMESPACE_DECL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"xmlns:[[:alpha:]]+"#).unwrap()); // very limited namespace prefix match
154    static PREFIX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"(</?)[[:alpha:]]+:"#).unwrap()); // very limited namespace prefix match
155    static HTML_ENTITIES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"&([a-zA-Z]+?);"#).unwrap());
156
157    let result = catch_unwind(AssertUnwindSafe(|| {
158        NAVIGATION_STATE.with(|nav_stack| {
159            nav_stack.borrow_mut().reset();
160        });
161
162        // We need the main definitions files to be read in so canonicalize can work.
163        // This call reads all of them for the current preferences, but that's ok since they will likely be used
164        crate::speech::SPEECH_RULES.with(|rules| rules.borrow_mut().read_files())?;
165
166        let mathml_str = mathml_str.as_ref();
167        return MATHML_INSTANCE.with(|old_package| {
168            static HTML_ENTITIES_MAPPING: phf::Map<&str, &str> = include!("entities.in");
169
170            let mut error_message = "".to_string(); // can't return a result inside the replace_all, so we do this hack of setting the message and then returning the error
171                                                    // need to deal with character data and convert to something the parser knows
172            let mathml_str =
173                HTML_ENTITIES.replace_all(mathml_str, |cap: &Captures| match HTML_ENTITIES_MAPPING.get(&cap[1]) {
174                    None => {
175                        error_message = format!("No entity named '{}'", &cap[0]);
176                        cap[0].to_string()
177                    }
178                    Some(&ch) => ch.to_string(),
179                });
180
181            if !error_message.is_empty() {
182                bail!(error_message);
183            }
184            let mathml_str = MATHJAX_V2.replace_all(&mathml_str, "");
185            let mathml_str = MATHJAX_V3.replace_all(&mathml_str, "");
186
187            // the speech rules use the xpath "name" function and that includes the prefix
188            // getting rid of the prefix properly probably involves a recursive replacement in the tree
189            // if the prefix is used, it is almost certainly something like "m" or "mml", so this cheat will work.
190            let mathml_str = NAMESPACE_DECL.replace(&mathml_str, "xmlns"); // do this before the PREFIX replace!
191            let mathml_str = PREFIX.replace_all(&mathml_str, "$1");
192
193            let new_package = parser::parse(&mathml_str);
194            if let Err(e) = new_package {
195                bail!("Invalid MathML input:\n{}\nError is: {}", &mathml_str, &e.to_string());
196            }
197
198            let new_package = new_package.unwrap();
199            let mathml = get_element(&new_package);
200            let mathml = cleanup_mathml(mathml)?;
201            let mathml_string = mml_to_string(mathml);
202            old_package.replace(new_package);
203
204            return Ok(mathml_string);
205        });
206    }));
207
208    return report_any_panic(result);
209}
210
211/// Get the spoken text of the MathML that was set.
212/// The speech takes into account any AT or user preferences.
213pub fn get_spoken_text() -> Result<String> {
214    enable_logs();
215    let result = catch_unwind(AssertUnwindSafe(|| {
216        MATHML_INSTANCE.with(|package_instance| {
217            let package_instance = package_instance.borrow();
218            let mathml = get_element(&package_instance);
219            let new_package = Package::new();
220            let intent = crate::speech::intent_from_mathml(mathml, new_package.as_document())?;
221            debug!("Intent tree:\n{}", mml_to_string(intent));
222            let speech = crate::speech::speak_mathml(intent, "", 0)?;
223            return Ok(speech);
224        })
225    }));
226    return report_any_panic(result);
227}
228
229/// Get the spoken text for an overview of the MathML that was set.
230/// The speech takes into account any AT or user preferences.
231/// Note: this implementation for is currently minimal and should not be used.
232pub fn get_overview_text() -> Result<String> {
233    enable_logs();
234    let result = catch_unwind(AssertUnwindSafe(|| {
235        MATHML_INSTANCE.with(|package_instance| {
236            let package_instance = package_instance.borrow();
237            let mathml = get_element(&package_instance);
238            let speech = crate::speech::overview_mathml(mathml, "", 0)?;
239            return Ok(speech);
240        })
241    }));
242    return report_any_panic(result);
243}
244
245/// Get the value of the named preference.
246/// None is returned if `name` is not a known preference.
247pub fn get_preference(name: impl AsRef<str>) -> Result<String> {
248    enable_logs();
249    let name = name.as_ref().to_string();
250    let result = catch_unwind(AssertUnwindSafe(|| {
251        use crate::prefs::NO_PREFERENCE;
252        crate::speech::SPEECH_RULES.with(|rules| {
253            let rules = rules.borrow();
254            let pref_manager = rules.pref_manager.borrow();
255            let mut value = pref_manager.pref_to_string(&name);
256            if value == NO_PREFERENCE {
257                value = pref_manager.pref_to_string(&name);
258            }
259            if value == NO_PREFERENCE {
260                bail!("No preference named '{}'", name);
261            } else {
262                return Ok(value);
263            }
264        })
265    }));
266    return report_any_panic(result);
267}
268
269/// Set a MathCAT preference. The preference name should be a known preference name.
270/// The value should either be a string or a number (depending upon the preference being set)
271/// The list of known user preferences is in the MathCAT user documentation.
272/// Here are common preferences set by programs (not settable by the user):
273/// * TTS -- SSML, SAPI5, None
274/// * Pitch -- normalized at '1.0'
275/// * Rate -- words per minute (should match current speech rate).
276///   There is a separate "MathRate" that is user settable that causes a relative percentage change from this rate.
277/// * Volume -- default 100
278/// * Voice -- set a voice to use (not implemented)
279/// * Gender -- set pick any voice of the given gender (not implemented)
280/// * Bookmark -- set to `true` if a `mark`/`bookmark` should be part of the returned speech (used for sync highlighting)
281///
282/// Important: both the preference name and value are case-sensitive
283///
284/// This function can be called multiple times to set different values.
285/// The values are persistent and extend beyond calls to [`set_mathml`].
286/// A value can be overwritten by calling this function again with a different value.
287///
288/// Be careful setting preferences -- these potentially override user settings, so only preferences that really need setting should be set.
289pub fn set_preference(name: impl AsRef<str>, value: impl AsRef<str>) -> Result<()> {
290    enable_logs();
291    let name = name.as_ref().to_string();
292    let value = value.as_ref().to_string();
293    let result = catch_unwind(AssertUnwindSafe(|| {
294        set_preference_impl(&name, &value)
295    }));
296    return report_any_panic(result);
297}
298
299fn set_preference_impl(name: &str, value: &str) -> Result<()> {
300    let mut value = value.to_string();
301    if name == "Language" || name == "LanguageAuto" {
302        // check the format
303        if value != "Auto" {
304            // could get es, es-419, or en-us-nyc ...  we only care about the first two parts so we clean it up a little
305            let mut lang_country_split = value.split('-');
306            let language = lang_country_split.next().unwrap_or("");
307            let country = lang_country_split.next().unwrap_or("");
308            if language.len() != 2 {
309                bail!(
310                    "Improper format for 'Language' preference '{}'. Should be of form 'en' or 'en-gb'",
311                    value
312                );
313            }
314            let mut new_lang_country = language.to_string(); // need a temp value because 'country' is borrowed from 'value' above
315            if !country.is_empty() {
316                new_lang_country.push('-');
317                new_lang_country.push_str(country);
318            }
319            value = new_lang_country;
320        }
321        if name == "LanguageAuto" && value == "Auto" {
322            bail!("'LanguageAuto' can not have the value 'Auto'");
323        }
324    }
325
326    crate::speech::SPEECH_RULES.with(|rules| {
327        let rules = rules.borrow_mut();
328        if let Some(error_string) = rules.get_error() {
329            bail!("{}", error_string);
330        }
331
332        // we set the value even if it was the same as the old value because this might override a potentially changed future user value
333        let mut pref_manager = rules.pref_manager.borrow_mut();
334        if name == "LanguageAuto" {
335            let language_pref = pref_manager.pref_to_string("Language");
336            if language_pref != "Auto" {
337                bail!(
338                    "'LanguageAuto' can only be used when 'Language' has the value 'Auto'; Language={}",
339                    language_pref
340                );
341            }
342        }
343        let lower_case_value = value.to_lowercase();
344        if lower_case_value == "true" || lower_case_value == "false" {
345            pref_manager.set_api_boolean_pref(name, value.to_lowercase() == "true");
346        } else {
347            match name {
348                "Pitch" | "Rate" | "Volume" | "CapitalLetters_Pitch" | "MathRate" | "PauseFactor" => {
349                    pref_manager.set_api_float_pref(name, to_float(name, &value)?)
350                }
351                _ => {
352                    pref_manager.set_string_pref(name, &value)?;
353                }
354            }
355        };
356        return Ok::<(), Error>(());
357    })?;
358
359    return Ok(());
360}
361
362fn to_float(name: &str, value: &str) -> Result<f64> {
363    return match value.parse::<f64>() {
364        Ok(val) => Ok(val),
365        Err(_) => bail!("SetPreference: preference'{}'s value '{}' must be a float", name, value),
366    };
367}
368
369/// Get the braille associated with the MathML that was set by [`set_mathml`].
370/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`).
371/// If 'nav_node_id' is given, it is highlighted based on the value of `BrailleNavHighlight` (default: `EndPoints`)
372pub fn get_braille(nav_node_id: impl AsRef<str>) -> Result<String> {
373    enable_logs();
374    let nav_node_id = nav_node_id.as_ref().to_string();
375    let result = catch_unwind(AssertUnwindSafe(|| {
376        MATHML_INSTANCE.with(|package_instance| {
377            let package_instance = package_instance.borrow();
378            let mathml = get_element(&package_instance);
379            let braille = crate::braille::braille_mathml(mathml, &nav_node_id)?.0;
380            return Ok(braille);
381        })
382    }));
383    return report_any_panic(result);
384}
385
386/// Get the braille associated with the current navigation focus of the MathML that was set by [`set_mathml`].
387/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`).
388/// The returned braille is brailled as if the current navigation focus is the entire expression to be brailled.
389pub fn get_navigation_braille() -> Result<String> {
390    enable_logs();
391    let result = catch_unwind(AssertUnwindSafe(|| {
392        MATHML_INSTANCE.with(|package_instance| {
393            let package_instance = package_instance.borrow();
394            let mathml = get_element(&package_instance);
395            let new_package = Package::new(); // used if we need to create a new tree
396            let new_doc = new_package.as_document();
397            let nav_mathml = NAVIGATION_STATE.with(|nav_stack| {
398                return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
399                    Err(e) => Err(e),
400                    Ok((found, offset)) => {
401                        // get the MathML node and wrap it inside of a <math> element
402                        // if the offset is given, we need to get the character it references
403                        if offset == 0 {
404                            if name(found) == "math" {
405                                Ok(found)
406                            } else {
407                                let new_mathml = create_mathml_element(&new_doc, "math");
408                                new_mathml.append_child(copy_mathml(found));
409                                new_doc.root().append_child(new_mathml);
410                                Ok(new_mathml)
411                            }
412                        } else if !is_leaf(found) {
413                            bail!(
414                                "Internal error: non-zero offset '{}' on a non-leaf element '{}'",
415                                offset,
416                                name(found)
417                            );
418                        } else if let Some(ch) = as_text(found).chars().nth(offset) {
419                            let internal_mathml = create_mathml_element(&new_doc, name(found));
420                            internal_mathml.set_text(&ch.to_string());
421                            let new_mathml = create_mathml_element(&new_doc, "math");
422                            new_mathml.append_child(internal_mathml);
423                            new_doc.root().append_child(new_mathml);
424                            Ok(new_mathml)
425                        } else {
426                            bail!(
427                                "Internal error: offset '{}' on leaf element '{}' doesn't exist",
428                                offset,
429                                mml_to_string(found)
430                            );
431                        }
432                    }
433                };
434            })?;
435
436            let braille = crate::braille::braille_mathml(nav_mathml, "")?.0;
437            return Ok(braille);
438        })
439    }));
440    return report_any_panic(result);
441}
442
443/// Given a key code along with the modifier keys, the current node is moved accordingly (or value reported in some cases).
444/// `key` is the [keycode](https://developer.mozilla.org/en-US/docs/Web/API/KeyboardEvent/keyCode#constants_for_keycode_value) for the key (in JavaScript, `ev.key_code`)
445/// The spoken text for the new current node is returned.
446pub fn do_navigate_keypress(
447    key: usize,
448    shift_key: bool,
449    control_key: bool,
450    alt_key: bool,
451    meta_key: bool,
452) -> Result<String> {
453    enable_logs();
454    let result = catch_unwind(AssertUnwindSafe(|| {
455        MATHML_INSTANCE.with(|package_instance| {
456            let package_instance = package_instance.borrow();
457            let mathml = get_element(&package_instance);
458            return do_mathml_navigate_key_press(mathml, key, shift_key, control_key, alt_key, meta_key);
459        })
460    }));
461    return report_any_panic(result);
462}
463
464/// Given a navigation command, the current node is moved accordingly.
465/// This is a higher level interface than `do_navigate_keypress` for applications that want to interpret the keys themselves.
466/// The valid commands are:
467/// * Standard move commands:
468///   `MovePrevious`, `MoveNext`, `MoveStart`, `MoveEnd`, `MoveLineStart`, `MoveLineEnd`
469/// * Movement in a table or elementary math:
470///   `MoveCellPrevious`, `MoveCellNext`, `MoveCellUp`, `MoveCellDown`, `MoveColumnStart`, `MoveColumnEnd`
471/// * Moving into children or out to parents:
472///   `ZoomIn`, `ZoomOut`, `ZoomOutAll`, `ZoomInAll`
473/// * Undo the last movement command:
474///   `MoveLastLocation`
475/// * Read commands (standard speech):
476///   `ReadPrevious`, `ReadNext`, `ReadCurrent`, `ReadCellCurrent`, `ReadStart`, `ReadEnd`, `ReadLineStart`, `ReadLineEnd`
477/// * Describe commands (overview):
478///   `DescribePrevious`, `DescribeNext`, `DescribeCurrent`
479/// * Location information:
480///   `WhereAmI`, `WhereAmIAll`
481/// * Change navigation modes (circle up/down):
482///   `ToggleZoomLockUp`, `ToggleZoomLockDown`
483/// * Speak the current navigation mode
484///   `ToggleSpeakMode`
485///
486/// There are 10 place markers that can be set/read/described or moved to.
487/// * Setting:
488///   `SetPlacemarker0`, `SetPlacemarker1`, `SetPlacemarker2`, `SetPlacemarker3`, `SetPlacemarker4`, `SetPlacemarker5`, `SetPlacemarker6`, `SetPlacemarker7`, `SetPlacemarker8`, `SetPlacemarker9`
489/// * Reading:
490///   `Read0`, `Read1`, `Read2`, `Read3`, `Read4`, `Read5`, `Read6`, `Read7`, `Read8`, `Read9`
491/// * Describing:
492///   `Describe0`, `Describe1`, `Describe2`, `Describe3`, `Describe4`, `Describe5`, `Describe6`, `Describe7`, `Describe8`, `Describe9`
493/// * Moving:
494///   `MoveTo0`, `MoveTo1`, `MoveTo2`, `MoveTo3`, `MoveTo4`, `MoveTo5`, `MoveTo6`, `MoveTo7`, `MoveTo8`, `MoveTo9`
495///
496/// When done with Navigation, call with `Exit`
497pub fn do_navigate_command(command: impl AsRef<str>) -> Result<String> {
498    enable_logs();
499    let command = command.as_ref().to_string();
500    let result = catch_unwind(AssertUnwindSafe(|| {
501        let cmd = NAV_COMMANDS.get_key(&command); // gets a &'static version of the command
502        if cmd.is_none() {
503            bail!("Unknown command in call to DoNavigateCommand()");
504        };
505        let cmd = *cmd.unwrap();
506        MATHML_INSTANCE.with(|package_instance| {
507            let package_instance = package_instance.borrow();
508            let mathml = get_element(&package_instance);
509            return do_navigate_command_string(mathml, cmd);
510        })
511    }));
512    return report_any_panic(result);
513}
514
515/// Given an 'id' and an offset (for tokens), set the navigation node to that id.
516/// An error is returned if the 'id' doesn't exist
517pub fn set_navigation_node(id: impl AsRef<str>, offset: usize) -> Result<()> {
518    enable_logs();
519    let id = id.as_ref().to_string();
520    let result = catch_unwind(AssertUnwindSafe(|| {
521        MATHML_INSTANCE.with(|package_instance| {
522            let package_instance = package_instance.borrow();
523            let mathml = get_element(&package_instance);
524            return set_navigation_node_from_id(mathml, &id, offset);
525        })
526    }));
527    return report_any_panic(result);
528}
529
530/// Return the MathML associated with the current (navigation) node and the offset (0-based) from that mathml (not yet implemented)
531/// The offset is needed for token elements that have multiple characters.
532pub fn get_navigation_mathml() -> Result<(String, usize)> {
533    enable_logs();
534    let result = catch_unwind(AssertUnwindSafe(|| {
535        MATHML_INSTANCE.with(|package_instance| {
536            let package_instance = package_instance.borrow();
537            let mathml = get_element(&package_instance);
538            return NAVIGATION_STATE.with(|nav_stack| {
539                return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
540                    Err(e) => Err(e),
541                    Ok((found, offset)) => Ok((mml_to_string(found), offset)),
542                };
543            });
544        })
545    }));
546    return report_any_panic(result);
547}
548
549/// Return the `id` and `offset` (0-based) associated with the current (navigation) node.
550/// `offset` (not yet implemented)
551/// The offset is needed for token elements that have multiple characters.
552pub fn get_navigation_mathml_id() -> Result<(String, usize)> {
553    enable_logs();
554    let result = catch_unwind(AssertUnwindSafe(|| {
555        MATHML_INSTANCE.with(|package_instance| {
556            let package_instance = package_instance.borrow();
557            let mathml = get_element(&package_instance);
558            return Ok(NAVIGATION_STATE.with(|nav_stack| {
559                return nav_stack.borrow().get_navigation_mathml_id(mathml);
560            }));
561        })
562    }));
563    return report_any_panic(result);
564}
565
566/// Return the start and end braille character positions associated with the current (navigation) node.
567pub fn get_braille_position() -> Result<(usize, usize)> {
568    enable_logs();
569    let result = catch_unwind(AssertUnwindSafe(|| {
570        MATHML_INSTANCE.with(|package_instance| {
571            let package_instance = package_instance.borrow();
572            let mathml = get_element(&package_instance);
573            let nav_node = get_navigation_mathml_id()?;
574            let (_, start, end) = crate::braille::braille_mathml(mathml, &nav_node.0)?;
575            return Ok((start, end));
576        })
577    }));
578    return report_any_panic(result);
579}
580
581/// Given a 0-based braille position, return the smallest MathML node enclosing it.
582/// This node might be a leaf with an offset.
583pub fn get_navigation_node_from_braille_position(position: usize) -> Result<(String, usize)> {
584    enable_logs();
585    let result = catch_unwind(AssertUnwindSafe(|| {
586        MATHML_INSTANCE.with(|package_instance| {
587            let package_instance = package_instance.borrow();
588            let mathml = get_element(&package_instance);
589            return crate::braille::get_navigation_node_from_braille_position(mathml, position);
590        })
591    }));
592    return report_any_panic(result);
593}
594
595pub fn get_supported_braille_codes() -> Result<Vec<String>> {
596    enable_logs();
597    let result = catch_unwind(AssertUnwindSafe(|| {
598        let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
599        let braille_dir = rules_dir.join("Braille");
600        let mut braille_code_paths = Vec::new();
601
602        find_all_dirs_shim(&braille_dir, &mut braille_code_paths);
603        let mut braille_code_paths = braille_code_paths.iter()
604                        .map(|path| path.strip_prefix(&braille_dir).unwrap().to_string_lossy().to_string())
605                        .filter(|string_path| !string_path.is_empty() )
606                        .collect::<Vec<String>>();
607        braille_code_paths.sort();
608
609        Ok(braille_code_paths)
610    }));
611    return report_any_panic(result);
612 }
613
614/// Returns a Vec of all supported languages ("en", "es", ...)
615pub fn get_supported_languages() -> Result<Vec<String>> {
616    enable_logs();
617    let result = catch_unwind(AssertUnwindSafe(|| {
618        let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
619        let lang_dir = rules_dir.join("Languages");
620        let mut lang_paths = Vec::new();
621
622        find_all_dirs_shim(&lang_dir, &mut lang_paths);
623        let mut language_paths = lang_paths.iter()
624                        .map(|path| path.strip_prefix(&lang_dir).unwrap()
625                                                  .to_string_lossy()
626                                                  .replace(std::path::MAIN_SEPARATOR, "-")
627                                                  .to_string())
628                        .filter(|string_path| !string_path.is_empty() )
629                        .collect::<Vec<String>>();
630
631        // make sure the 'zz' test dir isn't included (build.rs removes it, but for debugging is there)
632        language_paths.retain(|s| !s.starts_with("zz"));
633        language_paths.sort();
634        Ok(language_paths)
635    }));
636    return report_any_panic(result);
637 }
638
639 pub fn get_supported_speech_styles(lang: impl AsRef<str>) -> Result<Vec<String>> {
640    enable_logs();
641    let lang = lang.as_ref().to_string();
642    let result = catch_unwind(AssertUnwindSafe(|| {
643        let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
644        let lang_dir = rules_dir.join("Languages").join(&lang);
645        let mut speech_styles = find_files_in_dir_that_ends_with_shim(&lang_dir, "_Rules.yaml");
646        for file_name in &mut speech_styles {
647            file_name.truncate(file_name.len() - "_Rules.yaml".len())
648        }
649        speech_styles.sort();
650        speech_styles.dedup(); // remove duplicates -- shouldn't be any, but just in case
651        Ok(speech_styles)
652    }));
653    return report_any_panic(result);
654 }
655
656// utility functions
657
658/// Copy (recursively) the (MathML) element and return the new one.
659/// The Element type does not copy and modifying the structure of an element's child will modify the element, so we need a copy
660/// Convert the returned error from set_mathml, etc., to a useful string for display
661pub fn copy_mathml(mathml: Element) -> Element {
662    // If it represents MathML, the 'Element' can only have Text and Element children along with attributes
663    let children = mathml.children();
664    let new_mathml = create_mathml_element(&mathml.document(), name(mathml));
665    mathml.attributes().iter().for_each(|attr| {
666        new_mathml.set_attribute_value(attr.name(), attr.value());
667    });
668
669    // can't use is_leaf/as_text because this is also used with the intent tree
670    if children.len() == 1 &&
671       let Some(text) = children[0].text() {
672        new_mathml.set_text(text.text());
673        return new_mathml;
674        }
675
676    let mut new_children = Vec::with_capacity(children.len());
677    for child in children {
678        let child = as_element(child);
679        let new_child = copy_mathml(child);
680        new_children.push(new_child);
681    }
682    new_mathml.append_children(new_children);
683    return new_mathml;
684}
685
686pub fn errors_to_string(e: &Error) -> String {
687    enable_logs();
688    let mut result = format!("{e}\n");
689    for cause in e.chain().skip(1) { // skips original error
690        result += &format!("caused by: {cause}\n");
691    }
692    result
693}
694
695fn add_ids(mathml: Element) -> Element {
696    use std::time::SystemTime;
697    let time = if cfg!(target_family = "wasm") {
698        fastrand::usize(..)
699    } else {
700        SystemTime::now()
701            .duration_since(SystemTime::UNIX_EPOCH)
702            .unwrap()
703            .as_millis() as usize
704    };
705    let mut time_part = radix_fmt::radix(time, 36).to_string();
706    if time_part.len() < 3 {
707        time_part.push_str("a2c");      // needs to be at least three chars
708    }
709    let mut random_part = radix_fmt::radix(fastrand::u32(..), 36).to_string();
710    if random_part.len() < 4 {
711        random_part.push_str("a1b2");      // needs to be at least four chars
712    }
713    let prefix = "M".to_string() + &time_part[time_part.len() - 3..] + &random_part[random_part.len() - 4..] + "-"; // begin with letter
714    add_ids_to_all(mathml, &prefix, 0);
715    return mathml;
716
717    fn add_ids_to_all(mathml: Element, id_prefix: &str, count: usize) -> usize {
718        let mut count = count;
719        if mathml.attribute("id").is_none() {
720            mathml.set_attribute_value("id", (id_prefix.to_string() + &count.to_string()).as_str());
721            mathml.set_attribute_value("data-id-added", "true");
722            count += 1;
723        };
724
725        if crate::xpath_functions::is_leaf(mathml) {
726            return count;
727        }
728
729        for child in mathml.children() {
730            let child = as_element(child);
731            count = add_ids_to_all(child, id_prefix, count);
732        }
733        return count;
734    }
735}
736
737pub fn get_element(package: &Package) -> Element<'_> {
738    enable_logs();
739    let doc = package.as_document();
740    let mut result = None;
741    for root_child in doc.root().children() {
742        if let ChildOfRoot::Element(e) = root_child {
743            assert!(result.is_none());
744            result = Some(e);
745        }
746    }
747    return result.unwrap();
748}
749
750/// Get the intent after setting the MathML
751/// Used in testing
752#[allow(dead_code)]
753pub fn get_intent<'a>(mathml: Element<'a>, doc: Document<'a>) -> Result<Element<'a>> {
754    crate::speech::SPEECH_RULES.with(|rules|  rules.borrow_mut().read_files().unwrap());
755    let mathml = cleanup_mathml(mathml)?;
756    return crate::speech::intent_from_mathml(mathml, doc);
757}
758
759#[allow(dead_code)]
760fn trim_doc(doc: &Document) {
761    for root_child in doc.root().children() {
762        if let ChildOfRoot::Element(e) = root_child {
763            trim_element(e, false);
764        } else {
765            doc.root().remove_child(root_child); // comment or processing instruction
766        }
767    }
768}
769
770/// Not really meant to be public -- used by tests in some packages
771pub fn trim_element(e: Element, allow_structure_in_leaves: bool) {
772    // "<mtext>this is text</mtext" results in 3 text children
773    // these are combined into one child as it makes code downstream simpler
774
775    // space, tab, newline, carriage return all get collapsed to a single space
776    const WHITESPACE: &[char] = &[' ', '\u{0009}', '\u{000A}','\u{000C}', '\u{000D}'];
777    static WHITESPACE_MATCH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"[ \u{0009}\u{000A}\u{00C}\u{000D}]+"#).unwrap());
778
779    if is_leaf(e) && (!allow_structure_in_leaves || IsNode::is_mathml(e)) {
780        // Assume it is HTML inside of the leaf -- turn the HTML into a string
781        make_leaf_element(e);
782        return;
783    }
784
785    let mut single_text = "".to_string();
786    for child in e.children() {
787        match child {
788            ChildOfElement::Element(c) => {
789                trim_element(c, allow_structure_in_leaves);
790            }
791            ChildOfElement::Text(t) => {
792                single_text += t.text();
793                e.remove_child(child);
794            }
795            _ => {
796                e.remove_child(child);
797            }
798        }
799    }
800
801    // CSS considers only space, tab, linefeed, and carriage return as collapsable whitespace
802    if !(is_leaf(e) || name(e) == "intent-literal" || single_text.is_empty()) {
803        // intent-literal comes from testing intent
804        // FIX: we have a problem -- what should happen???
805        // FIX: For now, just keep the children and ignore the text and log an error -- shouldn't panic/crash
806        if !single_text.trim_matches(WHITESPACE).is_empty() {
807            error!(
808                "trim_element: both element and textual children which shouldn't happen -- ignoring text '{single_text}'"
809            );
810        }
811        return;
812    }
813    if e.children().is_empty() && !single_text.is_empty() {
814        // debug!("Combining text in {}: '{}' -> '{}'", e.name().local_part(), single_text, trimmed_text);
815        e.set_text(&WHITESPACE_MATCH.replace_all(&single_text, " "));
816    }
817
818    fn make_leaf_element(mathml_leaf: Element) {
819        // MathML leaves like <mn> really shouldn't have non-textual content, but you could have embedded HTML
820        // Here, we convert them to leaves by grabbing up all the text and making that the content
821        // Potentially, we leave them and let (default) rules do something, but it makes other parts of the code
822        //   messier because checking the text of a leaf becomes Option<&str> rather than just &str
823        let children = mathml_leaf.children();
824        if children.is_empty() {
825            return;
826        }
827
828        if rewrite_and_flatten_embedded_mathml(mathml_leaf) {
829            return;
830        }
831
832        // gather up the text
833        let mut text = "".to_string();
834        for child in children {
835            let child_text = match child {
836                ChildOfElement::Element(child) => {
837                    if name(child) == "mglyph" {
838                        child.attribute_value("alt").unwrap_or("").to_string()
839                    } else {
840                        gather_text(child)
841                    }
842                }
843                ChildOfElement::Text(t) => {
844                    // debug!("ChildOfElement::Text: '{}'", t.text());
845                    t.text().to_string()
846                }
847                _ => "".to_string(),
848            };
849            if !child_text.is_empty() {
850                text += &child_text;
851            }
852        }
853
854        // get rid of the old children and replace with the text we just built
855        mathml_leaf.clear_children();
856        mathml_leaf.set_text(WHITESPACE_MATCH.replace_all(&text, " ").trim_matches(WHITESPACE));
857        // debug!("make_leaf_element: text is '{}'", crate::canonicalize::as_text(mathml_leaf));
858
859        /// gather up all the contents of the element and return them with a leading space
860        fn gather_text(html: Element) -> String {
861            let mut text = "".to_string(); // since we are throwing out the element tag, add a space between the contents
862            for child in html.children() {
863                match child {
864                    ChildOfElement::Element(child) => {
865                        text += &gather_text(child);
866                    }
867                    ChildOfElement::Text(t) => text += t.text(),
868                    _ => (),
869                }
870            }
871            // debug!("gather_text: '{}'", text);
872            return text;
873        }
874    }
875
876    fn rewrite_and_flatten_embedded_mathml(mathml_leaf: Element) -> bool {
877        // first see if it can or needs to be rewritten
878        // this is likely rare, so we do a check and if true, to a second pass building the result
879        let mut needs_rewrite = false;
880        for child in mathml_leaf.children() {
881            if let Some(element) = child.element() {
882                if name(element) != "math" {
883                    return false; // something other than MathML as a child -- can't rewrite
884                }
885                needs_rewrite = true;
886            }
887        };
888
889        if !needs_rewrite {
890            return false;
891        }
892
893        // now do the rewrite, flatting out the mathml and returning an mrow with the children
894        let leaf_name = name(mathml_leaf);
895        let doc = mathml_leaf.document();
896        let mut new_children = Vec::new();
897        let mut is_last_mtext = false;
898        for child in mathml_leaf.children() {
899            if let Some(element) = child.element() {
900                trim_element(element, true);
901                new_children.append(&mut element.children());   // don't want 'math' wrapper
902                is_last_mtext = false;
903            } else if let Some(text) = child.text() {
904                // combine adjacent text nodes into single nodes
905                if is_last_mtext {
906                    let last_child = new_children.last_mut().unwrap().element().unwrap();
907                    let new_text = as_text(last_child).to_string() + text.text();
908                    last_child.set_text(&new_text);
909                } else {
910                    let new_leaf_node = create_mathml_element(&doc, leaf_name);
911                    new_leaf_node.set_text(text.text());
912                    new_children.push(ChildOfElement::Element(new_leaf_node));
913                    is_last_mtext = true;
914                }
915            }
916        };
917
918        // clean up whitespace in text nodes
919        for child in &mut new_children {    
920            if let Some(element) = child.element() && is_leaf(element) {
921                let text = as_text(element);
922                let cleaned_text = WHITESPACE_MATCH.replace_all(text, " ").trim_matches(WHITESPACE).to_string();
923                element.set_text(&cleaned_text);
924            }
925        }
926        
927        crate::canonicalize::set_mathml_name(mathml_leaf, "mrow");
928        mathml_leaf.clear_children();
929        mathml_leaf.append_children(new_children);
930
931        // debug!("rewrite_and_flatten_embedded_mathml: flattened\n'{}'", mml_to_string(mathml_leaf));
932        return true;
933    }
934}
935
936// used for testing trim
937/// returns Ok() if two Documents are equal or some info where they differ in the Err
938#[allow(dead_code)]
939fn is_same_doc(doc1: &Document, doc2: &Document) -> Result<()> {
940    // assume 'e' doesn't have element children until proven otherwise
941    // this means we keep Text children until we are proven they aren't needed
942    if doc1.root().children().len() != doc2.root().children().len() {
943        bail!(
944            "Children of docs have {} != {} children",
945            doc1.root().children().len(),
946            doc2.root().children().len()
947        );
948    }
949
950    for (i, (c1, c2)) in doc1
951        .root()
952        .children()
953        .iter()
954        .zip(doc2.root().children().iter())
955        .enumerate()
956    {
957        match c1 {
958            ChildOfRoot::Element(e1) => {
959                if let ChildOfRoot::Element(e2) = c2 {
960                    is_same_element(*e1, *e2, &[])?;
961                } else {
962                    bail!("child #{}, first is element, second is something else", i);
963                }
964            }
965            ChildOfRoot::Comment(com1) => {
966                if let ChildOfRoot::Comment(com2) = c2 {
967                    if com1.text() != com2.text() {
968                        bail!("child #{} -- comment text differs", i);
969                    }
970                } else {
971                    bail!("child #{}, first is comment, second is something else", i);
972                }
973            }
974            ChildOfRoot::ProcessingInstruction(p1) => {
975                if let ChildOfRoot::ProcessingInstruction(p2) = c2 {
976                    if p1.target() != p2.target() || p1.value() != p2.value() {
977                        bail!("child #{} -- processing instruction differs", i);
978                    }
979                } else {
980                    bail!(
981                        "child #{}, first is processing instruction, second is something else",
982                        i
983                    );
984                }
985            }
986        }
987    }
988    return Ok(());
989}
990
991/// returns Ok() if two Documents are equal or some info where they differ in the Err
992// Not really meant to be public -- used by tests in some packages
993#[allow(dead_code)]
994pub fn is_same_element(e1: Element, e2: Element, ignore_attrs: &[&str]) -> Result<()> {
995    enable_logs();
996    if name(e1) != name(e2) {
997        bail!("Names not the same: {}, {}", name(e1), name(e2));
998    }
999
1000    // assume 'e' doesn't have element children until proven otherwise
1001    // this means we keep Text children until we are proven they aren't needed
1002    if e1.children().len() != e2.children().len() {
1003        bail!(
1004            "Children of {} have {} != {} children",
1005            name(e1),
1006            e1.children().len(),
1007            e2.children().len()
1008        );
1009    }
1010
1011    if let Err(e) = attrs_are_same(e1.attributes(), e2.attributes(), ignore_attrs) {
1012        bail!("In element {}, {}", name(e1), e);
1013    }
1014
1015    for (i, (c1, c2)) in e1.children().iter().zip(e2.children().iter()).enumerate() {
1016        match c1 {
1017            ChildOfElement::Element(child1) => {
1018                if let ChildOfElement::Element(child2) = c2 {
1019                    is_same_element(*child1, *child2, ignore_attrs)?;
1020                } else {
1021                    bail!("{} child #{}, first is element, second is something else", name(e1), i);
1022                }
1023            }
1024            ChildOfElement::Comment(com1) => {
1025                if let ChildOfElement::Comment(com2) = c2 {
1026                    if com1.text() != com2.text() {
1027                        bail!("{} child #{} -- comment text differs", name(e1), i);
1028                    }
1029                } else {
1030                    bail!("{} child #{}, first is comment, second is something else", name(e1), i);
1031                }
1032            }
1033            ChildOfElement::ProcessingInstruction(p1) => {
1034                if let ChildOfElement::ProcessingInstruction(p2) = c2 {
1035                    if p1.target() != p2.target() || p1.value() != p2.value() {
1036                        bail!("{} child #{} -- processing instruction differs", name(e1), i);
1037                    }
1038                } else {
1039                    bail!(
1040                        "{} child #{}, first is processing instruction, second is something else",
1041                        name(e1),
1042                        i
1043                    );
1044                }
1045            }
1046            ChildOfElement::Text(t1) => {
1047                if let ChildOfElement::Text(t2) = c2 {
1048                    if t1.text() != t2.text() {
1049                        bail!("{} child #{} --  text differs", name(e1), i);
1050                    }
1051                } else {
1052                    bail!("{} child #{}, first is text, second is something else", name(e1), i);
1053                }
1054            }
1055        }
1056    }
1057    return Ok(());
1058
1059    /// compares attributes -- '==' didn't seems to work
1060    fn attrs_are_same(attrs1: Vec<Attribute>, attrs2: Vec<Attribute>, ignore: &[&str]) -> Result<()> {
1061        let attrs1 = attrs1.iter()
1062                .filter(|a| !ignore.contains(&a.name().local_part())).cloned()
1063                .collect::<Vec<Attribute>>();
1064        let attrs2 = attrs2.iter()
1065                .filter(|a| !ignore.contains(&a.name().local_part())).cloned()
1066                .collect::<Vec<Attribute>>();
1067        if attrs1.len() != attrs2.len() {
1068            bail!("Attributes have different length: {:?} != {:?}", attrs1, attrs2);
1069        }
1070        // can't guarantee attrs are in the same order
1071        for attr1 in attrs1 {
1072            if let Some(found_attr2) = attrs2
1073                .iter()
1074                .find(|&attr2| attr1.name().local_part() == attr2.name().local_part())
1075            {
1076                if attr1.value() == found_attr2.value() {
1077                    continue;
1078                } else {
1079                    bail!(
1080                        "Attribute named {} has differing values:\n  '{}'\n  '{}'",
1081                        attr1.name().local_part(),
1082                        attr1.value(),
1083                        found_attr2.value()
1084                    );
1085                }
1086            } else {
1087                bail!(
1088                    "Attribute name {} not in [{}]",
1089                    print_attr(&attr1),
1090                    print_attrs(&attrs2)
1091                );
1092            }
1093        }
1094        return Ok(());
1095
1096        fn print_attr(attr: &Attribute) -> String {
1097            return format!("@{}='{}'", attr.name().local_part(), attr.value());
1098        }
1099        fn print_attrs(attrs: &[Attribute]) -> String {
1100            return attrs.iter().map(print_attr).collect::<Vec<String>>().join(", ");
1101        }
1102    }
1103}
1104
1105#[cfg(test)]
1106mod tests {
1107    #[allow(unused_imports)]
1108    use super::super::init_logger;
1109    use super::*;
1110
1111    fn are_parsed_strs_equal(test: &str, target: &str) -> bool {
1112        let test_package = &parser::parse(test).expect("Failed to parse input");
1113        let test_doc = test_package.as_document();
1114        trim_doc(&test_doc);
1115        debug!("test:\n{}", mml_to_string(get_element(&test_package)));
1116
1117        let target_package = &parser::parse(target).expect("Failed to parse input");
1118        let target_doc = target_package.as_document();
1119        trim_doc(&target_doc);
1120        debug!("target:\n{}", mml_to_string(get_element(&target_package)));
1121
1122        match is_same_doc(&test_doc, &target_doc) {
1123            Ok(_) => return true,
1124            Err(e) => panic!("{}", e),
1125        }
1126    }
1127
1128    #[test]
1129    fn trim_same() {
1130        let trimmed_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
1131        assert!(are_parsed_strs_equal(trimmed_str, trimmed_str));
1132    }
1133
1134    #[test]
1135    fn trim_whitespace() {
1136        let trimmed_str = "<math><mrow><mo>-</mo><mi> a </mi></mrow></math>";
1137        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1138        assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
1139    }
1140
1141    #[test]
1142    fn no_trim_whitespace_nbsp() {
1143        let trimmed_str = "<math><mrow><mo>-</mo><mtext> &#x00A0;a </mtext></mrow></math>";
1144        let whitespace_str = "<math> <mrow ><mo>-</mo><mtext> &#x00A0;a </mtext></mrow ></math>";
1145        assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
1146    }
1147
1148    #[test]
1149    fn trim_comment() {
1150        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1151        let comment_str = "<math><mrow><mo>-</mo><!--a comment --><mi> a </mi></mrow></math>";
1152        assert!(are_parsed_strs_equal(comment_str, whitespace_str));
1153    }
1154
1155    #[test]
1156    fn replace_mglyph() {
1157        let mglyph_str = "<math>
1158                <mrow>
1159                    <mi>X<mglyph fontfamily='my-braid-font' index='2' alt='23braid' /></mi>
1160                    <mo>+</mo>
1161                    <mi>
1162                        <mglyph fontfamily='my-braid-font' index='5' alt='132braid' />Y
1163                    </mi>
1164                    <mo>=</mo>
1165                    <mi>
1166                        <mglyph fontfamily='my-braid-font' index='3' alt='13braid' />
1167                    </mi>
1168                </mrow>
1169            </math>";
1170        let result_str = "<math>
1171            <mrow>
1172                <mi>X23braid</mi>
1173                <mo>+</mo>
1174                <mi>132braidY</mi>
1175                <mo>=</mo>
1176                <mi>13braid</mi>
1177            </mrow>
1178        </math>";
1179        assert!(are_parsed_strs_equal(mglyph_str, result_str));
1180    }
1181
1182    #[test]
1183    fn trim_differs() {
1184        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1185        let different_str = "<math> <mrow ><mo>-</mo><mi> b </mi></mrow ></math>";
1186
1187        // need to manually do this since failure shouldn't be a panic
1188        let package1 = &parser::parse(whitespace_str).expect("Failed to parse input");
1189        let doc1 = package1.as_document();
1190        trim_doc(&doc1);
1191        debug!("doc1:\n{}", mml_to_string(get_element(&package1)));
1192
1193        let package2 = parser::parse(different_str).expect("Failed to parse input");
1194        let doc2 = package2.as_document();
1195        trim_doc(&doc2);
1196        debug!("doc2:\n{}", mml_to_string(get_element(&package2)));
1197
1198        assert!(is_same_doc(&doc1, &doc2).is_err());
1199    }
1200
1201    #[test]
1202    fn test_entities() {
1203        // this forces initialization
1204        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1205
1206        let entity_str = set_mathml("<math><mrow><mo>&minus;</mo><mi>&mopf;</mi></mrow></math>").unwrap();
1207        let converted_str =
1208            set_mathml("<math><mrow><mo>&#x02212;</mo><mi>&#x1D55E;</mi></mrow></math>").unwrap();
1209
1210        // need to remove unique ids
1211        static ID_MATCH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"id='.+?' "#).unwrap());
1212        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1213        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1214        assert_eq!(entity_str, converted_str, "normal entity test failed");
1215
1216        let entity_str = set_mathml(
1217            "<math data-quot=\"&quot;value&quot;\" data-apos='&apos;value&apos;'><mi>XXX</mi></math>",
1218        )
1219        .unwrap();
1220        let converted_str =
1221            set_mathml("<math data-quot='\"value\"' data-apos=\"'value'\"><mi>XXX</mi></math>").unwrap();
1222        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1223        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1224        assert_eq!(entity_str, converted_str, "special entities quote test failed");
1225
1226        let entity_str =
1227            set_mathml("<math><mo>&lt;</mo><mo>&gt;</mo><mtext>&amp;lt;</mtext></math>").unwrap();
1228        let converted_str =
1229            set_mathml("<math><mo>&#x003C;</mo><mo>&#x003E;</mo><mtext>&#x0026;lt;</mtext></math>")
1230                .unwrap();
1231        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1232        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1233        assert_eq!(entity_str, converted_str, "special entities <,>,& test failed");
1234    }
1235
1236    #[test]
1237    fn can_recover_from_invalid_set_rules_dir() {
1238        use std::env;
1239        // MathCAT will check the env var "MathCATRulesDir" as an override, so the following test might succeed if we don't override the env var
1240        unsafe { env::set_var("MathCATRulesDir", "MathCATRulesDir"); }   // safe because we are single threaded
1241        assert!(set_rules_dir("someInvalidRulesDir").is_err());
1242        assert!(
1243            set_rules_dir(super::super::abs_rules_dir_path()).is_ok(),
1244            "\nset_rules_dir to '{}' failed",
1245            super::super::abs_rules_dir_path()
1246        );
1247        assert!(set_mathml("<math><mn>1</mn></math>").is_ok());
1248    }
1249
1250    #[test]
1251    fn single_html_in_mtext() {
1252        let test = "<math><mn>1</mn> <mtext>a<p> para  1</p>bc</mtext> <mi>y</mi></math>";
1253        let target = "<math><mn>1</mn> <mtext>a para 1bc</mtext> <mi>y</mi></math>";
1254        assert!(are_parsed_strs_equal(test, target));
1255    }
1256
1257    #[test]
1258    fn multiple_html_in_mtext() {
1259        let test = "<math><mn>1</mn> <mtext>a<p>para 1</p> <p>para 2</p>bc  </mtext> <mi>y</mi></math>";
1260        let target = "<math><mn>1</mn> <mtext>apara 1 para 2bc</mtext> <mi>y</mi></math>";
1261        assert!(are_parsed_strs_equal(test, target));
1262    }
1263
1264    #[test]
1265    fn nested_html_in_mtext() {
1266        let test = "<math><mn>1</mn> <mtext>a <ol><li>first</li><li>second</li></ol> bc</mtext> <mi>y</mi></math>";
1267        let target = "<math><mn>1</mn> <mtext>a firstsecond bc</mtext> <mi>y</mi></math>";
1268        assert!(are_parsed_strs_equal(test, target));
1269    }
1270
1271    #[test]
1272    fn empty_html_in_mtext() {
1273        let test = "<math><mn>1</mn> <mtext>a<br/>bc</mtext> <mi>y</mi></math>";
1274        let target = "<math><mn>1</mn> <mtext>abc</mtext> <mi>y</mi></math>";
1275        assert!(are_parsed_strs_equal(test, target));
1276    }
1277
1278    #[test]
1279    fn mathml_in_mtext() {
1280        let test = "<math><mtext>if&#xa0;<math> <msup><mi>n</mi><mn>2</mn></msup></math>&#xa0;is real</mtext></math>";
1281        let target = "<math><mrow><mtext>if&#xa0;</mtext><msup><mi>n</mi><mn>2</mn></msup><mtext>&#xa0;is real</mtext></mrow></math>";
1282        assert!(are_parsed_strs_equal(test, target));
1283    }
1284}