Skip to main content

libmathcat/
interface.rs

1//! The interface module provides functionality both for calling from an API and also running the code from `main`.
2//!
3#![allow(non_snake_case)]
4#![allow(clippy::needless_return)]
5use std::cell::RefCell;
6use std::sync::LazyLock;
7
8use crate::canonicalize::{as_text, create_mathml_element};
9use crate::errors::*;
10use phf::phf_map;
11use regex::{Captures, Regex};
12use sxd_document::dom::{Element, Document, ChildOfRoot, ChildOfElement, Attribute};
13use sxd_document::parser;
14use sxd_document::Package;
15
16use crate::canonicalize::{as_element, name};
17use crate::shim_filesystem::{find_all_dirs_shim, find_files_in_dir_that_ends_with_shim};
18use log::{debug, error};
19
20use crate::navigate::*;
21use crate::pretty_print::mml_to_string;
22use crate::xpath_functions::{is_leaf, IsNode};
23use std::panic::{catch_unwind, AssertUnwindSafe};
24
25/// Maximum depth to prevent stack overflow on deeply nested MathML
26pub const MAX_DEPTH: usize = 512;
27
28#[cfg(feature = "enable-logs")]
29use std::sync::Once;
30#[cfg(feature = "enable-logs")]
31static INIT: Once = Once::new();
32
33fn enable_logs() {
34    #[cfg(feature = "enable-logs")]
35    INIT.call_once(||{
36        #[cfg(target_os = "android")]
37        {
38            use log::*;
39            use android_logger::*;
40        
41            android_logger::init_once(
42                Config::default()
43                .with_max_level(LevelFilter::Trace)
44                .with_tag("MathCat")
45            );    
46            trace!("Activated Android logger!");  
47        }    
48    });
49}
50
51// For getting a message from a panic
52thread_local! {
53    // Stores (Message, File, Line)
54    static PANIC_INFO: RefCell<Option<(String, String, u32)>> = const { RefCell::new(None) };
55}
56
57/// Initialize the panic handler to catch panics and store the message, file, and line number in `PANIC_INFO`.
58pub fn init_panic_handler() {
59    use std::panic;
60
61    panic::set_hook(Box::new(|info| {
62        let location = info.location()
63            .map(|l| format!("{}:{}", l.file(), l.line()))
64            .unwrap_or_else(|| "unknown".to_string());
65
66        let payload = info.payload();
67        let msg = if let Some(s) = payload.downcast_ref::<&'static str>() {
68            s.to_string()
69        } else if let Some(s) = payload.downcast_ref::<String>() {
70            s.clone()
71        } else {
72            "Unknown panic payload".to_string()
73        };
74
75        // Use try_with/try_borrow_mut to ensure the hook never panics itself
76        let _ = PANIC_INFO.try_with(|cell| {
77            if let Ok(mut slot) = cell.try_borrow_mut() {
78                *slot = Some((msg, location, 0));
79            }
80        });
81    }));
82}
83
84pub fn report_any_panic<T>(result: Result<Result<T, Error>, Box<dyn std::any::Any + Send>>) -> Result<T, Error> {
85    match result {
86        Ok(val) => val,
87        Err(_) => {
88            // Retrieve the smuggled info
89            let details = PANIC_INFO.with(|cell| cell.borrow_mut().take());
90            
91            if let Some((msg, file, line)) = details {
92                Err(anyhow::anyhow!(
93                    "MathCAT crash! Please report the following information: '{}' at {}:{}",
94                    msg, file, line
95                ))
96            } else {
97                Err(anyhow::anyhow!("MathCAT crash! -- please report"))
98            }
99        }
100    }
101} 
102
103// wrap up some common functionality between the call from 'main' and AT
104fn cleanup_mathml(mathml: Element) -> Result<Element> {
105    trim_element(mathml, false);
106    let mathml = crate::canonicalize::canonicalize(mathml)?;
107    let mathml = add_ids(mathml);
108    return Ok(mathml);
109}
110
111thread_local! {
112    /// The current node being navigated (also spoken and brailled) is stored in `MATHML_INSTANCE`.
113    pub static MATHML_INSTANCE: RefCell<Package> = init_mathml_instance();
114}
115
116fn init_mathml_instance() -> RefCell<Package> {
117    let package = parser::parse("<math></math>")
118        .expect("Internal error in 'init_mathml_instance;: didn't parse initializer string");
119    return RefCell::new(package);
120}
121
122/// Set the Rules directory
123/// IMPORTANT: this should be the very first call to MathCAT. If 'dir' is an empty string, the environment var 'MathCATRulesDir' is tried.
124pub fn set_rules_dir(dir: impl AsRef<str>) -> Result<()> {
125    enable_logs();
126    init_panic_handler();
127    let dir = dir.as_ref().to_string();
128    let result = catch_unwind(AssertUnwindSafe(|| {
129        use std::path::PathBuf;
130        let dir_os = if dir.is_empty() {
131            std::env::var_os("MathCATRulesDir").unwrap_or_default()
132        } else {
133            std::ffi::OsString::from(&dir)
134        };
135        let pref_manager = crate::prefs::PreferenceManager::get();
136        pref_manager.borrow_mut().initialize(PathBuf::from(dir_os))
137    }));
138    return report_any_panic(result);
139}
140
141/// Returns the version number (from Cargo.toml) of the build
142pub fn get_version() -> String {
143    enable_logs();
144    const VERSION: &str = env!("CARGO_PKG_VERSION");
145    return VERSION.to_string();
146}
147
148/// This will override any previous MathML that was set.
149/// This returns canonical MathML with 'id's set on any node that doesn't have an id.
150/// The ids can be used for sync highlighting if the `Bookmark` API preference is true.
151pub fn set_mathml(mathml_str: impl AsRef<str>) -> Result<String> {
152    enable_logs();
153    // if these are present when resent to MathJaX, MathJaX crashes (https://github.com/mathjax/MathJax/issues/2822)
154    static MATHJAX_V2: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"class *= *['"]MJX-.*?['"]"#).unwrap());
155    static MATHJAX_V3: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"class *= *['"]data-mjx-.*?['"]"#).unwrap());
156
157    // Strip out processing instructions and comments -- these are not MathML and can cause DOS problems in the parser
158    static PROCESSING_INSTRUCTION: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"<\?[\s\S]{1,2048}\?>"#).unwrap());
159    static XML_COMMENT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"(?s)"#).unwrap());
160
161    // These have some length limits to avoid DOS attacks via long strings
162    static NAMESPACE_DECL: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"xmlns:[[:alpha:]]{1,32}"#).unwrap());
163    static PREFIX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"(</?)[[:alpha:]]{1,32}:"#).unwrap());
164    static HTML_ENTITIES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"&([a-zA-Z]{2,10});"#).unwrap());
165    let result = catch_unwind(AssertUnwindSafe(|| {
166        NAVIGATION_STATE.with(|nav_stack| {
167            nav_stack.borrow_mut().reset();
168        });
169
170        // We need the main definitions files to be read in so canonicalize can work.
171        // This call reads all of them for the current preferences, but that's ok since they will likely be used
172        crate::speech::SPEECH_RULES.with(|rules| rules.borrow_mut().read_files())?;
173
174        let mathml_str = mathml_str.as_ref();
175        // Safety guard: Reject strings > 1MB to prevent DoS/Stack issues
176        if mathml_str.len() > 1024 * 1024 {
177            bail!("MathML string of size {} bytes exceeds length limit of 1MB", mathml_str.len());
178        }
179
180        return MATHML_INSTANCE.with(|old_package| {
181            static HTML_ENTITIES_MAPPING: phf::Map<&str, &str> = include!("entities.in");
182
183            let mut error_message = "".to_string(); // can't return a result inside the replace_all, so we do this hack of setting the message and then returning the error
184                                                                     
185            let mathml_str = XML_COMMENT.replace_all(mathml_str, "");
186            let mathml_str = PROCESSING_INSTRUCTION.replace_all(&mathml_str, "");
187            // FIX: need to deal with character data and convert to something the parser knows
188            let mathml_str = HTML_ENTITIES.replace_all(&mathml_str, |cap: &Captures| match HTML_ENTITIES_MAPPING.get(&cap[1]) {
189                    None => {
190                        error_message = format!("No entity named '{}'", &cap[0]);
191                        cap[0].to_string()
192                    }
193                    Some(&ch) => ch.to_string(),
194                });
195
196            if !error_message.is_empty() {
197                // Clear stale state so subsequent API calls do not return previous user's data (security issue)
198                old_package.replace(parser::parse("<math></math>").unwrap());
199                bail!(error_message);
200            }
201            let mathml_str = MATHJAX_V2.replace_all(&mathml_str, "");
202            let mathml_str = MATHJAX_V3.replace_all(&mathml_str, "");
203
204            // the speech rules use the xpath "name" function and that includes the prefix
205            // getting rid of the prefix properly probably involves a recursive replacement in the tree
206            // if the prefix is used, it is almost certainly something like "m" or "mml", so this cheat will work.
207            let mathml_str = NAMESPACE_DECL.replace(&mathml_str, "xmlns"); // do this before the PREFIX replace!
208            let mathml_str = PREFIX.replace_all(&mathml_str, "$1");
209
210            let new_package = parser::parse(&mathml_str);
211            if let Err(e) = new_package {
212                // Clear stale state so subsequent API calls do not return previous user's data (security issue)
213                old_package.replace(parser::parse("<math></math>").unwrap());
214                bail!("Invalid MathML input:\n{}\nError is: {}", &mathml_str, &e.to_string());
215            }
216
217            let new_package = new_package.unwrap();
218            let mathml = get_element(&new_package);
219            let mathml = cleanup_mathml(mathml)?;
220            let mathml_string = mml_to_string(mathml);
221            old_package.replace(new_package);
222
223            return Ok(mathml_string);
224        });
225    }));
226
227    return report_any_panic(result);
228}
229
230/// Get the spoken text of the MathML that was set.
231/// The speech takes into account any AT or user preferences.
232pub fn get_spoken_text() -> Result<String> {
233    enable_logs();
234    let result = catch_unwind(AssertUnwindSafe(|| {
235        MATHML_INSTANCE.with(|package_instance| {
236            let package_instance = package_instance.borrow();
237            let mathml = get_element(&package_instance);
238            let new_package = Package::new();
239            let intent = crate::speech::intent_from_mathml(mathml, new_package.as_document())?;
240            debug!("Intent tree:\n{}", mml_to_string(intent));
241            let speech = crate::speech::speak_mathml(intent, "", 0)?;
242            return Ok(speech);
243        })
244    }));
245    return report_any_panic(result);
246}
247
248/// Get the spoken text for an overview of the MathML that was set.
249/// The speech takes into account any AT or user preferences.
250/// Note: this implementation for is currently minimal and should not be used.
251pub fn get_overview_text() -> Result<String> {
252    enable_logs();
253    let result = catch_unwind(AssertUnwindSafe(|| {
254        MATHML_INSTANCE.with(|package_instance| {
255            let package_instance = package_instance.borrow();
256            let mathml = get_element(&package_instance);
257            let speech = crate::speech::overview_mathml(mathml, "", 0)?;
258            return Ok(speech);
259        })
260    }));
261    return report_any_panic(result);
262}
263
264/// Get the value of the named preference.
265/// None is returned if `name` is not a known preference.
266pub fn get_preference(name: impl AsRef<str>) -> Result<String> {
267    enable_logs();
268    let name = name.as_ref().to_string();
269    let result = catch_unwind(AssertUnwindSafe(|| {
270        use crate::prefs::NO_PREFERENCE;
271        crate::speech::SPEECH_RULES.with(|rules| {
272            let rules = rules.borrow();
273            let pref_manager = rules.pref_manager.borrow();
274            let mut value = pref_manager.pref_to_string(&name);
275            if value == NO_PREFERENCE {
276                value = pref_manager.pref_to_string(&name);
277            }
278            if value == NO_PREFERENCE {
279                bail!("No preference named '{}'", name);
280            } else {
281                return Ok(value);
282            }
283        })
284    }));
285    return report_any_panic(result);
286}
287
288/// Set a MathCAT preference. The preference name should be a known preference name.
289/// The value should either be a string or a number (depending upon the preference being set)
290/// The list of known user preferences is in the MathCAT user documentation.
291/// Here are common preferences set by programs (not settable by the user):
292/// * TTS -- SSML, SAPI5, None
293/// * Pitch -- normalized at '1.0'
294/// * Rate -- words per minute (should match current speech rate).
295///   There is a separate "MathRate" that is user settable that causes a relative percentage change from this rate.
296/// * Volume -- default 100
297/// * Voice -- set a voice to use (not implemented)
298/// * Gender -- set pick any voice of the given gender (not implemented)
299/// * Bookmark -- set to `true` if a `mark`/`bookmark` should be part of the returned speech (used for sync highlighting)
300///
301/// Important: both the preference name and value are case-sensitive
302///
303/// This function can be called multiple times to set different values.
304/// The values are persistent and extend beyond calls to [`set_mathml`].
305/// A value can be overwritten by calling this function again with a different value.
306///
307/// Be careful setting preferences -- these potentially override user settings, so only preferences that really need setting should be set.
308pub fn set_preference(name: impl AsRef<str>, value: impl AsRef<str>) -> Result<()> {
309    enable_logs();
310    let name = name.as_ref().to_string();
311    let value = value.as_ref().to_string();
312    let result = catch_unwind(AssertUnwindSafe(|| {
313        set_preference_impl(&name, &value)
314    }));
315    return report_any_panic(result);
316}
317
318fn set_preference_impl(name: &str, value: &str) -> Result<()> {
319    let mut value = value.to_string();
320    if name == "Language" || name == "LanguageAuto" {
321        // check the format
322        if value != "Auto" {
323            // could get es, es-419, or en-us-nyc ...  we only care about the first two parts so we clean it up a little
324            let mut lang_country_split = value.split('-');
325            let language = lang_country_split.next().unwrap_or("");
326            let country = lang_country_split.next().unwrap_or("");
327            if language.len() != 2 {
328                bail!(
329                    "Improper format for 'Language' preference '{}'. Should be of form 'en' or 'en-gb'",
330                    value
331                );
332            }
333            let mut new_lang_country = language.to_string(); // need a temp value because 'country' is borrowed from 'value' above
334            if !country.is_empty() {
335                new_lang_country.push('-');
336                new_lang_country.push_str(country);
337            }
338            value = new_lang_country;
339        }
340        if name == "LanguageAuto" && value == "Auto" {
341            bail!("'LanguageAuto' can not have the value 'Auto'");
342        }
343    }
344
345    crate::speech::SPEECH_RULES.with(|rules| -> Result<()> {
346        if let Some(error_string) = rules.borrow().get_error() {
347            bail!("{}", error_string);
348        }
349        Ok(())
350    })?;
351
352    // Do not hold a SpeechRules borrow while updating preferences: invalidation clears rule caches.
353    let pref_manager = crate::prefs::PreferenceManager::get();
354    let mut pref_manager = pref_manager.borrow_mut();
355    if name == "LanguageAuto" {
356        let language_pref = pref_manager.pref_to_string("Language");
357        if language_pref != "Auto" {
358            bail!(
359                "'LanguageAuto' can only be used when 'Language' has the value 'Auto'; Language={}",
360                language_pref
361            );
362        }
363    }
364    let lower_case_value = value.to_lowercase();
365    if lower_case_value == "true" || lower_case_value == "false" {
366        pref_manager.set_api_boolean_pref(name, value.to_lowercase() == "true");
367    } else {
368        match name {
369            "Pitch" | "Rate" | "Volume" | "CapitalLetters_Pitch" | "MathRate" | "PauseFactor" => {
370                pref_manager.set_api_float_pref(name, to_float(name, &value)?)
371            }
372            _ => {
373                pref_manager.set_string_pref(name, &value)?;
374            }
375        }
376    };
377
378    return Ok(());
379}
380
381fn to_float(name: &str, value: &str) -> Result<f64> {
382    return match value.parse::<f64>() {
383        Ok(val) => Ok(val),
384        Err(_) => bail!("SetPreference: preference'{}'s value '{}' must be a float", name, value),
385    };
386}
387
388/// Get the braille associated with the MathML that was set by [`set_mathml`].
389/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`).
390/// If 'nav_node_id' is given, it is highlighted based on the value of `BrailleNavHighlight` (default: `EndPoints`)
391pub fn get_braille(nav_node_id: impl AsRef<str>) -> Result<String> {
392    enable_logs();
393    let nav_node_id = nav_node_id.as_ref().to_string();
394    let result = catch_unwind(AssertUnwindSafe(|| {
395        MATHML_INSTANCE.with(|package_instance| {
396            let package_instance = package_instance.borrow();
397            let mathml = get_element(&package_instance);
398            let braille = crate::braille::braille_mathml(mathml, &nav_node_id)?.0;
399            return Ok(braille);
400        })
401    }));
402    return report_any_panic(result);
403}
404
405/// Get the braille associated with the current navigation focus of the MathML that was set by [`set_mathml`].
406/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`).
407/// The returned braille is brailled as if the current navigation focus is the entire expression to be brailled.
408pub fn get_navigation_braille() -> Result<String> {
409    enable_logs();
410    let result = catch_unwind(AssertUnwindSafe(|| {
411        MATHML_INSTANCE.with(|package_instance| {
412            let package_instance = package_instance.borrow();
413            let mathml = get_element(&package_instance);
414            let new_package = Package::new(); // used if we need to create a new tree
415            let new_doc = new_package.as_document();
416            let nav_mathml = NAVIGATION_STATE.with(|nav_stack| {
417                return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
418                    Err(e) => Err(e),
419                    Ok((found, offset)) => {
420                        // get the MathML node and wrap it inside of a <math> element
421                        // if the offset is given, we need to get the character it references
422                        if offset == 0 {
423                            if name(found) == "math" {
424                                Ok(found)
425                            } else {
426                                let new_mathml = create_mathml_element(&new_doc, "math");
427                                new_mathml.append_child(copy_mathml(found));
428                                new_doc.root().append_child(new_mathml);
429                                Ok(new_mathml)
430                            }
431                        } else if !is_leaf(found) {
432                            bail!(
433                                "Internal error: non-zero offset '{}' on a non-leaf element '{}'",
434                                offset,
435                                name(found)
436                            );
437                        } else if let Some(ch) = as_text(found).chars().nth(offset) {
438                            let internal_mathml = create_mathml_element(&new_doc, name(found));
439                            internal_mathml.set_text(&ch.to_string());
440                            let new_mathml = create_mathml_element(&new_doc, "math");
441                            new_mathml.append_child(internal_mathml);
442                            new_doc.root().append_child(new_mathml);
443                            Ok(new_mathml)
444                        } else {
445                            bail!(
446                                "Internal error: offset '{}' on leaf element '{}' doesn't exist",
447                                offset,
448                                mml_to_string(found)
449                            );
450                        }
451                    }
452                };
453            })?;
454
455            let braille = crate::braille::braille_mathml(nav_mathml, "")?.0;
456            return Ok(braille);
457        })
458    }));
459    return report_any_panic(result);
460}
461
462/// Given a key code along with the modifier keys, the current node is moved accordingly (or value reported in some cases).
463/// `key` is the [keycode](https://developer.mozilla.org/en-US/docs/Web/API/KeyboardEvent/keyCode#constants_for_keycode_value) for the key (in JavaScript, `ev.key_code`)
464/// The spoken text for the new current node is returned.
465pub fn do_navigate_keypress(
466    key: usize,
467    shift_key: bool,
468    control_key: bool,
469    alt_key: bool,
470    meta_key: bool,
471) -> Result<String> {
472    enable_logs();
473    let result = catch_unwind(AssertUnwindSafe(|| {
474        MATHML_INSTANCE.with(|package_instance| {
475            let package_instance = package_instance.borrow();
476            let mathml = get_element(&package_instance);
477            return do_mathml_navigate_key_press(mathml, key, shift_key, control_key, alt_key, meta_key);
478        })
479    }));
480    return report_any_panic(result);
481}
482
483/// Given a navigation command, the current node is moved accordingly.
484/// This is a higher level interface than `do_navigate_keypress` for applications that want to interpret the keys themselves.
485/// The valid commands are:
486/// * Standard move commands:
487///   `MovePrevious`, `MoveNext`, `MoveStart`, `MoveEnd`, `MoveLineStart`, `MoveLineEnd`
488/// * Movement in a table or elementary math:
489///   `MoveCellPrevious`, `MoveCellNext`, `MoveCellUp`, `MoveCellDown`, `MoveColumnStart`, `MoveColumnEnd`
490/// * Moving into children or out to parents:
491///   `ZoomIn`, `ZoomOut`, `ZoomOutAll`, `ZoomInAll`
492/// * Undo the last movement command:
493///   `MoveLastLocation`
494/// * Read commands (standard speech):
495///   `ReadPrevious`, `ReadNext`, `ReadCurrent`, `ReadCellCurrent`, `ReadStart`, `ReadEnd`, `ReadLineStart`, `ReadLineEnd`
496/// * Describe commands (overview):
497///   `DescribePrevious`, `DescribeNext`, `DescribeCurrent`
498/// * Location information:
499///   `WhereAmI`, `WhereAmIAll`
500/// * Change navigation modes (circle up/down):
501///   `ToggleZoomLockUp`, `ToggleZoomLockDown`
502/// * Speak the current navigation mode
503///   `ToggleSpeakMode`
504///
505/// There are 10 place markers that can be set/read/described or moved to.
506/// * Setting:
507///   `SetPlacemarker0`, `SetPlacemarker1`, `SetPlacemarker2`, `SetPlacemarker3`, `SetPlacemarker4`, `SetPlacemarker5`, `SetPlacemarker6`, `SetPlacemarker7`, `SetPlacemarker8`, `SetPlacemarker9`
508/// * Reading:
509///   `Read0`, `Read1`, `Read2`, `Read3`, `Read4`, `Read5`, `Read6`, `Read7`, `Read8`, `Read9`
510/// * Describing:
511///   `Describe0`, `Describe1`, `Describe2`, `Describe3`, `Describe4`, `Describe5`, `Describe6`, `Describe7`, `Describe8`, `Describe9`
512/// * Moving:
513///   `MoveTo0`, `MoveTo1`, `MoveTo2`, `MoveTo3`, `MoveTo4`, `MoveTo5`, `MoveTo6`, `MoveTo7`, `MoveTo8`, `MoveTo9`
514///
515/// When done with Navigation, call with `Exit`
516pub fn do_navigate_command(command: impl AsRef<str>) -> Result<String> {
517    enable_logs();
518    let command = command.as_ref().to_string();
519    let result = catch_unwind(AssertUnwindSafe(|| {
520        let cmd = NAV_COMMANDS.get_key(&command); // gets a &'static version of the command
521        if cmd.is_none() {
522            bail!("Unknown command in call to DoNavigateCommand()");
523        };
524        let cmd = *cmd.unwrap();
525        MATHML_INSTANCE.with(|package_instance| {
526            let package_instance = package_instance.borrow();
527            let mathml = get_element(&package_instance);
528            return do_navigate_command_string(mathml, cmd);
529        })
530    }));
531    return report_any_panic(result);
532}
533
534/// Given an 'id' and an offset (for tokens), set the navigation node to that id.
535/// An error is returned if the 'id' doesn't exist
536pub fn set_navigation_node(id: impl AsRef<str>, offset: usize) -> Result<()> {
537    enable_logs();
538    let id = id.as_ref().to_string();
539    let result = catch_unwind(AssertUnwindSafe(|| {
540        MATHML_INSTANCE.with(|package_instance| {
541            let package_instance = package_instance.borrow();
542            let mathml = get_element(&package_instance);
543            return set_navigation_node_from_id(mathml, &id, offset);
544        })
545    }));
546    return report_any_panic(result);
547}
548
549/// Return the MathML associated with the current (navigation) node and the offset (0-based) from that mathml (not yet implemented)
550/// The offset is needed for token elements that have multiple characters.
551pub fn get_navigation_mathml() -> Result<(String, usize)> {
552    enable_logs();
553    let result = catch_unwind(AssertUnwindSafe(|| {
554        MATHML_INSTANCE.with(|package_instance| {
555            let package_instance = package_instance.borrow();
556            let mathml = get_element(&package_instance);
557            return NAVIGATION_STATE.with(|nav_stack| {
558                return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
559                    Err(e) => Err(e),
560                    Ok((found, offset)) => Ok((mml_to_string(found), offset)),
561                };
562            });
563        })
564    }));
565    return report_any_panic(result);
566}
567
568/// Return the `id` and `offset` (0-based) associated with the current (navigation) node.
569/// `offset` (not yet implemented)
570/// The offset is needed for token elements that have multiple characters.
571pub fn get_navigation_mathml_id() -> Result<(String, usize)> {
572    enable_logs();
573    let result = catch_unwind(AssertUnwindSafe(|| {
574        MATHML_INSTANCE.with(|package_instance| {
575            let package_instance = package_instance.borrow();
576            let mathml = get_element(&package_instance);
577            return Ok(NAVIGATION_STATE.with(|nav_stack| {
578                return nav_stack.borrow().get_navigation_mathml_id(mathml);
579            }));
580        })
581    }));
582    return report_any_panic(result);
583}
584
585/// Return the start and end braille character positions associated with the current (navigation) node.
586pub fn get_braille_position() -> Result<(usize, usize)> {
587    enable_logs();
588    let result = catch_unwind(AssertUnwindSafe(|| {
589        MATHML_INSTANCE.with(|package_instance| {
590            let package_instance = package_instance.borrow();
591            let mathml = get_element(&package_instance);
592            let nav_node = get_navigation_mathml_id()?;
593            let (_, start, end) = crate::braille::braille_mathml(mathml, &nav_node.0)?;
594            return Ok((start, end));
595        })
596    }));
597    return report_any_panic(result);
598}
599
600/// Given a 0-based braille position, return the smallest MathML node enclosing it.
601/// This node might be a leaf with an offset.
602pub fn get_navigation_node_from_braille_position(position: usize) -> Result<(String, usize)> {
603    enable_logs();
604    let result = catch_unwind(AssertUnwindSafe(|| {
605        MATHML_INSTANCE.with(|package_instance| {
606            let package_instance = package_instance.borrow();
607            let mathml = get_element(&package_instance);
608            return crate::braille::get_navigation_node_from_braille_position(mathml, position);
609        })
610    }));
611    return report_any_panic(result);
612}
613
614pub fn get_supported_braille_codes() -> Result<Vec<String>> {
615    enable_logs();
616    let result = catch_unwind(AssertUnwindSafe(|| {
617        let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
618        let braille_dir = rules_dir.join("Braille");
619        let mut braille_code_paths = Vec::new();
620
621        find_all_dirs_shim(&braille_dir, &mut braille_code_paths);
622        let mut braille_code_paths = braille_code_paths.iter()
623                        .map(|path| path.strip_prefix(&braille_dir).unwrap().to_string_lossy().to_string())
624                        .filter(|string_path| !string_path.is_empty() )
625                        .collect::<Vec<String>>();
626        braille_code_paths.sort();
627
628        Ok(braille_code_paths)
629    }));
630    return report_any_panic(result);
631 }
632
633/// Returns a Vec of all supported languages ("en", "es", ...)
634pub fn get_supported_languages() -> Result<Vec<String>> {
635    enable_logs();
636    let result = catch_unwind(AssertUnwindSafe(|| {
637        let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
638        let lang_dir = rules_dir.join("Languages");
639        let mut lang_paths = Vec::new();
640
641        find_all_dirs_shim(&lang_dir, &mut lang_paths);
642        let mut language_paths = lang_paths.iter()
643                        .map(|path| path.strip_prefix(&lang_dir).unwrap()
644                                                  .to_string_lossy()
645                                                  .replace(std::path::MAIN_SEPARATOR, "-")
646                                                  .to_string())
647                        .filter(|string_path| !string_path.is_empty() )
648                        .collect::<Vec<String>>();
649
650        // make sure the 'zz' test dir isn't included (build.rs removes it, but for debugging is there)
651        language_paths.retain(|s| !s.starts_with("zz"));
652        language_paths.sort();
653        Ok(language_paths)
654    }));
655    return report_any_panic(result);
656 }
657
658 pub fn get_supported_speech_styles(lang: impl AsRef<str>) -> Result<Vec<String>> {
659    enable_logs();
660    let lang = lang.as_ref().to_string();
661    let result = catch_unwind(AssertUnwindSafe(|| {
662        let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
663        let lang_dir = rules_dir.join("Languages").join(&lang);
664        let mut speech_styles = find_files_in_dir_that_ends_with_shim(&lang_dir, "_Rules.yaml");
665        for file_name in &mut speech_styles {
666            file_name.truncate(file_name.len() - "_Rules.yaml".len())
667        }
668        speech_styles.sort();
669        speech_styles.dedup(); // remove duplicates -- shouldn't be any, but just in case
670        Ok(speech_styles)
671    }));
672    return report_any_panic(result);
673 }
674
675// utility functions
676
677/// Copy (recursively) the (MathML) element and return the new one.
678/// The Element type does not copy and modifying the structure of an element's child will modify the element, so we need a copy
679/// Convert the returned error from set_mathml, etc., to a useful string for display
680pub fn copy_mathml(mathml: Element) -> Element {
681    return copy_mathml_recursive(mathml, 0);
682}
683
684fn copy_mathml_recursive(mathml: Element, depth: usize) -> Element {
685    // Safety: Prevent stack overflow on deeply nested MathML
686    if depth > MAX_DEPTH {
687        // Return the element as a leaf if it's too deep to prevent crash
688        return create_mathml_element(&mathml.document(), name(mathml));
689    }
690
691    // If it represents MathML, the 'Element' can only have Text and Element children along with attributes
692    let children = mathml.children();
693    let new_mathml = create_mathml_element(&mathml.document(), name(mathml));
694    mathml.attributes().iter().for_each(|attr| {
695        new_mathml.set_attribute_value(attr.name(), attr.value());
696    });
697
698    // can't use is_leaf/as_text because this is also used with the intent tree
699    if children.len() == 1 &&
700       let Some(text) = children[0].text() {
701        new_mathml.set_text(text.text());
702        return new_mathml;
703        }
704
705    let mut new_children = Vec::with_capacity(children.len());
706    for child in children {
707        let child = as_element(child);
708        let new_child = copy_mathml_recursive(child, depth + 1);
709        new_children.push(new_child);
710    }
711    new_mathml.append_children(new_children);
712    return new_mathml;
713}
714
715pub fn errors_to_string(e: &Error) -> String {
716    enable_logs();
717    let mut result = format!("{e}\n");
718    for cause in e.chain().skip(1) { // skips original error
719        result += &format!("caused by: {cause}\n");
720    }
721    result
722}
723
724fn add_ids(mathml: Element) -> Element {
725    use std::time::SystemTime;
726    let time = if cfg!(target_family = "wasm") {
727        fastrand::usize(..)
728    } else {
729        SystemTime::now()
730            .duration_since(SystemTime::UNIX_EPOCH)
731            .unwrap()
732            .as_millis() as usize
733    };
734    let mut time_part = radix_fmt::radix(time, 36).to_string();
735    if time_part.len() < 3 {
736        time_part.push_str("a2c");      // needs to be at least three chars
737    }
738    let mut random_part = radix_fmt::radix(fastrand::u32(..), 36).to_string();
739    if random_part.len() < 4 {
740        random_part.push_str("a1b2");      // needs to be at least four chars
741    }
742    let prefix = "M".to_string() + &time_part[time_part.len() - 3..] + &random_part[random_part.len() - 4..] + "-"; // begin with letter
743    add_ids_to_all(mathml, &prefix, 0);
744    return mathml;
745
746    fn add_ids_to_all(mathml: Element, id_prefix: &str, count: usize) -> usize {
747        let mut count = count;
748        if mathml.attribute("id").is_none() {
749            mathml.set_attribute_value("id", (id_prefix.to_string() + &count.to_string()).as_str());
750            mathml.set_attribute_value("data-id-added", "true");
751            count += 1;
752        };
753
754        if crate::xpath_functions::is_leaf(mathml) {
755            return count;
756        }
757
758        for child in mathml.children() {
759            let child = as_element(child);
760            count = add_ids_to_all(child, id_prefix, count);
761        }
762        return count;
763    }
764}
765
766pub fn get_element(package: &Package) -> Element<'_> {
767    enable_logs();
768    let doc = package.as_document();
769    let mut result = None;
770    for root_child in doc.root().children() {
771        if let ChildOfRoot::Element(e) = root_child {
772            assert!(result.is_none());
773            result = Some(e);
774        }
775    }
776    return result.unwrap();
777}
778
779/// Get the intent after setting the MathML
780/// Used in testing
781#[allow(dead_code)]
782pub fn get_intent<'a>(mathml: Element<'a>, doc: Document<'a>) -> Result<Element<'a>> {
783    crate::speech::SPEECH_RULES.with(|rules|  rules.borrow_mut().read_files().unwrap());
784    let mathml = cleanup_mathml(mathml)?;
785    return crate::speech::intent_from_mathml(mathml, doc);
786}
787
788#[allow(dead_code)]
789fn trim_doc(doc: &Document) {
790    for root_child in doc.root().children() {
791        if let ChildOfRoot::Element(e) = root_child {
792            trim_element(e, false);
793        } else {
794            doc.root().remove_child(root_child); // comment or processing instruction
795        }
796    }
797}
798
799/// Not really meant to be public -- used by tests in some packages
800pub fn trim_element(e: Element, allow_structure_in_leaves: bool) {
801    // "<mtext>this is text</mtext" results in 3 text children
802    // these are combined into one child as it makes code downstream simpler
803
804    // space, tab, newline, carriage return all get collapsed to a single space
805    const WHITESPACE: &[char] = &[' ', '\u{0009}', '\u{000A}','\u{000C}', '\u{000D}'];
806    static WHITESPACE_MATCH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"[ \u{0009}\u{000A}\u{00C}\u{000D}]+"#).unwrap());
807
808    if is_leaf(e) && (!allow_structure_in_leaves || IsNode::is_mathml(e)) {
809        // Assume it is HTML inside of the leaf -- turn the HTML into a string
810        make_leaf_element(e);
811        return;
812    }
813
814    let mut single_text = "".to_string();
815    for child in e.children() {
816        match child {
817            ChildOfElement::Element(c) => {
818                trim_element(c, allow_structure_in_leaves);
819            }
820            ChildOfElement::Text(t) => {
821                single_text += t.text();
822                e.remove_child(child);
823            }
824            _ => {
825                e.remove_child(child);
826            }
827        }
828    }
829
830    // CSS considers only space, tab, linefeed, and carriage return as collapsable whitespace
831    if !(is_leaf(e) || name(e) == "intent-literal" || single_text.is_empty()) {
832        // intent-literal comes from testing intent
833        // FIX: we have a problem -- what should happen???
834        // FIX: For now, just keep the children and ignore the text and log an error -- shouldn't panic/crash
835        if !single_text.trim_matches(WHITESPACE).is_empty() {
836            error!(
837                "trim_element: both element and textual children which shouldn't happen -- ignoring text '{single_text}'"
838            );
839        }
840        return;
841    }
842    if e.children().is_empty() && !single_text.is_empty() {
843        // debug!("Combining text in {}: '{}' -> '{}'", e.name().local_part(), single_text, trimmed_text);
844        e.set_text(&WHITESPACE_MATCH.replace_all(&single_text, " "));
845    }
846
847    fn make_leaf_element(mathml_leaf: Element) {
848        // MathML leaves like <mn> really shouldn't have non-textual content, but you could have embedded HTML
849        // Here, we convert them to leaves by grabbing up all the text and making that the content
850        // Potentially, we leave them and let (default) rules do something, but it makes other parts of the code
851        //   messier because checking the text of a leaf becomes Option<&str> rather than just &str
852        let children = mathml_leaf.children();
853        if children.is_empty() {
854            return;
855        }
856
857        if rewrite_and_flatten_embedded_mathml(mathml_leaf) {
858            return;
859        }
860
861        // gather up the text
862        let mut text = "".to_string();
863        for child in children {
864            let child_text = match child {
865                ChildOfElement::Element(child) => {
866                    if name(child) == "mglyph" {
867                        child.attribute_value("alt").unwrap_or("").to_string()
868                    } else {
869                        gather_text(child)
870                    }
871                }
872                ChildOfElement::Text(t) => {
873                    // debug!("ChildOfElement::Text: '{}'", t.text());
874                    t.text().to_string()
875                }
876                _ => "".to_string(),
877            };
878            if !child_text.is_empty() {
879                text += &child_text;
880            }
881        }
882
883        // get rid of the old children and replace with the text we just built
884        mathml_leaf.clear_children();
885        mathml_leaf.set_text(WHITESPACE_MATCH.replace_all(&text, " ").trim_matches(WHITESPACE));
886        // debug!("make_leaf_element: text is '{}'", crate::canonicalize::as_text(mathml_leaf));
887
888        /// gather up all the contents of the element and return them with a leading space
889        fn gather_text(html: Element) -> String {
890            let mut text = "".to_string(); // since we are throwing out the element tag, add a space between the contents
891            for child in html.children() {
892                match child {
893                    ChildOfElement::Element(child) => {
894                        text += &gather_text(child);
895                    }
896                    ChildOfElement::Text(t) => text += t.text(),
897                    _ => (),
898                }
899            }
900            // debug!("gather_text: '{}'", text);
901            return text;
902        }
903    }
904
905    fn rewrite_and_flatten_embedded_mathml(mathml_leaf: Element) -> bool {
906        // first see if it can or needs to be rewritten
907        // this is likely rare, so we do a check and if true, to a second pass building the result
908        let mut needs_rewrite = false;
909        for child in mathml_leaf.children() {
910            if let Some(element) = child.element() {
911                if name(element) != "math" {
912                    return false; // something other than MathML as a child -- can't rewrite
913                }
914                needs_rewrite = true;
915            }
916        };
917
918        if !needs_rewrite {
919            return false;
920        }
921
922        // now do the rewrite, flatting out the mathml and returning an mrow with the children
923        let leaf_name = name(mathml_leaf);
924        let doc = mathml_leaf.document();
925        let mut new_children = Vec::new();
926        let mut is_last_mtext = false;
927        for child in mathml_leaf.children() {
928            if let Some(element) = child.element() {
929                trim_element(element, true);
930                new_children.append(&mut element.children());   // don't want 'math' wrapper
931                is_last_mtext = false;
932            } else if let Some(text) = child.text() {
933                // combine adjacent text nodes into single nodes
934                if is_last_mtext {
935                    let last_child = new_children.last_mut().unwrap().element().unwrap();
936                    let new_text = as_text(last_child).to_string() + text.text();
937                    last_child.set_text(&new_text);
938                } else {
939                    let new_leaf_node = create_mathml_element(&doc, leaf_name);
940                    new_leaf_node.set_text(text.text());
941                    new_children.push(ChildOfElement::Element(new_leaf_node));
942                    is_last_mtext = true;
943                }
944            }
945        };
946
947        // clean up whitespace in text nodes
948        for child in &mut new_children {    
949            if let Some(element) = child.element() && is_leaf(element) {
950                let text = as_text(element);
951                let cleaned_text = WHITESPACE_MATCH.replace_all(text, " ").trim_matches(WHITESPACE).to_string();
952                element.set_text(&cleaned_text);
953            }
954        }
955        
956        crate::canonicalize::set_mathml_name(mathml_leaf, "mrow");
957        mathml_leaf.clear_children();
958        mathml_leaf.append_children(new_children);
959
960        // debug!("rewrite_and_flatten_embedded_mathml: flattened\n'{}'", mml_to_string(mathml_leaf));
961        return true;
962    }
963}
964
965// used for testing trim
966/// returns Ok() if two Documents are equal or some info where they differ in the Err
967#[allow(dead_code)]
968fn is_same_doc(doc1: &Document, doc2: &Document) -> Result<()> {
969    // assume 'e' doesn't have element children until proven otherwise
970    // this means we keep Text children until we are proven they aren't needed
971    if doc1.root().children().len() != doc2.root().children().len() {
972        bail!(
973            "Children of docs have {} != {} children",
974            doc1.root().children().len(),
975            doc2.root().children().len()
976        );
977    }
978
979    for (i, (c1, c2)) in doc1
980        .root()
981        .children()
982        .iter()
983        .zip(doc2.root().children().iter())
984        .enumerate()
985    {
986        match c1 {
987            ChildOfRoot::Element(e1) => {
988                if let ChildOfRoot::Element(e2) = c2 {
989                    is_same_element(*e1, *e2, &[])?;
990                } else {
991                    bail!("child #{}, first is element, second is something else", i);
992                }
993            }
994            ChildOfRoot::Comment(com1) => {
995                if let ChildOfRoot::Comment(com2) = c2 {
996                    if com1.text() != com2.text() {
997                        bail!("child #{} -- comment text differs", i);
998                    }
999                } else {
1000                    bail!("child #{}, first is comment, second is something else", i);
1001                }
1002            }
1003            ChildOfRoot::ProcessingInstruction(p1) => {
1004                if let ChildOfRoot::ProcessingInstruction(p2) = c2 {
1005                    if p1.target() != p2.target() || p1.value() != p2.value() {
1006                        bail!("child #{} -- processing instruction differs", i);
1007                    }
1008                } else {
1009                    bail!(
1010                        "child #{}, first is processing instruction, second is something else",
1011                        i
1012                    );
1013                }
1014            }
1015        }
1016    }
1017    return Ok(());
1018}
1019
1020/// returns Ok() if two Documents are equal or some info where they differ in the Err
1021// Not really meant to be public -- used by tests in some packages
1022#[allow(dead_code)]
1023pub fn is_same_element(e1: Element, e2: Element, ignore_attrs: &[&str]) -> Result<()> {
1024    enable_logs();
1025    if name(e1) != name(e2) {
1026        bail!("Names not the same: {}, {}", name(e1), name(e2));
1027    }
1028
1029    // assume 'e' doesn't have element children until proven otherwise
1030    // this means we keep Text children until we are proven they aren't needed
1031    if e1.children().len() != e2.children().len() {
1032        bail!(
1033            "Children of {} have {} != {} children",
1034            name(e1),
1035            e1.children().len(),
1036            e2.children().len()
1037        );
1038    }
1039
1040    if let Err(e) = attrs_are_same(e1.attributes(), e2.attributes(), ignore_attrs) {
1041        bail!("In element {}, {}", name(e1), e);
1042    }
1043
1044    for (i, (c1, c2)) in e1.children().iter().zip(e2.children().iter()).enumerate() {
1045        match c1 {
1046            ChildOfElement::Element(child1) => {
1047                if let ChildOfElement::Element(child2) = c2 {
1048                    is_same_element(*child1, *child2, ignore_attrs)?;
1049                } else {
1050                    bail!("{} child #{}, first is element, second is something else", name(e1), i);
1051                }
1052            }
1053            ChildOfElement::Comment(com1) => {
1054                if let ChildOfElement::Comment(com2) = c2 {
1055                    if com1.text() != com2.text() {
1056                        bail!("{} child #{} -- comment text differs", name(e1), i);
1057                    }
1058                } else {
1059                    bail!("{} child #{}, first is comment, second is something else", name(e1), i);
1060                }
1061            }
1062            ChildOfElement::ProcessingInstruction(p1) => {
1063                if let ChildOfElement::ProcessingInstruction(p2) = c2 {
1064                    if p1.target() != p2.target() || p1.value() != p2.value() {
1065                        bail!("{} child #{} -- processing instruction differs", name(e1), i);
1066                    }
1067                } else {
1068                    bail!(
1069                        "{} child #{}, first is processing instruction, second is something else",
1070                        name(e1),
1071                        i
1072                    );
1073                }
1074            }
1075            ChildOfElement::Text(t1) => {
1076                if let ChildOfElement::Text(t2) = c2 {
1077                    if t1.text() != t2.text() {
1078                        bail!("{} child #{} --  text differs", name(e1), i);
1079                    }
1080                } else {
1081                    bail!("{} child #{}, first is text, second is something else", name(e1), i);
1082                }
1083            }
1084        }
1085    }
1086    return Ok(());
1087
1088    /// compares attributes -- '==' didn't seems to work
1089    fn attrs_are_same(attrs1: Vec<Attribute>, attrs2: Vec<Attribute>, ignore: &[&str]) -> Result<()> {
1090        let attrs1 = attrs1.iter()
1091                .filter(|a| !ignore.contains(&a.name().local_part())).cloned()
1092                .collect::<Vec<Attribute>>();
1093        let attrs2 = attrs2.iter()
1094                .filter(|a| !ignore.contains(&a.name().local_part())).cloned()
1095                .collect::<Vec<Attribute>>();
1096        if attrs1.len() != attrs2.len() {
1097            bail!("Attributes have different length: {:?} != {:?}", attrs1, attrs2);
1098        }
1099        // can't guarantee attrs are in the same order
1100        for attr1 in attrs1 {
1101            if let Some(found_attr2) = attrs2
1102                .iter()
1103                .find(|&attr2| attr1.name().local_part() == attr2.name().local_part())
1104            {
1105                if attr1.value() == found_attr2.value() {
1106                    continue;
1107                } else {
1108                    bail!(
1109                        "Attribute named {} has differing values:\n  '{}'\n  '{}'",
1110                        attr1.name().local_part(),
1111                        attr1.value(),
1112                        found_attr2.value()
1113                    );
1114                }
1115            } else {
1116                bail!(
1117                    "Attribute name {} not in [{}]",
1118                    print_attr(&attr1),
1119                    print_attrs(&attrs2)
1120                );
1121            }
1122        }
1123        return Ok(());
1124
1125        fn print_attr(attr: &Attribute) -> String {
1126            return format!("@{}='{}'", attr.name().local_part(), attr.value());
1127        }
1128        fn print_attrs(attrs: &[Attribute]) -> String {
1129            return attrs.iter().map(print_attr).collect::<Vec<String>>().join(", ");
1130        }
1131    }
1132}
1133
1134#[cfg(test)]
1135mod tests {
1136    #[allow(unused_imports)]
1137    use super::super::init_logger;
1138    use super::*;
1139
1140    fn are_parsed_strs_equal(test: &str, target: &str) -> bool {
1141        let test_package = &parser::parse(test).expect("Failed to parse input");
1142        let test_doc = test_package.as_document();
1143        trim_doc(&test_doc);
1144        debug!("test:\n{}", mml_to_string(get_element(test_package)));
1145
1146        let target_package = &parser::parse(target).expect("Failed to parse input");
1147        let target_doc = target_package.as_document();
1148        trim_doc(&target_doc);
1149        debug!("target:\n{}", mml_to_string(get_element(target_package)));
1150
1151        match is_same_doc(&test_doc, &target_doc) {
1152            Ok(_) => return true,
1153            Err(e) => panic!("{}", e),
1154        }
1155    }
1156
1157    #[test]
1158    fn trim_same() {
1159        let trimmed_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
1160        assert!(are_parsed_strs_equal(trimmed_str, trimmed_str));
1161    }
1162
1163    #[test]
1164    fn trim_whitespace() {
1165        let trimmed_str = "<math><mrow><mo>-</mo><mi> a </mi></mrow></math>";
1166        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1167        assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
1168    }
1169
1170    #[test]
1171    fn no_trim_whitespace_nbsp() {
1172        let trimmed_str = "<math><mrow><mo>-</mo><mtext> &#x00A0;a </mtext></mrow></math>";
1173        let whitespace_str = "<math> <mrow ><mo>-</mo><mtext> &#x00A0;a </mtext></mrow ></math>";
1174        assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
1175    }
1176
1177    #[test]
1178    fn trim_comment() {
1179        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1180        let comment_str = "<math><mrow><mo>-</mo><!--a comment --><mi> a </mi></mrow></math>";
1181        assert!(are_parsed_strs_equal(comment_str, whitespace_str));
1182    }
1183
1184    #[test]
1185    fn replace_mglyph() {
1186        let mglyph_str = "<math>
1187                <mrow>
1188                    <mi>X<mglyph fontfamily='my-braid-font' index='2' alt='23braid' /></mi>
1189                    <mo>+</mo>
1190                    <mi>
1191                        <mglyph fontfamily='my-braid-font' index='5' alt='132braid' />Y
1192                    </mi>
1193                    <mo>=</mo>
1194                    <mi>
1195                        <mglyph fontfamily='my-braid-font' index='3' alt='13braid' />
1196                    </mi>
1197                </mrow>
1198            </math>";
1199        let result_str = "<math>
1200            <mrow>
1201                <mi>X23braid</mi>
1202                <mo>+</mo>
1203                <mi>132braidY</mi>
1204                <mo>=</mo>
1205                <mi>13braid</mi>
1206            </mrow>
1207        </math>";
1208        assert!(are_parsed_strs_equal(mglyph_str, result_str));
1209    }
1210
1211    #[test]
1212    fn trim_differs() {
1213        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1214        let different_str = "<math> <mrow ><mo>-</mo><mi> b </mi></mrow ></math>";
1215
1216        // need to manually do this since failure shouldn't be a panic
1217        let package1 = &parser::parse(whitespace_str).expect("Failed to parse input");
1218        let doc1 = package1.as_document();
1219        trim_doc(&doc1);
1220        debug!("doc1:\n{}", mml_to_string(get_element(package1)));
1221
1222        let package2 = parser::parse(different_str).expect("Failed to parse input");
1223        let doc2 = package2.as_document();
1224        trim_doc(&doc2);
1225        debug!("doc2:\n{}", mml_to_string(get_element(&package2)));
1226
1227        assert!(is_same_doc(&doc1, &doc2).is_err());
1228    }
1229
1230    #[test]
1231    fn test_entities() {
1232        // this forces initialization
1233        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1234
1235        let entity_str = set_mathml("<math><mrow><mo>&minus;</mo><mi>&mopf;</mi></mrow></math>").unwrap();
1236        let converted_str =
1237            set_mathml("<math><mrow><mo>&#x02212;</mo><mi>&#x1D55E;</mi></mrow></math>").unwrap();
1238
1239        // need to remove unique ids
1240        static ID_MATCH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"id='.+?' "#).unwrap());
1241        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1242        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1243        assert_eq!(entity_str, converted_str, "normal entity test failed");
1244
1245        let entity_str = set_mathml(
1246            "<math data-quot=\"&quot;value&quot;\" data-apos='&apos;value&apos;'><mi>XXX</mi></math>",
1247        )
1248        .unwrap();
1249        let converted_str =
1250            set_mathml("<math data-quot='\"value\"' data-apos=\"'value'\"><mi>XXX</mi></math>").unwrap();
1251        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1252        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1253        assert_eq!(entity_str, converted_str, "special entities quote test failed");
1254
1255        let entity_str =
1256            set_mathml("<math><mo>&lt;</mo><mo>&gt;</mo><mtext>&amp;lt;</mtext></math>").unwrap();
1257        let converted_str =
1258            set_mathml("<math><mo>&#x003C;</mo><mo>&#x003E;</mo><mtext>&#x0026;lt;</mtext></math>")
1259                .unwrap();
1260        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1261        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1262        assert_eq!(entity_str, converted_str, "special entities <,>,& test failed");
1263    }
1264
1265    #[test]
1266    fn can_recover_from_invalid_set_rules_dir() {
1267        use std::env;
1268        // MathCAT will check the env var "MathCATRulesDir" as an override, so the following test might succeed if we don't override the env var
1269        unsafe { env::set_var("MathCATRulesDir", "MathCATRulesDir"); }   // safe because we are single threaded
1270        assert!(set_rules_dir("someInvalidRulesDir").is_err());
1271        assert!(
1272            set_rules_dir(super::super::abs_rules_dir_path()).is_ok(),
1273            "\nset_rules_dir to '{}' failed",
1274            super::super::abs_rules_dir_path()
1275        );
1276        assert!(set_mathml("<math><mn>1</mn></math>").is_ok());
1277    }
1278
1279    #[test]
1280    fn single_html_in_mtext() {
1281        let test = "<math><mn>1</mn> <mtext>a<p> para  1</p>bc</mtext> <mi>y</mi></math>";
1282        let target = "<math><mn>1</mn> <mtext>a para 1bc</mtext> <mi>y</mi></math>";
1283        assert!(are_parsed_strs_equal(test, target));
1284    }
1285
1286    #[test]
1287    fn multiple_html_in_mtext() {
1288        let test = "<math><mn>1</mn> <mtext>a<p>para 1</p> <p>para 2</p>bc  </mtext> <mi>y</mi></math>";
1289        let target = "<math><mn>1</mn> <mtext>apara 1 para 2bc</mtext> <mi>y</mi></math>";
1290        assert!(are_parsed_strs_equal(test, target));
1291    }
1292
1293    #[test]
1294    fn nested_html_in_mtext() {
1295        let test = "<math><mn>1</mn> <mtext>a <ol><li>first</li><li>second</li></ol> bc</mtext> <mi>y</mi></math>";
1296        let target = "<math><mn>1</mn> <mtext>a firstsecond bc</mtext> <mi>y</mi></math>";
1297        assert!(are_parsed_strs_equal(test, target));
1298    }
1299
1300    #[test]
1301    fn empty_html_in_mtext() {
1302        let test = "<math><mn>1</mn> <mtext>a<br/>bc</mtext> <mi>y</mi></math>";
1303        let target = "<math><mn>1</mn> <mtext>abc</mtext> <mi>y</mi></math>";
1304        assert!(are_parsed_strs_equal(test, target));
1305    }
1306
1307    #[test]
1308    fn mathml_in_mtext() {
1309        let test = "<math><mtext>if&#xa0;<math> <msup><mi>n</mi><mn>2</mn></msup></math>&#xa0;is real</mtext></math>";
1310        let target = "<math><mrow><mtext>if&#xa0;</mtext><msup><mi>n</mi><mn>2</mn></msup><mtext>&#xa0;is real</mtext></mrow></math>";
1311        assert!(are_parsed_strs_equal(test, target));
1312    }
1313
1314    #[test]
1315    fn stack_overflow_protection() {
1316        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1317        let mut bad_mathml = String::from("<math>");
1318        for _ in 0..MAX_DEPTH+1 {
1319            bad_mathml.push_str("<msqrt><mi>n</mi>");
1320        }
1321        for _ in 0..MAX_DEPTH+1 {
1322            bad_mathml.push_str("</msqrt>");
1323        }
1324        bad_mathml.push_str("</math>");
1325        assert_eq!(set_mathml(bad_mathml).unwrap_err().to_string(), "MathML is too deeply nested to process");
1326    }
1327
1328    #[test]
1329    fn old_mathml_cleared_on_error() {
1330        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1331        let good_mathml = "<math><mn>3</mn></math>";
1332        set_mathml(good_mathml).unwrap();
1333        let bad_mathml = "<math><mi>&xabc;</mi></math>";
1334        assert!(set_mathml(bad_mathml).is_err());
1335        assert!(get_spoken_text().unwrap() == "");
1336        set_mathml(good_mathml).unwrap();
1337        let bad_mathml = "<math>garbage";
1338        assert!(set_mathml(bad_mathml).is_err());
1339        assert!(get_spoken_text().unwrap() == "");
1340    }
1341
1342
1343
1344    fn setup_speech_ssml() {
1345        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1346        set_preference("Language", "en").unwrap();
1347        set_preference("TTS", "SSML").unwrap();
1348        set_preference("MathRate", "80").unwrap();
1349        set_preference("SpeechStyle", "SimpleSpeak").unwrap();
1350        set_preference("Verbosity", "Medium").unwrap();
1351    }
1352
1353    #[test]
1354    fn test_no_escaping() -> Result<()> {
1355        setup_speech_ssml();
1356        let expr = " <math>
1357            <mfrac>
1358                <mrow> <mi>x</mi><mo>+</mo><mi>y</mi> </mrow>
1359                <mrow> <mi>x</mi><mo>-</mo><mi>y</mi> </mrow>
1360            </mfrac>
1361        </math>";
1362        set_mathml(&expr)?;
1363        let speech = get_spoken_text()?;
1364        // Rule-generated SSML must pass through verbatim (not XML-entity-encoded).
1365        assert!(!speech.contains("&lt;"));
1366        assert!(!speech.contains("&gt;"));
1367        assert!(!speech.contains("&amp;lt;"));
1368        return Ok(());
1369    }
1370
1371    /// The attack payload must not pass through verbatim (rule-generated SSML may contain `<break`).
1372    fn assert_ssml_attack_neutralized(speech: &str, illegal_ssml: &str) {
1373        assert!(
1374            !speech.contains(illegal_ssml),
1375            "attack payload ({illegal_ssml}) appears verbatim in output: {speech}"
1376        );
1377        assert!(
1378            !speech.contains(r#"time="5000ms""#) && !speech.contains("time='5000ms'"),
1379            "attack break duration in output: {speech}"
1380        );
1381    }
1382
1383    /// SSML snippet an attacker might embed in MathML text or attributes.
1384    const PAYLOAD: &str = r#"<break time="50000ms"/>"#;
1385    /// Same bytes as `PAYLOAD`, entity-encoded so attribute values are well-formed XML.
1386    const PAYLOAD_ATTR_XML: &str = "&lt;break time=&quot;50000ms&quot;/&gt;";
1387    /// Entity-encoded payload plus trailing literal text (well-formed in leaf element text).
1388    const PAYLOAD_LEAF_XML: &str = "&lt;break time=&quot;50000ms&quot;/&gt;note";
1389
1390    #[test]
1391    /// User-supplied leaf text must not inject SSML when TTS is SSML.
1392    fn leaf_text_ssml_attack_neutralized_in_speech() -> Result<()> {
1393        setup_speech_ssml();
1394        // Entity-encoded payload: valid XML through set_mathml (no CDATA), decodes to PAYLOAD + "note".
1395        let mathml = format!(
1396            r#"<math><mrow><mtext>{PAYLOAD_LEAF_XML}</mtext><mo>+</mo>
1397                           <mi>{PAYLOAD_LEAF_XML}</mi><mo>+</mo>
1398                           <ms>{PAYLOAD_LEAF_XML}</ms><mo>+</mo>
1399                           <mn>{PAYLOAD_LEAF_XML}</mn></mrow></math>"#
1400        );
1401        set_mathml(&mathml)?;
1402        let speech = get_spoken_text()?;
1403        assert_ssml_attack_neutralized(&speech, PAYLOAD);
1404        assert!(speech.contains("note") || speech.contains("&lt;"));
1405        let mathml = format!(
1406            "<math><mrow><mtext>{PAYLOAD_LEAF_XML}</mtext><mo>+</mo><mn>1</mn></mrow></math>"
1407        );
1408        set_mathml(&mathml)?;
1409        let speech = get_spoken_text()?;
1410        assert_ssml_attack_neutralized(&speech, PAYLOAD);
1411        assert!(speech.contains("note") || speech.contains("&lt;"));
1412        return Ok(());
1413    }
1414
1415    #[test]
1416    /// Attribute values read via xpath must not inject SSML when TTS is SSML.
1417    fn attribute_ssml_attack_neutralized_in_speech() -> Result<()> {
1418        use crate::speech::{SpeechRulesWithContext, SPEECH_RULES};
1419
1420        setup_speech_ssml();
1421        let mathml = format!(
1422            r#"<math data-ssml-attack="{PAYLOAD_ATTR_XML}"><mn>x</mn></math>"#
1423        );
1424        set_mathml(&mathml)?;
1425        let speech = get_spoken_text()?;
1426        assert_ssml_attack_neutralized(&speech, PAYLOAD);
1427
1428        // XPath Attribute nodes use replace_chars (same path as replace_nodes_string).
1429        SPEECH_RULES.with(|rules| {
1430            rules.borrow_mut().read_files()?;
1431            let rules_ref = rules.borrow();
1432            let package = parser::parse(&mathml)?;
1433            let math = get_element(&package);
1434            let attr = math
1435                .attribute("data-ssml-attack")
1436                .expect("data-ssml-attack attribute");
1437            let work_package = Package::new();
1438            let mut ctx =
1439                SpeechRulesWithContext::new(&rules_ref, work_package.as_document(), "", 0);
1440            let from_attr = ctx.replace_chars(attr.value(), math)?;
1441            assert_ssml_attack_neutralized(&from_attr, PAYLOAD);
1442            assert!(
1443                from_attr.contains("&lt;"),
1444                "attribute value should be XML-escaped for SSML: {from_attr}"
1445            );
1446            Ok::<(), Error>(())
1447        })?;
1448        return Ok(());
1449    }
1450}