libmathcat/
interface.rs

1//! The interface module provides functionality both for calling from an API and also running the code from `main`.
2//!
3#![allow(non_snake_case)]
4#![allow(clippy::needless_return)]
5use std::cell::RefCell;
6
7use crate::canonicalize::{as_text, create_mathml_element};
8use crate::errors::*;
9use phf::phf_map;
10use regex::{Captures, Regex};
11use sxd_document::dom::*;
12use sxd_document::parser;
13use sxd_document::Package;
14
15use crate::canonicalize::{as_element, name};
16use crate::shim_filesystem::{find_all_dirs_shim, find_files_in_dir_that_ends_with_shim};
17
18use crate::navigate::*;
19use crate::pretty_print::mml_to_string;
20use crate::xpath_functions::{is_leaf, IsNode};
21
22#[cfg(feature = "enable-logs")]
23use std::sync::Once;
24#[cfg(feature = "enable-logs")]
25static INIT: Once = Once::new();
26
27fn enable_logs() {
28    #[cfg(feature = "enable-logs")]
29    INIT.call_once(||{
30        #[cfg(target_os = "android")]
31        {
32            extern crate log;
33            extern crate android_logger;
34            
35            use log::*;
36            use android_logger::*;
37        
38            android_logger::init_once(
39                Config::default()
40                .with_max_level(LevelFilter::Trace)
41                .with_tag("MathCat")
42            );    
43            trace!("Activated Android logger!");  
44        }    
45    });
46}
47
48// wrap up some common functionality between the call from 'main' and AT
49fn cleanup_mathml(mathml: Element) -> Result<Element> {
50    trim_element(mathml, false);
51    let mathml = crate::canonicalize::canonicalize(mathml)?;
52    let mathml = add_ids(mathml);
53    return Ok(mathml);
54}
55
56thread_local! {
57    /// The current node being navigated (also spoken and brailled) is stored in `MATHML_INSTANCE`.
58    pub static MATHML_INSTANCE: RefCell<Package> = init_mathml_instance();
59}
60
61fn init_mathml_instance() -> RefCell<Package> {
62    let package = parser::parse("<math></math>")
63        .expect("Internal error in 'init_mathml_instance;: didn't parse initializer string");
64    return RefCell::new(package);
65}
66
67/// Set the Rules directory
68/// IMPORTANT: this should be the very first call to MathCAT. If 'dir' is an empty string, the environment var 'MathCATRulesDir' is tried.
69pub fn set_rules_dir(dir: String) -> Result<()> {
70    enable_logs();
71    use std::path::PathBuf;
72    let dir = if dir.is_empty() {
73        std::env::var_os("MathCATRulesDir")
74            .unwrap_or_default()
75            .to_str()
76            .unwrap()
77            .to_string()
78    } else {
79        dir
80    };
81    let pref_manager = crate::prefs::PreferenceManager::get();
82    return pref_manager.borrow_mut().initialize(PathBuf::from(dir));
83}
84
85/// Returns the version number (from Cargo.toml) of the build
86pub fn get_version() -> String {
87    enable_logs();
88    const VERSION: &str = env!("CARGO_PKG_VERSION");
89    return VERSION.to_string();
90}
91
92/// This will override any previous MathML that was set.
93/// This returns canonical MathML with 'id's set on any node that doesn't have an id.
94/// The ids can be used for sync highlighting if the `Bookmark` API preference is true.
95pub fn set_mathml(mathml_str: String) -> Result<String> {
96    enable_logs();
97    lazy_static! {
98        // if these are present when resent to MathJaX, MathJaX crashes (https://github.com/mathjax/MathJax/issues/2822)
99        static ref MATHJAX_V2: Regex = Regex::new(r#"class *= *['"]MJX-.*?['"]"#).unwrap();
100        static ref MATHJAX_V3: Regex = Regex::new(r#"class *= *['"]data-mjx-.*?['"]"#).unwrap();
101        static ref NAMESPACE_DECL: Regex = Regex::new(r#"xmlns:[[:alpha:]]+"#).unwrap();     // very limited namespace prefix match
102        static ref PREFIX: Regex = Regex::new(r#"(</?)[[:alpha:]]+:"#).unwrap();     // very limited namespace prefix match
103        static ref HTML_ENTITIES: Regex = Regex::new(r#"&([a-zA-Z]+?);"#).unwrap();
104    }
105
106    NAVIGATION_STATE.with(|nav_stack| {
107        nav_stack.borrow_mut().reset();
108    });
109
110    // We need the main definitions files to be read in so canonicalize can work.
111    // This call reads all of them for the current preferences, but that's ok since they will likely be used
112    crate::speech::SPEECH_RULES.with(|rules| rules.borrow_mut().read_files())?;
113
114    return MATHML_INSTANCE.with(|old_package| {
115        static HTML_ENTITIES_MAPPING: phf::Map<&str, &str> = include!("entities.in");
116
117        let mut error_message = "".to_string(); // can't return a result inside the replace_all, so we do this hack of setting the message and then returning the error
118                                                // need to deal with character data and convert to something the parser knows
119        let mathml_str =
120            HTML_ENTITIES.replace_all(&mathml_str, |cap: &Captures| match HTML_ENTITIES_MAPPING.get(&cap[1]) {
121                None => {
122                    error_message = format!("No entity named '{}'", &cap[0]);
123                    cap[0].to_string()
124                }
125                Some(&ch) => ch.to_string(),
126            });
127
128        if !error_message.is_empty() {
129            bail!(error_message);
130        }
131        let mathml_str = MATHJAX_V2.replace_all(&mathml_str, "");
132        let mathml_str = MATHJAX_V3.replace_all(&mathml_str, "");
133
134        // the speech rules use the xpath "name" function and that includes the prefix
135        // getting rid of the prefix properly probably involves a recursive replacement in the tree
136        // if the prefix is used, it is almost certainly something like "m" or "mml", so this cheat will work.
137        let mathml_str = NAMESPACE_DECL.replace(&mathml_str, "xmlns"); // do this before the PREFIX replace!
138        let mathml_str = PREFIX.replace_all(&mathml_str, "$1");
139
140        let new_package = parser::parse(&mathml_str);
141        if let Err(e) = new_package {
142            bail!("Invalid MathML input:\n{}\nError is: {}", &mathml_str, &e.to_string());
143        }
144
145        let new_package = new_package.unwrap();
146        let mathml = get_element(&new_package);
147        let mathml = cleanup_mathml(mathml)?;
148        let mathml_string = mml_to_string(mathml);
149        old_package.replace(new_package);
150
151        return Ok(mathml_string);
152    });
153}
154
155/// Get the spoken text of the MathML that was set.
156/// The speech takes into account any AT or user preferences.
157pub fn get_spoken_text() -> Result<String> {
158    enable_logs();
159    // use std::time::{Instant};
160    // let instant = Instant::now();
161    return MATHML_INSTANCE.with(|package_instance| {
162        let package_instance = package_instance.borrow();
163        let mathml = get_element(&package_instance);
164        let new_package = Package::new();
165        let intent = crate::speech::intent_from_mathml(mathml, new_package.as_document())?;
166        debug!("Intent tree:\n{}", mml_to_string(intent));
167        let speech = crate::speech::speak_mathml(intent, "")?;
168        // info!("Time taken: {}ms", instant.elapsed().as_millis());
169        return Ok(speech);
170    });
171}
172
173/// Get the spoken text for an overview of the MathML that was set.
174/// The speech takes into account any AT or user preferences.
175/// Note: this implementation for is currently minimal and should not be used.
176pub fn get_overview_text() -> Result<String> {
177    enable_logs();
178    // use std::time::{Instant};
179    // let instant = Instant::now();
180    return MATHML_INSTANCE.with(|package_instance| {
181        let package_instance = package_instance.borrow();
182        let mathml = get_element(&package_instance);
183        let speech = crate::speech::overview_mathml(mathml, "")?;
184        // info!("Time taken: {}ms", instant.elapsed().as_millis());
185        return Ok(speech);
186    });
187}
188
189/// Get the value of the named preference.
190/// None is returned if `name` is not a known preference.
191pub fn get_preference(name: String) -> Result<String> {
192    enable_logs();
193    use crate::prefs::NO_PREFERENCE;
194    return crate::speech::SPEECH_RULES.with(|rules| {
195        let rules = rules.borrow();
196        let pref_manager = rules.pref_manager.borrow();
197        let mut value = pref_manager.pref_to_string(&name);
198        if value == NO_PREFERENCE {
199            value = pref_manager.pref_to_string(&name);
200        }
201        if value == NO_PREFERENCE {
202            bail!("No preference named '{}'", &name);
203        } else {
204            return Ok(value);
205        }
206    });
207}
208
209/// Set a MathCAT preference. The preference name should be a known preference name.
210/// The value should either be a string or a number (depending upon the preference being set)
211/// The list of known user preferences is in the MathCAT user documentation.
212/// Here are common preferences set by programs (not settable by the user):
213/// * TTS -- SSML, SAPI5, None
214/// * Pitch -- normalized at '1.0'
215/// * Rate -- words per minute (should match current speech rate).
216///   There is a separate "MathRate" that is user settable that causes a relative percentage change from this rate.
217/// * Volume -- default 100
218/// * Voice -- set a voice to use (not implemented)
219/// * Gender -- set pick any voice of the given gender (not implemented)
220/// * Bookmark -- set to `true` if a `mark`/`bookmark` should be part of the returned speech (used for sync highlighting)
221///
222/// Important: both the preference name and value are case-sensitive
223///
224/// This function can be called multiple times to set different values.
225/// The values are persistent and extend beyond calls to [`set_mathml`].
226/// A value can be overwritten by calling this function again with a different value.
227///
228/// Be careful setting preferences -- these potentially override user settings, so only preferences that really need setting should be set.
229pub fn set_preference(name: String, value: String) -> Result<()> {
230    enable_logs();
231    // "LanguageAuto" allows setting the language dir without actually changing the value of "Language" from Auto
232    let mut value = value;
233    if name == "Language" || name == "LanguageAuto" {
234        // check the format
235        if value != "Auto" {
236            // could get es, es-419, or en-us-nyc ...  we only care about the first two parts so we clean it up a little
237            let mut lang_country_split = value.split('-');
238            let language = lang_country_split.next().unwrap_or("");
239            let country = lang_country_split.next().unwrap_or("");
240            if language.len() != 2 {
241                bail!(
242                    "Improper format for 'Language' preference '{}'. Should be of form 'en' or 'en-gb'",
243                    value
244                );
245            }
246            let mut new_lang_country = language.to_string(); // need a temp value because 'country' is borrowed from 'value' above
247            if !country.is_empty() {
248                new_lang_country.push('-');
249                new_lang_country.push_str(country);
250            }
251            value = new_lang_country;
252        }
253        if name == "LanguageAuto" && value == "Auto" {
254            bail!("'LanguageAuto' can not have the value 'Auto'");
255        }
256    }
257
258    crate::speech::SPEECH_RULES.with(|rules| {
259        let rules = rules.borrow_mut();
260        if let Some(error_string) = rules.get_error() {
261            bail!("{}", error_string);
262        }
263
264        // we set the value even if it was the same as the old value because this might override a potentially changed future user value
265        let mut pref_manager = rules.pref_manager.borrow_mut();
266        if name == "LanguageAuto" {
267            let language_pref = pref_manager.pref_to_string("Language");
268            if language_pref != "Auto" {
269                bail!(
270                    "'LanguageAuto' can only be used when 'Language' has the value 'Auto'; Language={}",
271                    language_pref
272                );
273            }
274        }
275        let lower_case_value = value.to_lowercase();
276        if lower_case_value == "true" || lower_case_value == "false" {
277            pref_manager.set_api_boolean_pref(&name, value.to_lowercase() == "true");
278        } else {
279            match name.as_str() {
280                "Pitch" | "Rate" | "Volume" | "CapitalLetters_Pitch" | "MathRate" | "PauseFactor" => {
281                    pref_manager.set_api_float_pref(&name, to_float(&name, &value)?)
282                }
283                _ => {
284                    pref_manager.set_string_pref(&name, &value)?;
285                }
286            }
287        };
288        return Ok::<(), Error>(());
289    })?;
290
291    return Ok(());
292
293    fn to_float(name: &str, value: &str) -> Result<f64> {
294        return match value.parse::<f64>() {
295            Ok(val) => Ok(val),
296            Err(_) => bail!("SetPreference: preference'{}'s value '{}' must be a float", name, value),
297        };
298    }
299}
300
301/// Get the braille associated with the MathML that was set by [`set_mathml`].
302/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`).
303/// If 'nav_node_id' is given, it is highlighted based on the value of `BrailleNavHighlight` (default: `EndPoints`)
304pub fn get_braille(nav_node_id: String) -> Result<String> {
305    enable_logs();
306    // use std::time::{Instant};
307    // let instant = Instant::now();
308    return MATHML_INSTANCE.with(|package_instance| {
309        let package_instance = package_instance.borrow();
310        let mathml = get_element(&package_instance);
311        let braille = crate::braille::braille_mathml(mathml, &nav_node_id)?.0;
312        // info!("Time taken: {}ms", instant.elapsed().as_millis());
313        return Ok(braille);
314    });
315}
316
317/// Get the braille associated with the current navigation focus of the MathML that was set by [`set_mathml`].
318/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`).
319/// The returned braille is brailled as if the current navigation focus is the entire expression to be brailled.
320pub fn get_navigation_braille() -> Result<String> {
321    enable_logs();
322    return MATHML_INSTANCE.with(|package_instance| {
323        let package_instance = package_instance.borrow();
324        let mathml = get_element(&package_instance);
325        let new_package = Package::new(); // used if we need to create a new tree
326        let new_doc = new_package.as_document();
327        let nav_mathml = NAVIGATION_STATE.with(|nav_stack| {
328            return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
329                Err(e) => Err(e),
330                Ok((found, offset)) => {
331                    // get the MathML node and wrap it inside of a <math> element
332                    // if the offset is given, we need to get the character it references
333                    if offset == 0 {
334                        if name(found) == "math" {
335                            Ok(found)
336                        } else {
337                            let new_mathml = create_mathml_element(&new_doc, "math");
338                            new_mathml.append_child(copy_mathml(found));
339                            new_doc.root().append_child(new_mathml);
340                            Ok(new_mathml)
341                        }
342                    } else if !is_leaf(found) {
343                        bail!(
344                            "Internal error: non-zero offset '{}' on a non-leaf element '{}'",
345                            offset,
346                            name(found)
347                        );
348                    } else if let Some(ch) = as_text(found).chars().nth(offset) {
349                        let internal_mathml = create_mathml_element(&new_doc, name(found));
350                        internal_mathml.set_text(&ch.to_string());
351                        let new_mathml = create_mathml_element(&new_doc, "math");
352                        new_mathml.append_child(internal_mathml);
353                        new_doc.root().append_child(new_mathml);
354                        Ok(new_mathml)
355                    } else {
356                        bail!(
357                            "Internal error: offset '{}' on leaf element '{}' doesn't exist",
358                            offset,
359                            mml_to_string(found)
360                        );
361                    }
362                }
363            };
364        })?;
365
366        let braille = crate::braille::braille_mathml(nav_mathml, "")?.0;
367        return Ok(braille);
368    });
369}
370
371/// Given a key code along with the modifier keys, the current node is moved accordingly (or value reported in some cases).
372/// `key` is the [keycode](https://developer.mozilla.org/en-US/docs/Web/API/KeyboardEvent/keyCode#constants_for_keycode_value) for the key (in JavaScript, `ev.key_code`)
373/// The spoken text for the new current node is returned.
374pub fn do_navigate_keypress(
375    key: usize,
376    shift_key: bool,
377    control_key: bool,
378    alt_key: bool,
379    meta_key: bool,
380) -> Result<String> {
381    return MATHML_INSTANCE.with(|package_instance| {
382        let package_instance = package_instance.borrow();
383        let mathml = get_element(&package_instance);
384        return do_mathml_navigate_key_press(mathml, key, shift_key, control_key, alt_key, meta_key);
385    });
386}
387
388/// Given a navigation command, the current node is moved accordingly.
389/// This is a higher level interface than `do_navigate_keypress` for applications that want to interpret the keys themselves.
390/// The valid commands are:
391/// * Standard move commands:
392///   `MovePrevious`, `MoveNext`, `MoveStart`, `MoveEnd`, `MoveLineStart`, `MoveLineEnd`
393/// * Movement in a table or elementary math:
394///   `MoveCellPrevious`, `MoveCellNext`, `MoveCellUp`, `MoveCellDown`, `MoveColumnStart`, `MoveColumnEnd`
395/// * Moving into children or out to parents:
396///   `ZoomIn`, `ZoomOut`, `ZoomOutAll`, `ZoomInAll`
397/// * Undo the last movement command:
398///   `MoveLastLocation`
399/// * Read commands (standard speech):
400///   `ReadPrevious`, `ReadNext`, `ReadCurrent`, `ReadCellCurrent`, `ReadStart`, `ReadEnd`, `ReadLineStart`, `ReadLineEnd`
401/// * Describe commands (overview):
402///   `DescribePrevious`, `DescribeNext`, `DescribeCurrent`
403/// * Location information:
404///   `WhereAmI`, `WhereAmIAll`
405/// * Change navigation modes (circle up/down):
406///   `ToggleZoomLockUp`, `ToggleZoomLockDown`
407/// * Speak the current navigation mode
408///   `ToggleSpeakMode`
409///
410/// There are 10 place markers that can be set/read/described or moved to.
411/// * Setting:
412///   `SetPlacemarker0`, `SetPlacemarker1`, `SetPlacemarker2`, `SetPlacemarker3`, `SetPlacemarker4`, `SetPlacemarker5`, `SetPlacemarker6`, `SetPlacemarker7`, `SetPlacemarker8`, `SetPlacemarker9`
413/// * Reading:
414///   `Read0`, `Read1`, `Read2`, `Read3`, `Read4`, `Read5`, `Read6`, `Read7`, `Read8`, `Read9`
415/// * Describing:
416///   `Describe0`, `Describe1`, `Describe2`, `Describe3`, `Describe4`, `Describe5`, `Describe6`, `Describe7`, `Describe8`, `Describe9`
417/// * Moving:
418///   `MoveTo0`, `MoveTo1`, `MoveTo2`, `MoveTo3`, `MoveTo4`, `MoveTo5`, `MoveTo6`, `MoveTo7`, `MoveTo8`, `MoveTo9`
419///
420/// When done with Navigation, call with `Exit`
421pub fn do_navigate_command(command: String) -> Result<String> {
422    enable_logs();
423    let command = NAV_COMMANDS.get_key(&command); // gets a &'static version of the command
424    if command.is_none() {
425        bail!("Unknown command in call to DoNavigateCommand()");
426    };
427    let command = *command.unwrap();
428    return MATHML_INSTANCE.with(|package_instance| {
429        let package_instance = package_instance.borrow();
430        let mathml = get_element(&package_instance);
431        return do_navigate_command_string(mathml, command);
432    });
433}
434
435/// Given an 'id' and an offset (for tokens), set the navigation node to that id.
436/// An error is returned if the 'id' doesn't exist
437pub fn set_navigation_node(id: String, offset: usize) -> Result<()> {
438    enable_logs();
439    return MATHML_INSTANCE.with(|package_instance| {
440        let package_instance = package_instance.borrow();
441        let mathml = get_element(&package_instance);
442        return set_navigation_node_from_id(mathml, id, offset);
443    });
444}
445
446/// Return the MathML associated with the current (navigation) node and the offset (0-based) from that mathml (not yet implemented)
447/// The offset is needed for token elements that have multiple characters.
448pub fn get_navigation_mathml() -> Result<(String, usize)> {
449    return MATHML_INSTANCE.with(|package_instance| {
450        let package_instance = package_instance.borrow();
451        let mathml = get_element(&package_instance);
452        return NAVIGATION_STATE.with(|nav_stack| {
453            return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
454                Err(e) => Err(e),
455                Ok((found, offset)) => Ok((mml_to_string(found), offset)),
456            };
457        });
458    });
459}
460
461/// Return the `id` and `offset` (0-based) associated with the current (navigation) node.
462/// `offset` (not yet implemented)
463/// The offset is needed for token elements that have multiple characters.
464pub fn get_navigation_mathml_id() -> Result<(String, usize)> {
465    enable_logs();
466    return MATHML_INSTANCE.with(|package_instance| {
467        let package_instance = package_instance.borrow();
468        let mathml = get_element(&package_instance);
469        return Ok(NAVIGATION_STATE.with(|nav_stack| {
470            return nav_stack.borrow().get_navigation_mathml_id(mathml);
471        }));
472    });
473}
474
475/// Return the start and end braille character positions associated with the current (navigation) node.
476pub fn get_braille_position() -> Result<(usize, usize)> {
477    enable_logs();
478    return MATHML_INSTANCE.with(|package_instance| {
479        let package_instance = package_instance.borrow();
480        let mathml = get_element(&package_instance);
481        let nav_node = get_navigation_mathml_id()?;
482        let (_, start, end) = crate::braille::braille_mathml(mathml, &nav_node.0)?;
483        return Ok((start, end));
484    });
485}
486
487/// Given a 0-based braille position, return the smallest MathML node enclosing it.
488/// This node might be a leaf with an offset.
489pub fn get_navigation_node_from_braille_position(position: usize) -> Result<(String, usize)> {
490    enable_logs();
491    return MATHML_INSTANCE.with(|package_instance| {
492        let package_instance = package_instance.borrow();
493        let mathml = get_element(&package_instance);
494        return crate::braille::get_navigation_node_from_braille_position(mathml, position);
495    });
496}
497
498pub fn get_supported_braille_codes() -> Vec<String> {
499    enable_logs();
500    let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
501    let braille_dir = rules_dir.join("Braille");
502    let mut braille_code_paths = Vec::new();
503
504    find_all_dirs_shim(&braille_dir, &mut braille_code_paths);
505    let mut braille_code_paths = braille_code_paths.iter()
506                    .map(|path| path.strip_prefix(&braille_dir).unwrap().to_string_lossy().to_string())
507                    .filter(|string_path| !string_path.is_empty() )
508                    .collect::<Vec<String>>();
509    braille_code_paths.sort();
510
511    return braille_code_paths;
512 }
513
514pub fn get_supported_languages() -> Vec<String> {
515    enable_logs();
516    let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
517    let lang_dir = rules_dir.join("Languages");
518    let mut lang_paths = Vec::new();
519
520    find_all_dirs_shim(&lang_dir, &mut lang_paths);
521    let mut language_paths = lang_paths.iter()
522                    .map(|path| path.strip_prefix(&lang_dir).unwrap()
523                                              .to_string_lossy()
524                                              .replace(std::path::MAIN_SEPARATOR, "-")
525                                              .to_string())
526                    .filter(|string_path| !string_path.is_empty() )
527                    .collect::<Vec<String>>();
528
529    language_paths.sort();
530    return language_paths;
531 }
532
533 pub fn get_supported_speech_styles(lang: String) -> Vec<String> {
534    enable_logs();
535    let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
536    let lang_dir = rules_dir.join("Languages").join(lang);
537    let mut speech_styles = find_files_in_dir_that_ends_with_shim(&lang_dir, "_Rules.yaml");
538    for file_name in &mut speech_styles {
539        file_name.truncate(file_name.len() - "_Rules.yaml".len())
540    }
541    speech_styles.sort();
542    let mut i = 1;
543    while i < speech_styles.len() {
544        if speech_styles[i-1] == speech_styles[i] {
545            speech_styles.remove(i);
546        } else {
547            i += 1;
548        }
549    }
550    return speech_styles;
551 }
552
553// utility functions
554
555/// Copy (recursively) the (MathML) element and return the new one.
556/// The Element type does not copy and modifying the structure of an element's child will modify the element, so we need a copy
557/// Convert the returned error from set_mathml, etc., to a useful string for display
558pub fn copy_mathml(mathml: Element) -> Element {
559    // If it represents MathML, the 'Element' can only have Text and Element children along with attributes
560    let children = mathml.children();
561    let new_mathml = create_mathml_element(&mathml.document(), name(mathml));
562    mathml.attributes().iter().for_each(|attr| {
563        new_mathml.set_attribute_value(attr.name(), attr.value());
564    });
565
566    // can't use is_leaf/as_text because this is also used with the intent tree
567    if children.len() == 1 {
568        if let Some(text) = children[0].text() {
569        new_mathml.set_text(text.text());
570        return new_mathml;
571        }
572    }
573
574    let mut new_children = Vec::with_capacity(children.len());
575    for child in children {
576        let child = as_element(child);
577        let new_child = copy_mathml(child);
578        new_children.push(new_child);
579    }
580    new_mathml.append_children(new_children);
581    return new_mathml;
582}
583
584pub fn errors_to_string(e: &Error) -> String {
585    enable_logs();
586    let mut result = String::default();
587    let mut first_time = true;
588    for e in e.iter() {
589        if first_time {
590            result = format!("{e}\n");
591            first_time = false;
592        } else {
593            result += &format!("caused by: {e}\n");
594        }
595    }
596    return result;
597}
598
599fn add_ids(mathml: Element) -> Element {
600    use std::time::SystemTime;
601    let time = if cfg!(target_family = "wasm") {
602        fastrand::usize(..)
603    } else {
604        SystemTime::now()
605            .duration_since(SystemTime::UNIX_EPOCH)
606            .unwrap()
607            .as_millis() as usize
608    };
609    let time_part = radix_fmt::radix(time, 36).to_string();
610    let random_part = radix_fmt::radix(fastrand::u32(..), 36).to_string();
611    let prefix = "M".to_string() + &time_part[time_part.len() - 3..] + &random_part[random_part.len() - 4..] + "-"; // begin with letter
612    add_ids_to_all(mathml, &prefix, 0);
613    return mathml;
614
615    fn add_ids_to_all(mathml: Element, id_prefix: &str, count: usize) -> usize {
616        let mut count = count;
617        if mathml.attribute("id").is_none() {
618            mathml.set_attribute_value("id", (id_prefix.to_string() + &count.to_string()).as_str());
619            mathml.set_attribute_value("data-id-added", "true");
620            count += 1;
621        };
622
623        if crate::xpath_functions::is_leaf(mathml) {
624            return count;
625        }
626
627        for child in mathml.children() {
628            let child = as_element(child);
629            count = add_ids_to_all(child, id_prefix, count);
630        }
631        return count;
632    }
633}
634
635pub fn get_element(package: &Package) -> Element<'_> {
636    enable_logs();
637    let doc = package.as_document();
638    let mut result = None;
639    for root_child in doc.root().children() {
640        if let ChildOfRoot::Element(e) = root_child {
641            assert!(result.is_none());
642            result = Some(e);
643        }
644    }
645    return result.unwrap();
646}
647
648/// Get the intent after setting the MathML
649/// Used in testing
650#[allow(dead_code)]
651pub fn get_intent<'a>(mathml: Element<'a>, doc: Document<'a>) -> Result<Element<'a>> {
652    crate::speech::SPEECH_RULES.with(|rules|  rules.borrow_mut().read_files().unwrap());
653    let mathml = cleanup_mathml(mathml)?;
654    return crate::speech::intent_from_mathml(mathml, doc);
655}
656
657#[allow(dead_code)]
658fn trim_doc(doc: &Document) {
659    for root_child in doc.root().children() {
660        if let ChildOfRoot::Element(e) = root_child {
661            trim_element(e, false);
662        } else {
663            doc.root().remove_child(root_child); // comment or processing instruction
664        }
665    }
666}
667
668/// Not really meant to be public -- used by tests in some packages
669pub fn trim_element(e: Element, allow_structure_in_leaves: bool) {
670    // "<mtext>this is text</mtext" results in 3 text children
671    // these are combined into one child as it makes code downstream simpler
672
673    // space, tab, newline, carriage return all get collapsed to a single space
674    const WHITESPACE: &[char] = &[' ', '\u{0009}', '\u{000A}', '\u{000D}'];
675    lazy_static! {
676        static ref WHITESPACE_MATCH: Regex = Regex::new(r#"[ \u{0009}\u{000A}\u{000D}]+"#).unwrap();
677    }
678
679    if is_leaf(e) && (!allow_structure_in_leaves || IsNode::is_mathml(e)) {
680        // Assume it is HTML inside of the leaf -- turn the HTML into a string
681        make_leaf_element(e);
682        return;
683    }
684
685    let mut single_text = "".to_string();
686    for child in e.children() {
687        match child {
688            ChildOfElement::Element(c) => {
689                trim_element(c, allow_structure_in_leaves);
690            }
691            ChildOfElement::Text(t) => {
692                single_text += t.text();
693                e.remove_child(child);
694            }
695            _ => {
696                e.remove_child(child);
697            }
698        }
699    }
700
701    // CSS considers only space, tab, linefeed, and carriage return as collapsable whitespace
702    if !(is_leaf(e) || name(e) == "intent-literal" || single_text.is_empty()) {
703        // intent-literal comes from testing intent
704        // FIX: we have a problem -- what should happen???
705        // FIX: For now, just keep the children and ignore the text and log an error -- shouldn't panic/crash
706        if !single_text.trim_matches(WHITESPACE).is_empty() {
707            error!(
708                "trim_element: both element and textual children which shouldn't happen -- ignoring text '{single_text}'"
709            );
710        }
711        return;
712    }
713    if e.children().is_empty() && !single_text.is_empty() {
714        // debug!("Combining text in {}: '{}' -> '{}'", e.name().local_part(), single_text, trimmed_text);
715        e.set_text(&WHITESPACE_MATCH.replace_all(&single_text, " "));
716    }
717
718    fn make_leaf_element(mathml_leaf: Element) {
719        // MathML leaves like <mn> really shouldn't have non-textual content, but you could have embedded HTML
720        // Here, we take convert them to leaves by grabbing up all the text and making that the content
721        // Potentially, we leave them and let (default) rules do something, but it makes other parts of the code
722        //   messier because checking the text of a leaf becomes Option<&str> rather than just &str
723        let children = mathml_leaf.children();
724        if children.is_empty() {
725            return;
726        }
727
728        // gather up the text
729        let mut text = "".to_string();
730        for child in children {
731            let child_text = match child {
732                ChildOfElement::Element(child) => {
733                    if name(child) == "mglyph" {
734                        child.attribute_value("alt").unwrap_or("").to_string()
735                    } else {
736                        gather_text(child)
737                    }
738                }
739                ChildOfElement::Text(t) => {
740                    // debug!("ChildOfElement::Text: '{}'", t.text());
741                    t.text().to_string()
742                }
743                _ => "".to_string(),
744            };
745            if !child_text.is_empty() {
746                text += &child_text;
747            }
748        }
749
750        // get rid of the old children and replace with the text we just built
751        mathml_leaf.clear_children();
752        mathml_leaf.set_text(WHITESPACE_MATCH.replace_all(&text, " ").trim_matches(WHITESPACE));
753        // debug!("make_leaf_element: text is '{}'", crate::canonicalize::as_text(mathml_leaf));
754
755        /// gather up all the contents of the element and return them with a leading space
756        fn gather_text(html: Element) -> String {
757            let mut text = "".to_string(); // since we are throwing out the element tag, add a space between the contents
758            for child in html.children() {
759                match child {
760                    ChildOfElement::Element(child) => {
761                        text += &gather_text(child);
762                    }
763                    ChildOfElement::Text(t) => text += t.text(),
764                    _ => (),
765                }
766            }
767            // debug!("gather_text: '{}'", text);
768            return text;
769        }
770    }
771}
772
773// used for testing trim
774/// returns Ok() if two Documents are equal or some info where they differ in the Err
775#[allow(dead_code)]
776fn is_same_doc(doc1: &Document, doc2: &Document) -> Result<()> {
777    // assume 'e' doesn't have element children until proven otherwise
778    // this means we keep Text children until we are proven they aren't needed
779    if doc1.root().children().len() != doc2.root().children().len() {
780        bail!(
781            "Children of docs have {} != {} children",
782            doc1.root().children().len(),
783            doc2.root().children().len()
784        );
785    }
786
787    for (i, (c1, c2)) in doc1
788        .root()
789        .children()
790        .iter()
791        .zip(doc2.root().children().iter())
792        .enumerate()
793    {
794        match c1 {
795            ChildOfRoot::Element(e1) => {
796                if let ChildOfRoot::Element(e2) = c2 {
797                    is_same_element(*e1, *e2)?;
798                } else {
799                    bail!("child #{}, first is element, second is something else", i);
800                }
801            }
802            ChildOfRoot::Comment(com1) => {
803                if let ChildOfRoot::Comment(com2) = c2 {
804                    if com1.text() != com2.text() {
805                        bail!("child #{} -- comment text differs", i);
806                    }
807                } else {
808                    bail!("child #{}, first is comment, second is something else", i);
809                }
810            }
811            ChildOfRoot::ProcessingInstruction(p1) => {
812                if let ChildOfRoot::ProcessingInstruction(p2) = c2 {
813                    if p1.target() != p2.target() || p1.value() != p2.value() {
814                        bail!("child #{} -- processing instruction differs", i);
815                    }
816                } else {
817                    bail!(
818                        "child #{}, first is processing instruction, second is something else",
819                        i
820                    );
821                }
822            }
823        }
824    }
825    return Ok(());
826}
827
828/// returns Ok() if two Documents are equal or some info where they differ in the Err
829// Not really meant to be public -- used by tests in some packages
830#[allow(dead_code)]
831pub fn is_same_element(e1: Element, e2: Element) -> Result<()> {
832    enable_logs();
833    if name(e1) != name(e2) {
834        bail!("Names not the same: {}, {}", name(e1), name(e2));
835    }
836
837    // assume 'e' doesn't have element children until proven otherwise
838    // this means we keep Text children until we are proven they aren't needed
839    if e1.children().len() != e2.children().len() {
840        bail!(
841            "Children of {} have {} != {} children",
842            name(e1),
843            e1.children().len(),
844            e2.children().len()
845        );
846    }
847
848    if let Err(e) = attrs_are_same(e1.attributes(), e2.attributes()) {
849        bail!("In element {}, {}", name(e1), e);
850    }
851
852    for (i, (c1, c2)) in e1.children().iter().zip(e2.children().iter()).enumerate() {
853        match c1 {
854            ChildOfElement::Element(child1) => {
855                if let ChildOfElement::Element(child2) = c2 {
856                    is_same_element(*child1, *child2)?;
857                } else {
858                    bail!("{} child #{}, first is element, second is something else", name(e1), i);
859                }
860            }
861            ChildOfElement::Comment(com1) => {
862                if let ChildOfElement::Comment(com2) = c2 {
863                    if com1.text() != com2.text() {
864                        bail!("{} child #{} -- comment text differs", name(e1), i);
865                    }
866                } else {
867                    bail!("{} child #{}, first is comment, second is something else", name(e1), i);
868                }
869            }
870            ChildOfElement::ProcessingInstruction(p1) => {
871                if let ChildOfElement::ProcessingInstruction(p2) = c2 {
872                    if p1.target() != p2.target() || p1.value() != p2.value() {
873                        bail!("{} child #{} -- processing instruction differs", name(e1), i);
874                    }
875                } else {
876                    bail!(
877                        "{} child #{}, first is processing instruction, second is something else",
878                        name(e1),
879                        i
880                    );
881                }
882            }
883            ChildOfElement::Text(t1) => {
884                if let ChildOfElement::Text(t2) = c2 {
885                    if t1.text() != t2.text() {
886                        bail!("{} child #{} --  text differs", name(e1), i);
887                    }
888                } else {
889                    bail!("{} child #{}, first is text, second is something else", name(e1), i);
890                }
891            }
892        }
893    }
894    return Ok(());
895
896    /// compares attributes -- '==' didn't seems to work
897    fn attrs_are_same(attrs1: Vec<Attribute>, attrs2: Vec<Attribute>) -> Result<()> {
898        if attrs1.len() != attrs2.len() {
899            bail!("Attributes have different length: {:?} != {:?}", attrs1, attrs2);
900        }
901        // can't guarantee attrs are in the same order
902        for attr1 in attrs1 {
903            if let Some(found_attr2) = attrs2
904                .iter()
905                .find(|&attr2| attr1.name().local_part() == attr2.name().local_part())
906            {
907                if attr1.value() == found_attr2.value() {
908                    continue;
909                } else {
910                    bail!(
911                        "Attribute named {} has differing values:\n  '{}'\n  '{}'",
912                        attr1.name().local_part(),
913                        attr1.value(),
914                        found_attr2.value()
915                    );
916                }
917            } else {
918                bail!(
919                    "Attribute name {} not in [{}]",
920                    print_attr(&attr1),
921                    print_attrs(&attrs2)
922                );
923            }
924        }
925        return Ok(());
926
927        fn print_attr(attr: &Attribute) -> String {
928            return format!("@{}='{}'", attr.name().local_part(), attr.value());
929        }
930        fn print_attrs(attrs: &[Attribute]) -> String {
931            return attrs.iter().map(print_attr).collect::<Vec<String>>().join(", ");
932        }
933    }
934}
935
936#[cfg(test)]
937mod tests {
938    #[allow(unused_imports)]
939    use super::super::init_logger;
940    use super::*;
941
942    fn are_parsed_strs_equal(test: &str, target: &str) -> bool {
943        let target_package = &parser::parse(target).expect("Failed to parse input");
944        let target_doc = target_package.as_document();
945        trim_doc(&target_doc);
946        debug!("target:\n{}", mml_to_string(get_element(&target_package)));
947
948        let test_package = &parser::parse(test).expect("Failed to parse input");
949        let test_doc = test_package.as_document();
950        trim_doc(&test_doc);
951        debug!("test:\n{}", mml_to_string(get_element(&test_package)));
952
953        match is_same_doc(&test_doc, &target_doc) {
954            Ok(_) => return true,
955            Err(e) => panic!("{}", e),
956        }
957    }
958
959    #[test]
960    fn trim_same() {
961        let trimmed_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
962        assert!(are_parsed_strs_equal(trimmed_str, trimmed_str));
963    }
964
965    #[test]
966    fn trim_whitespace() {
967        let trimmed_str = "<math><mrow><mo>-</mo><mi> a </mi></mrow></math>";
968        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
969        assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
970    }
971
972    #[test]
973    fn no_trim_whitespace_nbsp() {
974        let trimmed_str = "<math><mrow><mo>-</mo><mtext> &#x00A0;a </mtext></mrow></math>";
975        let whitespace_str = "<math> <mrow ><mo>-</mo><mtext> &#x00A0;a </mtext></mrow ></math>";
976        assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
977    }
978
979    #[test]
980    fn trim_comment() {
981        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
982        let comment_str = "<math><mrow><mo>-</mo><!--a comment --><mi> a </mi></mrow></math>";
983        assert!(are_parsed_strs_equal(comment_str, whitespace_str));
984    }
985
986    #[test]
987    fn replace_mglyph() {
988        let mglyph_str = "<math>
989                <mrow>
990                    <mi>X<mglyph fontfamily='my-braid-font' index='2' alt='23braid' /></mi>
991                    <mo>+</mo>
992                    <mi>
993                        <mglyph fontfamily='my-braid-font' index='5' alt='132braid' />Y
994                    </mi>
995                    <mo>=</mo>
996                    <mi>
997                        <mglyph fontfamily='my-braid-font' index='3' alt='13braid' />
998                    </mi>
999                </mrow>
1000            </math>";
1001        let result_str = "<math>
1002            <mrow>
1003                <mi>X23braid</mi>
1004                <mo>+</mo>
1005                <mi>132braidY</mi>
1006                <mo>=</mo>
1007                <mi>13braid</mi>
1008            </mrow>
1009        </math>";
1010        assert!(are_parsed_strs_equal(mglyph_str, result_str));
1011    }
1012
1013    #[test]
1014    fn trim_differs() {
1015        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1016        let different_str = "<math> <mrow ><mo>-</mo><mi> b </mi></mrow ></math>";
1017
1018        // need to manually do this since failure shouldn't be a panic
1019        let package1 = &parser::parse(whitespace_str).expect("Failed to parse input");
1020        let doc1 = package1.as_document();
1021        trim_doc(&doc1);
1022        debug!("doc1:\n{}", mml_to_string(get_element(&package1)));
1023
1024        let package2 = parser::parse(different_str).expect("Failed to parse input");
1025        let doc2 = package2.as_document();
1026        trim_doc(&doc2);
1027        debug!("doc2:\n{}", mml_to_string(get_element(&package2)));
1028
1029        assert!(is_same_doc(&doc1, &doc2).is_err());
1030    }
1031
1032    #[test]
1033    fn test_entities() {
1034        // this forces initialization
1035        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1036
1037        let entity_str = set_mathml("<math><mrow><mo>&minus;</mo><mi>&mopf;</mi></mrow></math>".to_string()).unwrap();
1038        let converted_str =
1039            set_mathml("<math><mrow><mo>&#x02212;</mo><mi>&#x1D55E;</mi></mrow></math>".to_string()).unwrap();
1040
1041        // need to remove unique ids
1042        lazy_static! {
1043            static ref ID_MATCH: Regex = Regex::new(r#"id='.+?' "#).unwrap();
1044        }
1045        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1046        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1047        assert_eq!(entity_str, converted_str, "normal entity test failed");
1048
1049        let entity_str = set_mathml(
1050            "<math data-quot=\"&quot;value&quot;\" data-apos='&apos;value&apos;'><mi>XXX</mi></math>".to_string(),
1051        )
1052        .unwrap();
1053        let converted_str =
1054            set_mathml("<math data-quot='\"value\"' data-apos=\"'value'\"><mi>XXX</mi></math>".to_string()).unwrap();
1055        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1056        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1057        assert_eq!(entity_str, converted_str, "special entities quote test failed");
1058
1059        let entity_str =
1060            set_mathml("<math><mo>&lt;</mo><mo>&gt;</mo><mtext>&amp;lt;</mtext></math>".to_string()).unwrap();
1061        let converted_str =
1062            set_mathml("<math><mo>&#x003C;</mo><mo>&#x003E;</mo><mtext>&#x0026;lt;</mtext></math>".to_string())
1063                .unwrap();
1064        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1065        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1066        assert_eq!(entity_str, converted_str, "special entities <,>,& test failed");
1067    }
1068
1069    #[test]
1070    fn can_recover_from_invalid_set_rules_dir() {
1071        use std::env;
1072        // MathCAT will check the env var "MathCATRulesDir" as an override, so the following test might succeed if we don't override the env var
1073        env::set_var("MathCATRulesDir", "MathCATRulesDir");
1074        assert!(set_rules_dir("someInvalidRulesDir".to_string()).is_err());
1075        assert!(
1076            set_rules_dir(super::super::abs_rules_dir_path()).is_ok(),
1077            "\nset_rules_dir to '{}' failed",
1078            super::super::abs_rules_dir_path()
1079        );
1080        assert!(set_mathml("<math><mn>1</mn></math>".to_string()).is_ok());
1081    }
1082
1083    #[test]
1084    fn single_html_in_mtext() {
1085        let test = "<math><mn>1</mn> <mtext>a<p> para  1</p>bc</mtext> <mi>y</mi></math>";
1086        let target = "<math><mn>1</mn> <mtext>a para 1bc</mtext> <mi>y</mi></math>";
1087        assert!(are_parsed_strs_equal(test, target));
1088    }
1089
1090    #[test]
1091    fn multiple_html_in_mtext() {
1092        let test = "<math><mn>1</mn> <mtext>a<p>para 1</p> <p>para 2</p>bc  </mtext> <mi>y</mi></math>";
1093        let target = "<math><mn>1</mn> <mtext>apara 1 para 2bc</mtext> <mi>y</mi></math>";
1094        assert!(are_parsed_strs_equal(test, target));
1095    }
1096
1097    #[test]
1098    fn nested_html_in_mtext() {
1099        let test = "<math><mn>1</mn> <mtext>a <ol><li>first</li><li>second</li></ol> bc</mtext> <mi>y</mi></math>";
1100        let target = "<math><mn>1</mn> <mtext>a firstsecond bc</mtext> <mi>y</mi></math>";
1101        assert!(are_parsed_strs_equal(test, target));
1102    }
1103
1104    #[test]
1105    fn empty_html_in_mtext() {
1106        let test = "<math><mn>1</mn> <mtext>a<br/>bc</mtext> <mi>y</mi></math>";
1107        let target = "<math><mn>1</mn> <mtext>abc</mtext> <mi>y</mi></math>";
1108        assert!(are_parsed_strs_equal(test, target));
1109    }
1110}