libmathcat/
interface.rs

1//! The interface module provides functionality both for calling from an API and also running the code from `main`.
2//!
3#![allow(non_snake_case)]
4#![allow(clippy::needless_return)]
5use std::cell::RefCell;
6
7use crate::canonicalize::{as_text, create_mathml_element};
8use crate::errors::*;
9use phf::phf_map;
10use regex::{Captures, Regex};
11use sxd_document::dom::*;
12use sxd_document::parser;
13use sxd_document::Package;
14
15use crate::canonicalize::{as_element, name};
16use crate::shim_filesystem::{find_all_dirs_shim, find_files_in_dir_that_ends_with_shim};
17
18use crate::navigate::*;
19use crate::pretty_print::mml_to_string;
20use crate::xpath_functions::{is_leaf, IsNode};
21
22#[cfg(feature = "enable-logs")]
23use std::sync::Once;
24#[cfg(feature = "enable-logs")]
25static INIT: Once = Once::new();
26
27fn enable_logs() {
28    #[cfg(feature = "enable-logs")]
29    INIT.call_once(||{
30        #[cfg(target_os = "android")]
31        {
32            extern crate log;
33            extern crate android_logger;
34            
35            use log::*;
36            use android_logger::*;
37        
38            android_logger::init_once(
39                Config::default()
40                .with_max_level(LevelFilter::Trace)
41                .with_tag("MathCat")
42            );    
43            trace!("Activated Android logger!");  
44        }    
45    });
46}
47
48// wrap up some common functionality between the call from 'main' and AT
49fn cleanup_mathml(mathml: Element) -> Result<Element> {
50    trim_element(mathml, false);
51    let mathml = crate::canonicalize::canonicalize(mathml)?;
52    let mathml = add_ids(mathml);
53    return Ok(mathml);
54}
55
56thread_local! {
57    /// The current node being navigated (also spoken and brailled) is stored in `MATHML_INSTANCE`.
58    pub static MATHML_INSTANCE: RefCell<Package> = init_mathml_instance();
59}
60
61fn init_mathml_instance() -> RefCell<Package> {
62    let package = parser::parse("<math></math>")
63        .expect("Internal error in 'init_mathml_instance;: didn't parse initializer string");
64    return RefCell::new(package);
65}
66
67/// Set the Rules directory
68/// IMPORTANT: this should be the very first call to MathCAT. If 'dir' is an empty string, the environment var 'MathCATRulesDir' is tried.
69pub fn set_rules_dir(dir: String) -> Result<()> {
70    enable_logs();
71    use std::path::PathBuf;
72    let dir = if dir.is_empty() {
73        std::env::var_os("MathCATRulesDir")
74            .unwrap_or_default()
75            .to_str()
76            .unwrap()
77            .to_string()
78    } else {
79        dir
80    };
81    let pref_manager = crate::prefs::PreferenceManager::get();
82    return pref_manager.borrow_mut().initialize(PathBuf::from(dir));
83}
84
85/// Returns the version number (from Cargo.toml) of the build
86pub fn get_version() -> String {
87    enable_logs();
88    const VERSION: &str = env!("CARGO_PKG_VERSION");
89    return VERSION.to_string();
90}
91
92/// This will override any previous MathML that was set.
93/// This returns canonical MathML with 'id's set on any node that doesn't have an id.
94/// The ids can be used for sync highlighting if the `Bookmark` API preference is true.
95pub fn set_mathml(mathml_str: String) -> Result<String> {
96    enable_logs();
97    lazy_static! {
98        // if these are present when resent to MathJaX, MathJaX crashes (https://github.com/mathjax/MathJax/issues/2822)
99        static ref MATHJAX_V2: Regex = Regex::new(r#"class *= *['"]MJX-.*?['"]"#).unwrap();
100        static ref MATHJAX_V3: Regex = Regex::new(r#"class *= *['"]data-mjx-.*?['"]"#).unwrap();
101        static ref NAMESPACE_DECL: Regex = Regex::new(r#"xmlns:[[:alpha:]]+"#).unwrap();     // very limited namespace prefix match
102        static ref PREFIX: Regex = Regex::new(r#"(</?)[[:alpha:]]+:"#).unwrap();     // very limited namespace prefix match
103        static ref HTML_ENTITIES: Regex = Regex::new(r#"&([a-zA-Z]+?);"#).unwrap();
104    }
105
106    NAVIGATION_STATE.with(|nav_stack| {
107        nav_stack.borrow_mut().reset();
108    });
109
110    // We need the main definitions files to be read in so canonicalize can work.
111    // This call reads all of them for the current preferences, but that's ok since they will likely be used
112    crate::speech::SPEECH_RULES.with(|rules| rules.borrow_mut().read_files())?;
113
114    return MATHML_INSTANCE.with(|old_package| {
115        static HTML_ENTITIES_MAPPING: phf::Map<&str, &str> = include!("entities.in");
116
117        let mut error_message = "".to_string(); // can't return a result inside the replace_all, so we do this hack of setting the message and then returning the error
118                                                // need to deal with character data and convert to something the parser knows
119        let mathml_str =
120            HTML_ENTITIES.replace_all(&mathml_str, |cap: &Captures| match HTML_ENTITIES_MAPPING.get(&cap[1]) {
121                None => {
122                    error_message = format!("No entity named '{}'", &cap[0]);
123                    cap[0].to_string()
124                }
125                Some(&ch) => ch.to_string(),
126            });
127
128        if !error_message.is_empty() {
129            bail!(error_message);
130        }
131        let mathml_str = MATHJAX_V2.replace_all(&mathml_str, "");
132        let mathml_str = MATHJAX_V3.replace_all(&mathml_str, "");
133
134        // the speech rules use the xpath "name" function and that includes the prefix
135        // getting rid of the prefix properly probably involves a recursive replacement in the tree
136        // if the prefix is used, it is almost certainly something like "m" or "mml", so this cheat will work.
137        let mathml_str = NAMESPACE_DECL.replace(&mathml_str, "xmlns"); // do this before the PREFIX replace!
138        let mathml_str = PREFIX.replace_all(&mathml_str, "$1");
139
140        let new_package = parser::parse(&mathml_str);
141        if let Err(e) = new_package {
142            bail!("Invalid MathML input:\n{}\nError is: {}", &mathml_str, &e.to_string());
143        }
144
145        let new_package = new_package.unwrap();
146        let mathml = get_element(&new_package);
147        let mathml = cleanup_mathml(mathml)?;
148        let mathml_string = mml_to_string(mathml);
149        old_package.replace(new_package);
150
151        return Ok(mathml_string);
152    });
153}
154
155/// Get the spoken text of the MathML that was set.
156/// The speech takes into account any AT or user preferences.
157pub fn get_spoken_text() -> Result<String> {
158    enable_logs();
159    // use std::time::{Instant};
160    // let instant = Instant::now();
161    return MATHML_INSTANCE.with(|package_instance| {
162        let package_instance = package_instance.borrow();
163        let mathml = get_element(&package_instance);
164        let new_package = Package::new();
165        let intent = crate::speech::intent_from_mathml(mathml, new_package.as_document())?;
166        debug!("Intent tree:\n{}", mml_to_string(intent));
167        let speech = crate::speech::speak_mathml(intent, "")?;
168        // info!("Time taken: {}ms", instant.elapsed().as_millis());
169        return Ok(speech);
170    });
171}
172
173/// Get the spoken text for an overview of the MathML that was set.
174/// The speech takes into account any AT or user preferences.
175/// Note: this implementation for is currently minimal and should not be used.
176pub fn get_overview_text() -> Result<String> {
177    enable_logs();
178    // use std::time::{Instant};
179    // let instant = Instant::now();
180    return MATHML_INSTANCE.with(|package_instance| {
181        let package_instance = package_instance.borrow();
182        let mathml = get_element(&package_instance);
183        let speech = crate::speech::overview_mathml(mathml, "")?;
184        // info!("Time taken: {}ms", instant.elapsed().as_millis());
185        return Ok(speech);
186    });
187}
188
189/// Get the value of the named preference.
190/// None is returned if `name` is not a known preference.
191pub fn get_preference(name: String) -> Result<String> {
192    enable_logs();
193    use crate::prefs::NO_PREFERENCE;
194    return crate::speech::SPEECH_RULES.with(|rules| {
195        let rules = rules.borrow();
196        let pref_manager = rules.pref_manager.borrow();
197        let mut value = pref_manager.pref_to_string(&name);
198        if value == NO_PREFERENCE {
199            value = pref_manager.pref_to_string(&name);
200        }
201        if value == NO_PREFERENCE {
202            bail!("No preference named '{}'", &name);
203        } else {
204            return Ok(value);
205        }
206    });
207}
208
209/// Set a MathCAT preference. The preference name should be a known preference name.
210/// The value should either be a string or a number (depending upon the preference being set)
211/// The list of known user preferences is in the MathCAT user documentation.
212/// Here are common preferences set by programs (not settable by the user):
213/// * TTS -- SSML, SAPI5, None
214/// * Pitch -- normalized at '1.0'
215/// * Rate -- words per minute (should match current speech rate).
216///   There is a separate "MathRate" that is user settable that causes a relative percentage change from this rate.
217/// * Volume -- default 100
218/// * Voice -- set a voice to use (not implemented)
219/// * Gender -- set pick any voice of the given gender (not implemented)
220/// * Bookmark -- set to `true` if a `mark`/`bookmark` should be part of the returned speech (used for sync highlighting)
221///
222/// Important: both the preference name and value are case-sensitive
223///
224/// This function can be called multiple times to set different values.
225/// The values are persistent and extend beyond calls to [`set_mathml`].
226/// A value can be overwritten by calling this function again with a different value.
227///
228/// Be careful setting preferences -- these potentially override user settings, so only preferences that really need setting should be set.
229pub fn set_preference(name: String, value: String) -> Result<()> {
230    enable_logs();
231    // "LanguageAuto" allows setting the language dir without actually changing the value of "Language" from Auto
232    let mut value = value;
233    if name == "Language" || name == "LanguageAuto" {
234        // check the format
235        if value != "Auto" {
236            // could get es, es-419, or en-us-nyc ...  we only care about the first two parts so we clean it up a little
237            let mut lang_country_split = value.split('-');
238            let language = lang_country_split.next().unwrap_or("");
239            let country = lang_country_split.next().unwrap_or("");
240            if language.len() != 2 {
241                bail!(
242                    "Improper format for 'Language' preference '{}'. Should be of form 'en' or 'en-gb'",
243                    value
244                );
245            }
246            let mut new_lang_country = language.to_string(); // need a temp value because 'country' is borrowed from 'value' above
247            if !country.is_empty() {
248                new_lang_country.push('-');
249                new_lang_country.push_str(country);
250            }
251            value = new_lang_country;
252        }
253        if name == "LanguageAuto" && value == "Auto" {
254            bail!("'LanguageAuto' can not have the value 'Auto'");
255        }
256    }
257
258    crate::speech::SPEECH_RULES.with(|rules| {
259        let rules = rules.borrow_mut();
260        if let Some(error_string) = rules.get_error() {
261            bail!("{}", error_string);
262        }
263
264        // we set the value even if it was the same as the old value because this might override a potentially changed future user value
265        let mut pref_manager = rules.pref_manager.borrow_mut();
266        if name == "LanguageAuto" {
267            let language_pref = pref_manager.pref_to_string("Language");
268            if language_pref != "Auto" {
269                bail!(
270                    "'LanguageAuto' can only be used when 'Language' has the value 'Auto'; Language={}",
271                    language_pref
272                );
273            }
274        }
275        let lower_case_value = value.to_lowercase();
276        if lower_case_value == "true" || lower_case_value == "false" {
277            pref_manager.set_api_boolean_pref(&name, value.to_lowercase() == "true");
278        } else {
279            match name.as_str() {
280                "Pitch" | "Rate" | "Volume" | "CapitalLetters_Pitch" | "MathRate" | "PauseFactor" => {
281                    pref_manager.set_api_float_pref(&name, to_float(&name, &value)?)
282                }
283                _ => {
284                    pref_manager.set_string_pref(&name, &value)?;
285                }
286            }
287        };
288        return Ok::<(), Error>(());
289    })?;
290
291    return Ok(());
292
293    fn to_float(name: &str, value: &str) -> Result<f64> {
294        return match value.parse::<f64>() {
295            Ok(val) => Ok(val),
296            Err(_) => bail!("SetPreference: preference'{}'s value '{}' must be a float", name, value),
297        };
298    }
299}
300
301/// Get the braille associated with the MathML that was set by [`set_mathml`].
302/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`).
303/// If 'nav_node_id' is given, it is highlighted based on the value of `BrailleNavHighlight` (default: `EndPoints`)
304pub fn get_braille(nav_node_id: String) -> Result<String> {
305    enable_logs();
306    // use std::time::{Instant};
307    // let instant = Instant::now();
308    return MATHML_INSTANCE.with(|package_instance| {
309        let package_instance = package_instance.borrow();
310        let mathml = get_element(&package_instance);
311        let braille = crate::braille::braille_mathml(mathml, &nav_node_id)?.0;
312        // info!("Time taken: {}ms", instant.elapsed().as_millis());
313        return Ok(braille);
314    });
315}
316
317/// Get the braille associated with the current navigation focus of the MathML that was set by [`set_mathml`].
318/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`).
319/// The returned braille is brailled as if the current navigation focus is the entire expression to be brailled.
320pub fn get_navigation_braille() -> Result<String> {
321    enable_logs();
322    return MATHML_INSTANCE.with(|package_instance| {
323        let package_instance = package_instance.borrow();
324        let mathml = get_element(&package_instance);
325        let new_package = Package::new(); // used if we need to create a new tree
326        let new_doc = new_package.as_document();
327        let nav_mathml = NAVIGATION_STATE.with(|nav_stack| {
328            return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
329                Err(e) => Err(e),
330                Ok((found, offset)) => {
331                    // get the MathML node and wrap it inside of a <math> element
332                    // if the offset is given, we need to get the character it references
333                    if offset == 0 {
334                        if name(found) == "math" {
335                            Ok(found)
336                        } else {
337                            let new_mathml = create_mathml_element(&new_doc, "math");
338                            new_mathml.append_child(copy_mathml(found));
339                            new_doc.root().append_child(new_mathml);
340                            Ok(new_mathml)
341                        }
342                    } else if !is_leaf(found) {
343                        bail!(
344                            "Internal error: non-zero offset '{}' on a non-leaf element '{}'",
345                            offset,
346                            name(found)
347                        );
348                    } else if let Some(ch) = as_text(found).chars().nth(offset) {
349                        let internal_mathml = create_mathml_element(&new_doc, name(found));
350                        internal_mathml.set_text(&ch.to_string());
351                        let new_mathml = create_mathml_element(&new_doc, "math");
352                        new_mathml.append_child(internal_mathml);
353                        new_doc.root().append_child(new_mathml);
354                        Ok(new_mathml)
355                    } else {
356                        bail!(
357                            "Internal error: offset '{}' on leaf element '{}' doesn't exist",
358                            offset,
359                            mml_to_string(found)
360                        );
361                    }
362                }
363            };
364        })?;
365
366        let braille = crate::braille::braille_mathml(nav_mathml, "")?.0;
367        return Ok(braille);
368    });
369}
370
371/// Given a key code along with the modifier keys, the current node is moved accordingly (or value reported in some cases).
372/// `key` is the [keycode](https://developer.mozilla.org/en-US/docs/Web/API/KeyboardEvent/keyCode#constants_for_keycode_value) for the key (in JavaScript, `ev.key_code`)
373/// The spoken text for the new current node is returned.
374pub fn do_navigate_keypress(
375    key: usize,
376    shift_key: bool,
377    control_key: bool,
378    alt_key: bool,
379    meta_key: bool,
380) -> Result<String> {
381    return MATHML_INSTANCE.with(|package_instance| {
382        let package_instance = package_instance.borrow();
383        let mathml = get_element(&package_instance);
384        return do_mathml_navigate_key_press(mathml, key, shift_key, control_key, alt_key, meta_key);
385    });
386}
387
388/// Given a navigation command, the current node is moved accordingly.
389/// This is a higher level interface than `do_navigate_keypress` for applications that want to interpret the keys themselves.
390/// The valid commands are:
391/// * Standard move commands:
392///   `MovePrevious`, `MoveNext`, `MoveStart`, `MoveEnd`, `MoveLineStart`, `MoveLineEnd`
393/// * Movement in a table or elementary math:
394///   `MoveCellPrevious`, `MoveCellNext`, `MoveCellUp`, `MoveCellDown`, `MoveColumnStart`, `MoveColumnEnd`
395/// * Moving into children or out to parents:
396///   `ZoomIn`, `ZoomOut`, `ZoomOutAll`, `ZoomInAll`
397/// * Undo the last movement command:
398///   `MoveLastLocation`
399/// * Read commands (standard speech):
400///   `ReadPrevious`, `ReadNext`, `ReadCurrent`, `ReadCellCurrent`, `ReadStart`, `ReadEnd`, `ReadLineStart`, `ReadLineEnd`
401/// * Describe commands (overview):
402///   `DescribePrevious`, `DescribeNext`, `DescribeCurrent`
403/// * Location information:
404///   `WhereAmI`, `WhereAmIAll`
405/// * Change navigation modes (circle up/down):
406///   `ToggleZoomLockUp`, `ToggleZoomLockDown`
407/// * Speak the current navigation mode
408///   `ToggleSpeakMode`
409///
410/// There are 10 place markers that can be set/read/described or moved to.
411/// * Setting:
412///   `SetPlacemarker0`, `SetPlacemarker1`, `SetPlacemarker2`, `SetPlacemarker3`, `SetPlacemarker4`, `SetPlacemarker5`, `SetPlacemarker6`, `SetPlacemarker7`, `SetPlacemarker8`, `SetPlacemarker9`
413/// * Reading:
414///   `Read0`, `Read1`, `Read2`, `Read3`, `Read4`, `Read5`, `Read6`, `Read7`, `Read8`, `Read9`
415/// * Describing:
416///   `Describe0`, `Describe1`, `Describe2`, `Describe3`, `Describe4`, `Describe5`, `Describe6`, `Describe7`, `Describe8`, `Describe9`
417/// * Moving:
418///   `MoveTo0`, `MoveTo1`, `MoveTo2`, `MoveTo3`, `MoveTo4`, `MoveTo5`, `MoveTo6`, `MoveTo7`, `MoveTo8`, `MoveTo9`
419///
420/// When done with Navigation, call with `Exit`
421pub fn do_navigate_command(command: String) -> Result<String> {
422    enable_logs();
423    let command = NAV_COMMANDS.get_key(&command); // gets a &'static version of the command
424    if command.is_none() {
425        bail!("Unknown command in call to DoNavigateCommand()");
426    };
427    let command = *command.unwrap();
428    return MATHML_INSTANCE.with(|package_instance| {
429        let package_instance = package_instance.borrow();
430        let mathml = get_element(&package_instance);
431        return do_navigate_command_string(mathml, command);
432    });
433}
434
435/// Given an 'id' and an offset (for tokens), set the navigation node to that id.
436/// An error is returned if the 'id' doesn't exist
437pub fn set_navigation_node(id: String, offset: usize) -> Result<()> {
438    enable_logs();
439    return MATHML_INSTANCE.with(|package_instance| {
440        let package_instance = package_instance.borrow();
441        let mathml = get_element(&package_instance);
442        return set_navigation_node_from_id(mathml, id, offset);
443    });
444}
445
446/// Return the MathML associated with the current (navigation) node and the offset (0-based) from that mathml (not yet implemented)
447/// The offset is needed for token elements that have multiple characters.
448pub fn get_navigation_mathml() -> Result<(String, usize)> {
449    return MATHML_INSTANCE.with(|package_instance| {
450        let package_instance = package_instance.borrow();
451        let mathml = get_element(&package_instance);
452        return NAVIGATION_STATE.with(|nav_stack| {
453            return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
454                Err(e) => Err(e),
455                Ok((found, offset)) => Ok((mml_to_string(found), offset)),
456            };
457        });
458    });
459}
460
461/// Return the `id` and `offset` (0-based) associated with the current (navigation) node.
462/// `offset` (not yet implemented)
463/// The offset is needed for token elements that have multiple characters.
464pub fn get_navigation_mathml_id() -> Result<(String, usize)> {
465    enable_logs();
466    return MATHML_INSTANCE.with(|package_instance| {
467        let package_instance = package_instance.borrow();
468        let mathml = get_element(&package_instance);
469        return Ok(NAVIGATION_STATE.with(|nav_stack| {
470            return nav_stack.borrow().get_navigation_mathml_id(mathml);
471        }));
472    });
473}
474
475/// Return the start and end braille character positions associated with the current (navigation) node.
476pub fn get_braille_position() -> Result<(usize, usize)> {
477    enable_logs();
478    return MATHML_INSTANCE.with(|package_instance| {
479        let package_instance = package_instance.borrow();
480        let mathml = get_element(&package_instance);
481        let nav_node = get_navigation_mathml_id()?;
482        let (_, start, end) = crate::braille::braille_mathml(mathml, &nav_node.0)?;
483        return Ok((start, end));
484    });
485}
486
487/// Given a 0-based braille position, return the smallest MathML node enclosing it.
488/// This node might be a leaf with an offset.
489pub fn get_navigation_node_from_braille_position(position: usize) -> Result<(String, usize)> {
490    enable_logs();
491    return MATHML_INSTANCE.with(|package_instance| {
492        let package_instance = package_instance.borrow();
493        let mathml = get_element(&package_instance);
494        return crate::braille::get_navigation_node_from_braille_position(mathml, position);
495    });
496}
497
498pub fn get_supported_braille_codes() -> Vec<String> {
499    enable_logs();
500    let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
501    let braille_dir = rules_dir.join("Braille");
502    let mut braille_code_paths = Vec::new();
503
504    find_all_dirs_shim(&braille_dir, &mut braille_code_paths);
505    let mut braille_code_paths = braille_code_paths.iter()
506                    .map(|path| path.strip_prefix(&braille_dir).unwrap().to_string_lossy().to_string())
507                    .filter(|string_path| !string_path.is_empty() )
508                    .collect::<Vec<String>>();
509    braille_code_paths.sort();
510
511    return braille_code_paths;
512 }
513
514/// Returns a Vec of all supported languages ("en", "es", ...)
515pub fn get_supported_languages() -> Vec<String> {
516    enable_logs();
517    let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
518    let lang_dir = rules_dir.join("Languages");
519    let mut lang_paths = Vec::new();
520
521    find_all_dirs_shim(&lang_dir, &mut lang_paths);
522    let mut language_paths = lang_paths.iter()
523                    .map(|path| path.strip_prefix(&lang_dir).unwrap()
524                                              .to_string_lossy()
525                                              .replace(std::path::MAIN_SEPARATOR, "-")
526                                              .to_string())
527                    .filter(|string_path| !string_path.is_empty() )
528                    .collect::<Vec<String>>();
529
530    // make sure the 'zz' test dir isn't included (build.rs removes it, but for debugging is there)
531    language_paths.retain(|s| !s.starts_with("zz"));
532    language_paths.sort();
533    return language_paths;
534 }
535
536 pub fn get_supported_speech_styles(lang: String) -> Vec<String> {
537    enable_logs();
538    let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
539    let lang_dir = rules_dir.join("Languages").join(lang);
540    let mut speech_styles = find_files_in_dir_that_ends_with_shim(&lang_dir, "_Rules.yaml");
541    for file_name in &mut speech_styles {
542        file_name.truncate(file_name.len() - "_Rules.yaml".len())
543    }
544    speech_styles.sort();
545    // remove duplicates -- shouldn't be any, but just in case
546    let mut i = 1;
547    while i < speech_styles.len() {
548        if speech_styles[i-1] == speech_styles[i] {
549            speech_styles.remove(i);
550        } else {
551            i += 1;
552        }
553    }
554    return speech_styles;
555 }
556
557// utility functions
558
559/// Copy (recursively) the (MathML) element and return the new one.
560/// The Element type does not copy and modifying the structure of an element's child will modify the element, so we need a copy
561/// Convert the returned error from set_mathml, etc., to a useful string for display
562pub fn copy_mathml(mathml: Element) -> Element {
563    // If it represents MathML, the 'Element' can only have Text and Element children along with attributes
564    let children = mathml.children();
565    let new_mathml = create_mathml_element(&mathml.document(), name(mathml));
566    mathml.attributes().iter().for_each(|attr| {
567        new_mathml.set_attribute_value(attr.name(), attr.value());
568    });
569
570    // can't use is_leaf/as_text because this is also used with the intent tree
571    if children.len() == 1 {
572        if let Some(text) = children[0].text() {
573        new_mathml.set_text(text.text());
574        return new_mathml;
575        }
576    }
577
578    let mut new_children = Vec::with_capacity(children.len());
579    for child in children {
580        let child = as_element(child);
581        let new_child = copy_mathml(child);
582        new_children.push(new_child);
583    }
584    new_mathml.append_children(new_children);
585    return new_mathml;
586}
587
588pub fn errors_to_string(e: &Error) -> String {
589    enable_logs();
590    let mut result = String::default();
591    let mut first_time = true;
592    for e in e.iter() {
593        if first_time {
594            result = format!("{e}\n");
595            first_time = false;
596        } else {
597            result += &format!("caused by: {e}\n");
598        }
599    }
600    return result;
601}
602
603fn add_ids(mathml: Element) -> Element {
604    use std::time::SystemTime;
605    let time = if cfg!(target_family = "wasm") {
606        fastrand::usize(..)
607    } else {
608        SystemTime::now()
609            .duration_since(SystemTime::UNIX_EPOCH)
610            .unwrap()
611            .as_millis() as usize
612    };
613    let mut time_part = radix_fmt::radix(time, 36).to_string();
614    if time_part.len() < 3 {
615        time_part.push_str("a2c");      // needs to be at least three chars
616    }
617    let mut random_part = radix_fmt::radix(fastrand::u32(..), 36).to_string();
618    if random_part.len() < 4 {
619        random_part.push_str("a1b2");      // needs to be at least four chars
620    }
621    let prefix = "M".to_string() + &time_part[time_part.len() - 3..] + &random_part[random_part.len() - 4..] + "-"; // begin with letter
622    add_ids_to_all(mathml, &prefix, 0);
623    return mathml;
624
625    fn add_ids_to_all(mathml: Element, id_prefix: &str, count: usize) -> usize {
626        let mut count = count;
627        if mathml.attribute("id").is_none() {
628            mathml.set_attribute_value("id", (id_prefix.to_string() + &count.to_string()).as_str());
629            mathml.set_attribute_value("data-id-added", "true");
630            count += 1;
631        };
632
633        if crate::xpath_functions::is_leaf(mathml) {
634            return count;
635        }
636
637        for child in mathml.children() {
638            let child = as_element(child);
639            count = add_ids_to_all(child, id_prefix, count);
640        }
641        return count;
642    }
643}
644
645pub fn get_element(package: &Package) -> Element<'_> {
646    enable_logs();
647    let doc = package.as_document();
648    let mut result = None;
649    for root_child in doc.root().children() {
650        if let ChildOfRoot::Element(e) = root_child {
651            assert!(result.is_none());
652            result = Some(e);
653        }
654    }
655    return result.unwrap();
656}
657
658/// Get the intent after setting the MathML
659/// Used in testing
660#[allow(dead_code)]
661pub fn get_intent<'a>(mathml: Element<'a>, doc: Document<'a>) -> Result<Element<'a>> {
662    crate::speech::SPEECH_RULES.with(|rules|  rules.borrow_mut().read_files().unwrap());
663    let mathml = cleanup_mathml(mathml)?;
664    return crate::speech::intent_from_mathml(mathml, doc);
665}
666
667#[allow(dead_code)]
668fn trim_doc(doc: &Document) {
669    for root_child in doc.root().children() {
670        if let ChildOfRoot::Element(e) = root_child {
671            trim_element(e, false);
672        } else {
673            doc.root().remove_child(root_child); // comment or processing instruction
674        }
675    }
676}
677
678/// Not really meant to be public -- used by tests in some packages
679pub fn trim_element(e: Element, allow_structure_in_leaves: bool) {
680    // "<mtext>this is text</mtext" results in 3 text children
681    // these are combined into one child as it makes code downstream simpler
682
683    // space, tab, newline, carriage return all get collapsed to a single space
684    const WHITESPACE: &[char] = &[' ', '\u{0009}', '\u{000A}', '\u{000D}'];
685    lazy_static! {
686        static ref WHITESPACE_MATCH: Regex = Regex::new(r#"[ \u{0009}\u{000A}\u{000D}]+"#).unwrap();
687    }
688
689    if is_leaf(e) && (!allow_structure_in_leaves || IsNode::is_mathml(e)) {
690        // Assume it is HTML inside of the leaf -- turn the HTML into a string
691        make_leaf_element(e);
692        return;
693    }
694
695    let mut single_text = "".to_string();
696    for child in e.children() {
697        match child {
698            ChildOfElement::Element(c) => {
699                trim_element(c, allow_structure_in_leaves);
700            }
701            ChildOfElement::Text(t) => {
702                single_text += t.text();
703                e.remove_child(child);
704            }
705            _ => {
706                e.remove_child(child);
707            }
708        }
709    }
710
711    // CSS considers only space, tab, linefeed, and carriage return as collapsable whitespace
712    if !(is_leaf(e) || name(e) == "intent-literal" || single_text.is_empty()) {
713        // intent-literal comes from testing intent
714        // FIX: we have a problem -- what should happen???
715        // FIX: For now, just keep the children and ignore the text and log an error -- shouldn't panic/crash
716        if !single_text.trim_matches(WHITESPACE).is_empty() {
717            error!(
718                "trim_element: both element and textual children which shouldn't happen -- ignoring text '{single_text}'"
719            );
720        }
721        return;
722    }
723    if e.children().is_empty() && !single_text.is_empty() {
724        // debug!("Combining text in {}: '{}' -> '{}'", e.name().local_part(), single_text, trimmed_text);
725        e.set_text(&WHITESPACE_MATCH.replace_all(&single_text, " "));
726    }
727
728    fn make_leaf_element(mathml_leaf: Element) {
729        // MathML leaves like <mn> really shouldn't have non-textual content, but you could have embedded HTML
730        // Here, we convert them to leaves by grabbing up all the text and making that the content
731        // Potentially, we leave them and let (default) rules do something, but it makes other parts of the code
732        //   messier because checking the text of a leaf becomes Option<&str> rather than just &str
733        let children = mathml_leaf.children();
734        if children.is_empty() {
735            return;
736        }
737
738        if rewrite_and_flatten_embedded_mathml(mathml_leaf) {
739            return;
740        }
741
742        // gather up the text
743        let mut text = "".to_string();
744        for child in children {
745            let child_text = match child {
746                ChildOfElement::Element(child) => {
747                    if name(child) == "mglyph" {
748                        child.attribute_value("alt").unwrap_or("").to_string()
749                    } else {
750                        gather_text(child)
751                    }
752                }
753                ChildOfElement::Text(t) => {
754                    // debug!("ChildOfElement::Text: '{}'", t.text());
755                    t.text().to_string()
756                }
757                _ => "".to_string(),
758            };
759            if !child_text.is_empty() {
760                text += &child_text;
761            }
762        }
763
764        // get rid of the old children and replace with the text we just built
765        mathml_leaf.clear_children();
766        mathml_leaf.set_text(WHITESPACE_MATCH.replace_all(&text, " ").trim_matches(WHITESPACE));
767        // debug!("make_leaf_element: text is '{}'", crate::canonicalize::as_text(mathml_leaf));
768
769        /// gather up all the contents of the element and return them with a leading space
770        fn gather_text(html: Element) -> String {
771            let mut text = "".to_string(); // since we are throwing out the element tag, add a space between the contents
772            for child in html.children() {
773                match child {
774                    ChildOfElement::Element(child) => {
775                        text += &gather_text(child);
776                    }
777                    ChildOfElement::Text(t) => text += t.text(),
778                    _ => (),
779                }
780            }
781            // debug!("gather_text: '{}'", text);
782            return text;
783        }
784    }
785
786    fn rewrite_and_flatten_embedded_mathml(mathml_leaf: Element) -> bool {
787        // first see if it can or needs to be rewritten
788        // this is likely rare, so we do a check and if true, to a second pass building the result
789        let mut needs_rewrite = false;
790        for child in mathml_leaf.children() {
791            if let Some(element) = child.element() {
792                if name(element) != "math" {
793                    return false; // something other than MathML as a child -- can't rewrite
794                }
795                needs_rewrite = true;
796            }
797        };
798
799        if !needs_rewrite {
800            return false;
801        }
802
803        // now do the rewrite, flatting out the mathml and returning an mrow with the children
804        let leaf_name = name(mathml_leaf);
805        let doc = mathml_leaf.document();
806        let mut new_children = Vec::new();
807        let mut is_last_mtext = false;
808        for child in mathml_leaf.children() {
809            if let Some(element) = child.element() {
810                trim_element(element, true);
811                new_children.append(&mut element.children());   // don't want 'math' wrapper
812                is_last_mtext = false;
813            } else if let Some(text) = child.text() {
814                // combine adjacent text nodes into single nodes
815                if is_last_mtext {
816                    let last_child = new_children.last_mut().unwrap().element().unwrap();
817                    let new_text = as_text(last_child).to_string() + text.text();
818                    last_child.set_text(&new_text);
819                } else {
820                    let new_leaf_node = create_mathml_element(&doc, leaf_name);
821                    new_leaf_node.set_text(text.text());
822                    new_children.push(ChildOfElement::Element(new_leaf_node));
823                    is_last_mtext = true;
824                }
825            }
826        };
827
828        crate::canonicalize::set_mathml_name(mathml_leaf, "mrow");
829        mathml_leaf.clear_children();
830        mathml_leaf.append_children(new_children);
831
832        // debug!("rewrite_and_flatten_embedded_mathml: flattened\n'{}'", mml_to_string(mathml_leaf));
833        return true;
834    }
835}
836
837// used for testing trim
838/// returns Ok() if two Documents are equal or some info where they differ in the Err
839#[allow(dead_code)]
840fn is_same_doc(doc1: &Document, doc2: &Document) -> Result<()> {
841    // assume 'e' doesn't have element children until proven otherwise
842    // this means we keep Text children until we are proven they aren't needed
843    if doc1.root().children().len() != doc2.root().children().len() {
844        bail!(
845            "Children of docs have {} != {} children",
846            doc1.root().children().len(),
847            doc2.root().children().len()
848        );
849    }
850
851    for (i, (c1, c2)) in doc1
852        .root()
853        .children()
854        .iter()
855        .zip(doc2.root().children().iter())
856        .enumerate()
857    {
858        match c1 {
859            ChildOfRoot::Element(e1) => {
860                if let ChildOfRoot::Element(e2) = c2 {
861                    is_same_element(*e1, *e2)?;
862                } else {
863                    bail!("child #{}, first is element, second is something else", i);
864                }
865            }
866            ChildOfRoot::Comment(com1) => {
867                if let ChildOfRoot::Comment(com2) = c2 {
868                    if com1.text() != com2.text() {
869                        bail!("child #{} -- comment text differs", i);
870                    }
871                } else {
872                    bail!("child #{}, first is comment, second is something else", i);
873                }
874            }
875            ChildOfRoot::ProcessingInstruction(p1) => {
876                if let ChildOfRoot::ProcessingInstruction(p2) = c2 {
877                    if p1.target() != p2.target() || p1.value() != p2.value() {
878                        bail!("child #{} -- processing instruction differs", i);
879                    }
880                } else {
881                    bail!(
882                        "child #{}, first is processing instruction, second is something else",
883                        i
884                    );
885                }
886            }
887        }
888    }
889    return Ok(());
890}
891
892/// returns Ok() if two Documents are equal or some info where they differ in the Err
893// Not really meant to be public -- used by tests in some packages
894#[allow(dead_code)]
895pub fn is_same_element(e1: Element, e2: Element) -> Result<()> {
896    enable_logs();
897    if name(e1) != name(e2) {
898        bail!("Names not the same: {}, {}", name(e1), name(e2));
899    }
900
901    // assume 'e' doesn't have element children until proven otherwise
902    // this means we keep Text children until we are proven they aren't needed
903    if e1.children().len() != e2.children().len() {
904        bail!(
905            "Children of {} have {} != {} children",
906            name(e1),
907            e1.children().len(),
908            e2.children().len()
909        );
910    }
911
912    if let Err(e) = attrs_are_same(e1.attributes(), e2.attributes()) {
913        bail!("In element {}, {}", name(e1), e);
914    }
915
916    for (i, (c1, c2)) in e1.children().iter().zip(e2.children().iter()).enumerate() {
917        match c1 {
918            ChildOfElement::Element(child1) => {
919                if let ChildOfElement::Element(child2) = c2 {
920                    is_same_element(*child1, *child2)?;
921                } else {
922                    bail!("{} child #{}, first is element, second is something else", name(e1), i);
923                }
924            }
925            ChildOfElement::Comment(com1) => {
926                if let ChildOfElement::Comment(com2) = c2 {
927                    if com1.text() != com2.text() {
928                        bail!("{} child #{} -- comment text differs", name(e1), i);
929                    }
930                } else {
931                    bail!("{} child #{}, first is comment, second is something else", name(e1), i);
932                }
933            }
934            ChildOfElement::ProcessingInstruction(p1) => {
935                if let ChildOfElement::ProcessingInstruction(p2) = c2 {
936                    if p1.target() != p2.target() || p1.value() != p2.value() {
937                        bail!("{} child #{} -- processing instruction differs", name(e1), i);
938                    }
939                } else {
940                    bail!(
941                        "{} child #{}, first is processing instruction, second is something else",
942                        name(e1),
943                        i
944                    );
945                }
946            }
947            ChildOfElement::Text(t1) => {
948                if let ChildOfElement::Text(t2) = c2 {
949                    if t1.text() != t2.text() {
950                        bail!("{} child #{} --  text differs", name(e1), i);
951                    }
952                } else {
953                    bail!("{} child #{}, first is text, second is something else", name(e1), i);
954                }
955            }
956        }
957    }
958    return Ok(());
959
960    /// compares attributes -- '==' didn't seems to work
961    fn attrs_are_same(attrs1: Vec<Attribute>, attrs2: Vec<Attribute>) -> Result<()> {
962        if attrs1.len() != attrs2.len() {
963            bail!("Attributes have different length: {:?} != {:?}", attrs1, attrs2);
964        }
965        // can't guarantee attrs are in the same order
966        for attr1 in attrs1 {
967            if let Some(found_attr2) = attrs2
968                .iter()
969                .find(|&attr2| attr1.name().local_part() == attr2.name().local_part())
970            {
971                if attr1.value() == found_attr2.value() {
972                    continue;
973                } else {
974                    bail!(
975                        "Attribute named {} has differing values:\n  '{}'\n  '{}'",
976                        attr1.name().local_part(),
977                        attr1.value(),
978                        found_attr2.value()
979                    );
980                }
981            } else {
982                bail!(
983                    "Attribute name {} not in [{}]",
984                    print_attr(&attr1),
985                    print_attrs(&attrs2)
986                );
987            }
988        }
989        return Ok(());
990
991        fn print_attr(attr: &Attribute) -> String {
992            return format!("@{}='{}'", attr.name().local_part(), attr.value());
993        }
994        fn print_attrs(attrs: &[Attribute]) -> String {
995            return attrs.iter().map(print_attr).collect::<Vec<String>>().join(", ");
996        }
997    }
998}
999
1000#[cfg(test)]
1001mod tests {
1002    #[allow(unused_imports)]
1003    use super::super::init_logger;
1004    use super::*;
1005
1006    fn are_parsed_strs_equal(test: &str, target: &str) -> bool {
1007        let test_package = &parser::parse(test).expect("Failed to parse input");
1008        let test_doc = test_package.as_document();
1009        trim_doc(&test_doc);
1010        debug!("test:\n{}", mml_to_string(get_element(&test_package)));
1011
1012        let target_package = &parser::parse(target).expect("Failed to parse input");
1013        let target_doc = target_package.as_document();
1014        trim_doc(&target_doc);
1015        debug!("target:\n{}", mml_to_string(get_element(&target_package)));
1016
1017        match is_same_doc(&test_doc, &target_doc) {
1018            Ok(_) => return true,
1019            Err(e) => panic!("{}", e),
1020        }
1021    }
1022
1023    #[test]
1024    fn trim_same() {
1025        let trimmed_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
1026        assert!(are_parsed_strs_equal(trimmed_str, trimmed_str));
1027    }
1028
1029    #[test]
1030    fn trim_whitespace() {
1031        let trimmed_str = "<math><mrow><mo>-</mo><mi> a </mi></mrow></math>";
1032        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1033        assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
1034    }
1035
1036    #[test]
1037    fn no_trim_whitespace_nbsp() {
1038        let trimmed_str = "<math><mrow><mo>-</mo><mtext> &#x00A0;a </mtext></mrow></math>";
1039        let whitespace_str = "<math> <mrow ><mo>-</mo><mtext> &#x00A0;a </mtext></mrow ></math>";
1040        assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
1041    }
1042
1043    #[test]
1044    fn trim_comment() {
1045        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1046        let comment_str = "<math><mrow><mo>-</mo><!--a comment --><mi> a </mi></mrow></math>";
1047        assert!(are_parsed_strs_equal(comment_str, whitespace_str));
1048    }
1049
1050    #[test]
1051    fn replace_mglyph() {
1052        let mglyph_str = "<math>
1053                <mrow>
1054                    <mi>X<mglyph fontfamily='my-braid-font' index='2' alt='23braid' /></mi>
1055                    <mo>+</mo>
1056                    <mi>
1057                        <mglyph fontfamily='my-braid-font' index='5' alt='132braid' />Y
1058                    </mi>
1059                    <mo>=</mo>
1060                    <mi>
1061                        <mglyph fontfamily='my-braid-font' index='3' alt='13braid' />
1062                    </mi>
1063                </mrow>
1064            </math>";
1065        let result_str = "<math>
1066            <mrow>
1067                <mi>X23braid</mi>
1068                <mo>+</mo>
1069                <mi>132braidY</mi>
1070                <mo>=</mo>
1071                <mi>13braid</mi>
1072            </mrow>
1073        </math>";
1074        assert!(are_parsed_strs_equal(mglyph_str, result_str));
1075    }
1076
1077    #[test]
1078    fn trim_differs() {
1079        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1080        let different_str = "<math> <mrow ><mo>-</mo><mi> b </mi></mrow ></math>";
1081
1082        // need to manually do this since failure shouldn't be a panic
1083        let package1 = &parser::parse(whitespace_str).expect("Failed to parse input");
1084        let doc1 = package1.as_document();
1085        trim_doc(&doc1);
1086        debug!("doc1:\n{}", mml_to_string(get_element(&package1)));
1087
1088        let package2 = parser::parse(different_str).expect("Failed to parse input");
1089        let doc2 = package2.as_document();
1090        trim_doc(&doc2);
1091        debug!("doc2:\n{}", mml_to_string(get_element(&package2)));
1092
1093        assert!(is_same_doc(&doc1, &doc2).is_err());
1094    }
1095
1096    #[test]
1097    fn test_entities() {
1098        // this forces initialization
1099        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1100
1101        let entity_str = set_mathml("<math><mrow><mo>&minus;</mo><mi>&mopf;</mi></mrow></math>".to_string()).unwrap();
1102        let converted_str =
1103            set_mathml("<math><mrow><mo>&#x02212;</mo><mi>&#x1D55E;</mi></mrow></math>".to_string()).unwrap();
1104
1105        // need to remove unique ids
1106        lazy_static! {
1107            static ref ID_MATCH: Regex = Regex::new(r#"id='.+?' "#).unwrap();
1108        }
1109        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1110        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1111        assert_eq!(entity_str, converted_str, "normal entity test failed");
1112
1113        let entity_str = set_mathml(
1114            "<math data-quot=\"&quot;value&quot;\" data-apos='&apos;value&apos;'><mi>XXX</mi></math>".to_string(),
1115        )
1116        .unwrap();
1117        let converted_str =
1118            set_mathml("<math data-quot='\"value\"' data-apos=\"'value'\"><mi>XXX</mi></math>".to_string()).unwrap();
1119        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1120        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1121        assert_eq!(entity_str, converted_str, "special entities quote test failed");
1122
1123        let entity_str =
1124            set_mathml("<math><mo>&lt;</mo><mo>&gt;</mo><mtext>&amp;lt;</mtext></math>".to_string()).unwrap();
1125        let converted_str =
1126            set_mathml("<math><mo>&#x003C;</mo><mo>&#x003E;</mo><mtext>&#x0026;lt;</mtext></math>".to_string())
1127                .unwrap();
1128        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1129        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1130        assert_eq!(entity_str, converted_str, "special entities <,>,& test failed");
1131    }
1132
1133    #[test]
1134    fn can_recover_from_invalid_set_rules_dir() {
1135        use std::env;
1136        // MathCAT will check the env var "MathCATRulesDir" as an override, so the following test might succeed if we don't override the env var
1137        env::set_var("MathCATRulesDir", "MathCATRulesDir");
1138        assert!(set_rules_dir("someInvalidRulesDir".to_string()).is_err());
1139        assert!(
1140            set_rules_dir(super::super::abs_rules_dir_path()).is_ok(),
1141            "\nset_rules_dir to '{}' failed",
1142            super::super::abs_rules_dir_path()
1143        );
1144        assert!(set_mathml("<math><mn>1</mn></math>".to_string()).is_ok());
1145    }
1146
1147    #[test]
1148    fn single_html_in_mtext() {
1149        let test = "<math><mn>1</mn> <mtext>a<p> para  1</p>bc</mtext> <mi>y</mi></math>";
1150        let target = "<math><mn>1</mn> <mtext>a para 1bc</mtext> <mi>y</mi></math>";
1151        assert!(are_parsed_strs_equal(test, target));
1152    }
1153
1154    #[test]
1155    fn multiple_html_in_mtext() {
1156        let test = "<math><mn>1</mn> <mtext>a<p>para 1</p> <p>para 2</p>bc  </mtext> <mi>y</mi></math>";
1157        let target = "<math><mn>1</mn> <mtext>apara 1 para 2bc</mtext> <mi>y</mi></math>";
1158        assert!(are_parsed_strs_equal(test, target));
1159    }
1160
1161    #[test]
1162    fn nested_html_in_mtext() {
1163        let test = "<math><mn>1</mn> <mtext>a <ol><li>first</li><li>second</li></ol> bc</mtext> <mi>y</mi></math>";
1164        let target = "<math><mn>1</mn> <mtext>a firstsecond bc</mtext> <mi>y</mi></math>";
1165        assert!(are_parsed_strs_equal(test, target));
1166    }
1167
1168    #[test]
1169    fn empty_html_in_mtext() {
1170        let test = "<math><mn>1</mn> <mtext>a<br/>bc</mtext> <mi>y</mi></math>";
1171        let target = "<math><mn>1</mn> <mtext>abc</mtext> <mi>y</mi></math>";
1172        assert!(are_parsed_strs_equal(test, target));
1173    }
1174
1175    #[test]
1176    fn mathml_in_mtext() {
1177        let test = "<math><mtext>if&#xa0;<math> <msup><mi>n</mi><mn>2</mn></msup></math>&#xa0;is real</mtext></math>";
1178        let target = "<math><mrow><mtext>if&#xa0;</mtext><msup><mi>n</mi><mn>2</mn></msup><mtext>&#xa0;is real</mtext></mrow></math>";
1179        assert!(are_parsed_strs_equal(test, target));
1180    }
1181}