libmathcat/
interface.rs

1//! The interface module provides functionality both for calling from an API and also running the code from `main`.
2//!
3#![allow(non_snake_case)]
4#![allow(clippy::needless_return)]
5use std::cell::RefCell;
6
7use crate::canonicalize::{as_text, create_mathml_element};
8use crate::errors::*;
9use phf::phf_map;
10use regex::{Captures, Regex};
11use sxd_document::dom::*;
12use sxd_document::parser;
13use sxd_document::Package;
14
15use crate::canonicalize::{as_element, name};
16use crate::shim_filesystem::{find_all_dirs_shim, find_files_in_dir_that_ends_with_shim};
17
18use crate::navigate::*;
19use crate::pretty_print::mml_to_string;
20use crate::xpath_functions::{is_leaf, IsNode};
21
22#[cfg(feature = "enable-logs")]
23use std::sync::Once;
24#[cfg(feature = "enable-logs")]
25static INIT: Once = Once::new();
26
27fn enable_logs() {
28    #[cfg(feature = "enable-logs")]
29    INIT.call_once(||{
30        #[cfg(target_os = "android")]
31        {
32            extern crate log;
33            extern crate android_logger;
34            
35            use log::*;
36            use android_logger::*;
37        
38            android_logger::init_once(
39                Config::default()
40                .with_max_level(LevelFilter::Trace)
41                .with_tag("MathCat")
42            );    
43            trace!("Activated Android logger!");  
44        }    
45    });
46}
47
48// wrap up some common functionality between the call from 'main' and AT
49fn cleanup_mathml(mathml: Element) -> Result<Element> {
50    trim_element(mathml, false);
51    let mathml = crate::canonicalize::canonicalize(mathml)?;
52    let mathml = add_ids(mathml);
53    return Ok(mathml);
54}
55
56thread_local! {
57    /// The current node being navigated (also spoken and brailled) is stored in `MATHML_INSTANCE`.
58    pub static MATHML_INSTANCE: RefCell<Package> = init_mathml_instance();
59}
60
61fn init_mathml_instance() -> RefCell<Package> {
62    let package = parser::parse("<math></math>")
63        .expect("Internal error in 'init_mathml_instance;: didn't parse initializer string");
64    return RefCell::new(package);
65}
66
67/// Set the Rules directory
68/// IMPORTANT: this should be the very first call to MathCAT. If 'dir' is an empty string, the environment var 'MathCATRulesDir' is tried.
69pub fn set_rules_dir(dir: String) -> Result<()> {
70    enable_logs();
71    use std::path::PathBuf;
72    let dir = if dir.is_empty() {
73        std::env::var_os("MathCATRulesDir")
74            .unwrap_or_default()
75            .to_str()
76            .unwrap()
77            .to_string()
78    } else {
79        dir
80    };
81    let pref_manager = crate::prefs::PreferenceManager::get();
82    return pref_manager.borrow_mut().initialize(PathBuf::from(dir));
83}
84
85/// Returns the version number (from Cargo.toml) of the build
86pub fn get_version() -> String {
87    enable_logs();
88    const VERSION: &str = env!("CARGO_PKG_VERSION");
89    return VERSION.to_string();
90}
91
92/// This will override any previous MathML that was set.
93/// This returns canonical MathML with 'id's set on any node that doesn't have an id.
94/// The ids can be used for sync highlighting if the `Bookmark` API preference is true.
95pub fn set_mathml(mathml_str: String) -> Result<String> {
96    enable_logs();
97    lazy_static! {
98        // if these are present when resent to MathJaX, MathJaX crashes (https://github.com/mathjax/MathJax/issues/2822)
99        static ref MATHJAX_V2: Regex = Regex::new(r#"class *= *['"]MJX-.*?['"]"#).unwrap();
100        static ref MATHJAX_V3: Regex = Regex::new(r#"class *= *['"]data-mjx-.*?['"]"#).unwrap();
101        static ref NAMESPACE_DECL: Regex = Regex::new(r#"xmlns:[[:alpha:]]+"#).unwrap();     // very limited namespace prefix match
102        static ref PREFIX: Regex = Regex::new(r#"(</?)[[:alpha:]]+:"#).unwrap();     // very limited namespace prefix match
103        static ref HTML_ENTITIES: Regex = Regex::new(r#"&([a-zA-Z]+?);"#).unwrap();
104    }
105
106    NAVIGATION_STATE.with(|nav_stack| {
107        nav_stack.borrow_mut().reset();
108    });
109
110    // We need the main definitions files to be read in so canonicalize can work.
111    // This call reads all of them for the current preferences, but that's ok since they will likely be used
112    crate::speech::SPEECH_RULES.with(|rules| rules.borrow_mut().read_files())?;
113
114    return MATHML_INSTANCE.with(|old_package| {
115        static HTML_ENTITIES_MAPPING: phf::Map<&str, &str> = include!("entities.in");
116
117        let mut error_message = "".to_string(); // can't return a result inside the replace_all, so we do this hack of setting the message and then returning the error
118                                                // need to deal with character data and convert to something the parser knows
119        let mathml_str =
120            HTML_ENTITIES.replace_all(&mathml_str, |cap: &Captures| match HTML_ENTITIES_MAPPING.get(&cap[1]) {
121                None => {
122                    error_message = format!("No entity named '{}'", &cap[0]);
123                    cap[0].to_string()
124                }
125                Some(&ch) => ch.to_string(),
126            });
127
128        if !error_message.is_empty() {
129            bail!(error_message);
130        }
131        let mathml_str = MATHJAX_V2.replace_all(&mathml_str, "");
132        let mathml_str = MATHJAX_V3.replace_all(&mathml_str, "");
133
134        // the speech rules use the xpath "name" function and that includes the prefix
135        // getting rid of the prefix properly probably involves a recursive replacement in the tree
136        // if the prefix is used, it is almost certainly something like "m" or "mml", so this cheat will work.
137        let mathml_str = NAMESPACE_DECL.replace(&mathml_str, "xmlns"); // do this before the PREFIX replace!
138        let mathml_str = PREFIX.replace_all(&mathml_str, "$1");
139
140        let new_package = parser::parse(&mathml_str);
141        if let Err(e) = new_package {
142            bail!("Invalid MathML input:\n{}\nError is: {}", &mathml_str, &e.to_string());
143        }
144
145        let new_package = new_package.unwrap();
146        let mathml = get_element(&new_package);
147        let mathml = cleanup_mathml(mathml)?;
148        let mathml_string = mml_to_string(mathml);
149        old_package.replace(new_package);
150
151        return Ok(mathml_string);
152    });
153}
154
155/// Get the spoken text of the MathML that was set.
156/// The speech takes into account any AT or user preferences.
157pub fn get_spoken_text() -> Result<String> {
158    enable_logs();
159    // use std::time::{Instant};
160    // let instant = Instant::now();
161    return MATHML_INSTANCE.with(|package_instance| {
162        let package_instance = package_instance.borrow();
163        let mathml = get_element(&package_instance);
164        let new_package = Package::new();
165        let intent = crate::speech::intent_from_mathml(mathml, new_package.as_document())?;
166        debug!("Intent tree:\n{}", mml_to_string(intent));
167        let speech = crate::speech::speak_mathml(intent, "")?;
168        // info!("Time taken: {}ms", instant.elapsed().as_millis());
169        return Ok(speech);
170    });
171}
172
173/// Get the spoken text for an overview of the MathML that was set.
174/// The speech takes into account any AT or user preferences.
175/// Note: this implementation for is currently minimal and should not be used.
176pub fn get_overview_text() -> Result<String> {
177    enable_logs();
178    // use std::time::{Instant};
179    // let instant = Instant::now();
180    return MATHML_INSTANCE.with(|package_instance| {
181        let package_instance = package_instance.borrow();
182        let mathml = get_element(&package_instance);
183        let speech = crate::speech::overview_mathml(mathml, "")?;
184        // info!("Time taken: {}ms", instant.elapsed().as_millis());
185        return Ok(speech);
186    });
187}
188
189/// Get the value of the named preference.
190/// None is returned if `name` is not a known preference.
191pub fn get_preference(name: String) -> Result<String> {
192    enable_logs();
193    use crate::prefs::NO_PREFERENCE;
194    return crate::speech::SPEECH_RULES.with(|rules| {
195        let rules = rules.borrow();
196        let pref_manager = rules.pref_manager.borrow();
197        let mut value = pref_manager.pref_to_string(&name);
198        if value == NO_PREFERENCE {
199            value = pref_manager.pref_to_string(&name);
200        }
201        if value == NO_PREFERENCE {
202            bail!("No preference named '{}'", &name);
203        } else {
204            return Ok(value);
205        }
206    });
207}
208
209/// Set a MathCAT preference. The preference name should be a known preference name.
210/// The value should either be a string or a number (depending upon the preference being set)
211/// The list of known user preferences is in the MathCAT user documentation.
212/// Here are common preferences set by programs (not settable by the user):
213/// * TTS -- SSML, SAPI5, None
214/// * Pitch -- normalized at '1.0'
215/// * Rate -- words per minute (should match current speech rate).
216///   There is a separate "MathRate" that is user settable that causes a relative percentage change from this rate.
217/// * Volume -- default 100
218/// * Voice -- set a voice to use (not implemented)
219/// * Gender -- set pick any voice of the given gender (not implemented)
220/// * Bookmark -- set to `true` if a `mark`/`bookmark` should be part of the returned speech (used for sync highlighting)
221///
222/// Important: both the preference name and value are case-sensitive
223///
224/// This function can be called multiple times to set different values.
225/// The values are persistent and extend beyond calls to [`set_mathml`].
226/// A value can be overwritten by calling this function again with a different value.
227///
228/// Be careful setting preferences -- these potentially override user settings, so only preferences that really need setting should be set.
229pub fn set_preference(name: String, value: String) -> Result<()> {
230    enable_logs();
231    // "LanguageAuto" allows setting the language dir without actually changing the value of "Language" from Auto
232    let mut value = value;
233    if name == "Language" || name == "LanguageAuto" {
234        // check the format
235        if value != "Auto" {
236            // could get es, es-419, or en-us-nyc ...  we only care about the first two parts so we clean it up a little
237            let mut lang_country_split = value.split('-');
238            let language = lang_country_split.next().unwrap_or("");
239            let country = lang_country_split.next().unwrap_or("");
240            if language.len() != 2 {
241                bail!(
242                    "Improper format for 'Language' preference '{}'. Should be of form 'en' or 'en-gb'",
243                    value
244                );
245            }
246            let mut new_lang_country = language.to_string(); // need a temp value because 'country' is borrowed from 'value' above
247            if !country.is_empty() {
248                new_lang_country.push('-');
249                new_lang_country.push_str(country);
250            }
251            value = new_lang_country;
252        }
253        if name == "LanguageAuto" && value == "Auto" {
254            bail!("'LanguageAuto' can not have the value 'Auto'");
255        }
256    }
257
258    crate::speech::SPEECH_RULES.with(|rules| {
259        let rules = rules.borrow_mut();
260        if let Some(error_string) = rules.get_error() {
261            bail!("{}", error_string);
262        }
263
264        // we set the value even if it was the same as the old value because this might override a potentially changed future user value
265        let mut pref_manager = rules.pref_manager.borrow_mut();
266        if name == "LanguageAuto" {
267            let language_pref = pref_manager.pref_to_string("Language");
268            if language_pref != "Auto" {
269                bail!(
270                    "'LanguageAuto' can only be used when 'Language' has the value 'Auto'; Language={}",
271                    language_pref
272                );
273            }
274        }
275        let lower_case_value = value.to_lowercase();
276        if lower_case_value == "true" || lower_case_value == "false" {
277            pref_manager.set_api_boolean_pref(&name, value.to_lowercase() == "true");
278        } else {
279            match name.as_str() {
280                "Pitch" | "Rate" | "Volume" | "CapitalLetters_Pitch" | "MathRate" | "PauseFactor" => {
281                    pref_manager.set_api_float_pref(&name, to_float(&name, &value)?)
282                }
283                _ => {
284                    pref_manager.set_string_pref(&name, &value)?;
285                }
286            }
287        };
288        return Ok::<(), Error>(());
289    })?;
290
291    return Ok(());
292
293    fn to_float(name: &str, value: &str) -> Result<f64> {
294        return match value.parse::<f64>() {
295            Ok(val) => Ok(val),
296            Err(_) => bail!("SetPreference: preference'{}'s value '{}' must be a float", name, value),
297        };
298    }
299}
300
301/// Get the braille associated with the MathML that was set by [`set_mathml`].
302/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`).
303/// If 'nav_node_id' is given, it is highlighted based on the value of `BrailleNavHighlight` (default: `EndPoints`)
304pub fn get_braille(nav_node_id: String) -> Result<String> {
305    enable_logs();
306    // use std::time::{Instant};
307    // let instant = Instant::now();
308    return MATHML_INSTANCE.with(|package_instance| {
309        let package_instance = package_instance.borrow();
310        let mathml = get_element(&package_instance);
311        let braille = crate::braille::braille_mathml(mathml, &nav_node_id)?.0;
312        // info!("Time taken: {}ms", instant.elapsed().as_millis());
313        return Ok(braille);
314    });
315}
316
317/// Get the braille associated with the current navigation focus of the MathML that was set by [`set_mathml`].
318/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`).
319/// The returned braille is brailled as if the current navigation focus is the entire expression to be brailled.
320pub fn get_navigation_braille() -> Result<String> {
321    enable_logs();
322    return MATHML_INSTANCE.with(|package_instance| {
323        let package_instance = package_instance.borrow();
324        let mathml = get_element(&package_instance);
325        let new_package = Package::new(); // used if we need to create a new tree
326        let new_doc = new_package.as_document();
327        let nav_mathml = NAVIGATION_STATE.with(|nav_stack| {
328            return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
329                Err(e) => Err(e),
330                Ok((found, offset)) => {
331                    // get the MathML node and wrap it inside of a <math> element
332                    // if the offset is given, we need to get the character it references
333                    if offset == 0 {
334                        if name(found) == "math" {
335                            Ok(found)
336                        } else {
337                            let new_mathml = create_mathml_element(&new_doc, "math");
338                            new_mathml.append_child(copy_mathml(found));
339                            new_doc.root().append_child(new_mathml);
340                            Ok(new_mathml)
341                        }
342                    } else if !is_leaf(found) {
343                        bail!(
344                            "Internal error: non-zero offset '{}' on a non-leaf element '{}'",
345                            offset,
346                            name(found)
347                        );
348                    } else if let Some(ch) = as_text(found).chars().nth(offset) {
349                        let internal_mathml = create_mathml_element(&new_doc, name(found));
350                        internal_mathml.set_text(&ch.to_string());
351                        let new_mathml = create_mathml_element(&new_doc, "math");
352                        new_mathml.append_child(internal_mathml);
353                        new_doc.root().append_child(new_mathml);
354                        Ok(new_mathml)
355                    } else {
356                        bail!(
357                            "Internal error: offset '{}' on leaf element '{}' doesn't exist",
358                            offset,
359                            mml_to_string(found)
360                        );
361                    }
362                }
363            };
364        })?;
365
366        let braille = crate::braille::braille_mathml(nav_mathml, "")?.0;
367        return Ok(braille);
368    });
369}
370
371/// Given a key code along with the modifier keys, the current node is moved accordingly (or value reported in some cases).
372/// `key` is the [keycode](https://developer.mozilla.org/en-US/docs/Web/API/KeyboardEvent/keyCode#constants_for_keycode_value) for the key (in JavaScript, `ev.key_code`)
373/// The spoken text for the new current node is returned.
374pub fn do_navigate_keypress(
375    key: usize,
376    shift_key: bool,
377    control_key: bool,
378    alt_key: bool,
379    meta_key: bool,
380) -> Result<String> {
381    return MATHML_INSTANCE.with(|package_instance| {
382        let package_instance = package_instance.borrow();
383        let mathml = get_element(&package_instance);
384        return do_mathml_navigate_key_press(mathml, key, shift_key, control_key, alt_key, meta_key);
385    });
386}
387
388/// Given a navigation command, the current node is moved accordingly.
389/// This is a higher level interface than `do_navigate_keypress` for applications that want to interpret the keys themselves.
390/// The valid commands are:
391/// * Standard move commands:
392///   `MovePrevious`, `MoveNext`, `MoveStart`, `MoveEnd`, `MoveLineStart`, `MoveLineEnd`
393/// * Movement in a table or elementary math:
394///   `MoveCellPrevious`, `MoveCellNext`, `MoveCellUp`, `MoveCellDown`, `MoveColumnStart`, `MoveColumnEnd`
395/// * Moving into children or out to parents:
396///   `ZoomIn`, `ZoomOut`, `ZoomOutAll`, `ZoomInAll`
397/// * Undo the last movement command:
398///   `MoveLastLocation`
399/// * Read commands (standard speech):
400///   `ReadPrevious`, `ReadNext`, `ReadCurrent`, `ReadCellCurrent`, `ReadStart`, `ReadEnd`, `ReadLineStart`, `ReadLineEnd`
401/// * Describe commands (overview):
402///   `DescribePrevious`, `DescribeNext`, `DescribeCurrent`
403/// * Location information:
404///   `WhereAmI`, `WhereAmIAll`
405/// * Change navigation modes (circle up/down):
406///   `ToggleZoomLockUp`, `ToggleZoomLockDown`
407/// * Speak the current navigation mode
408///   `ToggleSpeakMode`
409///
410/// There are 10 place markers that can be set/read/described or moved to.
411/// * Setting:
412///   `SetPlacemarker0`, `SetPlacemarker1`, `SetPlacemarker2`, `SetPlacemarker3`, `SetPlacemarker4`, `SetPlacemarker5`, `SetPlacemarker6`, `SetPlacemarker7`, `SetPlacemarker8`, `SetPlacemarker9`
413/// * Reading:
414///   `Read0`, `Read1`, `Read2`, `Read3`, `Read4`, `Read5`, `Read6`, `Read7`, `Read8`, `Read9`
415/// * Describing:
416///   `Describe0`, `Describe1`, `Describe2`, `Describe3`, `Describe4`, `Describe5`, `Describe6`, `Describe7`, `Describe8`, `Describe9`
417/// * Moving:
418///   `MoveTo0`, `MoveTo1`, `MoveTo2`, `MoveTo3`, `MoveTo4`, `MoveTo5`, `MoveTo6`, `MoveTo7`, `MoveTo8`, `MoveTo9`
419///
420/// When done with Navigation, call with `Exit`
421pub fn do_navigate_command(command: String) -> Result<String> {
422    enable_logs();
423    let command = NAV_COMMANDS.get_key(&command); // gets a &'static version of the command
424    if command.is_none() {
425        bail!("Unknown command in call to DoNavigateCommand()");
426    };
427    let command = *command.unwrap();
428    return MATHML_INSTANCE.with(|package_instance| {
429        let package_instance = package_instance.borrow();
430        let mathml = get_element(&package_instance);
431        return do_navigate_command_string(mathml, command);
432    });
433}
434
435/// Given an 'id' and an offset (for tokens), set the navigation node to that id.
436/// An error is returned if the 'id' doesn't exist
437pub fn set_navigation_node(id: String, offset: usize) -> Result<()> {
438    enable_logs();
439    return MATHML_INSTANCE.with(|package_instance| {
440        let package_instance = package_instance.borrow();
441        let mathml = get_element(&package_instance);
442        return set_navigation_node_from_id(mathml, id, offset);
443    });
444}
445
446/// Return the MathML associated with the current (navigation) node and the offset (0-based) from that mathml (not yet implemented)
447/// The offset is needed for token elements that have multiple characters.
448pub fn get_navigation_mathml() -> Result<(String, usize)> {
449    return MATHML_INSTANCE.with(|package_instance| {
450        let package_instance = package_instance.borrow();
451        let mathml = get_element(&package_instance);
452        return NAVIGATION_STATE.with(|nav_stack| {
453            return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
454                Err(e) => Err(e),
455                Ok((found, offset)) => Ok((mml_to_string(found), offset)),
456            };
457        });
458    });
459}
460
461/// Return the `id` and `offset` (0-based) associated with the current (navigation) node.
462/// `offset` (not yet implemented)
463/// The offset is needed for token elements that have multiple characters.
464pub fn get_navigation_mathml_id() -> Result<(String, usize)> {
465    enable_logs();
466    return MATHML_INSTANCE.with(|package_instance| {
467        let package_instance = package_instance.borrow();
468        let mathml = get_element(&package_instance);
469        return Ok(NAVIGATION_STATE.with(|nav_stack| {
470            return nav_stack.borrow().get_navigation_mathml_id(mathml);
471        }));
472    });
473}
474
475/// Return the start and end braille character positions associated with the current (navigation) node.
476pub fn get_braille_position() -> Result<(usize, usize)> {
477    enable_logs();
478    return MATHML_INSTANCE.with(|package_instance| {
479        let package_instance = package_instance.borrow();
480        let mathml = get_element(&package_instance);
481        let nav_node = get_navigation_mathml_id()?;
482        let (_, start, end) = crate::braille::braille_mathml(mathml, &nav_node.0)?;
483        return Ok((start, end));
484    });
485}
486
487/// Given a 0-based braille position, return the smallest MathML node enclosing it.
488/// This node might be a leaf with an offset.
489pub fn get_navigation_node_from_braille_position(position: usize) -> Result<(String, usize)> {
490    enable_logs();
491    return MATHML_INSTANCE.with(|package_instance| {
492        let package_instance = package_instance.borrow();
493        let mathml = get_element(&package_instance);
494        return crate::braille::get_navigation_node_from_braille_position(mathml, position);
495    });
496}
497
498pub fn get_supported_braille_codes() -> Vec<String> {
499    enable_logs();
500    let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
501    let braille_dir = rules_dir.join("Braille");
502    let mut braille_code_paths = Vec::new();
503
504    find_all_dirs_shim(&braille_dir, &mut braille_code_paths);
505    let mut braille_code_paths = braille_code_paths.iter()
506                    .map(|path| path.strip_prefix(&braille_dir).unwrap().to_string_lossy().to_string())
507                    .filter(|string_path| !string_path.is_empty() )
508                    .collect::<Vec<String>>();
509    braille_code_paths.sort();
510
511    return braille_code_paths;
512 }
513
514/// Returns a Vec of all supported languages ("en", "es", ...)
515pub fn get_supported_languages() -> Vec<String> {
516    enable_logs();
517    let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
518    let lang_dir = rules_dir.join("Languages");
519    let mut lang_paths = Vec::new();
520
521    find_all_dirs_shim(&lang_dir, &mut lang_paths);
522    let mut language_paths = lang_paths.iter()
523                    .map(|path| path.strip_prefix(&lang_dir).unwrap()
524                                              .to_string_lossy()
525                                              .replace(std::path::MAIN_SEPARATOR, "-")
526                                              .to_string())
527                    .filter(|string_path| !string_path.is_empty() )
528                    .collect::<Vec<String>>();
529
530    // make sure the 'zz' test dir isn't included (build.rs removes it, but for debugging is there)
531    language_paths.retain(|s| !s.starts_with("zz"));
532    language_paths.sort();
533    return language_paths;
534 }
535
536 pub fn get_supported_speech_styles(lang: String) -> Vec<String> {
537    enable_logs();
538    let rules_dir = crate::prefs::PreferenceManager::get().borrow().get_rules_dir();
539    let lang_dir = rules_dir.join("Languages").join(lang);
540    let mut speech_styles = find_files_in_dir_that_ends_with_shim(&lang_dir, "_Rules.yaml");
541    for file_name in &mut speech_styles {
542        file_name.truncate(file_name.len() - "_Rules.yaml".len())
543    }
544    speech_styles.sort();
545    // remove duplicates -- shouldn't be any, but just in case
546    let mut i = 1;
547    while i < speech_styles.len() {
548        if speech_styles[i-1] == speech_styles[i] {
549            speech_styles.remove(i);
550        } else {
551            i += 1;
552        }
553    }
554    return speech_styles;
555 }
556
557// utility functions
558
559/// Copy (recursively) the (MathML) element and return the new one.
560/// The Element type does not copy and modifying the structure of an element's child will modify the element, so we need a copy
561/// Convert the returned error from set_mathml, etc., to a useful string for display
562pub fn copy_mathml(mathml: Element) -> Element {
563    // If it represents MathML, the 'Element' can only have Text and Element children along with attributes
564    let children = mathml.children();
565    let new_mathml = create_mathml_element(&mathml.document(), name(mathml));
566    mathml.attributes().iter().for_each(|attr| {
567        new_mathml.set_attribute_value(attr.name(), attr.value());
568    });
569
570    // can't use is_leaf/as_text because this is also used with the intent tree
571    if children.len() == 1 {
572        if let Some(text) = children[0].text() {
573        new_mathml.set_text(text.text());
574        return new_mathml;
575        }
576    }
577
578    let mut new_children = Vec::with_capacity(children.len());
579    for child in children {
580        let child = as_element(child);
581        let new_child = copy_mathml(child);
582        new_children.push(new_child);
583    }
584    new_mathml.append_children(new_children);
585    return new_mathml;
586}
587
588pub fn errors_to_string(e: &Error) -> String {
589    enable_logs();
590    let mut result = String::default();
591    let mut first_time = true;
592    for e in e.iter() {
593        if first_time {
594            result = format!("{e}\n");
595            first_time = false;
596        } else {
597            result += &format!("caused by: {e}\n");
598        }
599    }
600    return result;
601}
602
603fn add_ids(mathml: Element) -> Element {
604    use std::time::SystemTime;
605    let time = if cfg!(target_family = "wasm") {
606        fastrand::usize(..)
607    } else {
608        SystemTime::now()
609            .duration_since(SystemTime::UNIX_EPOCH)
610            .unwrap()
611            .as_millis() as usize
612    };
613    let mut time_part = radix_fmt::radix(time, 36).to_string();
614    if time_part.len() < 3 {
615        time_part.push_str("a2c");      // needs to be at least three chars
616    }
617    let mut random_part = radix_fmt::radix(fastrand::u32(..), 36).to_string();
618    if random_part.len() < 4 {
619        random_part.push_str("a1b2");      // needs to be at least four chars
620    }
621    let prefix = "M".to_string() + &time_part[time_part.len() - 3..] + &random_part[random_part.len() - 4..] + "-"; // begin with letter
622    add_ids_to_all(mathml, &prefix, 0);
623    return mathml;
624
625    fn add_ids_to_all(mathml: Element, id_prefix: &str, count: usize) -> usize {
626        let mut count = count;
627        if mathml.attribute("id").is_none() {
628            mathml.set_attribute_value("id", (id_prefix.to_string() + &count.to_string()).as_str());
629            mathml.set_attribute_value("data-id-added", "true");
630            count += 1;
631        };
632
633        if crate::xpath_functions::is_leaf(mathml) {
634            return count;
635        }
636
637        for child in mathml.children() {
638            let child = as_element(child);
639            count = add_ids_to_all(child, id_prefix, count);
640        }
641        return count;
642    }
643}
644
645pub fn get_element(package: &Package) -> Element<'_> {
646    enable_logs();
647    let doc = package.as_document();
648    let mut result = None;
649    for root_child in doc.root().children() {
650        if let ChildOfRoot::Element(e) = root_child {
651            assert!(result.is_none());
652            result = Some(e);
653        }
654    }
655    return result.unwrap();
656}
657
658/// Get the intent after setting the MathML
659/// Used in testing
660#[allow(dead_code)]
661pub fn get_intent<'a>(mathml: Element<'a>, doc: Document<'a>) -> Result<Element<'a>> {
662    crate::speech::SPEECH_RULES.with(|rules|  rules.borrow_mut().read_files().unwrap());
663    let mathml = cleanup_mathml(mathml)?;
664    return crate::speech::intent_from_mathml(mathml, doc);
665}
666
667#[allow(dead_code)]
668fn trim_doc(doc: &Document) {
669    for root_child in doc.root().children() {
670        if let ChildOfRoot::Element(e) = root_child {
671            trim_element(e, false);
672        } else {
673            doc.root().remove_child(root_child); // comment or processing instruction
674        }
675    }
676}
677
678/// Not really meant to be public -- used by tests in some packages
679pub fn trim_element(e: Element, allow_structure_in_leaves: bool) {
680    // "<mtext>this is text</mtext" results in 3 text children
681    // these are combined into one child as it makes code downstream simpler
682
683    // space, tab, newline, carriage return all get collapsed to a single space
684    const WHITESPACE: &[char] = &[' ', '\u{0009}', '\u{000A}', '\u{000D}'];
685    lazy_static! {
686        static ref WHITESPACE_MATCH: Regex = Regex::new(r#"[ \u{0009}\u{000A}\u{000D}]+"#).unwrap();
687    }
688
689    if is_leaf(e) && (!allow_structure_in_leaves || IsNode::is_mathml(e)) {
690        // Assume it is HTML inside of the leaf -- turn the HTML into a string
691        make_leaf_element(e);
692        return;
693    }
694
695    let mut single_text = "".to_string();
696    for child in e.children() {
697        match child {
698            ChildOfElement::Element(c) => {
699                trim_element(c, allow_structure_in_leaves);
700            }
701            ChildOfElement::Text(t) => {
702                single_text += t.text();
703                e.remove_child(child);
704            }
705            _ => {
706                e.remove_child(child);
707            }
708        }
709    }
710
711    // CSS considers only space, tab, linefeed, and carriage return as collapsable whitespace
712    if !(is_leaf(e) || name(e) == "intent-literal" || single_text.is_empty()) {
713        // intent-literal comes from testing intent
714        // FIX: we have a problem -- what should happen???
715        // FIX: For now, just keep the children and ignore the text and log an error -- shouldn't panic/crash
716        if !single_text.trim_matches(WHITESPACE).is_empty() {
717            error!(
718                "trim_element: both element and textual children which shouldn't happen -- ignoring text '{single_text}'"
719            );
720        }
721        return;
722    }
723    if e.children().is_empty() && !single_text.is_empty() {
724        // debug!("Combining text in {}: '{}' -> '{}'", e.name().local_part(), single_text, trimmed_text);
725        e.set_text(&WHITESPACE_MATCH.replace_all(&single_text, " "));
726    }
727
728    fn make_leaf_element(mathml_leaf: Element) {
729        // MathML leaves like <mn> really shouldn't have non-textual content, but you could have embedded HTML
730        // Here, we take convert them to leaves by grabbing up all the text and making that the content
731        // Potentially, we leave them and let (default) rules do something, but it makes other parts of the code
732        //   messier because checking the text of a leaf becomes Option<&str> rather than just &str
733        let children = mathml_leaf.children();
734        if children.is_empty() {
735            return;
736        }
737
738        // gather up the text
739        let mut text = "".to_string();
740        for child in children {
741            let child_text = match child {
742                ChildOfElement::Element(child) => {
743                    if name(child) == "mglyph" {
744                        child.attribute_value("alt").unwrap_or("").to_string()
745                    } else {
746                        gather_text(child)
747                    }
748                }
749                ChildOfElement::Text(t) => {
750                    // debug!("ChildOfElement::Text: '{}'", t.text());
751                    t.text().to_string()
752                }
753                _ => "".to_string(),
754            };
755            if !child_text.is_empty() {
756                text += &child_text;
757            }
758        }
759
760        // get rid of the old children and replace with the text we just built
761        mathml_leaf.clear_children();
762        mathml_leaf.set_text(WHITESPACE_MATCH.replace_all(&text, " ").trim_matches(WHITESPACE));
763        // debug!("make_leaf_element: text is '{}'", crate::canonicalize::as_text(mathml_leaf));
764
765        /// gather up all the contents of the element and return them with a leading space
766        fn gather_text(html: Element) -> String {
767            let mut text = "".to_string(); // since we are throwing out the element tag, add a space between the contents
768            for child in html.children() {
769                match child {
770                    ChildOfElement::Element(child) => {
771                        text += &gather_text(child);
772                    }
773                    ChildOfElement::Text(t) => text += t.text(),
774                    _ => (),
775                }
776            }
777            // debug!("gather_text: '{}'", text);
778            return text;
779        }
780    }
781}
782
783// used for testing trim
784/// returns Ok() if two Documents are equal or some info where they differ in the Err
785#[allow(dead_code)]
786fn is_same_doc(doc1: &Document, doc2: &Document) -> Result<()> {
787    // assume 'e' doesn't have element children until proven otherwise
788    // this means we keep Text children until we are proven they aren't needed
789    if doc1.root().children().len() != doc2.root().children().len() {
790        bail!(
791            "Children of docs have {} != {} children",
792            doc1.root().children().len(),
793            doc2.root().children().len()
794        );
795    }
796
797    for (i, (c1, c2)) in doc1
798        .root()
799        .children()
800        .iter()
801        .zip(doc2.root().children().iter())
802        .enumerate()
803    {
804        match c1 {
805            ChildOfRoot::Element(e1) => {
806                if let ChildOfRoot::Element(e2) = c2 {
807                    is_same_element(*e1, *e2)?;
808                } else {
809                    bail!("child #{}, first is element, second is something else", i);
810                }
811            }
812            ChildOfRoot::Comment(com1) => {
813                if let ChildOfRoot::Comment(com2) = c2 {
814                    if com1.text() != com2.text() {
815                        bail!("child #{} -- comment text differs", i);
816                    }
817                } else {
818                    bail!("child #{}, first is comment, second is something else", i);
819                }
820            }
821            ChildOfRoot::ProcessingInstruction(p1) => {
822                if let ChildOfRoot::ProcessingInstruction(p2) = c2 {
823                    if p1.target() != p2.target() || p1.value() != p2.value() {
824                        bail!("child #{} -- processing instruction differs", i);
825                    }
826                } else {
827                    bail!(
828                        "child #{}, first is processing instruction, second is something else",
829                        i
830                    );
831                }
832            }
833        }
834    }
835    return Ok(());
836}
837
838/// returns Ok() if two Documents are equal or some info where they differ in the Err
839// Not really meant to be public -- used by tests in some packages
840#[allow(dead_code)]
841pub fn is_same_element(e1: Element, e2: Element) -> Result<()> {
842    enable_logs();
843    if name(e1) != name(e2) {
844        bail!("Names not the same: {}, {}", name(e1), name(e2));
845    }
846
847    // assume 'e' doesn't have element children until proven otherwise
848    // this means we keep Text children until we are proven they aren't needed
849    if e1.children().len() != e2.children().len() {
850        bail!(
851            "Children of {} have {} != {} children",
852            name(e1),
853            e1.children().len(),
854            e2.children().len()
855        );
856    }
857
858    if let Err(e) = attrs_are_same(e1.attributes(), e2.attributes()) {
859        bail!("In element {}, {}", name(e1), e);
860    }
861
862    for (i, (c1, c2)) in e1.children().iter().zip(e2.children().iter()).enumerate() {
863        match c1 {
864            ChildOfElement::Element(child1) => {
865                if let ChildOfElement::Element(child2) = c2 {
866                    is_same_element(*child1, *child2)?;
867                } else {
868                    bail!("{} child #{}, first is element, second is something else", name(e1), i);
869                }
870            }
871            ChildOfElement::Comment(com1) => {
872                if let ChildOfElement::Comment(com2) = c2 {
873                    if com1.text() != com2.text() {
874                        bail!("{} child #{} -- comment text differs", name(e1), i);
875                    }
876                } else {
877                    bail!("{} child #{}, first is comment, second is something else", name(e1), i);
878                }
879            }
880            ChildOfElement::ProcessingInstruction(p1) => {
881                if let ChildOfElement::ProcessingInstruction(p2) = c2 {
882                    if p1.target() != p2.target() || p1.value() != p2.value() {
883                        bail!("{} child #{} -- processing instruction differs", name(e1), i);
884                    }
885                } else {
886                    bail!(
887                        "{} child #{}, first is processing instruction, second is something else",
888                        name(e1),
889                        i
890                    );
891                }
892            }
893            ChildOfElement::Text(t1) => {
894                if let ChildOfElement::Text(t2) = c2 {
895                    if t1.text() != t2.text() {
896                        bail!("{} child #{} --  text differs", name(e1), i);
897                    }
898                } else {
899                    bail!("{} child #{}, first is text, second is something else", name(e1), i);
900                }
901            }
902        }
903    }
904    return Ok(());
905
906    /// compares attributes -- '==' didn't seems to work
907    fn attrs_are_same(attrs1: Vec<Attribute>, attrs2: Vec<Attribute>) -> Result<()> {
908        if attrs1.len() != attrs2.len() {
909            bail!("Attributes have different length: {:?} != {:?}", attrs1, attrs2);
910        }
911        // can't guarantee attrs are in the same order
912        for attr1 in attrs1 {
913            if let Some(found_attr2) = attrs2
914                .iter()
915                .find(|&attr2| attr1.name().local_part() == attr2.name().local_part())
916            {
917                if attr1.value() == found_attr2.value() {
918                    continue;
919                } else {
920                    bail!(
921                        "Attribute named {} has differing values:\n  '{}'\n  '{}'",
922                        attr1.name().local_part(),
923                        attr1.value(),
924                        found_attr2.value()
925                    );
926                }
927            } else {
928                bail!(
929                    "Attribute name {} not in [{}]",
930                    print_attr(&attr1),
931                    print_attrs(&attrs2)
932                );
933            }
934        }
935        return Ok(());
936
937        fn print_attr(attr: &Attribute) -> String {
938            return format!("@{}='{}'", attr.name().local_part(), attr.value());
939        }
940        fn print_attrs(attrs: &[Attribute]) -> String {
941            return attrs.iter().map(print_attr).collect::<Vec<String>>().join(", ");
942        }
943    }
944}
945
946#[cfg(test)]
947mod tests {
948    #[allow(unused_imports)]
949    use super::super::init_logger;
950    use super::*;
951
952    fn are_parsed_strs_equal(test: &str, target: &str) -> bool {
953        let target_package = &parser::parse(target).expect("Failed to parse input");
954        let target_doc = target_package.as_document();
955        trim_doc(&target_doc);
956        debug!("target:\n{}", mml_to_string(get_element(&target_package)));
957
958        let test_package = &parser::parse(test).expect("Failed to parse input");
959        let test_doc = test_package.as_document();
960        trim_doc(&test_doc);
961        debug!("test:\n{}", mml_to_string(get_element(&test_package)));
962
963        match is_same_doc(&test_doc, &target_doc) {
964            Ok(_) => return true,
965            Err(e) => panic!("{}", e),
966        }
967    }
968
969    #[test]
970    fn trim_same() {
971        let trimmed_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
972        assert!(are_parsed_strs_equal(trimmed_str, trimmed_str));
973    }
974
975    #[test]
976    fn trim_whitespace() {
977        let trimmed_str = "<math><mrow><mo>-</mo><mi> a </mi></mrow></math>";
978        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
979        assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
980    }
981
982    #[test]
983    fn no_trim_whitespace_nbsp() {
984        let trimmed_str = "<math><mrow><mo>-</mo><mtext> &#x00A0;a </mtext></mrow></math>";
985        let whitespace_str = "<math> <mrow ><mo>-</mo><mtext> &#x00A0;a </mtext></mrow ></math>";
986        assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
987    }
988
989    #[test]
990    fn trim_comment() {
991        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
992        let comment_str = "<math><mrow><mo>-</mo><!--a comment --><mi> a </mi></mrow></math>";
993        assert!(are_parsed_strs_equal(comment_str, whitespace_str));
994    }
995
996    #[test]
997    fn replace_mglyph() {
998        let mglyph_str = "<math>
999                <mrow>
1000                    <mi>X<mglyph fontfamily='my-braid-font' index='2' alt='23braid' /></mi>
1001                    <mo>+</mo>
1002                    <mi>
1003                        <mglyph fontfamily='my-braid-font' index='5' alt='132braid' />Y
1004                    </mi>
1005                    <mo>=</mo>
1006                    <mi>
1007                        <mglyph fontfamily='my-braid-font' index='3' alt='13braid' />
1008                    </mi>
1009                </mrow>
1010            </math>";
1011        let result_str = "<math>
1012            <mrow>
1013                <mi>X23braid</mi>
1014                <mo>+</mo>
1015                <mi>132braidY</mi>
1016                <mo>=</mo>
1017                <mi>13braid</mi>
1018            </mrow>
1019        </math>";
1020        assert!(are_parsed_strs_equal(mglyph_str, result_str));
1021    }
1022
1023    #[test]
1024    fn trim_differs() {
1025        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
1026        let different_str = "<math> <mrow ><mo>-</mo><mi> b </mi></mrow ></math>";
1027
1028        // need to manually do this since failure shouldn't be a panic
1029        let package1 = &parser::parse(whitespace_str).expect("Failed to parse input");
1030        let doc1 = package1.as_document();
1031        trim_doc(&doc1);
1032        debug!("doc1:\n{}", mml_to_string(get_element(&package1)));
1033
1034        let package2 = parser::parse(different_str).expect("Failed to parse input");
1035        let doc2 = package2.as_document();
1036        trim_doc(&doc2);
1037        debug!("doc2:\n{}", mml_to_string(get_element(&package2)));
1038
1039        assert!(is_same_doc(&doc1, &doc2).is_err());
1040    }
1041
1042    #[test]
1043    fn test_entities() {
1044        // this forces initialization
1045        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
1046
1047        let entity_str = set_mathml("<math><mrow><mo>&minus;</mo><mi>&mopf;</mi></mrow></math>".to_string()).unwrap();
1048        let converted_str =
1049            set_mathml("<math><mrow><mo>&#x02212;</mo><mi>&#x1D55E;</mi></mrow></math>".to_string()).unwrap();
1050
1051        // need to remove unique ids
1052        lazy_static! {
1053            static ref ID_MATCH: Regex = Regex::new(r#"id='.+?' "#).unwrap();
1054        }
1055        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1056        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1057        assert_eq!(entity_str, converted_str, "normal entity test failed");
1058
1059        let entity_str = set_mathml(
1060            "<math data-quot=\"&quot;value&quot;\" data-apos='&apos;value&apos;'><mi>XXX</mi></math>".to_string(),
1061        )
1062        .unwrap();
1063        let converted_str =
1064            set_mathml("<math data-quot='\"value\"' data-apos=\"'value'\"><mi>XXX</mi></math>".to_string()).unwrap();
1065        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1066        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1067        assert_eq!(entity_str, converted_str, "special entities quote test failed");
1068
1069        let entity_str =
1070            set_mathml("<math><mo>&lt;</mo><mo>&gt;</mo><mtext>&amp;lt;</mtext></math>".to_string()).unwrap();
1071        let converted_str =
1072            set_mathml("<math><mo>&#x003C;</mo><mo>&#x003E;</mo><mtext>&#x0026;lt;</mtext></math>".to_string())
1073                .unwrap();
1074        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1075        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1076        assert_eq!(entity_str, converted_str, "special entities <,>,& test failed");
1077    }
1078
1079    #[test]
1080    fn can_recover_from_invalid_set_rules_dir() {
1081        use std::env;
1082        // MathCAT will check the env var "MathCATRulesDir" as an override, so the following test might succeed if we don't override the env var
1083        env::set_var("MathCATRulesDir", "MathCATRulesDir");
1084        assert!(set_rules_dir("someInvalidRulesDir".to_string()).is_err());
1085        assert!(
1086            set_rules_dir(super::super::abs_rules_dir_path()).is_ok(),
1087            "\nset_rules_dir to '{}' failed",
1088            super::super::abs_rules_dir_path()
1089        );
1090        assert!(set_mathml("<math><mn>1</mn></math>".to_string()).is_ok());
1091    }
1092
1093    #[test]
1094    fn single_html_in_mtext() {
1095        let test = "<math><mn>1</mn> <mtext>a<p> para  1</p>bc</mtext> <mi>y</mi></math>";
1096        let target = "<math><mn>1</mn> <mtext>a para 1bc</mtext> <mi>y</mi></math>";
1097        assert!(are_parsed_strs_equal(test, target));
1098    }
1099
1100    #[test]
1101    fn multiple_html_in_mtext() {
1102        let test = "<math><mn>1</mn> <mtext>a<p>para 1</p> <p>para 2</p>bc  </mtext> <mi>y</mi></math>";
1103        let target = "<math><mn>1</mn> <mtext>apara 1 para 2bc</mtext> <mi>y</mi></math>";
1104        assert!(are_parsed_strs_equal(test, target));
1105    }
1106
1107    #[test]
1108    fn nested_html_in_mtext() {
1109        let test = "<math><mn>1</mn> <mtext>a <ol><li>first</li><li>second</li></ol> bc</mtext> <mi>y</mi></math>";
1110        let target = "<math><mn>1</mn> <mtext>a firstsecond bc</mtext> <mi>y</mi></math>";
1111        assert!(are_parsed_strs_equal(test, target));
1112    }
1113
1114    #[test]
1115    fn empty_html_in_mtext() {
1116        let test = "<math><mn>1</mn> <mtext>a<br/>bc</mtext> <mi>y</mi></math>";
1117        let target = "<math><mn>1</mn> <mtext>abc</mtext> <mi>y</mi></math>";
1118        assert!(are_parsed_strs_equal(test, target));
1119    }
1120}