libmathcat/
interface.rs

1//! The interface module provides functionality both for calling from an API and also running the code from `main`.
2//!
3#![allow(non_snake_case)]
4#![allow(clippy::needless_return)]
5use std::cell::RefCell;
6
7use crate::canonicalize::{as_text, create_mathml_element};
8use crate::errors::*;
9use phf::phf_map;
10use regex::{Captures, Regex};
11use sxd_document::dom::*;
12use sxd_document::parser;
13use sxd_document::Package;
14
15use crate::canonicalize::{as_element, name};
16
17use crate::navigate::*;
18use crate::pretty_print::mml_to_string;
19use crate::xpath_functions::{is_leaf, IsNode};
20
21#[cfg(feature = "enable-logs")]
22use std::sync::Once;
23#[cfg(feature = "enable-logs")]
24static INIT: Once = Once::new();
25
26fn enable_logs() {
27    #[cfg(feature = "enable-logs")]
28    INIT.call_once(||{
29        #[cfg(target_os = "android")]
30        {
31            extern crate log;
32            extern crate android_logger;
33            
34            use log::*;
35            use android_logger::*;
36        
37            android_logger::init_once(
38                Config::default()
39                .with_max_level(LevelFilter::Trace)
40                .with_tag("MathCat")
41            );    
42            trace!("Activated Android logger!");  
43        }    
44    });
45}
46
47// wrap up some common functionality between the call from 'main' and AT
48fn cleanup_mathml(mathml: Element) -> Result<Element> {
49    trim_element(mathml, false);
50    let mathml = crate::canonicalize::canonicalize(mathml)?;
51    let mathml = add_ids(mathml);
52    return Ok(mathml);
53}
54
55thread_local! {
56    /// The current node being navigated (also spoken and brailled) is stored in `MATHML_INSTANCE`.
57    pub static MATHML_INSTANCE: RefCell<Package> = init_mathml_instance();
58}
59
60fn init_mathml_instance() -> RefCell<Package> {
61    let package = parser::parse("<math></math>")
62        .expect("Internal error in 'init_mathml_instance;: didn't parse initializer string");
63    return RefCell::new(package);
64}
65
66/// Set the Rules directory
67/// IMPORTANT: this should be the very first call to MathCAT. If 'dir' is an empty string, the environment var 'MathCATRulesDir' is tried.
68pub fn set_rules_dir(dir: String) -> Result<()> {
69    enable_logs();
70    use std::path::PathBuf;
71    let dir = if dir.is_empty() {
72        std::env::var_os("MathCATRulesDir")
73            .unwrap_or_default()
74            .to_str()
75            .unwrap()
76            .to_string()
77    } else {
78        dir
79    };
80    let pref_manager = crate::prefs::PreferenceManager::get();
81    return pref_manager.borrow_mut().initialize(PathBuf::from(dir));
82}
83
84/// Returns the version number (from Cargo.toml) of the build
85pub fn get_version() -> String {
86    enable_logs();
87    const VERSION: &str = env!("CARGO_PKG_VERSION");
88    return VERSION.to_string();
89}
90
91/// This will override any previous MathML that was set.
92/// This returns canonical MathML with 'id's set on any node that doesn't have an id.
93/// The ids can be used for sync highlighting if the `Bookmark` API preference is true.
94pub fn set_mathml(mathml_str: String) -> Result<String> {
95    enable_logs();
96    lazy_static! {
97        // if these are present when resent to MathJaX, MathJaX crashes (https://github.com/mathjax/MathJax/issues/2822)
98        static ref MATHJAX_V2: Regex = Regex::new(r#"class *= *['"]MJX-.*?['"]"#).unwrap();
99        static ref MATHJAX_V3: Regex = Regex::new(r#"class *= *['"]data-mjx-.*?['"]"#).unwrap();
100        static ref NAMESPACE_DECL: Regex = Regex::new(r#"xmlns:[[:alpha:]]+"#).unwrap();     // very limited namespace prefix match
101        static ref PREFIX: Regex = Regex::new(r#"(</?)[[:alpha:]]+:"#).unwrap();     // very limited namespace prefix match
102        static ref HTML_ENTITIES: Regex = Regex::new(r#"&([a-zA-Z]+?);"#).unwrap();
103    }
104
105    NAVIGATION_STATE.with(|nav_stack| {
106        nav_stack.borrow_mut().reset();
107    });
108
109    // We need the main definitions files to be read in so canonicalize can work.
110    // This call reads all of them for the current preferences, but that's ok since they will likely be used
111    crate::speech::SPEECH_RULES.with(|rules| rules.borrow_mut().read_files())?;
112
113    return MATHML_INSTANCE.with(|old_package| {
114        static HTML_ENTITIES_MAPPING: phf::Map<&str, &str> = include!("entities.in");
115
116        let mut error_message = "".to_string(); // can't return a result inside the replace_all, so we do this hack of setting the message and then returning the error
117                                                // need to deal with character data and convert to something the parser knows
118        let mathml_str =
119            HTML_ENTITIES.replace_all(&mathml_str, |cap: &Captures| match HTML_ENTITIES_MAPPING.get(&cap[1]) {
120                None => {
121                    error_message = format!("No entity named '{}'", &cap[0]);
122                    cap[0].to_string()
123                }
124                Some(&ch) => ch.to_string(),
125            });
126
127        if !error_message.is_empty() {
128            bail!(error_message);
129        }
130        let mathml_str = MATHJAX_V2.replace_all(&mathml_str, "");
131        let mathml_str = MATHJAX_V3.replace_all(&mathml_str, "");
132
133        // the speech rules use the xpath "name" function and that includes the prefix
134        // getting rid of the prefix properly probably involves a recursive replacement in the tree
135        // if the prefix is used, it is almost certainly something like "m" or "mml", so this cheat will work.
136        let mathml_str = NAMESPACE_DECL.replace(&mathml_str, "xmlns"); // do this before the PREFIX replace!
137        let mathml_str = PREFIX.replace_all(&mathml_str, "$1");
138
139        let new_package = parser::parse(&mathml_str);
140        if let Err(e) = new_package {
141            bail!("Invalid MathML input:\n{}\nError is: {}", &mathml_str, &e.to_string());
142        }
143
144        let new_package = new_package.unwrap();
145        let mathml = get_element(&new_package);
146        let mathml = cleanup_mathml(mathml)?;
147        let mathml_string = mml_to_string(mathml);
148        old_package.replace(new_package);
149
150        return Ok(mathml_string);
151    });
152}
153
154/// Get the spoken text of the MathML that was set.
155/// The speech takes into account any AT or user preferences.
156pub fn get_spoken_text() -> Result<String> {
157    enable_logs();
158    // use std::time::{Instant};
159    // let instant = Instant::now();
160    return MATHML_INSTANCE.with(|package_instance| {
161        let package_instance = package_instance.borrow();
162        let mathml = get_element(&package_instance);
163        let new_package = Package::new();
164        let intent = crate::speech::intent_from_mathml(mathml, new_package.as_document())?;
165        debug!("Intent tree:\n{}", mml_to_string(intent));
166        let speech = crate::speech::speak_mathml(intent, "")?;
167        // info!("Time taken: {}ms", instant.elapsed().as_millis());
168        return Ok(speech);
169    });
170}
171
172/// Get the spoken text for an overview of the MathML that was set.
173/// The speech takes into account any AT or user preferences.
174/// Note: this implementation for is currently minimal and should not be used.
175pub fn get_overview_text() -> Result<String> {
176    enable_logs();
177    // use std::time::{Instant};
178    // let instant = Instant::now();
179    return MATHML_INSTANCE.with(|package_instance| {
180        let package_instance = package_instance.borrow();
181        let mathml = get_element(&package_instance);
182        let speech = crate::speech::overview_mathml(mathml, "")?;
183        // info!("Time taken: {}ms", instant.elapsed().as_millis());
184        return Ok(speech);
185    });
186}
187
188/// Get the value of the named preference.
189/// None is returned if `name` is not a known preference.
190pub fn get_preference(name: String) -> Result<String> {
191    enable_logs();
192    use crate::prefs::NO_PREFERENCE;
193    return crate::speech::SPEECH_RULES.with(|rules| {
194        let rules = rules.borrow();
195        let pref_manager = rules.pref_manager.borrow();
196        let mut value = pref_manager.pref_to_string(&name);
197        if value == NO_PREFERENCE {
198            value = pref_manager.pref_to_string(&name);
199        }
200        if value == NO_PREFERENCE {
201            bail!("No preference named '{}'", &name);
202        } else {
203            return Ok(value);
204        }
205    });
206}
207
208/// Set a MathCAT preference. The preference name should be a known preference name.
209/// The value should either be a string or a number (depending upon the preference being set)
210/// The list of known user preferences is in the MathCAT user documentation.
211/// Here are common preferences set by programs (not settable by the user):
212/// * TTS -- SSML, SAPI5, None
213/// * Pitch -- normalized at '1.0'
214/// * Rate -- words per minute (should match current speech rate).
215///       There is a separate "MathRate" that is user settable that causes a relative percentage change from this rate.
216/// * Volume -- default 100
217/// * Voice -- set a voice to use (not implemented)
218/// * Gender -- set pick any voice of the given gender (not implemented)
219/// * Bookmark -- set to `true` if a `mark`/`bookmark` should be part of the returned speech (used for sync highlighting)
220///
221/// Important: both the preference name and value are case-sensitive
222///
223/// This function can be called multiple times to set different values.
224/// The values are persistent and extend beyond calls to [`set_mathml`].
225/// A value can be overwritten by calling this function again with a different value.
226///
227/// Be careful setting preferences -- these potentially override user settings, so only preferences that really need setting should be set.
228pub fn set_preference(name: String, value: String) -> Result<()> {
229    enable_logs();
230    // "LanguageAuto" allows setting the language dir without actually changing the value of "Language" from Auto
231    let mut value = value;
232    if name == "Language" || name == "LanguageAuto" {
233        // check the format
234        if value != "Auto" {
235            // could get es, es-419, or en-us-nyc ...  we only care about the first two parts so we clean it up a little
236            let mut lang_country_split = value.split('-');
237            let language = lang_country_split.next().unwrap_or("");
238            let country = lang_country_split.next().unwrap_or("");
239            if language.len() != 2 {
240                bail!(
241                    "Improper format for 'Language' preference '{}'. Should be of form 'en' or 'en-gb'",
242                    value
243                );
244            }
245            let mut new_lang_country = language.to_string(); // need a temp value because 'country' is borrowed from 'value' above
246            if !country.is_empty() {
247                new_lang_country.push('-');
248                new_lang_country.push_str(country);
249            }
250            value = new_lang_country;
251        }
252        if name == "LanguageAuto" && value == "Auto" {
253            bail!("'LanguageAuto' can not have the value 'Auto'");
254        }
255    }
256
257    crate::speech::SPEECH_RULES.with(|rules| {
258        let rules = rules.borrow_mut();
259        if let Some(error_string) = rules.get_error() {
260            bail!("{}", error_string);
261        }
262
263        // we set the value even if it was the same as the old value because this might override a potentially changed future user value
264        let mut pref_manager = rules.pref_manager.borrow_mut();
265        if name == "LanguageAuto" {
266            let language_pref = pref_manager.pref_to_string("Language");
267            if language_pref != "Auto" {
268                bail!(
269                    "'LanguageAuto' can only be used when 'Language' has the value 'Auto'; Language={}",
270                    language_pref
271                );
272            }
273        }
274        let lower_case_value = value.to_lowercase();
275        if lower_case_value == "true" || lower_case_value == "false" {
276            pref_manager.set_api_boolean_pref(&name, value.to_lowercase() == "true");
277        } else {
278            match name.as_str() {
279                "Pitch" | "Rate" | "Volume" | "CapitalLetters_Pitch" | "MathRate" | "PauseFactor" => {
280                    pref_manager.set_api_float_pref(&name, to_float(&name, &value)?)
281                }
282                _ => {
283                    pref_manager.set_string_pref(&name, &value)?;
284                }
285            }
286        };
287        return Ok::<(), Error>(());
288    })?;
289
290    return Ok(());
291
292    fn to_float(name: &str, value: &str) -> Result<f64> {
293        return match value.parse::<f64>() {
294            Ok(val) => Ok(val),
295            Err(_) => bail!("SetPreference: preference'{}'s value '{}' must be a float", name, value),
296        };
297    }
298}
299
300/// Get the braille associated with the MathML that was set by [`set_mathml`].
301/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`).
302/// If 'nav_node_id' is given, it is highlighted based on the value of `BrailleNavHighlight` (default: `EndPoints`)
303pub fn get_braille(nav_node_id: String) -> Result<String> {
304    enable_logs();
305    // use std::time::{Instant};
306    // let instant = Instant::now();
307    return MATHML_INSTANCE.with(|package_instance| {
308        let package_instance = package_instance.borrow();
309        let mathml = get_element(&package_instance);
310        let braille = crate::braille::braille_mathml(mathml, &nav_node_id)?.0;
311        // info!("Time taken: {}ms", instant.elapsed().as_millis());
312        return Ok(braille);
313    });
314}
315
316/// Get the braille associated with the current navigation focus of the MathML that was set by [`set_mathml`].
317/// The braille returned depends upon the preference for the `code` preference (default `Nemeth`).
318/// The returned braille is brailled as if the current navigation focus is the entire expression to be brailled.
319pub fn get_navigation_braille() -> Result<String> {
320    enable_logs();
321    return MATHML_INSTANCE.with(|package_instance| {
322        let package_instance = package_instance.borrow();
323        let mathml = get_element(&package_instance);
324        let new_package = Package::new(); // used if we need to create a new tree
325        let new_doc = new_package.as_document();
326        let nav_mathml = NAVIGATION_STATE.with(|nav_stack| {
327            return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
328                Err(e) => Err(e),
329                Ok((found, offset)) => {
330                    // get the MathML node and wrap it inside of a <math> element
331                    // if the offset is given, we need to get the character it references
332                    if offset == 0 {
333                        if name(found) == "math" {
334                            Ok(found)
335                        } else {
336                            let new_mathml = create_mathml_element(&new_doc, "math");
337                            new_mathml.append_child(copy_mathml(found));
338                            new_doc.root().append_child(new_mathml);
339                            Ok(new_mathml)
340                        }
341                    } else if !is_leaf(found) {
342                        bail!(
343                            "Internal error: non-zero offset '{}' on a non-leaf element '{}'",
344                            offset,
345                            name(found)
346                        );
347                    } else if let Some(ch) = as_text(found).chars().nth(offset) {
348                        let internal_mathml = create_mathml_element(&new_doc, name(found));
349                        internal_mathml.set_text(&ch.to_string());
350                        let new_mathml = create_mathml_element(&new_doc, "math");
351                        new_mathml.append_child(internal_mathml);
352                        new_doc.root().append_child(new_mathml);
353                        Ok(new_mathml)
354                    } else {
355                        bail!(
356                            "Internal error: offset '{}' on leaf element '{}' doesn't exist",
357                            offset,
358                            mml_to_string(found)
359                        );
360                    }
361                }
362            };
363        })?;
364
365        let braille = crate::braille::braille_mathml(nav_mathml, "")?.0;
366        return Ok(braille);
367    });
368}
369
370/// Given a key code along with the modifier keys, the current node is moved accordingly (or value reported in some cases).
371/// `key` is the [keycode](https://developer.mozilla.org/en-US/docs/Web/API/KeyboardEvent/keyCode#constants_for_keycode_value) for the key (in JavaScript, `ev.key_code`)
372/// The spoken text for the new current node is returned.
373pub fn do_navigate_keypress(
374    key: usize,
375    shift_key: bool,
376    control_key: bool,
377    alt_key: bool,
378    meta_key: bool,
379) -> Result<String> {
380    return MATHML_INSTANCE.with(|package_instance| {
381        let package_instance = package_instance.borrow();
382        let mathml = get_element(&package_instance);
383        return do_mathml_navigate_key_press(mathml, key, shift_key, control_key, alt_key, meta_key);
384    });
385}
386
387/// Given a navigation command, the current node is moved accordingly.
388/// This is a higher level interface than `do_navigate_keypress` for applications that want to interpret the keys themselves.
389/// The valid commands are:
390/// * Standard move commands:
391///   `MovePrevious`, `MoveNext`, `MoveStart`, `MoveEnd`, `MoveLineStart`, `MoveLineEnd`
392/// * Movement in a table or elementary math:
393///   `MoveCellPrevious`, `MoveCellNext`, `MoveCellUp`, `MoveCellDown`, `MoveColumnStart`, `MoveColumnEnd`
394/// * Moving into children or out to parents:
395///   `ZoomIn`, `ZoomOut`, `ZoomOutAll`, `ZoomInAll`
396/// * Undo the last movement command:
397///   `MoveLastLocation`
398/// * Read commands (standard speech):
399///   `ReadPrevious`, `ReadNext`, `ReadCurrent`, `ReadCellCurrent`, `ReadStart`, `ReadEnd`, `ReadLineStart`, `ReadLineEnd`
400/// * Describe commands (overview):
401///   `DescribePrevious`, `DescribeNext`, `DescribeCurrent`
402/// * Location information:
403///   `WhereAmI`, `WhereAmIAll`
404/// * Change navigation modes (circle up/down):
405///   `ToggleZoomLockUp`, `ToggleZoomLockDown`
406/// * Speak the current navigation mode
407///   `ToggleSpeakMode`
408///
409/// There are 10 place markers that can be set/read/described or moved to.
410/// * Setting:
411///   `SetPlacemarker0`, `SetPlacemarker1`, `SetPlacemarker2`, `SetPlacemarker3`, `SetPlacemarker4`, `SetPlacemarker5`, `SetPlacemarker6`, `SetPlacemarker7`, `SetPlacemarker8`, `SetPlacemarker9`
412/// * Reading:
413///   `Read0`, `Read1`, `Read2`, `Read3`, `Read4`, `Read5`, `Read6`, `Read7`, `Read8`, `Read9`
414/// * Describing:
415///   `Describe0`, `Describe1`, `Describe2`, `Describe3`, `Describe4`, `Describe5`, `Describe6`, `Describe7`, `Describe8`, `Describe9`
416/// * Moving:
417///   `MoveTo0`, `MoveTo1`, `MoveTo2`, `MoveTo3`, `MoveTo4`, `MoveTo5`, `MoveTo6`, `MoveTo7`, `MoveTo8`, `MoveTo9`
418///
419/// When done with Navigation, call with `Exit`
420pub fn do_navigate_command(command: String) -> Result<String> {
421    enable_logs();
422    let command = NAV_COMMANDS.get_key(&command); // gets a &'static version of the command
423    if command.is_none() {
424        bail!("Unknown command in call to DoNavigateCommand()");
425    };
426    let command = *command.unwrap();
427    return MATHML_INSTANCE.with(|package_instance| {
428        let package_instance = package_instance.borrow();
429        let mathml = get_element(&package_instance);
430        return do_navigate_command_string(mathml, command);
431    });
432}
433
434/// Given an 'id' and an offset (for tokens), set the navigation node to that id.
435/// An error is returned if the 'id' doesn't exist
436pub fn set_navigation_node(id: String, offset: usize) -> Result<()> {
437    enable_logs();
438    return MATHML_INSTANCE.with(|package_instance| {
439        let package_instance = package_instance.borrow();
440        let mathml = get_element(&package_instance);
441        return set_navigation_node_from_id(mathml, id, offset);
442    });
443}
444
445/// Return the MathML associated with the current (navigation) node and the offset (0-based) from that mathml (not yet implemented)
446/// The offset is needed for token elements that have multiple characters.
447pub fn get_navigation_mathml() -> Result<(String, usize)> {
448    return MATHML_INSTANCE.with(|package_instance| {
449        let package_instance = package_instance.borrow();
450        let mathml = get_element(&package_instance);
451        return NAVIGATION_STATE.with(|nav_stack| {
452            return match nav_stack.borrow_mut().get_navigation_mathml(mathml) {
453                Err(e) => Err(e),
454                Ok((found, offset)) => Ok((mml_to_string(found), offset)),
455            };
456        });
457    });
458}
459
460/// Return the `id` and `offset` (0-based) associated with the current (navigation) node.
461/// `offset` (not yet implemented)
462/// The offset is needed for token elements that have multiple characters.
463pub fn get_navigation_mathml_id() -> Result<(String, usize)> {
464    enable_logs();
465    return MATHML_INSTANCE.with(|package_instance| {
466        let package_instance = package_instance.borrow();
467        let mathml = get_element(&package_instance);
468        return Ok(NAVIGATION_STATE.with(|nav_stack| {
469            return nav_stack.borrow().get_navigation_mathml_id(mathml);
470        }));
471    });
472}
473
474/// Return the start and end braille character positions associated with the current (navigation) node.
475pub fn get_braille_position() -> Result<(usize, usize)> {
476    enable_logs();
477    return MATHML_INSTANCE.with(|package_instance| {
478        let package_instance = package_instance.borrow();
479        let mathml = get_element(&package_instance);
480        let nav_node = get_navigation_mathml_id()?;
481        let (_, start, end) = crate::braille::braille_mathml(mathml, &nav_node.0)?;
482        return Ok((start, end));
483    });
484}
485
486/// Given a 0-based braille position, return the smallest MathML node enclosing it.
487/// This node might be a leaf with an offset.
488pub fn get_navigation_node_from_braille_position(position: usize) -> Result<(String, usize)> {
489    enable_logs();
490    return MATHML_INSTANCE.with(|package_instance| {
491        let package_instance = package_instance.borrow();
492        let mathml = get_element(&package_instance);
493        return crate::braille::get_navigation_node_from_braille_position(mathml, position);
494    });
495}
496
497// utility functions
498
499/// Copy (recursively) the (MathML) element and return the new one.
500/// The Element type does not copy and modifying the structure of an element's child will modify the element, so we need a copy
501/// Convert the returned error from set_mathml, etc., to a useful string for display
502pub fn copy_mathml(mathml: Element) -> Element {
503    // If it represents MathML, the 'Element' can only have Text and Element children along with attributes
504    let children = mathml.children();
505    let new_mathml = create_mathml_element(&mathml.document(), name(mathml));
506    mathml.attributes().iter().for_each(|attr| {
507        new_mathml.set_attribute_value(attr.name(), attr.value());
508    });
509
510    // can't use is_leaf/as_text because this is also used with the intent tree
511    if children.len() == 1 {
512        if let Some(text) = children[0].text() {
513        new_mathml.set_text(text.text());
514        return new_mathml;
515        }
516    }
517
518    let mut new_children = Vec::with_capacity(children.len());
519    for child in children {
520        let child = as_element(child);
521        let new_child = copy_mathml(child);
522        new_children.push(new_child);
523    }
524    new_mathml.append_children(new_children);
525    return new_mathml;
526}
527
528pub fn errors_to_string(e: &Error) -> String {
529    enable_logs();
530    let mut result = String::default();
531    let mut first_time = true;
532    for e in e.iter() {
533        if first_time {
534            result = format!("{}\n", e);
535            first_time = false;
536        } else {
537            result += &format!("caused by: {}\n", e);
538        }
539    }
540    return result;
541}
542
543fn add_ids(mathml: Element) -> Element {
544    use std::time::SystemTime;
545    let time = if cfg!(target_family = "wasm") {
546        rand::random::<usize>()
547    } else {
548        SystemTime::now()
549            .duration_since(SystemTime::UNIX_EPOCH)
550            .unwrap()
551            .as_millis() as usize
552    };
553    let time_part = radix_fmt::radix(time, 36).to_string();
554    let random_part = radix_fmt::radix(rand::random::<usize>(), 36).to_string();
555    let prefix = "M".to_string() + &time_part[time_part.len() - 3..] + &random_part[random_part.len() - 4..] + "-"; // begin with letter
556    add_ids_to_all(mathml, &prefix, 0);
557    return mathml;
558
559    fn add_ids_to_all(mathml: Element, id_prefix: &str, count: usize) -> usize {
560        let mut count = count;
561        if mathml.attribute("id").is_none() {
562            mathml.set_attribute_value("id", (id_prefix.to_string() + &count.to_string()).as_str());
563            mathml.set_attribute_value("data-id-added", "true");
564            count += 1;
565        };
566
567        if crate::xpath_functions::is_leaf(mathml) {
568            return count;
569        }
570
571        for child in mathml.children() {
572            let child = as_element(child);
573            count = add_ids_to_all(child, id_prefix, count);
574        }
575        return count;
576    }
577}
578
579pub fn get_element(package: &Package) -> Element {
580    enable_logs();
581    let doc = package.as_document();
582    let mut result = None;
583    for root_child in doc.root().children() {
584        if let ChildOfRoot::Element(e) = root_child {
585            assert!(result.is_none());
586            result = Some(e);
587        }
588    }
589    return result.unwrap();
590}
591
592/// Get the intent after setting the MathML
593/// Used in testing
594#[allow(dead_code)]
595pub fn get_intent<'a>(mathml: Element<'a>, doc: Document<'a>) -> Result<Element<'a>> {
596    crate::speech::SPEECH_RULES.with(|rules|  rules.borrow_mut().read_files().unwrap());
597    let mathml = cleanup_mathml(mathml)?;
598    return crate::speech::intent_from_mathml(mathml, doc);
599}
600
601#[allow(dead_code)]
602fn trim_doc(doc: &Document) {
603    for root_child in doc.root().children() {
604        if let ChildOfRoot::Element(e) = root_child {
605            trim_element(e, false);
606        } else {
607            doc.root().remove_child(root_child); // comment or processing instruction
608        }
609    }
610}
611
612/// Not really meant to be public -- used by tests in some packages
613pub fn trim_element(e: Element, allow_structure_in_leaves: bool) {
614    // "<mtext>this is text</mtext" results in 3 text children
615    // these are combined into one child as it makes code downstream simpler
616
617    // space, tab, newline, carriage return all get collapsed to a single space
618    const WHITESPACE: &[char] = &[' ', '\u{0009}', '\u{000A}', '\u{000D}'];
619    lazy_static! {
620        static ref WHITESPACE_MATCH: Regex = Regex::new(r#"[ \u{0009}\u{000A}\u{000D}]+"#).unwrap();
621    }
622
623    if is_leaf(e) && (!allow_structure_in_leaves || IsNode::is_mathml(e)) {
624        // Assume it is HTML inside of the leaf -- turn the HTML into a string
625        make_leaf_element(e);
626        return;
627    }
628
629    let mut single_text = "".to_string();
630    for child in e.children() {
631        match child {
632            ChildOfElement::Element(c) => {
633                trim_element(c, allow_structure_in_leaves);
634            }
635            ChildOfElement::Text(t) => {
636                single_text += t.text();
637                e.remove_child(child);
638            }
639            _ => {
640                e.remove_child(child);
641            }
642        }
643    }
644
645    // CSS considers only space, tab, linefeed, and carriage return as collapsable whitespace
646    if !(is_leaf(e) || name(e) == "intent-literal" || single_text.is_empty()) {
647        // intent-literal comes from testing intent
648        // FIX: we have a problem -- what should happen???
649        // FIX: For now, just keep the children and ignore the text and log an error -- shouldn't panic/crash
650        if !single_text.trim_matches(WHITESPACE).is_empty() {
651            error!(
652                "trim_element: both element and textual children which shouldn't happen -- ignoring text '{}'",
653                single_text
654            );
655        }
656        return;
657    }
658    if e.children().is_empty() && !single_text.is_empty() {
659        // debug!("Combining text in {}: '{}' -> '{}'", e.name().local_part(), single_text, trimmed_text);
660        e.set_text(&WHITESPACE_MATCH.replace_all(&single_text, " "));
661    }
662
663    fn make_leaf_element(mathml_leaf: Element) {
664        // MathML leaves like <mn> really shouldn't have non-textual content, but you could have embedded HTML
665        // Here, we take convert them to leaves by grabbing up all the text and making that the content
666        // Potentially, we leave them and let (default) rules do something, but it makes other parts of the code
667        //   messier because checking the text of a leaf becomes Option<&str> rather than just &str
668        let children = mathml_leaf.children();
669        if children.is_empty() {
670            return;
671        }
672
673        // gather up the text
674        let mut text = "".to_string();
675        for child in children {
676            let child_text = match child {
677                ChildOfElement::Element(child) => {
678                    if name(child) == "mglyph" {
679                        child.attribute_value("alt").unwrap_or("").to_string()
680                    } else {
681                        gather_text(child)
682                    }
683                }
684                ChildOfElement::Text(t) => {
685                    // debug!("ChildOfElement::Text: '{}'", t.text());
686                    t.text().to_string()
687                }
688                _ => "".to_string(),
689            };
690            if !child_text.is_empty() {
691                text += &child_text;
692            }
693        }
694
695        // get rid of the old children and replace with the text we just built
696        mathml_leaf.clear_children();
697        mathml_leaf.set_text(WHITESPACE_MATCH.replace_all(&text, " ").trim_matches(WHITESPACE));
698        // debug!("make_leaf_element: text is '{}'", crate::canonicalize::as_text(mathml_leaf));
699
700        /// gather up all the contents of the element and return them with a leading space
701        fn gather_text(html: Element) -> String {
702            let mut text = "".to_string(); // since we are throwing out the element tag, add a space between the contents
703            for child in html.children() {
704                match child {
705                    ChildOfElement::Element(child) => {
706                        text += &gather_text(child);
707                    }
708                    ChildOfElement::Text(t) => text += t.text(),
709                    _ => (),
710                }
711            }
712            // debug!("gather_text: '{}'", text);
713            return text;
714        }
715    }
716}
717
718// used for testing trim
719/// returns Ok() if two Documents are equal or some info where they differ in the Err
720#[allow(dead_code)]
721fn is_same_doc(doc1: &Document, doc2: &Document) -> Result<()> {
722    // assume 'e' doesn't have element children until proven otherwise
723    // this means we keep Text children until we are proven they aren't needed
724    if doc1.root().children().len() != doc2.root().children().len() {
725        bail!(
726            "Children of docs have {} != {} children",
727            doc1.root().children().len(),
728            doc2.root().children().len()
729        );
730    }
731
732    for (i, (c1, c2)) in doc1
733        .root()
734        .children()
735        .iter()
736        .zip(doc2.root().children().iter())
737        .enumerate()
738    {
739        match c1 {
740            ChildOfRoot::Element(e1) => {
741                if let ChildOfRoot::Element(e2) = c2 {
742                    is_same_element(*e1, *e2)?;
743                } else {
744                    bail!("child #{}, first is element, second is something else", i);
745                }
746            }
747            ChildOfRoot::Comment(com1) => {
748                if let ChildOfRoot::Comment(com2) = c2 {
749                    if com1.text() != com2.text() {
750                        bail!("child #{} -- comment text differs", i);
751                    }
752                } else {
753                    bail!("child #{}, first is comment, second is something else", i);
754                }
755            }
756            ChildOfRoot::ProcessingInstruction(p1) => {
757                if let ChildOfRoot::ProcessingInstruction(p2) = c2 {
758                    if p1.target() != p2.target() || p1.value() != p2.value() {
759                        bail!("child #{} -- processing instruction differs", i);
760                    }
761                } else {
762                    bail!(
763                        "child #{}, first is processing instruction, second is something else",
764                        i
765                    );
766                }
767            }
768        }
769    }
770    return Ok(());
771}
772
773/// returns Ok() if two Documents are equal or some info where they differ in the Err
774// Not really meant to be public -- used by tests in some packages
775#[allow(dead_code)]
776pub fn is_same_element(e1: Element, e2: Element) -> Result<()> {
777    enable_logs();
778    if name(e1) != name(e2) {
779        bail!("Names not the same: {}, {}", name(e1), name(e2));
780    }
781
782    // assume 'e' doesn't have element children until proven otherwise
783    // this means we keep Text children until we are proven they aren't needed
784    if e1.children().len() != e2.children().len() {
785        bail!(
786            "Children of {} have {} != {} children",
787            name(e1),
788            e1.children().len(),
789            e2.children().len()
790        );
791    }
792
793    if let Err(e) = attrs_are_same(e1.attributes(), e2.attributes()) {
794        bail!("In element {}, {}", name(e1), e);
795    }
796
797    for (i, (c1, c2)) in e1.children().iter().zip(e2.children().iter()).enumerate() {
798        match c1 {
799            ChildOfElement::Element(child1) => {
800                if let ChildOfElement::Element(child2) = c2 {
801                    is_same_element(*child1, *child2)?;
802                } else {
803                    bail!("{} child #{}, first is element, second is something else", name(e1), i);
804                }
805            }
806            ChildOfElement::Comment(com1) => {
807                if let ChildOfElement::Comment(com2) = c2 {
808                    if com1.text() != com2.text() {
809                        bail!("{} child #{} -- comment text differs", name(e1), i);
810                    }
811                } else {
812                    bail!("{} child #{}, first is comment, second is something else", name(e1), i);
813                }
814            }
815            ChildOfElement::ProcessingInstruction(p1) => {
816                if let ChildOfElement::ProcessingInstruction(p2) = c2 {
817                    if p1.target() != p2.target() || p1.value() != p2.value() {
818                        bail!("{} child #{} -- processing instruction differs", name(e1), i);
819                    }
820                } else {
821                    bail!(
822                        "{} child #{}, first is processing instruction, second is something else",
823                        name(e1),
824                        i
825                    );
826                }
827            }
828            ChildOfElement::Text(t1) => {
829                if let ChildOfElement::Text(t2) = c2 {
830                    if t1.text() != t2.text() {
831                        bail!("{} child #{} --  text differs", name(e1), i);
832                    }
833                } else {
834                    bail!("{} child #{}, first is text, second is something else", name(e1), i);
835                }
836            }
837        }
838    }
839    return Ok(());
840
841    /// compares attributes -- '==' didn't seems to work
842    fn attrs_are_same(attrs1: Vec<Attribute>, attrs2: Vec<Attribute>) -> Result<()> {
843        if attrs1.len() != attrs2.len() {
844            bail!("Attributes have different length: {:?} != {:?}", attrs1, attrs2);
845        }
846        // can't guarantee attrs are in the same order
847        for attr1 in attrs1 {
848            if let Some(found_attr2) = attrs2
849                .iter()
850                .find(|&attr2| attr1.name().local_part() == attr2.name().local_part())
851            {
852                if attr1.value() == found_attr2.value() {
853                    continue;
854                } else {
855                    bail!(
856                        "Attribute named {} has differing values:\n  '{}'\n  '{}'",
857                        attr1.name().local_part(),
858                        attr1.value(),
859                        found_attr2.value()
860                    );
861                }
862            } else {
863                bail!(
864                    "Attribute name {} not in [{}]",
865                    print_attr(&attr1),
866                    print_attrs(&attrs2)
867                );
868            }
869        }
870        return Ok(());
871
872        fn print_attr(attr: &Attribute) -> String {
873            return format!("@{}='{}'", attr.name().local_part(), attr.value());
874        }
875        fn print_attrs(attrs: &[Attribute]) -> String {
876            return attrs.iter().map(print_attr).collect::<Vec<String>>().join(", ");
877        }
878    }
879}
880
881#[cfg(test)]
882mod tests {
883    #[allow(unused_imports)]
884    use super::super::init_logger;
885    use super::*;
886
887    fn are_parsed_strs_equal(test: &str, target: &str) -> bool {
888        let target_package = &parser::parse(target).expect("Failed to parse input");
889        let target_doc = target_package.as_document();
890        trim_doc(&target_doc);
891        debug!("target:\n{}", mml_to_string(get_element(&target_package)));
892
893        let test_package = &parser::parse(test).expect("Failed to parse input");
894        let test_doc = test_package.as_document();
895        trim_doc(&test_doc);
896        debug!("test:\n{}", mml_to_string(get_element(&test_package)));
897
898        match is_same_doc(&test_doc, &target_doc) {
899            Ok(_) => return true,
900            Err(e) => panic!("{}", e),
901        }
902    }
903
904    #[test]
905    fn trim_same() {
906        let trimmed_str = "<math><mrow><mo>-</mo><mi>a</mi></mrow></math>";
907        assert!(are_parsed_strs_equal(trimmed_str, trimmed_str));
908    }
909
910    #[test]
911    fn trim_whitespace() {
912        let trimmed_str = "<math><mrow><mo>-</mo><mi> a </mi></mrow></math>";
913        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
914        assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
915    }
916
917    #[test]
918    fn no_trim_whitespace_nbsp() {
919        let trimmed_str = "<math><mrow><mo>-</mo><mtext> &#x00A0;a </mtext></mrow></math>";
920        let whitespace_str = "<math> <mrow ><mo>-</mo><mtext> &#x00A0;a </mtext></mrow ></math>";
921        assert!(are_parsed_strs_equal(trimmed_str, whitespace_str));
922    }
923
924    #[test]
925    fn trim_comment() {
926        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
927        let comment_str = "<math><mrow><mo>-</mo><!--a comment --><mi> a </mi></mrow></math>";
928        assert!(are_parsed_strs_equal(comment_str, whitespace_str));
929    }
930
931    #[test]
932    fn replace_mglyph() {
933        let mglyph_str = "<math>
934                <mrow>
935                    <mi>X<mglyph fontfamily='my-braid-font' index='2' alt='23braid' /></mi>
936                    <mo>+</mo>
937                    <mi>
938                        <mglyph fontfamily='my-braid-font' index='5' alt='132braid' />Y
939                    </mi>
940                    <mo>=</mo>
941                    <mi>
942                        <mglyph fontfamily='my-braid-font' index='3' alt='13braid' />
943                    </mi>
944                </mrow>
945            </math>";
946        let result_str = "<math>
947            <mrow>
948                <mi>X23braid</mi>
949                <mo>+</mo>
950                <mi>132braidY</mi>
951                <mo>=</mo>
952                <mi>13braid</mi>
953            </mrow>
954        </math>";
955        assert!(are_parsed_strs_equal(mglyph_str, result_str));
956    }
957
958    #[test]
959    fn trim_differs() {
960        let whitespace_str = "<math> <mrow ><mo>-</mo><mi> a </mi></mrow ></math>";
961        let different_str = "<math> <mrow ><mo>-</mo><mi> b </mi></mrow ></math>";
962
963        // need to manually do this since failure shouldn't be a panic
964        let package1 = &parser::parse(whitespace_str).expect("Failed to parse input");
965        let doc1 = package1.as_document();
966        trim_doc(&doc1);
967        debug!("doc1:\n{}", mml_to_string(get_element(&package1)));
968
969        let package2 = parser::parse(different_str).expect("Failed to parse input");
970        let doc2 = package2.as_document();
971        trim_doc(&doc2);
972        debug!("doc2:\n{}", mml_to_string(get_element(&package2)));
973
974        assert!(is_same_doc(&doc1, &doc2).is_err());
975    }
976
977    #[test]
978    fn test_entities() {
979        // this forces initialization
980        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
981
982        let entity_str = set_mathml("<math><mrow><mo>&minus;</mo><mi>&mopf;</mi></mrow></math>".to_string()).unwrap();
983        let converted_str =
984            set_mathml("<math><mrow><mo>&#x02212;</mo><mi>&#x1D55E;</mi></mrow></math>".to_string()).unwrap();
985
986        // need to remove unique ids
987        lazy_static! {
988            static ref ID_MATCH: Regex = Regex::new(r#"id='.+?' "#).unwrap();
989        }
990        let entity_str = ID_MATCH.replace_all(&entity_str, "");
991        let converted_str = ID_MATCH.replace_all(&converted_str, "");
992        assert_eq!(entity_str, converted_str, "normal entity test failed");
993
994        let entity_str = set_mathml(
995            "<math data-quot=\"&quot;value&quot;\" data-apos='&apos;value&apos;'><mi>XXX</mi></math>".to_string(),
996        )
997        .unwrap();
998        let converted_str =
999            set_mathml("<math data-quot='\"value\"' data-apos=\"'value'\"><mi>XXX</mi></math>".to_string()).unwrap();
1000        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1001        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1002        assert_eq!(entity_str, converted_str, "special entities quote test failed");
1003
1004        let entity_str =
1005            set_mathml("<math><mo>&lt;</mo><mo>&gt;</mo><mtext>&amp;lt;</mtext></math>".to_string()).unwrap();
1006        let converted_str =
1007            set_mathml("<math><mo>&#x003C;</mo><mo>&#x003E;</mo><mtext>&#x0026;lt;</mtext></math>".to_string())
1008                .unwrap();
1009        let entity_str = ID_MATCH.replace_all(&entity_str, "");
1010        let converted_str = ID_MATCH.replace_all(&converted_str, "");
1011        assert_eq!(entity_str, converted_str, "special entities <,>,& test failed");
1012    }
1013
1014    #[test]
1015    fn can_recover_from_invalid_set_rules_dir() {
1016        use std::env;
1017        // MathCAT will check the env var "MathCATRulesDir" as an override, so the following test might succeed if we don't override the env var
1018        env::set_var("MathCATRulesDir", "MathCATRulesDir");
1019        assert!(set_rules_dir("someInvalidRulesDir".to_string()).is_err());
1020        assert!(
1021            set_rules_dir(super::super::abs_rules_dir_path()).is_ok(),
1022            "\nset_rules_dir to '{}' failed",
1023            super::super::abs_rules_dir_path()
1024        );
1025        assert!(set_mathml("<math><mn>1</mn></math>".to_string()).is_ok());
1026    }
1027
1028    #[test]
1029    fn single_html_in_mtext() {
1030        let test = "<math><mn>1</mn> <mtext>a<p> para  1</p>bc</mtext> <mi>y</mi></math>";
1031        let target = "<math><mn>1</mn> <mtext>a para 1bc</mtext> <mi>y</mi></math>";
1032        assert!(are_parsed_strs_equal(test, target));
1033    }
1034
1035    #[test]
1036    fn multiple_html_in_mtext() {
1037        let test = "<math><mn>1</mn> <mtext>a<p>para 1</p> <p>para 2</p>bc  </mtext> <mi>y</mi></math>";
1038        let target = "<math><mn>1</mn> <mtext>apara 1 para 2bc</mtext> <mi>y</mi></math>";
1039        assert!(are_parsed_strs_equal(test, target));
1040    }
1041
1042    #[test]
1043    fn nested_html_in_mtext() {
1044        let test = "<math><mn>1</mn> <mtext>a <ol><li>first</li><li>second</li></ol> bc</mtext> <mi>y</mi></math>";
1045        let target = "<math><mn>1</mn> <mtext>a firstsecond bc</mtext> <mi>y</mi></math>";
1046        assert!(are_parsed_strs_equal(test, target));
1047    }
1048
1049    #[test]
1050    fn empty_html_in_mtext() {
1051        let test = "<math><mn>1</mn> <mtext>a<br/>bc</mtext> <mi>y</mi></math>";
1052        let target = "<math><mn>1</mn> <mtext>abc</mtext> <mi>y</mi></math>";
1053        assert!(are_parsed_strs_equal(test, target));
1054    }
1055}