Skip to main content

libmathcat/
speech.rs

1//! The speech module is where the speech rules are read in and speech generated.
2//!
3//! The speech rules call out to the preferences and tts modules and the dividing line is not always clean.
4//! A number of useful utility functions used by other modules are defined here.
5#![allow(clippy::needless_return)]
6use std::path::PathBuf;
7use std::collections::HashMap;
8use std::cell::{RefCell, RefMut};
9use std::sync::LazyLock;
10use sxd_document::dom::{ChildOfElement, Document, Element};
11use sxd_document::{Package, QName};
12use sxd_xpath::context::Evaluation;
13use sxd_xpath::{Factory, Value, XPath};
14use sxd_xpath::nodeset::Node;
15use std::fmt;
16use std::time::SystemTime;
17use crate::definitions::read_definitions_file;
18use crate::errors::*;
19use crate::prefs::*;
20use crate::xpath_functions::is_leaf;
21use yaml_rust::{YamlLoader, Yaml, yaml::Hash};
22use crate::tts::*;
23use crate::infer_intent::*;
24use crate::pretty_print::{mml_to_string, yaml_to_string};
25use std::path::Path;
26use std::rc::Rc;
27use crate::shim_filesystem::{read_to_string_shim, canonicalize_shim};
28use crate::canonicalize::{as_element, create_mathml_element, set_mathml_name, name, MATHML_FROM_NAME_ATTR};
29use regex::Regex;
30use log::{debug, error, info};
31
32
33pub const NAV_NODE_SPEECH_NOT_FOUND: &str = "NAV_NODE_NOT_FOUND";
34
35/// Like lisp's ' (quote foo), this is used to block "replace_chars" being called.
36///   Unlike lisp, this appended to the end of a string (more efficient)
37/// At the moment, the only use is BrailleChars(...) -- internally, it calls replace_chars and we don't want it called again.
38/// Note: an alternative to this hack is to add "xq" (execute but don't eval the result), but that's heavy-handed for the current need
39const NO_EVAL_QUOTE_CHAR: char = '\u{efff}';            // a private space char
40const NO_EVAL_QUOTE_CHAR_AS_BYTES: [u8;3] = [0xee,0xbf,0xbf];
41const N_BYTES_NO_EVAL_QUOTE_CHAR: usize = NO_EVAL_QUOTE_CHAR.len_utf8();
42
43/// Converts 'string' into a "quoted" string -- use is_quoted_string and unquote_string
44pub fn make_quoted_string(mut string: String) -> String {
45    string.push(NO_EVAL_QUOTE_CHAR);
46    return string;
47}
48
49/// Checks the string to see if it is "quoted"
50pub fn is_quoted_string(str: &str) -> bool {
51    if str.len() < N_BYTES_NO_EVAL_QUOTE_CHAR {
52        return false;
53    }
54    let bytes = str.as_bytes();
55    return bytes[bytes.len()-N_BYTES_NO_EVAL_QUOTE_CHAR..] == NO_EVAL_QUOTE_CHAR_AS_BYTES;
56}
57
58/// Converts 'string' into a "quoted" string -- use is_quoted_string and unquote_string
59/// IMPORTANT: this assumes the string is quoted -- no check is made
60pub fn unquote_string(str: &str) -> &str {
61    return &str[..str.len()-N_BYTES_NO_EVAL_QUOTE_CHAR];
62}
63
64
65/// The main external call, `intent_from_mathml` returns a string for the speech associated with the `mathml`.
66///   It matches against the rules that are computed by user prefs such as "Language" and "SpeechStyle".
67///
68/// The speech rules assume `mathml` has been "cleaned" via the canonicalization step.
69///
70/// If the preferences change (and hence the speech rules to use change), or if the rule file changes,
71///   `intent_from_mathml` will detect that and (re)load the proper rules.
72///
73/// A string is returned in call cases.
74/// If there is an error, the speech string will indicate an error.
75pub fn intent_from_mathml<'m>(mathml: Element, doc: Document<'m>) -> Result<Element<'m>> {
76    let intent_tree = intent_rules(&INTENT_RULES, doc, mathml, "")?;
77    doc.root().append_child(intent_tree);
78    return Ok(intent_tree);
79}
80
81pub fn speak_mathml(mathml: Element, nav_node_id: &str, nav_node_offset: usize) -> Result<String> {
82    return speak_rules(&SPEECH_RULES, mathml, nav_node_id, nav_node_offset);
83}
84
85pub fn overview_mathml(mathml: Element, nav_node_id: &str, nav_node_offset: usize) -> Result<String> {
86    return speak_rules(&OVERVIEW_RULES, mathml, nav_node_id, nav_node_offset);
87}
88
89
90fn intent_rules<'m>(rules: &'static std::thread::LocalKey<RefCell<SpeechRules>>, doc: Document<'m>, mathml: Element, nav_node_id: &'m str) -> Result<Element<'m>> {
91    rules.with(|rules| {
92        rules.borrow_mut().read_files()?;
93        let rules = rules.borrow();
94        // debug!("intent_rules:\n{}", mml_to_string(mathml));
95        let should_set_literal_intent = rules.pref_manager.borrow().pref_to_string("SpeechStyle").as_str() == "LiteralSpeak";
96        let original_intent = mathml.attribute_value("intent");
97        if should_set_literal_intent {
98            if let Some(intent) = original_intent {
99                let intent = if intent.contains('(') {intent.replace('(', ":literal(")} else {intent.to_string() + ":literal"};
100                mathml.set_attribute_value("intent", &intent);
101            } else {
102                mathml.set_attribute_value("intent", ":literal");
103            };
104        }
105        let mut rules_with_context = SpeechRulesWithContext::new(&rules, doc, nav_node_id, 0);
106        let intent =  rules_with_context.match_pattern::<Element<'m>>(mathml)
107                    .context("Pattern match/replacement failure!")?;
108        let answer = if name(intent) == "TEMP_NAME" {   // unneeded extra layer
109            assert_eq!(intent.children().len(), 1);
110            as_element(intent.children()[0])
111        } else {
112            intent
113        };
114        if should_set_literal_intent {
115            if let Some(original_intent) = original_intent {
116                mathml.set_attribute_value("intent", original_intent);
117            } else {
118                mathml.remove_attribute("intent");
119            }
120        }
121        return Ok(answer);
122    })
123}
124
125/// Speak the MathML
126/// If 'nav_node_id' is not an empty string, then the element with that id will have [[...]] around it
127fn speak_rules(rules: &'static std::thread::LocalKey<RefCell<SpeechRules>>, mathml: Element, nav_node_id: &str, nav_node_offset: usize) -> Result<String> {
128    return rules.with(|rules| {
129        rules.borrow_mut().read_files()?;
130        let rules = rules.borrow();
131        // debug!("speak_rules:\n{}", mml_to_string(mathml));
132        let new_package = Package::new();
133        let mut rules_with_context = SpeechRulesWithContext::new(&rules, new_package.as_document(), nav_node_id, nav_node_offset);
134        let speech_string = nestable_speak_rules(& mut rules_with_context, mathml)?;
135        
136        return Ok( rules.pref_manager.borrow().get_tts()
137            .merge_pauses(remove_optional_indicators(
138                &speech_string.replace(CONCAT_STRING, "")
139                                   .replace(CONCAT_INDICATOR, "") 
140                                   .replace(POSTFIX_CONCAT_STRING, "")
141                                   .replace(POSTFIX_CONCAT_INDICATOR, "")                           
142                            )
143            .trim_start().trim_end_matches([' ', ',', ';'])) );
144    });
145
146    fn nestable_speak_rules<'c, 's:'c, 'm:'c>(rules_with_context: &mut SpeechRulesWithContext<'c, 's, 'm>, mathml: Element<'c>) -> Result<String> {
147        let mut speech_string = rules_with_context.match_pattern::<String>(mathml)
148                    .context("Pattern match/replacement failure!")?;
149        // Note: [[...]] is added around a matching child, but if the "id" is on 'mathml', the whole string is used
150        if !rules_with_context.nav_node_id.is_empty() {
151            // See https://github.com/NSoiffer/MathCAT/issues/174 for why we can just start the speech at the nav node
152            let intent_attr = mathml.attribute_value("data-intent-property").unwrap_or_default();
153            if let Some(start) = speech_string.find("[[") {
154                match speech_string[start+2..].find("]]") {
155                    None => bail!("Internal error: looking for '[[...]]' during navigation -- only found '[[' in '{}'", speech_string),
156                    Some(end) => speech_string = speech_string[start+2..start+2+end].to_string(),
157                }
158            } else if !intent_attr.contains(":literal:") {
159                // try again with LiteralSpeak -- some parts might have been elided in other SpeechStyles
160                mathml.set_attribute_value("data-intent-property", (":literal:".to_string() + intent_attr).as_str());
161                let speech = nestable_speak_rules(rules_with_context, mathml);
162                mathml.set_attribute_value("data-intent-property", intent_attr);
163                return speech;
164            } else {
165                bail!(NAV_NODE_SPEECH_NOT_FOUND); //  NAV_NODE_SPEECH_NOT_FOUND is tested for later
166            }
167        }
168        return Ok(speech_string);
169    }
170}
171
172/// Converts its argument to a string that can be used in a debugging message.
173pub fn yaml_to_type(yaml: &Yaml) -> String {
174    return match yaml {
175        Yaml::Real(v)=> format!("real='{v:#}'"),
176        Yaml::Integer(v)=> format!("integer='{v:#}'"),
177        Yaml::String(v)=> format!("string='{v:#}'"),
178        Yaml::Boolean(v)=> format!("boolean='{v:#}'"),
179        Yaml::Array(v)=> match v.len() {
180            0 => "array with no entries".to_string(),
181            1 => format!("array with the entry: {}", yaml_to_type(&v[0])),
182            _ => format!("array with {} entries. First entry: {}", v.len(), yaml_to_type(&v[0])),
183        }
184        Yaml::Hash(h)=> {
185            let first_pair = 
186                if h.is_empty() {
187                    "no pairs".to_string()
188                } else {
189                    let (key, val) = h.iter().next().unwrap();
190                    format!("({}, {})", yaml_to_type(key), yaml_to_type(val))
191                };
192            format!("dictionary with {} pair{}. A pair: {}", h.len(), if h.len()==1 {""} else {"s"}, first_pair)
193        }
194        Yaml::Alias(_)=> "Alias".to_string(),
195        Yaml::Null=> "Null".to_string(),
196        Yaml::BadValue=> "BadValue".to_string(),       
197    }
198}
199
200fn yaml_type_err(yaml: &Yaml, str: &str) -> Error {
201    anyhow!("Expected {}, found {}", str, yaml_to_type(yaml))
202}
203
204// fn yaml_key_err(dict: &Yaml, key: &str, yaml_type: &str) -> String {
205//     if dict.as_hash().is_none() {
206//        return format!("Expected dictionary with key '{}', found\n{}", key, yaml_to_string(dict, 1));
207//     }
208//     let str = &dict[key];
209//     if str.is_badvalue() {
210//         return format!("Did not find '{}' in\n{}", key,  yaml_to_string(dict, 1));
211//     }
212//     return format!("Type of '{}' is not a {}.\nIt is a {}. YAML value is\n{}", 
213//             key, yaml_type, yaml_to_type(str), yaml_to_string(dict, 0));
214// }
215
216fn find_str<'a>(dict: &'a Yaml, key: &'a str) -> Option<&'a str> {
217    return dict[key].as_str();
218}
219
220/// Returns the Yaml as a `Hash` or an error if it isn't.
221pub fn as_hash_checked(value: &Yaml) -> Result<&Hash> {
222    let result = value.as_hash();
223    let result = result.ok_or_else(|| yaml_type_err(value, "hashmap"))?;
224    return Ok( result );
225}
226
227/// Returns the Yaml as a `Vec` or an error if it isn't.
228pub fn as_vec_checked(value: &Yaml) -> Result<&Vec<Yaml>> {
229    let result = value.as_vec();
230    let result = result.ok_or_else(|| yaml_type_err(value, "array"))?;
231    return Ok( result );
232}
233
234/// Returns the Yaml as a `&str` or an error if it isn't.
235pub fn as_str_checked(yaml: &Yaml) -> Result<&str> {
236    return yaml.as_str().ok_or_else(|| yaml_type_err(yaml, "string"));
237}
238
239
240/// A bit of a hack to concatenate replacements (without a ' ').
241/// The CONCAT_INDICATOR is added by a "ct:" (instead of 't:') in the speech rules
242/// and checked for by the tts code.
243pub const CONCAT_INDICATOR: &str = "\u{F8FE}";
244
245// This is the pattern that needs to be matched (and deleted)
246pub const CONCAT_STRING: &str = " \u{F8FE}";
247
248// a similar hack to delete a space afterward
249pub const POSTFIX_CONCAT_INDICATOR: &str = "\u{F8FF}";
250
251// This is the pattern that needs to be matched (and deleted)
252pub const POSTFIX_CONCAT_STRING: &str = "\u{F8FF} ";
253
254// a similar hack to potentially delete (repetitive) optional replacements
255// the OPTIONAL_INDICATOR is added by "ot:" before and after the optional string
256const OPTIONAL_INDICATOR: &str  = "\u{F8FD}";
257const OPTIONAL_INDICATOR_LEN: usize = OPTIONAL_INDICATOR.len();
258
259pub fn remove_optional_indicators(str: &str) -> String {
260    return str.replace(OPTIONAL_INDICATOR, "");
261}
262
263/// Given a string that should be Yaml, it calls `build_fn` with that string.
264/// The build function/closure should process the Yaml as appropriate and capture any errors and write them to `std_err`.
265/// The returned value should be a Vector containing the paths of all the files that were included.
266pub fn compile_rule<F>(str: &str, mut build_fn: F) -> Result<Vec<PathBuf>> where
267            F: FnMut(&Yaml) -> Result<Vec<PathBuf>> {
268    let docs = YamlLoader::load_from_str(str);
269    match docs {
270        Err(e) => {
271            bail!("Parse error!!: {}", e);
272        },
273        Ok(docs) => {
274            if docs.len() != 1 {
275                bail!("Didn't find rules!");
276            }
277            return build_fn(&docs[0]);
278        }
279    }
280}
281
282pub fn process_include<F>(current_file: &Path, new_file_name: &str, mut read_new_file: F) -> Result<Vec<PathBuf>>
283                    where F: FnMut(&Path) -> Result<Vec<PathBuf>> {
284    let parent_path = current_file.parent();
285    if parent_path.is_none() {
286        bail!("Internal error: {:?} is not a valid file name", current_file);
287    }
288    let mut new_file = match canonicalize_shim(parent_path.unwrap()) {
289        Ok(path) => path,
290        Err(e) => bail!("process_include: canonicalize failed for {} with message {}", parent_path.unwrap().display(), e),
291    };
292
293    // the referenced file might be in a directory that hasn't been zipped up -- find the dir and call the unzip function
294    for unzip_dir in new_file.ancestors() {
295        if unzip_dir.ends_with("Rules") {
296            break;      // nothing to unzip
297        }
298        if unzip_dir.ends_with("Languages") || unzip_dir.ends_with("Braille") {
299            // get the subdir ...Rules/Braille/en/...
300            // could have ...Rules/Braille/definitions.yaml, so 'next()' doesn't exist in this case, but the file wasn't zipped up
301            if let Some(subdir) = new_file.strip_prefix(unzip_dir).unwrap().iter().next() {
302                let default_lang = if unzip_dir.ends_with("Languages") {"en"} else {"UEB;"};
303                PreferenceManager::unzip_files(unzip_dir, subdir.to_str().unwrap(), Some(default_lang)).unwrap_or_default();
304            }
305        }
306    }
307    new_file.push(new_file_name);
308    info!("...processing include: {new_file_name}...");
309    let new_file = match crate::shim_filesystem::canonicalize_shim(new_file.as_path()) {
310        Ok(buf) => buf,
311        Err(msg) => bail!("-include: constructed file name '{}' causes error '{}'",
312                                 new_file.to_str().unwrap(), msg),
313    };
314
315    let mut included_files = read_new_file(new_file.as_path())?;
316    let mut files_read = vec![new_file];
317    files_read.append(&mut included_files);
318    return Ok(files_read);
319}
320
321/// As the name says, TreeOrString is either a Tree (Element) or a String
322/// It is used to share code during pattern matching
323pub trait TreeOrString<'c, 'm:'c, T> {
324    fn from_element(e: Element<'m>) -> Result<T>;
325    fn from_string(s: String, doc: Document<'m>) -> Result<T>;
326    fn replace_tts<'s:'c, 'r>(tts: &TTS, command: &TTSCommandRule, prefs: &PreferenceManager, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T>;
327    fn replace<'s:'c, 'r>(ra: &ReplacementArray, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T>;
328    fn replace_nodes<'s:'c, 'r>(rules: &'r mut SpeechRulesWithContext<'c, 's,'m>, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<T>;
329    fn highlight_braille(braille: T, highlight_style: String) -> T;
330    fn mark_nav_speech(speech: T) -> T;
331}
332
333impl<'c, 'm:'c> TreeOrString<'c, 'm, String> for String {
334    fn from_element(_e: Element<'m>) -> Result<String> {
335         bail!("from_element not allowed for strings");
336    }
337
338    fn from_string(s: String, _doc: Document<'m>) -> Result<String> {
339        return Ok(s);
340    }
341
342    fn replace_tts<'s:'c, 'r>(tts: &TTS, command: &TTSCommandRule, prefs: &PreferenceManager, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<String> {
343        return tts.replace_string(command, prefs, rules_with_context, mathml);
344    }
345
346    fn replace<'s:'c, 'r>(ra: &ReplacementArray, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<String> {
347        return ra.replace_array_string(rules_with_context, mathml);
348    }
349
350    fn replace_nodes<'s:'c, 'r>(rules: &'r mut SpeechRulesWithContext<'c, 's,'m>, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<String> {
351        return rules.replace_nodes_string(nodes, mathml);
352    }
353
354    fn highlight_braille(braille: String, highlight_style: String) -> String {
355        return SpeechRulesWithContext::highlight_braille_string(braille, highlight_style);
356    }
357
358    fn mark_nav_speech(speech: String) -> String {
359        return SpeechRulesWithContext::mark_nav_speech(speech);
360    }
361}
362
363impl<'c, 'm:'c> TreeOrString<'c, 'm, Element<'m>> for Element<'m> {
364    fn from_element(e: Element<'m>) -> Result<Element<'m>> {
365         return Ok(e);
366    }
367
368    fn from_string(s: String, doc: Document<'m>) -> Result<Element<'m>> {
369        // FIX: is 'mi' really ok?  Don't want to use TEMP_NAME because this name needs to move to the outside world
370        let leaf = create_mathml_element(&doc, "mi");
371        leaf.set_text(&s);
372        return Ok(leaf);
373}
374
375    fn replace_tts<'s:'c, 'r>(_tts: &TTS, _command: &TTSCommandRule, _prefs: &PreferenceManager, _rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, _mathml: Element<'c>) -> Result<Element<'m>> {
376        bail!("Internal error: applying a TTS rule to a tree");
377    }
378
379    fn replace<'s:'c, 'r>(ra: &ReplacementArray, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<Element<'m>> {
380        return ra.replace_array_tree(rules_with_context, mathml);
381    }
382
383    fn replace_nodes<'s:'c, 'r>(rules: &'r mut SpeechRulesWithContext<'c, 's,'m>, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<Element<'m>> {
384        return rules.replace_nodes_tree(nodes, mathml);
385    }
386
387    fn highlight_braille(_braille: Element<'c>, _highlight_style: String) -> Element<'m> {
388        panic!("Internal error: highlight_braille called on a tree");
389    }
390
391    fn mark_nav_speech(_speech: Element<'c>) -> Element<'m> {
392        panic!("Internal error: mark_nav_speech called on a tree");
393    }
394}
395
396/// 'Replacement' is an enum that contains all the potential replacement types/structs
397/// Hence there are fields 'Test' ("test:"), 'Text" ("t:"), "XPath", etc
398#[derive(Debug, Clone)]
399#[allow(clippy::upper_case_acronyms)]
400enum Replacement {
401    // Note: all of these are pointer types
402    Text(String),
403    XPath(MyXPath),
404    Intent(Box<Intent>),
405    Test(Box<TestArray>),
406    TTS(Box<TTSCommandRule>),
407    With(Box<With>),
408    SetVariables(Box<SetVariables>),
409    Insert(Box<InsertChildren>),
410    Translate(TranslateExpression),
411}
412
413impl fmt::Display for Replacement {
414    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
415        return write!(f, "{}",
416            match self {
417                Replacement::Test(c) => c.to_string(),
418                Replacement::Text(t) => format!("t: \"{t}\""),
419                Replacement::XPath(x) => x.to_string(),
420                Replacement::Intent(i) => i.to_string(),
421                Replacement::TTS(t) => t.to_string(),
422                Replacement::With(w) => w.to_string(),
423                Replacement::SetVariables(v) => v.to_string(),
424                Replacement::Insert(ic) => ic.to_string(),
425                Replacement::Translate(x) => x.to_string(),
426            }
427        );
428    }
429}
430
431impl Replacement {   
432    fn build(replacement: &Yaml) -> Result<Replacement> {
433        // Replacement -- single key/value (see below for allowed values)
434        let dictionary = replacement.as_hash();
435        if dictionary.is_none() {
436            bail!("  expected a key/value pair. Found {}.",  yaml_to_string(replacement, 0));
437        };
438        let dictionary = dictionary.unwrap();
439        if dictionary.is_empty() { 
440            bail!("No key/value pairs found for key 'replace'.\n\
441                Suggestion: are the following lines indented properly?");
442        }
443        if dictionary.len() > 1 { 
444            bail!("Should only be one key/value pair for the replacement.\n    \
445                    Suggestion: are the following lines indented properly?\n    \
446                    The key/value pairs found are\n{}", yaml_to_string(replacement, 2));
447        }
448
449        // get the single value
450        let (key, value) = dictionary.iter().next().unwrap();
451        let key = key.as_str().ok_or_else(|| anyhow!("replacement key(e.g, 't') is not a string"))?;
452        match key {
453            "t" | "T" => {
454                return Ok( Replacement::Text( as_str_checked(value)?.to_string() ) );
455            },
456            "ct" | "CT" => {
457                return Ok( Replacement::Text( CONCAT_INDICATOR.to_string() + as_str_checked(value)? ) );
458            },
459            "tc" | "TC" => {
460                return Ok( Replacement::Text( as_str_checked(value)?.to_string() + POSTFIX_CONCAT_INDICATOR ) );
461            },
462            "ot" | "OT" => {
463                return Ok( Replacement::Text( OPTIONAL_INDICATOR.to_string() + as_str_checked(value)? + OPTIONAL_INDICATOR ) );
464            },
465            "x" => {
466                return Ok( Replacement::XPath( MyXPath::build(value)
467                    .context("while trying to evaluate value of 'x:'")? ) );
468            },
469            "pause" | "rate" | "pitch" | "volume" | "audio" | "gender" | "voice" | "spell" | "SPELL" | "bookmark" | "pronounce" | "PRONOUNCE" => {
470                return Ok( Replacement::TTS( TTS::build(&key.to_ascii_lowercase(), value)? ) );
471            },
472            "intent" => {
473                return Ok( Replacement::Intent( Intent::build(value)? ) );
474            },
475            "test" => {
476                return Ok( Replacement::Test( Box::new( TestArray::build(value)? ) ) );
477            },
478            "with" => {
479                return Ok( Replacement::With( With::build(value)? ) );
480            },
481            "set_variables" => {
482                return Ok( Replacement::SetVariables( SetVariables::build(value)? ) );
483            },
484            "insert" => {
485                return Ok( Replacement::Insert( InsertChildren::build(value)? ) );
486            },
487            "translate" => {
488                return Ok( Replacement::Translate( TranslateExpression::build(value)
489                    .context("while trying to evaluate value of 'speak:'")? ) );
490            },
491            _ => {
492                bail!("Unknown 'replace' command ({}) with value: {}", key, yaml_to_string(value, 0));
493            }
494        }
495    }
496}
497
498// structure used when "insert:" is encountered in a rule
499// the 'replacements' are inserted between each node in the 'xpath'
500#[derive(Debug, Clone)]
501struct InsertChildren {
502    xpath: MyXPath,                     // the replacement nodes
503    replacements: ReplacementArray,     // what is inserted between each node
504}
505
506#[cfg_attr(coverage, coverage(off))]
507impl fmt::Display for InsertChildren {
508    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
509        return write!(f, "InsertChildren:\n  nodes {}\n  replacements {}", self.xpath, &self.replacements);
510    }
511}
512
513
514impl InsertChildren {
515    fn build(insert: &Yaml) -> Result<Box<InsertChildren>> {
516        // 'insert:' -- 'nodes': xxx 'replace': xxx
517        if insert.as_hash().is_none() {
518            bail!("")
519        }
520        let nodes = &insert["nodes"];
521        if nodes.is_badvalue() { 
522            bail!("Missing 'nodes' as part of 'insert'.\n    \
523                  Suggestion: add 'nodes:' or if present, indent so it is contained in 'insert'");
524        }
525        let nodes = as_str_checked(nodes)?;
526        let replace = &insert["replace"];
527        if replace.is_badvalue() { 
528            bail!("Missing 'replace' as part of 'insert'.\n    \
529                  Suggestion: add 'replace:' or if present, indent so it is contained in 'insert'");
530        }
531        return Ok( Box::new( InsertChildren {
532            xpath: MyXPath::new(nodes.to_string())?,
533            replacements: ReplacementArray::build(replace).context("'replace:'")?,
534        } ) );
535    }
536    
537    // It would be most efficient to do an xpath eval, get the nodes (type: NodeSet) and then intersperse the node_replace()
538    //   calls with replacements for the ReplacementArray parts. But that causes problems with the "pause: auto" calculation because
539    //   the replacements are segmented (can't look to neighbors for the calculation there)
540    // An alternative is to introduce another Replacement enum value, but that's a lot of complication for not that much
541    //    gain (and Node's have contagious lifetimes)
542    // The solution adopted is to find out the number of nodes and build up MyXPaths with each node selected (e.g, "*" => "*[3]")
543    //    and put those nodes into a flat ReplacementArray and then do a standard replace on that.
544    //    This is slower than the alternatives, but reuses a bunch of code and hence is less complicated.
545    fn replace<'c, 's:'c, 'm: 'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
546        let result = self.xpath.evaluate(&rules_with_context.context_stack.base, mathml)
547                .with_context(||format!("in '{}' replacing after pattern match", &self.xpath.rc.string) )?;
548        match result {
549            Value::Nodeset(nodes) => {
550                if nodes.size() == 0 {
551                    bail!("During replacement, no matching element found");
552                };
553                let nodes = nodes.document_order();
554                let n_nodes = nodes.len();
555                let mut expanded_result = Vec::with_capacity(n_nodes + (n_nodes+1)*self.replacements.replacements.len());
556                expanded_result.push(
557                    Replacement::XPath(
558                        MyXPath::new(format!("{}[{}]", self.xpath.rc.string , 1))?
559                    )
560                );
561                for i in 2..n_nodes+1 {
562                    expanded_result.extend_from_slice(&self.replacements.replacements);
563                    expanded_result.push(
564                        Replacement::XPath(
565                            MyXPath::new(format!("{}[{}]", self.xpath.rc.string , i))?
566                        )
567                    );
568                }
569                let replacements = ReplacementArray{ replacements: expanded_result };
570                return replacements.replace(rules_with_context, mathml);
571            },
572
573            // FIX: should the options be errors???
574            Value::String(t) => { return T::from_string(rules_with_context.replace_chars(&t, mathml)?, rules_with_context.doc); },
575            Value::Number(num)  => { return T::from_string( num.to_string(), rules_with_context.doc ); },
576            Value::Boolean(b)  => { return T::from_string( b.to_string(), rules_with_context.doc ); },          // FIX: is this right???
577        }
578        
579    }    
580}
581
582
583static ATTR_NAME_VALUE: LazyLock<Regex> = LazyLock::new(|| {
584    Regex::new(
585        // match name='value', where name is sort of an NCNAME (see CONCEPT_OR_LITERAL in infer_intent.rs)
586        // The quotes can be either single or double quotes
587        r#"(?P<name>[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*)\s*=\s*('(?P<value>[^']+)'|"(?P<dqvalue>[^"]+)")"#
588    ).unwrap()
589});
590
591// structure used when "intent:" is encountered in a rule
592// the name is either a string or an xpath that needs evaluation. 99% of the time it is a string
593#[derive(Debug, Clone)]
594struct Intent {
595    name: Option<String>,           // name of node
596    xpath: Option<MyXPath>,         // alternative to directly using the string
597    attrs: String,                  // optional attrs -- format "attr1='val1' [attr2='val2'...]"
598    children: ReplacementArray,     // children of node
599}
600
601impl fmt::Display for Intent {
602    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
603        let name = if let Some(name) = &self.name {
604            name.to_string()
605        } else {
606            self.xpath.as_ref().unwrap().to_string()
607        };
608        return write!(f, "intent: {}: {},  attrs='{}'>\n      children: {}",
609                        if self.name.is_some() {"name"} else {"xpath-name"}, name,
610                        self.attrs,
611                        &self.children);
612    }
613}
614
615impl Intent {
616    fn build(yaml_dict: &Yaml) -> Result<Box<Intent>> {
617        // 'intent:' -- 'name': xxx 'children': xxx
618        if yaml_dict.as_hash().is_none() {
619            bail!("Array found for contents of 'intent' -- should be dictionary with keys 'name' and 'children'")
620        }
621        let name = &yaml_dict["name"];
622        let xpath_name = &yaml_dict["xpath-name"];
623        if name.is_badvalue() && xpath_name.is_badvalue(){ 
624            bail!("Missing 'name' or 'xpath-name' as part of 'intent'.\n    \
625                  Suggestion: add 'name:' or if present, indent so it is contained in 'intent'");
626        }
627        let attrs = &yaml_dict["attrs"];
628        let replace = &yaml_dict["children"];
629        if replace.is_badvalue() {
630            bail!("Missing 'children' as part of 'intent'.\n    \
631                  Suggestion: add 'children:' or if present, indent so it is contained in 'intent'");
632        }
633        return Ok( Box::new( Intent {
634            name: if name.is_badvalue() {None} else {Some(as_str_checked(name).context("'name'")?.to_string())},
635            xpath: if xpath_name.is_badvalue() {None} else {Some(MyXPath::build(xpath_name).context("'intent'")?)},
636            attrs: if attrs.is_badvalue() {"".to_string()} else {as_str_checked(attrs).context("'attrs'")?.to_string()},
637            children: ReplacementArray::build(replace).context("'children:'")?,
638        } ) );
639    }
640        
641    fn replace<'c, 's:'c, 'm: 'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
642        let result = self.children.replace::<Element<'m>>(rules_with_context, mathml)
643                    .context("replacing inside 'intent'")?;
644        let mut result = lift_children(result);
645        if name(result) != "TEMP_NAME" && name(result) != "Unknown" {
646            // this case happens when you have an 'intent' replacement as a direct child of an 'intent' replacement
647            let temp = create_mathml_element(&result.document(), "TEMP_NAME");
648            temp.append_child(result);
649            result = temp;
650        }
651        if let Some(intent_name) = &self.name {
652            result.set_attribute_value(MATHML_FROM_NAME_ATTR, name(mathml));
653            set_mathml_name(result, intent_name.as_str());
654        }
655        if let Some(my_xpath) = &self.xpath{    // self.xpath_name must be != None
656            let xpath_value = my_xpath.evaluate(rules_with_context.get_context(), mathml)?;
657            match xpath_value {
658                Value::String(intent_name) => {
659                    result.set_attribute_value(MATHML_FROM_NAME_ATTR, name(mathml));
660                    set_mathml_name(result, intent_name.as_str())
661                },
662                _ => bail!("'xpath-name' value '{}' was not a string", &my_xpath),
663            }
664        }
665        if self.name.is_none() && self.xpath.is_none() {
666            bail!("Intent::replace: internal error -- neither 'name' nor 'xpath' is set");
667        };
668        
669        for attr in mathml.attributes() {
670            result.set_attribute_value(attr.name(), attr.value());
671        }
672
673        // can't test against name == "math" because intent might a new element
674        if mathml.parent().is_some() && mathml.parent().unwrap().element().is_some() &&
675           result.attribute_value("id") == crate::canonicalize::get_parent(mathml).attribute_value("id") {
676            // avoid duplicate ids -- it's a bug if it does, but this helps in that case
677            result.remove_attribute("id");
678        }
679
680        if !self.attrs.is_empty() {
681            // debug!("MathML after children, before attr processing:\n{}", mml_to_string(mathml));
682            // debug!("Result after children, before attr processing:\n{}", mml_to_string(result));
683            // debug!("Intent::replace attrs = \"{}\"", &self.attrs);
684            for cap in ATTR_NAME_VALUE.captures_iter(&self.attrs) {
685                let matched_value = if cap["value"].is_empty() {&cap["dqvalue"]} else {&cap["value"]};
686                let value_as_xpath = MyXPath::new(matched_value.to_string()).context("attr value inside 'intent'")?;
687                let value = value_as_xpath.evaluate(rules_with_context.get_context(), result)
688                        .context("attr xpath evaluation value inside 'intent'")?;
689                let mut value = value.into_string();
690                if &cap["name"] == INTENT_PROPERTY {
691                    value = simplify_fixity_properties(&value);
692                }
693                // debug!("Intent::replace match\n  name={}\n  value={}\n  xpath value={}", &cap["name"], &cap["value"], &value);
694                if &cap["name"] == INTENT_PROPERTY && value == ":" {
695                    // should have been an empty string, so remove the attribute
696                    result.remove_attribute(INTENT_PROPERTY);
697                } else {
698                    result.set_attribute_value(&cap["name"], &value);
699                }
700            };
701        }
702
703        // debug!("Result from 'intent:'\n{}", mml_to_string(result));
704        return T::from_element(result);
705
706
707        /// "lift" up the children any "TEMP_NAME" child -- could short circuit when only one child
708        fn lift_children(result: Element) -> Element {
709            // debug!("lift_children:\n{}", mml_to_string(result));
710            // most likely there will be the same number of new children as result has, but there could be more
711            let mut new_children = Vec::with_capacity(2*result.children().len());
712            for child_of_element in result.children() {
713                match child_of_element {
714                    ChildOfElement::Element(child) => {
715                        if name(child) == "TEMP_NAME" {
716                            new_children.append(&mut child.children());  // almost always just one
717                        } else {
718                            new_children.push(child_of_element);
719                        }
720                    },
721                    _ => new_children.push(child_of_element),      // text()
722                }
723            }
724            result.replace_children(new_children);
725            return result;
726        }
727    }    
728}
729
730// structure used when "with:" is encountered in a rule
731// the variables are placed on (and later) popped of a variable stack before/after the replacement
732#[derive(Debug, Clone)]
733struct With {
734    variables: VariableDefinitions,     // variables and values
735    replacements: ReplacementArray,     // what to do with these vars
736}
737
738#[cfg_attr(coverage, coverage(off))]
739impl fmt::Display for With {
740    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
741        return write!(f, "with:\n      variables: {}\n      replace: {}", &self.variables, &self.replacements);
742    }
743}
744
745
746impl With {
747    fn build(vars_replacements: &Yaml) -> Result<Box<With>> {
748        // 'with:' -- 'variables': xxx 'replace': xxx
749        if vars_replacements.as_hash().is_none() {
750            bail!("Array found for contents of 'with' -- should be dictionary with keys 'variables' and 'replace'")
751        }
752        let var_defs = &vars_replacements["variables"];
753        if var_defs.is_badvalue() { 
754            bail!("Missing 'variables' as part of 'with'.\n    \
755                  Suggestion: add 'variables:' or if present, indent so it is contained in 'with'");
756        }
757        let replace = &vars_replacements["replace"];
758        if replace.is_badvalue() { 
759            bail!("Missing 'replace' as part of 'with'.\n    \
760                  Suggestion: add 'replace:' or if present, indent so it is contained in 'with'");
761        }
762        return Ok( Box::new( With {
763            variables: VariableDefinitions::build(var_defs).context("'variables'")?,
764            replacements: ReplacementArray::build(replace).context("'replace:'")?,
765        } ) );
766    }
767
768    fn replace<'c, 's:'c, 'm: 'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
769        rules_with_context.context_stack.push(self.variables.clone(), mathml)?;
770        let result = self.replacements.replace(rules_with_context, mathml)
771                    .context("replacing inside 'with'")?;
772        rules_with_context.context_stack.pop();
773        return Ok( result );
774    }    
775}
776
777// structure used when "set_variables:" is encountered in a rule
778// the variables are global and are placed in the base context and never popped off
779#[derive(Debug, Clone)]
780struct SetVariables {
781    variables: VariableDefinitions,     // variables and values
782}
783
784#[cfg_attr(coverage, coverage(off))]
785impl fmt::Display for SetVariables {
786    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
787        return write!(f, "SetVariables: variables {}", &self.variables);
788    }
789}
790
791
792impl SetVariables {
793    fn build(vars: &Yaml) -> Result<Box<SetVariables>> {
794        // 'set_variables:' -- 'variables': xxx (array)
795        if vars.as_vec().is_none() {
796            bail!("'set_variables' -- should be an array of variable name, xpath value");
797        }
798        return Ok( Box::new( SetVariables {
799            variables: VariableDefinitions::build(vars).context("'set_variables'")?
800        } ) );
801    }
802        
803    fn replace<'c, 's:'c, 'm: 'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
804        rules_with_context.context_stack.set_globals(self.variables.clone(), mathml)?;
805        return T::from_string( "".to_string(), rules_with_context.doc );
806    }    
807}
808
809
810/// Allow speech of an expression in the middle of a rule (used by "WhereAmI" for navigation)
811#[derive(Debug, Clone)]
812struct TranslateExpression {
813    xpath: MyXPath,     // variables and values
814}
815
816#[cfg_attr(coverage, coverage(off))]
817impl fmt::Display for TranslateExpression {
818    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
819        return write!(f, "speak: {}", &self.xpath);
820    }
821}
822
823
824impl TranslateExpression {
825    fn build(vars: &Yaml) -> Result<TranslateExpression> {
826        // 'translate:' -- xpath (should evaluate to an id)
827        return Ok( TranslateExpression { xpath: MyXPath::build(vars).context("'translate'")? } );
828    }
829        
830    fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
831        if self.xpath.rc.string.starts_with('@') {
832            let xpath_value = self.xpath.evaluate(rules_with_context.get_context(), mathml)?;
833            let id = match xpath_value {
834                Value::String(s) => Some(s),
835                Value::Nodeset(nodes) => {
836                    if nodes.size() == 1 {
837                        nodes.document_order_first().unwrap().attribute().map(|attr| attr.value().to_string())
838                    } else {
839                        None
840                    }
841                },
842                _ => None,
843            };
844            match id {
845                None => bail!("'translate' value '{}' is not a string or an attribute value (correct by using '@id'??):\n", self.xpath),
846                Some(id) => {
847                    let speech = speak_mathml(mathml, &id, 0)?;
848                    return T::from_string(speech, rules_with_context.doc);
849                }
850            }
851        } else {
852            return T::from_string(
853                self.xpath.replace(rules_with_context, mathml).context("'translate'")?,
854                rules_with_context.doc
855            );
856        }  
857    } 
858}
859
860
861/// An array of rule `Replacement`s (text, xpath, tts commands, etc)
862#[derive(Debug, Clone)]
863pub struct ReplacementArray {
864    replacements: Vec<Replacement>
865}
866
867impl fmt::Display for ReplacementArray {
868    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
869        return write!(f, "{}", self.pretty_print_replacements());
870    }
871}
872
873impl ReplacementArray {
874    /// Return an empty `ReplacementArray`
875    pub fn build_empty() -> ReplacementArray {
876        return ReplacementArray {
877            replacements: vec![]
878        }
879    }
880
881    /// Convert a Yaml input into a [`ReplacementArray`].
882    /// Any errors are passed back out.
883    pub fn build(replacements: &Yaml) -> Result<ReplacementArray> {
884        // replacements is either a single replacement or an array of replacements
885        let result= if replacements.is_array() {
886            let replacements = replacements.as_vec().unwrap();
887            replacements
888                .iter()
889                .enumerate()    // useful for errors
890                .map(|(i, r)| Replacement::build(r)
891                            .with_context(|| format!("replacement #{} of {}", i+1, replacements.len())))
892                .collect::<Result<Vec<Replacement>>>()?
893        } else {
894            vec![ Replacement::build(replacements)?]
895        };
896
897        return Ok( ReplacementArray{ replacements: result } );
898    }
899
900    /// Do all the replacements in `mathml` using `rules`.
901    pub fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
902        return T::replace(self, rules_with_context, mathml);
903    }
904
905    pub fn replace_array_string<'c, 's:'c, 'm:'c>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<String> {
906        // loop over the replacements and build up a vector of strings, excluding empty ones.
907        // * eliminate any redundance
908        // * add/replace auto-pauses
909        // * join the remaining vector together
910        let mut replacement_strings = Vec::with_capacity(self.replacements.len());   // probably conservative guess
911        for replacement in self.replacements.iter() {
912            let string: String = rules_with_context.replace(replacement, mathml)?;
913            if !string.is_empty() {
914                replacement_strings.push(string);
915            }
916        }
917
918        if replacement_strings.is_empty() {
919            return Ok( "".to_string() );
920        }
921        // delete an optional text that is repetitive
922        // we do this by looking for the optional text marker, and if present, check for repetition at end of previous string
923        // if repetitive, we delete the optional string
924        // if not, we leave the markers because the repetition might happen several "levels" up
925        // this could also be done in a final cleanup of the entire string (where we remove any markers),
926        //   but the match is harder (rust regex lacks look behind pattern match) and it is less efficient
927        // Note: we skip the first string since it can't be repetitive of something at this level
928        for i in 1..replacement_strings.len()-1 {
929            if let Some(bytes) = is_repetitive(&replacement_strings[i-1], &replacement_strings[i])  {
930                replacement_strings[i] = bytes.to_string();
931            } 
932        }
933                        
934        for i in 0..replacement_strings.len() {
935            if replacement_strings[i].contains(PAUSE_AUTO_STR) {
936                let before = if i == 0 {""} else {&replacement_strings[i-1]};
937                let after = if i+1 == replacement_strings.len() {""} else {&replacement_strings[i+1]};
938                replacement_strings[i] = replacement_strings[i].replace(
939                    PAUSE_AUTO_STR,
940                    &rules_with_context.speech_rules.pref_manager.borrow().get_tts().compute_auto_pause(&rules_with_context.speech_rules.pref_manager.borrow(), before, after));
941            }
942        }
943
944        // join the strings together with spaces in between
945        // concatenation (removal of spaces) is saved for the top level because they otherwise are stripped at the wrong sometimes
946        return Ok( replacement_strings.join(" ") );
947
948        /// delete an optional text (in 'next') that is repetitive at the end of 'prev'
949        /// we do this by looking for the optional text marker, and if present, check for repetition at end of previous string
950        /// if repetitive, we delete the optional string
951        fn is_repetitive<'a>(prev: &str, next: &'a str) -> Option<&'a str> {
952            // OPTIONAL_INDICATOR optionally surrounds the end of 'prev'(ignoring trailing whitespace)
953            // OPTIONAL_INDICATOR surrounds the start of 'next'
954            // minor optimization -- lots of short strings and the OPTIONAL_INDICATOR takes a few bytes, so skip the check for those strings
955            if next.len() <=  2 * OPTIONAL_INDICATOR_LEN {
956                return None;
957            }
958
959            // should be exactly one match -- ignore more than one for now
960            let i_start = next.find(OPTIONAL_INDICATOR)?;
961            let start_repeat_word_in_next = &next[i_start + OPTIONAL_INDICATOR_LEN..];
962            let i_end = start_repeat_word_in_next.find(OPTIONAL_INDICATOR)
963                .unwrap_or_else(|| panic!("Internal error: missing end optional char -- text handling is corrupted!"));
964            let repeat_word = &start_repeat_word_in_next[..i_end];
965            // debug!("check if '{}' is repetitive, end_index={}", repeat_word, i_end);
966            // debug!("   prev: '{}', next '{}'", prev, next);
967
968            let prev_trimmed = prev.trim_end();
969            let ends_with_word = prev_trimmed.len() > repeat_word.len() && prev_trimmed.ends_with(repeat_word);
970            let ends_with_wrapped_word =
971                prev_trimmed
972                    .strip_suffix(OPTIONAL_INDICATOR)
973                    .and_then(|s| s.strip_suffix(repeat_word))
974                    .and_then(|s| s.strip_suffix(OPTIONAL_INDICATOR))
975                    .is_some();
976            if ends_with_word || ends_with_wrapped_word {
977                // debug!("  is repetitive");
978                Some(start_repeat_word_in_next[i_end + OPTIONAL_INDICATOR_LEN..].trim_start())  // remove repeat word and OPTIONAL_INDICATOR
979            } else {
980                None
981            }
982        }
983    }
984
985    pub fn replace_array_tree<'c, 's:'c, 'm:'c>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<Element<'m>> {
986        // shortcut for common case (don't build a new tree node)
987        if self.replacements.len() == 1 {
988            return rules_with_context.replace::<Element<'m>>(&self.replacements[0], mathml);
989        }
990
991        let new_element = create_mathml_element(&rules_with_context.doc, "Unknown");  // Hopefully set later (in Intent::Replace())
992        let mut new_children = Vec::with_capacity(self.replacements.len());
993        for child in self.replacements.iter() {
994            let child = rules_with_context.replace::<Element<'m>>(child, mathml)?;
995            new_children.push(ChildOfElement::Element(child));
996        };
997        new_element.append_children(new_children);
998        return Ok(new_element);
999    }
1000
1001
1002    /// Return true if there are no replacements.
1003    pub fn is_empty(&self) -> bool {
1004        return self.replacements.is_empty();
1005    }
1006    
1007    fn pretty_print_replacements(&self) -> String {
1008        let mut group_string = String::with_capacity(128);
1009        if self.replacements.len() == 1 {
1010            group_string += &format!("[{}]", self.replacements[0]);
1011        } else {
1012            group_string += &self.replacements.iter()
1013                    .map(|replacement| format!("\n  - {replacement}"))
1014                    .collect::<Vec<String>>()
1015                    .join("");
1016            group_string += "\n";
1017        }
1018        return group_string;
1019    }
1020}
1021
1022
1023
1024// MyXPath is a wrapper around an 'XPath' that keeps around the original xpath expr (as a string) so it can be used in error reporting.
1025// Because we want to be able to clone them and XPath doesn't support clone(), this is a wrapper around an internal MyXPath.
1026// It supports the standard SpeechRule functionality of building and replacing.
1027#[derive(Debug)]
1028struct RCMyXPath {
1029    xpath: XPath,
1030    string: String,        // store for error reporting
1031}
1032
1033#[derive(Debug, Clone)]
1034pub struct MyXPath {
1035    rc: Rc<RCMyXPath>        // rather than putting Rc around both 'xpath' and 'string', just use one and indirect to internal RCMyXPath
1036}
1037
1038
1039impl fmt::Display for MyXPath {
1040    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1041        return write!(f, "\"{}\"", self.rc.string);
1042    }
1043}
1044
1045// pub fn xpath_count() -> (usize, usize) {
1046//     return (XPATH_CACHE.with( |cache| cache.borrow().len()), unsafe{XPATH_CACHE_HITS} );
1047// }
1048thread_local!{
1049    static XPATH_CACHE: RefCell<HashMap<String, MyXPath>> = RefCell::new( HashMap::with_capacity(2047) );
1050}
1051// static mut XPATH_CACHE_HITS: usize = 0;
1052
1053impl MyXPath {
1054    fn new(xpath: String) -> Result<MyXPath> {
1055        return XPATH_CACHE.with( |cache|  {
1056            let mut cache = cache.borrow_mut();
1057            return Ok(
1058                match cache.get(&xpath) {
1059                    Some(compiled_xpath) => {
1060                        // unsafe{ XPATH_CACHE_HITS += 1;};
1061                        compiled_xpath.clone()
1062                    },
1063                    None => {
1064                        let new_xpath = MyXPath {
1065                            rc: Rc::new( RCMyXPath {
1066                                xpath: MyXPath::compile_xpath(&xpath)?,
1067                                string: xpath.clone()
1068                            })};
1069                        cache.insert(xpath.clone(), new_xpath.clone());
1070                        new_xpath
1071                    },
1072                }
1073            )
1074        });
1075    }
1076
1077    pub fn build(xpath: &Yaml) -> Result<MyXPath> {
1078        let xpath = match xpath {
1079            Yaml::String(s) => s.to_string(),
1080            Yaml::Integer(i) => i.to_string(),
1081            Yaml::Real(s) => s.to_string(),
1082            Yaml::Boolean(s) => s.to_string(),
1083            Yaml::Array(v) =>
1084                // array of strings -- concatenate them together
1085                v.iter()
1086                    .map(as_str_checked)
1087                    .collect::<Result<Vec<&str>>>()?
1088                    .join(" "),
1089            _ => bail!("Bad value when trying to create an xpath: {}", yaml_to_string(xpath, 1)),
1090        };
1091        return MyXPath::new(xpath);
1092    }
1093
1094    fn compile_xpath(xpath: &str) -> Result<XPath> {
1095        let factory = Factory::new();
1096        let xpath_with_debug_info = MyXPath::add_debug_string_arg(xpath)?;
1097        let compiled_xpath = factory.build(&xpath_with_debug_info)
1098                        .with_context(|| format!(
1099                            "Could not compile XPath for pattern:\n{}{}",
1100                            &xpath, more_details(xpath)))?;
1101        return match compiled_xpath {
1102            Some(xpath) => Ok(xpath),
1103            None => bail!("Problem compiling Xpath for pattern:\n{}{}",
1104                            &xpath, more_details(xpath)),
1105        };
1106
1107        
1108        fn more_details(xpath: &str) -> String {
1109            // try to give a better error message by counting [], (), 's, and "s
1110            if xpath.is_empty() {
1111                return "xpath is empty string".to_string();
1112            }
1113            let as_bytes = xpath.trim().as_bytes();
1114            if as_bytes[0] == b'\'' && as_bytes[as_bytes.len()-1] != b'\'' {
1115                return "\nmissing \"'\"".to_string();
1116            }
1117            if (as_bytes[0] == b'"' && as_bytes[as_bytes.len()-1] != b'"') ||
1118               (as_bytes[0] != b'"' && as_bytes[as_bytes.len()-1] == b'"'){
1119                return "\nmissing '\"'".to_string();
1120            }
1121
1122            let mut i_bytes = 0;      // keep track of # of bytes into string for error reporting
1123            let mut paren_count = 0;    // counter to make sure they are balanced
1124            let mut i_paren = 0;      // position of the outermost open paren
1125            let mut bracket_count = 0;
1126            let mut i_bracket = 0;
1127            for ch in xpath.chars() {
1128                if ch == '(' {
1129                    if paren_count == 0 {
1130                        i_paren = i_bytes;
1131                    }
1132                    paren_count += 1;
1133                } else if ch == '[' {
1134                    if bracket_count == 0 {
1135                        i_bracket = i_bytes;
1136                    }
1137                    bracket_count += 1;
1138                } else if ch == ')' {
1139                    if paren_count == 0 {
1140                        return format!("\nExtra ')' found after '{}'", &xpath[i_paren..i_bytes]);
1141                    }
1142                    paren_count -= 1;
1143                    if paren_count == 0 && bracket_count > 0 && i_bracket > i_paren {
1144                        return format!("\nUnclosed brackets found at '{}'", &xpath[i_paren..i_bytes]);
1145                    }
1146                } else if ch == ']' {
1147                    if bracket_count == 0 {
1148                        return format!("\nExtra ']' found after '{}'", &xpath[i_bracket..i_bytes]);
1149                    }
1150                    bracket_count -= 1;
1151                    if bracket_count == 0 && paren_count > 0 && i_paren > i_bracket {
1152                        return format!("\nUnclosed parens found at '{}'", &xpath[i_bracket..i_bytes]);
1153                    }
1154                }
1155                i_bytes += ch.len_utf8();
1156            }
1157            return "".to_string();
1158        }
1159    }
1160
1161    /// Convert DEBUG(...) input to the internal function which is DEBUG(arg, arg_as_string)
1162    fn add_debug_string_arg(xpath: &str) -> Result<String> {
1163        // do a quick check to see if "DEBUG" is in the string -- this is the common case
1164        let debug_start = xpath.find("DEBUG(");
1165        if debug_start.is_none() {
1166            return Ok( xpath.to_string() );
1167        }
1168
1169        let debug_start = debug_start.unwrap();
1170        let mut before_paren = xpath[..debug_start+5].to_string();   // includes "DEBUG"
1171        let chars = xpath[debug_start+5..].chars().collect::<Vec<char>>();     // begins at '('
1172        before_paren.push_str(&chars_add_debug_string_arg(&chars).with_context(|| format!("In xpath='{xpath}'"))?);
1173        // debug!("add_debug_string_arg: {}", before_paren);
1174        return Ok(before_paren);
1175
1176        fn chars_add_debug_string_arg(chars: &[char]) -> Result<String>  {
1177            // Find all the DEBUG(...) commands in 'xpath' and adds a string argument.
1178            // The DEBUG function that is used internally takes two arguments, the second one being a string version of the DEBUG arg.
1179            //   Being a string, any quotes need to be escaped, and DEBUGs inside of DEBUGs need more escaping.
1180            //   This is done via recursive calls to this function.
1181            assert_eq!(chars[0], '(', "{} does not start with ')'", chars.iter().collect::<String>());
1182            let mut count = 1;  // open/close count
1183            let mut i = 1;
1184            let mut inside_quote = false;
1185            while i < chars.len() {
1186                let ch = chars[i];
1187                match ch {
1188                    '\\' => {
1189                        if i+1 == chars.len() {
1190                            bail!("Syntax error in DEBUG: last char is escape char\nDebug string: '{}'", chars.iter().collect::<String>());
1191                        }
1192                        i += 1;
1193                    },
1194                    '\'' => inside_quote = !inside_quote,
1195                    '(' if !inside_quote => {
1196                        count += 1;
1197                        // FIX: it would be more efficient to spot "DEBUG" preceding this and recurse rather than matching the whole string and recursing
1198                    },
1199                    '(' => (),
1200                    ')' if !inside_quote => {
1201                        count -= 1;
1202                        if count == 0 {
1203                            let arg = &chars[1..i].iter().collect::<String>();
1204                            let escaped_arg = arg.replace('"', "\\\"");
1205                            // DEBUG(...) may be inside 'arg' -- recurse
1206                            let processed_arg = MyXPath::add_debug_string_arg(arg)?;
1207
1208                            // DEBUG(...) may be in the remainder of the string -- recurse
1209                            let processed_rest = MyXPath::add_debug_string_arg(&chars[i+1..].iter().collect::<String>())?;
1210                            return Ok( format!("({processed_arg}, \"{escaped_arg}\"){processed_rest}") );
1211                        }
1212                    },
1213                    ')' => (),
1214                    _ => (),
1215                }
1216                i += 1;
1217            }
1218            bail!("Syntax error in DEBUG: didn't find matching closing paren\nDEBUG{}", chars.iter().collect::<String>());
1219        }
1220    }
1221
1222    fn is_true(&self, context: &sxd_xpath::Context, mathml: Element) -> Result<bool> {
1223        // return true if there is no condition or if the condition evaluates to true
1224        return Ok(
1225            match self.evaluate(context, mathml)? {
1226                Value::Boolean(b) => b,
1227                Value::Nodeset(nodes) => nodes.size() > 0,
1228                _                      => false,      
1229            }
1230        )
1231    }
1232
1233    pub fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
1234        if self.rc.string == "process-intent(.)" {
1235            return T::from_element( infer_intent(rules_with_context, mathml)? );
1236        }
1237        
1238        let result = self.evaluate(&rules_with_context.context_stack.base, mathml)
1239                .with_context(|| format!("in '{}' replacing after pattern match", &self.rc.string) )?;
1240        let string = match result {
1241                Value::Nodeset(nodes) => {
1242                    if nodes.size() == 0 {
1243                        bail!("During replacement, no matching element found");
1244                    }
1245                    return rules_with_context.replace_nodes(nodes.document_order(), mathml);
1246                },
1247                Value::String(s) => s,
1248                Value::Number(num) => num.to_string(),
1249                Value::Boolean(b) => b.to_string(),          // FIX: is this right???
1250        };
1251        // Hack!: this test for input that starts with a '$' (defined variable), avoids a double evaluate;
1252        // We don't need NO_EVAL_QUOTE_CHAR here, but the more general solution of a quoted execute (- xq:) would avoid this hack
1253        let result = if self.rc.string.starts_with('$') {string} else {rules_with_context.replace_chars(&string, mathml)?};
1254        return T::from_string(result, rules_with_context.doc );
1255    }
1256    
1257    pub fn evaluate<'c>(&self, context: &sxd_xpath::Context<'c>, mathml: Element<'c>) -> Result<Value<'c>> {
1258        // debug!("evaluate: {}", self);
1259        let result = self.rc.xpath.evaluate(context, mathml);
1260        return match result {
1261            Ok(val) => Ok( val ),
1262            Err(e) => {
1263                // debug!("MyXPath::trying to evaluate:\n  '{}'\n caused the error\n'{}'", self, e.to_string().replace("OwnedPrefixedName { prefix: None, local_part:", "").replace(" }", ""));
1264                bail!( "{}\n\n",
1265                     // remove confusing parts of error message from xpath
1266                    e.to_string().replace("OwnedPrefixedName { prefix: None, local_part:", "").replace(" }", "") );
1267            }
1268        };
1269    }
1270
1271    pub fn test_input<F>(self, f: F) -> bool where F: Fn(&str) -> bool {
1272        return f(self.rc.string.as_ref());
1273    }
1274}
1275
1276// 'SpeechPattern' holds a single pattern.
1277// Some info is not needed beyond converting the Yaml to the SpeechPattern, but is useful for error reporting.
1278// The two main parts are the pattern to be matched and the replacements to do if there is a match.
1279// Any variables/prefs that are defined/set are also stored.
1280#[derive(Debug)]
1281struct SpeechPattern {
1282    pattern_name: String,
1283    tag_name: String,
1284    file_name: String,
1285    pattern: MyXPath,                     // the xpath expr to attempt to match
1286    match_uses_var_defs: bool,            // include var_defs in context for matching
1287    var_defs: VariableDefinitions,        // any variable definitions [can be and probably is an empty vector most of the time]
1288    replacements: ReplacementArray,       // the replacements in case there is a match
1289}
1290
1291impl fmt::Display for SpeechPattern {
1292    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1293        return write!(f, "[name: {}, tag: {},\n  variables: {:?}, pattern: {},\n  replacement: {}]",
1294                self.pattern_name, self.tag_name, self.var_defs, self.pattern,
1295                self.replacements.pretty_print_replacements());
1296    }
1297}
1298
1299impl SpeechPattern  {
1300    fn build(dict: &Yaml, file: &Path, rules: &mut SpeechRules) -> Result<Option<Vec<PathBuf>>> {
1301        // Rule::SpeechPattern
1302        //   build { "pattern_name", "tag_name", "pattern", "replacement" }
1303        // or recurse via include: file_name
1304
1305        // debug!("\nbuild_speech_pattern: dict:\n{}", yaml_to_string(dict, 0));
1306        if let Some(include_file_name) = find_str(dict, "include") {
1307            let do_include_fn = |new_file: &Path| {
1308                rules.read_patterns(new_file)
1309            };
1310
1311            return Ok( Some(process_include(file, include_file_name, do_include_fn)?) );
1312        }
1313
1314        let pattern_name = find_str(dict, "name");
1315
1316        // tag_named can be either a string (most common) or an array of strings
1317        let mut tag_names: Vec<&str> = Vec::new();
1318        match find_str(dict, "tag") {
1319            Some(str) => tag_names.push(str),
1320            None => {
1321                // check for array
1322                let tag_array  = &dict["tag"];
1323                tag_names = vec![];
1324                if tag_array.is_array() {
1325                    for (i, name) in tag_array.as_vec().unwrap().iter().enumerate() {
1326                        match as_str_checked(name) {
1327                            Err(e) => return Err(
1328                                e.context(
1329                                    format!("tag name '{}' is not a string in:\n{}",
1330                                        &yaml_to_string(&tag_array.as_vec().unwrap()[i], 0),
1331                                        &yaml_to_string(dict, 1)))
1332                            ),
1333                            Ok(str) => tag_names.push(str),
1334                        };
1335                    }
1336                } else {
1337                    bail!("Errors trying to find 'tag' in:\n{}", &yaml_to_string(dict, 1));
1338                }
1339            }
1340        }
1341
1342        if pattern_name.is_none() {
1343            if dict.is_null() {
1344                bail!("Error trying to find 'name': empty value (two consecutive '-'s?");
1345            } else {
1346                bail!("Errors trying to find 'name' in:\n{}", &yaml_to_string(dict, 1));
1347            };
1348        };
1349        let pattern_name = pattern_name.unwrap().to_string();
1350
1351        // FIX: add check to make sure tag_name is a valid MathML tag name
1352        if dict["match"].is_badvalue() {
1353            bail!("Did not find 'match' in\n{}", yaml_to_string(dict, 1));
1354        }
1355        if dict["replace"].is_badvalue() {
1356            bail!("Did not find 'replace' in\n{}", yaml_to_string(dict, 1));
1357        }
1358    
1359        // xpath's can't be cloned, so we need to do a 'build_xxx' for each tag name
1360        for tag_name in tag_names {
1361            let tag_name = tag_name.to_string();
1362            let pattern_xpath = MyXPath::build(&dict["match"])
1363                    .with_context(|| {
1364                        format!("value for 'match' in rule ({}: {}):\n{}",
1365                                tag_name, pattern_name, yaml_to_string(dict, 1))
1366                    })?;
1367            let speech_pattern =
1368                Box::new( SpeechPattern{
1369                    pattern_name: pattern_name.clone(),
1370                    tag_name: tag_name.clone(),
1371                    file_name: file.to_str().unwrap().to_string(),
1372                    match_uses_var_defs: dict["variables"].is_array() && pattern_xpath.rc.string.contains('$'),    // FIX: should look at var_defs for actual name
1373                    pattern: pattern_xpath,
1374                    var_defs: VariableDefinitions::build(&dict["variables"])
1375                        .with_context(|| {
1376                            format!("value for 'variables' in rule ({}: {}):\n{}",
1377                                    tag_name, pattern_name, yaml_to_string(dict, 1))
1378                        })?,
1379                    replacements: ReplacementArray::build(&dict["replace"])
1380                        .with_context(|| {
1381                            format!("value for 'replace' in rule ({}: {}). Replacements:\n{}",
1382                                    tag_name, pattern_name, yaml_to_string(&dict["replace"], 1))
1383                    })?
1384                } );
1385            // get the array of rules for the tag name
1386            let rule_value = rules.rules.entry(tag_name).or_default();
1387
1388            // if the name exists, replace it. Otherwise add the new rule
1389            match rule_value.iter().enumerate().find(|&pattern| pattern.1.pattern_name == speech_pattern.pattern_name) {
1390                None => rule_value.push(speech_pattern),
1391                Some((i, _old_pattern)) => {
1392                    let old_rule = &rule_value[i];
1393                    info!("\n\n***WARNING***: replacing {}/'{}' in {} with rule from {}\n",
1394                            old_rule.tag_name, old_rule.pattern_name, old_rule.file_name, speech_pattern.file_name);
1395                    rule_value[i] = speech_pattern;
1396                },
1397            }
1398        }
1399
1400        return Ok(None);
1401    }
1402
1403    fn is_match(&self, context: &sxd_xpath::Context, mathml: Element) -> Result<bool> {
1404        if self.tag_name != mathml.name().local_part() && self.tag_name != "*" && self.tag_name != "!*" {
1405            return Ok( false );
1406        }
1407
1408        // debug!("\nis_match: pattern='{}'", self.pattern_name);
1409        // debug!("    pattern_expr {:?}", self.pattern);
1410        // debug!("is_match: mathml is\n{}", mml_to_string(mathml));
1411        return Ok(
1412            match self.pattern.evaluate(context, mathml)? {
1413                Value::Boolean(b)       => b,
1414                Value::Nodeset(nodes) => nodes.size() > 0,
1415                _                             => false,
1416            }
1417        );
1418    }
1419}
1420
1421
1422// 'Test' holds information used if the replacement is a "test:" clause.
1423// The condition is an xpath expr and the "else:" part is optional.
1424
1425#[derive(Debug, Clone)]
1426struct TestArray {
1427    tests: Vec<Test>
1428}
1429
1430impl fmt::Display for TestArray {
1431    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1432        for test in &self.tests {
1433            writeln!(f, "{test}")?;
1434        }
1435        return Ok( () );
1436    }
1437}
1438
1439impl TestArray {
1440    fn build(test: &Yaml) -> Result<TestArray> {
1441        // 'test:' for convenience takes either a dictionary with keys if/else_if/then/then_test/else/else_test or
1442        //      or an array of those values (there should be at most one else/else_test)
1443
1444        // if 'test' is a dictionary ('Hash'), we convert it to an array with one entry and proceed
1445        let tests = if test.as_hash().is_some() {
1446            vec![test]
1447        } else if let Some(vec) = test.as_vec() {
1448            vec.iter().collect()
1449        } else {
1450            bail!("Value for 'test:' is neither a dictionary or an array.")
1451        };
1452
1453        // each entry in 'tests' should be a dictionary with keys if/then/then_test/else/else_test
1454        // a valid entry is one of:
1455        //   if:/else_if:, then:/then_test: and optional else:/else_test:
1456        //   else:/else_test: -- if this case, it should be the last entry in 'tests'
1457        // 'if:' should only be the first entry in the array; 'else_if' should never be the first entry. Otherwise, they are the same
1458        let mut test_array = vec![];
1459        for test in tests {
1460            if test.as_hash().is_none() {
1461                bail!("Value for array entry in 'test:' must be a dictionary/contain keys");
1462            }
1463            let if_part = &test[if test_array.is_empty() {"if"} else {"else_if"}];
1464            if !if_part.is_badvalue() {
1465                // first case: if:, then:, optional else:
1466                let condition = Some( MyXPath::build(if_part)? );
1467                let then_part = TestOrReplacements::build(test, "then", "then_test", true)?;
1468                let else_part = TestOrReplacements::build(test, "else", "else_test", false)?;
1469                let n_keys = if else_part.is_none() {2} else {3};
1470                if test.as_hash().unwrap().len() > n_keys {
1471                    bail!("A key other than 'if', 'else_if', 'then', 'then_test', 'else', or 'else_test' was found in the 'then' clause of 'test'");
1472                };
1473                test_array.push(
1474                    Test { condition, then_part, else_part }
1475                );
1476            } else {
1477                // second case: should be else/else_test
1478                let else_part = TestOrReplacements::build(test, "else", "else_test", true)?;
1479                if test.as_hash().unwrap().len() > 1 {
1480                    bail!("A key other than 'if', 'else_if', 'then', 'then_test', 'else', or 'else_test' was found the 'else' clause of 'test'");
1481                };
1482                test_array.push(
1483                    Test { condition: None, then_part: None, else_part }
1484                );
1485                
1486                // there shouldn't be any trailing tests
1487                if test_array.len() < test.as_hash().unwrap().len() {
1488                    bail!("'else'/'else_test' key is not last key in 'test:'");
1489                }
1490            }
1491        };
1492
1493        if test_array.is_empty() {
1494            bail!("No entries for 'test:'");
1495        }
1496
1497        return Ok( TestArray { tests: test_array } );
1498    }
1499
1500    fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
1501        for test in &self.tests {
1502            if test.is_true(&rules_with_context.context_stack.base, mathml)? {
1503                assert!(test.then_part.is_some());
1504                return test.then_part.as_ref().unwrap().replace(rules_with_context, mathml);
1505            } else if let Some(else_part) = test.else_part.as_ref() {
1506                return else_part.replace(rules_with_context, mathml);
1507            }
1508        }
1509        return T::from_string("".to_string(), rules_with_context.doc);
1510    }
1511}
1512
1513#[derive(Debug, Clone)]
1514// Used to hold then/then_test and also else/else_test -- only one of these can be present at a time
1515enum TestOrReplacements {
1516    Replacements(ReplacementArray),     // replacements to use when a test is true
1517    Test(TestArray),                    // the array of if/then/else tests
1518}
1519
1520impl fmt::Display for TestOrReplacements {
1521    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1522        if let TestOrReplacements::Test(_) = self {
1523            write!(f, "  _test")?;
1524        }
1525        write!(f, ":")?;
1526        return match self {
1527            TestOrReplacements::Test(t) => write!(f, "{t}"),
1528            TestOrReplacements::Replacements(r) => write!(f, "{r}"),
1529        };
1530    }
1531}
1532
1533impl TestOrReplacements {
1534    fn build(test: &Yaml, replace_key: &str, test_key: &str, key_required: bool) -> Result<Option<TestOrReplacements>> {
1535        let part = &test[replace_key];
1536        let test_part = &test[test_key];
1537        if !part.is_badvalue() && !test_part.is_badvalue() { 
1538            bail!(format!("Only one of '{}' or '{}' is allowed as part of 'test'.\n{}\n    \
1539                  Suggestion: delete one or adjust indentation",
1540                    replace_key, test_key, yaml_to_string(test, 2)));
1541        }
1542        if part.is_badvalue() && test_part.is_badvalue() {
1543            if key_required {
1544                bail!(format!("Missing one of '{}'/'{}:' as part of 'test:'\n{}\n   \
1545                    Suggestion: add the missing key or indent so it is contained in 'test'",
1546                    replace_key, test_key, yaml_to_string(test, 2)))
1547            } else {
1548                return Ok( None );
1549            }
1550        }
1551        // at this point, we have only one of the two options
1552        if test_part.is_badvalue() {
1553            return Ok( Some( TestOrReplacements::Replacements( ReplacementArray::build(part)? ) ) );
1554        } else {
1555            return Ok( Some( TestOrReplacements::Test( TestArray::build(test_part)? ) ) );
1556        }
1557    }
1558
1559    fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
1560        return match self {
1561            TestOrReplacements::Replacements(r) => r.replace(rules_with_context, mathml),
1562            TestOrReplacements::Test(t) => t.replace(rules_with_context, mathml),
1563        }
1564    }
1565}
1566
1567#[derive(Debug, Clone)]
1568struct Test {
1569    condition: Option<MyXPath>,
1570    then_part: Option<TestOrReplacements>,
1571    else_part: Option<TestOrReplacements>,
1572}
1573impl fmt::Display for Test {
1574    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1575        write!(f, "test: [ ")?;
1576        if let Some(if_part) = &self.condition {
1577            write!(f, " if: '{if_part}'")?;
1578        }
1579        if let Some(then_part) = &self.then_part {
1580            write!(f, " then{then_part}")?;
1581        }
1582        if let Some(else_part) = &self.else_part {
1583            write!(f, " else{else_part}")?;
1584        }
1585        return write!(f, "]");
1586    }
1587}
1588
1589impl Test {
1590    fn is_true(&self, context: &sxd_xpath::Context, mathml: Element) -> Result<bool> {
1591        return match self.condition.as_ref() {
1592            None => Ok( false ),     // trivially false -- want to do else part
1593            Some(condition) => condition.is_true(context, mathml)
1594                                .context("Failure in conditional test"),
1595        }
1596    }
1597}
1598
1599// Used for speech rules with "variables: ..."
1600#[derive(Debug, Clone)]
1601struct VariableDefinition {
1602    name: String,     // name of variable
1603    value: MyXPath,   // xpath value, typically a constant like "true" or "0", but could be "*/*[1]" to store some nodes   
1604}
1605
1606impl fmt::Display for VariableDefinition {
1607    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1608        return write!(f, "[name: {}={}]", self.name, self.value);
1609    }   
1610}
1611
1612// Used for speech rules with "variables: ..."
1613#[derive(Debug)]
1614struct VariableValue<'v> {
1615    name: String,       // name of variable
1616    value: Option<Value<'v>>,   // xpath value, typically a constant like "true" or "0", but could be "*/*[1]" to store some nodes   
1617}
1618
1619impl fmt::Display for VariableValue<'_> {
1620    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1621        let value = match &self.value {
1622            None => "unset".to_string(),
1623            Some(val) => format!("{val:?}")
1624        };
1625        return write!(f, "[name: {}, value: {}]", self.name, value);
1626    }   
1627}
1628
1629impl VariableDefinition {
1630    fn build(name_value_def: &Yaml) -> Result<VariableDefinition> {
1631        match name_value_def.as_hash() {
1632            Some(map) => {
1633                if map.len() != 1 {
1634                    bail!("definition is not a key/value pair. Found {}",
1635                            yaml_to_string(name_value_def, 1) );
1636                }
1637                let (name, value) = map.iter().next().unwrap();
1638                let name = as_str_checked( name)
1639                    .with_context(|| format!( "definition name is not a string: {}",
1640                            yaml_to_string(name, 1) ))?.to_string();
1641                match value {
1642                    Yaml::Boolean(_) | Yaml::String(_)  | Yaml::Integer(_) | Yaml::Real(_) => (),
1643                    _ => bail!("definition value is not a string, boolean, or number. Found {}",
1644                            yaml_to_string(value, 1) )
1645                };
1646                return Ok(
1647                    VariableDefinition{
1648                        name,
1649                        value: MyXPath::build(value)?
1650                    }
1651                );
1652            },
1653            None => bail!("definition is not a key/value pair. Found {}",
1654                            yaml_to_string(name_value_def, 1) )
1655        }
1656    }
1657}
1658
1659
1660#[derive(Debug, Clone)]
1661struct VariableDefinitions {
1662    defs: Vec<VariableDefinition>
1663}
1664
1665impl fmt::Display for VariableDefinitions {
1666    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1667        for def in &self.defs {
1668            write!(f, "{def},")?;
1669        }
1670        return Ok( () );
1671    }
1672}
1673
1674struct VariableValues<'v> {
1675    defs: Vec<VariableValue<'v>>
1676}
1677
1678impl fmt::Display for VariableValues<'_> {
1679    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1680        for value in &self.defs {
1681            write!(f, "{value}")?;
1682        }
1683        return writeln!(f);
1684    }
1685}
1686
1687impl VariableDefinitions {
1688    fn new(len: usize) -> VariableDefinitions {
1689        return VariableDefinitions{ defs: Vec::with_capacity(len) };
1690    }
1691
1692    fn build(defs: &Yaml) -> Result<VariableDefinitions> {
1693        if defs.is_badvalue() {
1694            return Ok( VariableDefinitions::new(0) );
1695        };
1696        if defs.is_array() {
1697            let defs = defs.as_vec().unwrap();
1698            let mut definitions = VariableDefinitions::new(defs.len());
1699            for def in defs {
1700                let variable_def = VariableDefinition::build(def)
1701                        .context("definition of 'variables'")?;
1702                definitions.push( variable_def);
1703            };
1704            return Ok (definitions );
1705        }
1706        bail!( "'variables' is not an array of {{name: xpath-value}} definitions. Found {}'",
1707                yaml_to_string(defs, 1) );
1708    }
1709
1710    fn push(&mut self, var_def: VariableDefinition) {
1711        self.defs.push(var_def);
1712    }
1713
1714    fn len(&self) -> usize {
1715        return self.defs.len();
1716    }
1717}
1718
1719struct ContextStack<'c> {
1720    // Note: values are generated by calling value_of on an Evaluation -- that makes the two lifetimes the same
1721    old_values: Vec<VariableValues<'c>>,   // store old values so they can be set on pop 
1722    base: sxd_xpath::Context<'c>                      // initial context -- contains all the function defs and pref variables
1723}
1724
1725impl fmt::Display for ContextStack<'_> {
1726    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1727        writeln!(f, " {} old_values", self.old_values.len())?;
1728        for values in &self.old_values {
1729            writeln!(f, "  {values}")?;
1730        }
1731        return writeln!(f);
1732    }
1733}
1734
1735impl<'c, 'r> ContextStack<'c> {
1736    fn new<'a,>(pref_manager: &'a PreferenceManager) -> ContextStack<'c> {
1737        let prefs = pref_manager.merge_prefs();
1738        let mut context_stack = ContextStack {
1739            base: ContextStack::base_context(prefs),
1740            old_values: Vec::with_capacity(31)      // should avoid allocations
1741        };
1742        // FIX: the list of variables to set should come from definitions.yaml
1743        // These can't be set on the <math> tag because of the "translate" command which starts speech at an 'id'
1744        context_stack.base.set_variable("MatchingPause", Value::Boolean(false));
1745        context_stack.base.set_variable("IsColumnSilent", Value::Boolean(false));
1746
1747
1748        return context_stack;
1749    }
1750
1751    fn base_context(var_defs: PreferenceHashMap) -> sxd_xpath::Context<'c> {
1752        let mut context  = sxd_xpath::Context::new();
1753        context.set_namespace("m", "http://www.w3.org/1998/Math/MathML");
1754        crate::xpath_functions::add_builtin_functions(&mut context);
1755        for (key, value) in var_defs {
1756            context.set_variable(key.as_str(), yaml_to_value(&value));
1757            // if let Some(str_value) = value.as_str() {
1758            //     if str_value != "Auto" {
1759            //         debug!("Set {}='{}'", key.as_str(), str_value);
1760            //     }
1761            // }
1762        };
1763        return context;
1764    }
1765
1766    fn set_globals(&'r mut self, new_vars: VariableDefinitions, mathml: Element<'c>) -> Result<()> {
1767        // for each var/value pair, evaluate the value and add the var/value to the base context
1768        for def in &new_vars.defs {
1769            // set the new value
1770            let new_value = match def.value.evaluate(&self.base, mathml) {
1771                Ok(val) => val,
1772                Err(_) => bail!(format!("Can't evaluate variable def for {}", def)),
1773            };
1774            let qname = QName::new(def.name.as_str());
1775            self.base.set_variable(qname, new_value);
1776        }
1777        return Ok( () );
1778    }
1779
1780    fn push(&'r mut self, new_vars: VariableDefinitions, mathml: Element<'c>) -> Result<()> {
1781        // store the old value and set the new one 
1782        let mut old_values = VariableValues {defs: Vec::with_capacity(new_vars.defs.len()) };
1783        let evaluation = Evaluation::new(&self.base, Node::Element(mathml));
1784        for def in &new_vars.defs {
1785            // get the old value (might not be defined)
1786            let qname = QName::new(def.name.as_str());
1787            let old_value = evaluation.value_of(qname).cloned();
1788            old_values.defs.push( VariableValue{ name: def.name.clone(), value: old_value} );
1789        }
1790
1791        // use a second loop because of borrow problem with self.base and 'evaluation'
1792        for def in &new_vars.defs {
1793            // set the new value
1794            let new_value = match def.value.evaluate(&self.base, mathml) {
1795                Ok(val) => val,
1796                Err(_) => Value::Nodeset(sxd_xpath::nodeset::Nodeset::new()),
1797            };
1798            let qname = QName::new(def.name.as_str());
1799            self.base.set_variable(qname, new_value);
1800        }
1801        self.old_values.push(old_values);
1802        return Ok( () );
1803    }
1804
1805    fn pop(&mut self) {
1806        const MISSING_VALUE: &str = "-- unset value --";     // can't remove a variable from context, so use this value
1807        let old_values = self.old_values.pop().unwrap();
1808        for variable in old_values.defs {
1809            let qname = QName::new(&variable.name);
1810            let old_value = match variable.value {
1811                None => Value::String(MISSING_VALUE.to_string()),
1812                Some(val) => val,
1813            };
1814            self.base.set_variable(qname, old_value);
1815        }
1816    }
1817}
1818
1819
1820fn yaml_to_value<'b>(yaml: &Yaml) -> Value<'b> {
1821    return match yaml {
1822        Yaml::String(s) => Value::String(s.clone()),
1823        Yaml::Boolean(b)  => Value::Boolean(*b),
1824        Yaml::Integer(i)   => Value::Number(*i as f64),
1825        Yaml::Real(s)   => Value::Number(s.parse::<f64>().unwrap()),
1826        _  => {
1827            error!("yaml_to_value: illegal type found in Yaml value: {}", yaml_to_string(yaml, 1));
1828            Value::String("".to_string())
1829        },
1830    }
1831}
1832
1833
1834// Information for matching a Unicode char (defined in unicode.yaml) and building its replacement
1835struct UnicodeDef {
1836    ch: u32,
1837    speech: ReplacementArray
1838}
1839
1840impl  fmt::Display for UnicodeDef {
1841    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1842        return write!(f, "UnicodeDef{{ch: {}, speech: {:?}}}", self.ch, self.speech);
1843    }
1844}
1845
1846impl UnicodeDef {
1847    fn build(unicode_def: &Yaml, file_name: &Path, speech_rules: &SpeechRules, use_short: bool) -> Result<Option<Vec<PathBuf>>> {
1848        if let Some(include_file_name) = find_str(unicode_def, "include") {
1849            let do_include_fn = |new_file: &Path| {
1850                speech_rules.read_unicode(Some(new_file.to_path_buf()), use_short)
1851            };
1852            return Ok( Some(process_include(file_name, include_file_name, do_include_fn)?) );
1853        }
1854        // key: char, value is replacement or array of replacements
1855        let dictionary = unicode_def.as_hash();
1856        if dictionary.is_none() {
1857            bail!("Expected a unicode definition (e.g, '+':[t: \"plus\"]'), found {}", yaml_to_string(unicode_def, 0));
1858        }
1859
1860        let dictionary = dictionary.unwrap();
1861        if dictionary.len() != 1 {
1862            bail!("Expected a unicode definition (e.g, '+':[t: \"plus\"]'), found {}", yaml_to_string(unicode_def, 0));
1863        }
1864
1865        let (ch, replacements) = dictionary.iter().next().ok_or_else(|| anyhow!("Expected a unicode definition (e.g, '+':[t: \"plus\"]'), found {}", yaml_to_string(unicode_def, 0)))?;
1866        let mut unicode_table = if use_short {
1867            speech_rules.unicode_short.borrow_mut()
1868        } else {
1869            speech_rules.unicode_full.borrow_mut()
1870        };
1871        if let Some(str) = ch.as_str() {
1872            if str.is_empty() {
1873                bail!("Empty character definition. Replacement is {}", replacements.as_str().unwrap());
1874            }
1875            let mut chars = str.chars();
1876            let first_ch = chars.next().unwrap();       // non-empty string, so a char exists
1877            if chars.next().is_some() {                       // more than one char
1878                if str.contains('-')  {
1879                    return process_range(str, replacements, unicode_table);
1880                } else if first_ch != '0' {     // exclude 0xDDDD
1881                    for ch in str.chars() {     // restart the iterator
1882                        let ch_as_str = ch.to_string();
1883                        if unicode_table.insert(ch as u32, ReplacementArray::build(&substitute_ch(replacements, &ch_as_str))
1884                                            .with_context(|| format!("In definition of char: '{str}'"))?.replacements).is_some() {
1885                            error!("*** Character '{}' (0x{:X}) is repeated", ch, ch as u32);
1886                        }
1887                    }
1888                    return Ok(None);
1889                }
1890            }
1891        }
1892
1893        let ch = UnicodeDef::get_unicode_char(ch)?;
1894        if unicode_table.insert(ch, ReplacementArray::build(replacements)
1895                                        .with_context(|| format!("In definition of char: '{}' (0x{})",
1896                                                                        char::from_u32(ch).unwrap(), ch))?.replacements).is_some() {
1897            error!("*** Character '{}' (0x{:X}) is repeated", char::from_u32(ch).unwrap(), ch);
1898        }
1899        return Ok(None);
1900
1901        fn process_range(def_range: &str, replacements: &Yaml, mut unicode_table: RefMut<HashMap<u32,Vec<Replacement>>>) -> Result<Option<Vec<PathBuf>>> {
1902            // should be a character range (e.g., "A-Z")
1903            // iterate over that range and also substitute the char for '.' in the 
1904            let mut range = def_range.split('-');
1905            let first = range.next().unwrap().chars().next().unwrap() as u32;
1906            let last = range.next().unwrap().chars().next().unwrap() as u32;
1907            if range.next().is_some() {
1908                bail!("Character range definition has more than one '-': '{}'", def_range);
1909            }
1910
1911            for ch in first..last+1 {
1912                let ch_as_str = char::from_u32(ch).unwrap().to_string();
1913                unicode_table.insert(ch, ReplacementArray::build(&substitute_ch(replacements, &ch_as_str))
1914                                        .with_context(|| format!("In definition of char: '{def_range}'"))?.replacements);
1915            };
1916
1917            return Ok(None)
1918        }
1919
1920        fn substitute_ch(yaml: &Yaml, ch: &str) -> Yaml {
1921            return match yaml {
1922                Yaml::Array(v) => {
1923                    Yaml::Array(
1924                        v.iter()
1925                         .map(|e| substitute_ch(e, ch))
1926                         .collect::<Vec<Yaml>>()
1927                    )
1928                },
1929                Yaml::Hash(h) => {
1930                    Yaml::Hash(
1931                        h.iter()
1932                         .map(|(key,val)| (key.clone(), substitute_ch(val, ch)) )
1933                         .collect::<Hash>()
1934                    )
1935                },
1936                Yaml::String(s) => Yaml::String( s.replace('.', ch) ),
1937                _ => yaml.clone(),
1938            }
1939        }
1940    }
1941    
1942    fn get_unicode_char(ch: &Yaml) -> Result<u32> {
1943        // either "a" or 0x1234 (number)
1944        if let Some(ch) = ch.as_str() {
1945            let mut ch_iter = ch.chars();
1946            let unicode_ch = ch_iter.next();
1947            if unicode_ch.is_none() || ch_iter.next().is_some() {
1948                bail!("Wanted unicode char, found string '{}')", ch);
1949            };
1950            return Ok( unicode_ch.unwrap() as u32 );
1951        }
1952    
1953        if let Some(num) = ch.as_i64() {
1954            return Ok( num as u32 );
1955        }
1956        bail!("Unicode character '{}' can't be converted to an code point", yaml_to_string(ch, 0));
1957    }    
1958}
1959
1960// Fix: there should be a cache so subsequent library calls don't have to read in the same speech rules
1961//   likely a cache of size 1 is fine
1962// Fix: all statics should be gathered together into one structure that is a Mutex
1963//   for each library call, we should grab a lock on the Mutex in case others try to call
1964//   at the same time.
1965//   If this turns out to be something that others actually do, then a cache > 1 would be good
1966
1967 type RuleTable = HashMap<String, Vec<Box<SpeechPattern>>>;
1968 type UnicodeTable = Rc<RefCell<HashMap<u32,Vec<Replacement>>>>;
1969 type FilesAndTimesShared = Rc<RefCell<FilesAndTimes>>;
1970
1971 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
1972 pub enum RulesFor {
1973     Intent,
1974     Speech,
1975     OverView,
1976     Navigation,
1977     Braille,
1978 }
1979
1980 impl fmt::Display for RulesFor {
1981    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1982        let name = match self {
1983            RulesFor::Intent => "Intent",
1984            RulesFor::Speech => "Speech",
1985            RulesFor::OverView => "OverView",
1986            RulesFor::Navigation => "Navigation",
1987            RulesFor::Braille => "Braille",
1988        };
1989       return write!(f, "{name}");
1990    }
1991 }
1992
1993 
1994#[derive(Debug, Clone)]
1995pub struct FileAndTime {
1996    file: PathBuf,
1997    time: SystemTime,
1998}
1999
2000impl FileAndTime {
2001    fn new(file: PathBuf) -> FileAndTime {
2002        return FileAndTime {
2003            file,
2004            time: SystemTime::UNIX_EPOCH,
2005        }
2006    }
2007
2008    // used for debugging preference settings
2009    pub fn debug_get_file(&self) -> Option<&str> {
2010        return self.file.to_str();
2011    }
2012
2013    pub fn new_with_time(file: PathBuf) -> FileAndTime {
2014        return FileAndTime {
2015            time: FileAndTime::get_metadata(&file),
2016            file,
2017        }
2018    }
2019
2020    pub fn is_up_to_date(&self) -> bool {
2021        let file_mod_time = FileAndTime::get_metadata(&self.file);
2022        return self.time >= file_mod_time;
2023    }
2024
2025    fn get_metadata(path: &Path) -> SystemTime {
2026        use std::fs;
2027        if !cfg!(target_family = "wasm") {
2028            let metadata = fs::metadata(path);
2029            if let Ok(metadata) = metadata &&
2030               let Ok(mod_time) = metadata.modified() {
2031                    return mod_time;
2032                }
2033        }
2034        return SystemTime::UNIX_EPOCH
2035    }
2036
2037}
2038#[derive(Debug, Default)]
2039pub struct FilesAndTimes {
2040    // ft[0] is the main file -- other files are included by it (or recursively)
2041    // We could be a little smarter about invalidation by tracking what file is the parent (including file),
2042    // but it seems more complicated than it is worth
2043    ft: Vec<FileAndTime>
2044}
2045
2046impl FilesAndTimes {
2047    pub fn new(start_path: PathBuf) -> FilesAndTimes {
2048        let mut ft = Vec::with_capacity(8);
2049        ft.push( FileAndTime::new(start_path) );
2050        return FilesAndTimes{ ft };
2051    }
2052
2053    /// Returns true if the main file matches the corresponding preference location and files' times are all current
2054    pub fn is_file_up_to_date(&self, pref_path: &Path, should_ignore_file_time: bool) -> bool {
2055
2056        // if the time isn't set or the path is different from the preference (which might have changed), return false
2057        if self.ft.is_empty() || self.as_path() != pref_path {
2058            return false;
2059        }
2060        if should_ignore_file_time || cfg!(target_family = "wasm") {
2061            return true;
2062        }
2063        if  self.ft[0].time == SystemTime::UNIX_EPOCH {
2064            return false;
2065        }
2066
2067
2068        // check the time stamp on the included files -- if the head file hasn't changed, the paths for the included files will be the same
2069        for file in &self.ft {
2070            if !file.is_up_to_date() {
2071                return false;
2072            }
2073        }
2074        return true;
2075    }
2076
2077    fn set_files_and_times(&mut self, new_files: Vec<PathBuf>)  {
2078        self.ft.clear();
2079        for path in new_files {
2080            let time = FileAndTime::get_metadata(&path);      // do before move below
2081            self.ft.push( FileAndTime{ file: path, time })
2082        }
2083    }
2084
2085    /// Mark cached files as stale so the next `read_files()` reloads them.
2086    pub fn invalidate(&mut self) {
2087        self.ft.clear();
2088    }
2089
2090    pub fn is_valid(&self) -> bool {
2091        self.ft.is_empty()
2092    }
2093
2094    pub fn as_path(&self) -> &Path {
2095        assert!(!self.ft.is_empty());
2096        return &self.ft[0].file;
2097    }
2098
2099    pub fn paths(&self) -> Vec<PathBuf> {
2100        return self.ft.iter().map(|ft| ft.file.clone()).collect::<Vec<PathBuf>>();
2101    }
2102
2103}
2104
2105
2106/// `SpeechRulesWithContext` encapsulates a named group of speech rules (e.g, "ClearSpeak")
2107/// along with the preferences to be used for speech.
2108// Note: if we can't read the files, an error message is stored in the structure and needs to be checked.
2109// I tried using Result<SpeechRules>, but it was a mess with all the unwrapping.
2110// Important: the code needs to be careful to check this at the top level calls
2111pub struct SpeechRules {
2112    error: String,
2113    name: RulesFor,
2114    pub pref_manager: Rc<RefCell<PreferenceManager>>,
2115    rules: RuleTable,                              // the speech rules used (partitioned into MathML tags in hashmap, then linearly searched)
2116    rule_files: FilesAndTimes,                     // files that were read
2117    translate_single_chars_only: bool,             // strings like "half" don't want 'a's translated, but braille does
2118    unicode_short: UnicodeTable,                   // the short list of rules used for Unicode characters
2119    unicode_short_files: FilesAndTimesShared,     // files that were read
2120    unicode_full:  UnicodeTable,                   // the long remaining rules used for Unicode characters
2121    unicode_full_files: FilesAndTimesShared,      // files that were read
2122    definitions_files: FilesAndTimesShared,       // files that were read
2123}
2124
2125impl fmt::Display for SpeechRules {
2126    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2127        writeln!(f, "SpeechRules '{}'\n{})", self.name, self.pref_manager.borrow())?;
2128        let mut rules_vec: Vec<(&String, &Vec<Box<SpeechPattern>>)> = self.rules.iter().collect();
2129        rules_vec.sort_by_key(|(tag_name, _)| tag_name.as_str());
2130        for (tag_name, rules) in rules_vec {
2131            writeln!(f, "   {}: #patterns {}", tag_name, rules.len())?;
2132        };
2133        return writeln!(f, "   {}+{} unicode entries", &self.unicode_short.borrow().len(), &self.unicode_full.borrow().len());
2134    }
2135}
2136
2137
2138/// `SpeechRulesWithContext` encapsulates a named group of speech rules (e.g, "ClearSpeak")
2139/// along with the preferences to be used for speech.
2140/// Because speech rules can define variables, there is also a context that is carried with them
2141pub struct SpeechRulesWithContext<'c, 's:'c, 'm:'c> {
2142    speech_rules: &'s SpeechRules,
2143    context_stack: ContextStack<'c>,   // current value of (context) variables
2144    doc: Document<'m>,
2145    nav_node_id: &'m str,
2146    nav_node_offset: usize,
2147    pub inside_spell: bool,     // hack to allow 'spell' to avoid infinite loop (see 'spell' implementation in tts.rs)
2148    pub translate_count: usize, // hack to avoid 'translate' infinite loop (see 'spell' implementation in tts.rs)
2149}
2150
2151impl<'c, 's:'c, 'm:'c> fmt::Display for SpeechRulesWithContext<'c, 's,'m> {
2152    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2153        writeln!(f, "SpeechRulesWithContext \n{})", self.speech_rules)?;
2154        return writeln!(f, "   {} context entries, nav node id '({}, {})'", &self.context_stack, self.nav_node_id, self.nav_node_offset);
2155    }
2156}
2157
2158thread_local!{
2159    /// SPEECH_UNICODE_SHORT is shared among several rules, so "RC" is used
2160    static SPEECH_UNICODE_SHORT: UnicodeTable =
2161        Rc::new( RefCell::new( HashMap::with_capacity(500) ) );
2162        
2163    /// SPEECH_UNICODE_FULL is shared among several rules, so "RC" is used
2164    static SPEECH_UNICODE_FULL: UnicodeTable =
2165        Rc::new( RefCell::new( HashMap::with_capacity(6500) ) );
2166        
2167    /// BRAILLE_UNICODE_SHORT is shared among several rules, so "RC" is used
2168    static BRAILLE_UNICODE_SHORT: UnicodeTable =
2169        Rc::new( RefCell::new( HashMap::with_capacity(500) ) );
2170        
2171    /// BRAILLE_UNICODE_FULL is shared among several rules, so "RC" is used
2172    static BRAILLE_UNICODE_FULL: UnicodeTable =
2173        Rc::new( RefCell::new( HashMap::with_capacity(5000) ) );
2174
2175    /// SPEECH_DEFINITION_FILES_AND_TIMES is shared among several rules, so "RC" is used
2176    static SPEECH_DEFINITION_FILES_AND_TIMES: FilesAndTimesShared =
2177        Rc::new( RefCell::new(FilesAndTimes::default()) );
2178        
2179    /// BRAILLE_DEFINITION_FILES_AND_TIMES is shared among several rules, so "RC" is used
2180    static BRAILLE_DEFINITION_FILES_AND_TIMES: FilesAndTimesShared =
2181        Rc::new( RefCell::new(FilesAndTimes::default()) );
2182        
2183    /// SPEECH_UNICODE_SHORT_FILES_AND_TIMES is shared among several rules, so "RC" is used
2184    static SPEECH_UNICODE_SHORT_FILES_AND_TIMES: FilesAndTimesShared =
2185        Rc::new( RefCell::new(FilesAndTimes::default()) );
2186        
2187    /// SPEECH_UNICODE_FULL_FILES_AND_TIMES is shared among several rules, so "RC" is used
2188    static SPEECH_UNICODE_FULL_FILES_AND_TIMES: FilesAndTimesShared =
2189        Rc::new( RefCell::new(FilesAndTimes::default()) );
2190        
2191    /// BRAILLE_UNICODE_SHORT_FILES_AND_TIMES is shared among several rules, so "RC" is used
2192    static BRAILLE_UNICODE_SHORT_FILES_AND_TIMES: FilesAndTimesShared =
2193        Rc::new( RefCell::new(FilesAndTimes::default()) );
2194        
2195    /// BRAILLE_UNICODE_FULL_FILES_AND_TIMES is shared among several rules, so "RC" is used
2196    static BRAILLE_UNICODE_FULL_FILES_AND_TIMES: FilesAndTimesShared =
2197        Rc::new( RefCell::new(FilesAndTimes::default()) );
2198        
2199    /// The current set of speech rules
2200    // maybe this should be a small cache of rules in case people switch rules/prefs?
2201    pub static INTENT_RULES: RefCell<SpeechRules> =
2202            RefCell::new( SpeechRules::new(RulesFor::Intent, true) );
2203
2204    pub static SPEECH_RULES: RefCell<SpeechRules> =
2205            RefCell::new( SpeechRules::new(RulesFor::Speech, true) );
2206
2207    pub static OVERVIEW_RULES: RefCell<SpeechRules> =
2208            RefCell::new( SpeechRules::new(RulesFor::OverView, true) );
2209
2210    pub static NAVIGATION_RULES: RefCell<SpeechRules> =
2211            RefCell::new( SpeechRules::new(RulesFor::Navigation, true) );
2212
2213    pub static BRAILLE_RULES: RefCell<SpeechRules> =
2214            RefCell::new( SpeechRules::new(RulesFor::Braille, false) );
2215}
2216
2217/// Invalidate speech caches whose paths change when `Language` changes.
2218pub fn invalidate_speech_language_caches() {
2219    SPEECH_DEFINITION_FILES_AND_TIMES.with(|files| files.borrow_mut().invalidate());
2220    SPEECH_UNICODE_SHORT_FILES_AND_TIMES.with(|files| files.borrow_mut().invalidate());
2221    SPEECH_UNICODE_FULL_FILES_AND_TIMES.with(|files| files.borrow_mut().invalidate());
2222    INTENT_RULES.with(|rules| rules.borrow_mut().rule_files.invalidate());
2223    SPEECH_RULES.with(|rules| rules.borrow_mut().rule_files.invalidate());
2224    OVERVIEW_RULES.with(|rules| rules.borrow_mut().rule_files.invalidate());
2225    NAVIGATION_RULES.with(|rules| rules.borrow_mut().rule_files.invalidate());
2226}
2227
2228/// Invalidate caches whose paths change when `SpeechStyle` changes.
2229pub fn invalidate_speech_style_caches() {
2230    SPEECH_RULES.with(|rules| rules.borrow_mut().rule_files.invalidate());
2231}
2232
2233/// Invalidate braille caches whose paths change when `BrailleCode` changes.
2234pub fn invalidate_braille_caches() {
2235    BRAILLE_DEFINITION_FILES_AND_TIMES.with(|files| files.borrow_mut().invalidate());
2236    BRAILLE_UNICODE_SHORT_FILES_AND_TIMES.with(|files| files.borrow_mut().invalidate());
2237    BRAILLE_UNICODE_FULL_FILES_AND_TIMES.with(|files| files.borrow_mut().invalidate());
2238    BRAILLE_RULES.with(|rules| rules.borrow_mut().rule_files.invalidate());
2239}
2240
2241#[cfg(test)]
2242// Used for testing the cache is invalidated when the language changes in prefs.rs
2243impl SpeechRules {
2244    pub(crate) fn rule_files_cache_is_empty(&self) -> bool {
2245        self.rule_files.is_valid()
2246    }
2247
2248    pub(crate) fn definitions_files_cache_is_empty(&self) -> bool {
2249        self.definitions_files.borrow().is_valid()
2250    }
2251
2252    pub(crate) fn definitions_files_cache_path(&self) -> PathBuf {
2253        self.definitions_files.borrow().as_path().to_path_buf()
2254    }
2255}
2256
2257impl SpeechRules {
2258    pub fn new(name: RulesFor, translate_single_chars_only: bool) -> SpeechRules {
2259        let globals = if name == RulesFor::Braille {
2260            (
2261                (BRAILLE_UNICODE_SHORT.with(Rc::clone), BRAILLE_UNICODE_SHORT_FILES_AND_TIMES.with(Rc::clone)),
2262                (BRAILLE_UNICODE_FULL. with(Rc::clone), BRAILLE_UNICODE_FULL_FILES_AND_TIMES.with(Rc::clone)),
2263                BRAILLE_DEFINITION_FILES_AND_TIMES.with(Rc::clone),
2264            )
2265        } else {
2266            (
2267                (SPEECH_UNICODE_SHORT.with(Rc::clone), SPEECH_UNICODE_SHORT_FILES_AND_TIMES.with(Rc::clone)),
2268                (SPEECH_UNICODE_FULL. with(Rc::clone), SPEECH_UNICODE_FULL_FILES_AND_TIMES.with(Rc::clone)),
2269                SPEECH_DEFINITION_FILES_AND_TIMES.with(Rc::clone),
2270            )
2271        };
2272
2273        return SpeechRules {
2274            error: Default::default(),
2275            name,
2276            rules: HashMap::with_capacity(if name == RulesFor::Intent || name == RulesFor::Speech {500} else {50}),                       // lazy load them
2277            rule_files: FilesAndTimes::default(),
2278            unicode_short: globals.0.0,       // lazy load them
2279            unicode_short_files: globals.0.1,
2280            unicode_full: globals.1.0,        // lazy load them
2281            unicode_full_files: globals.1.1,
2282            definitions_files: globals.2,
2283            translate_single_chars_only,
2284            pref_manager: PreferenceManager::get(),
2285        };
2286}
2287
2288    pub fn get_error(&self) -> Option<&str> {
2289        return if self.error.is_empty() {
2290             None
2291        } else {
2292            Some(&self.error)
2293        }
2294    }
2295
2296    pub fn read_files(&mut self) -> Result<()> {
2297        let check_rule_files = self.pref_manager.borrow().pref_to_string("CheckRuleFiles");
2298        if check_rule_files != "None" {  // "Prefs" or "All" are other values
2299            self.pref_manager.borrow_mut().set_preference_files()?;
2300        }
2301        let should_ignore_file_time = self.pref_manager.borrow().pref_to_string("CheckRuleFiles") != "All";     // ignore for "None", "Prefs"
2302        let rule_file = self.pref_manager.borrow().get_rule_file(&self.name).to_path_buf();     // need to create PathBuf to avoid a move/use problem
2303        if self.rules.is_empty() || !self.rule_files.is_file_up_to_date(&rule_file, should_ignore_file_time) {
2304            self.rules.clear();
2305            let files_read = self.read_patterns(&rule_file)?;
2306            self.rule_files.set_files_and_times(files_read);
2307        }
2308
2309        let pref_manager = self.pref_manager.borrow();
2310        let unicode_pref_files = if self.name == RulesFor::Braille {pref_manager.get_braille_unicode_file()} else {pref_manager.get_speech_unicode_file()};
2311
2312        if !self.unicode_short_files.borrow().is_file_up_to_date(unicode_pref_files.0, should_ignore_file_time) {
2313            self.unicode_short.borrow_mut().clear();
2314            self.unicode_short_files.borrow_mut().set_files_and_times(self.read_unicode(None, true)?);
2315        }
2316
2317        if self.definitions_files.borrow().ft.is_empty() || !self.definitions_files.borrow().is_file_up_to_date(
2318                            pref_manager.get_definitions_file(self.name != RulesFor::Braille),
2319                            should_ignore_file_time
2320        ) {
2321            self.definitions_files.borrow_mut().set_files_and_times(read_definitions_file(self.name != RulesFor::Braille)?);
2322        }
2323        return Ok( () );
2324    }
2325
2326    fn read_patterns(&mut self, path: &Path) -> Result<Vec<PathBuf>> {
2327        // info!("Reading rule file: {}", p.to_str().unwrap());
2328        let rule_file_contents = read_to_string_shim(path).with_context(|| format!("cannot read file '{}'", path.to_str().unwrap()))?;
2329        let rules_build_fn = |pattern: &Yaml| {
2330            self.build_speech_patterns(pattern, path)
2331                .with_context(||format!("in file {:?}", path.to_str().unwrap()))
2332        };
2333        return compile_rule(&rule_file_contents, rules_build_fn)
2334                .with_context(||format!("in file {:?}", path.to_str().unwrap()));
2335    }
2336
2337    fn build_speech_patterns(&mut self, patterns: &Yaml, file_name: &Path) -> Result<Vec<PathBuf>> {
2338        // Rule::SpeechPatternList
2339        let patterns_vec = patterns.as_vec();
2340        if patterns_vec.is_none() {
2341            bail!(yaml_type_err(patterns, "array"));
2342        }
2343        let patterns_vec = patterns.as_vec().unwrap();
2344        let mut files_read = vec![file_name.to_path_buf()];
2345        for entry in patterns_vec.iter() {
2346            if let Some(mut added_files) = SpeechPattern::build(entry, file_name, self)? {
2347                files_read.append(&mut added_files);
2348            }
2349        }
2350        return Ok(files_read)
2351    }
2352    
2353    fn read_unicode(&self, path: Option<PathBuf>, use_short: bool) -> Result<Vec<PathBuf>> {
2354        let path = match path {
2355            Some(p) => p,
2356            None => {
2357                // get the path to either the short or long unicode file
2358                let pref_manager = self.pref_manager.borrow();
2359                let unicode_files = if self.name == RulesFor::Braille {
2360                    pref_manager.get_braille_unicode_file()
2361                } else {
2362                    pref_manager.get_speech_unicode_file()
2363                };
2364                let unicode_files = if use_short {unicode_files.0} else {unicode_files.1};
2365                unicode_files.to_path_buf()
2366            }
2367        };
2368
2369        // FIX: should read first (lang), then supplement with second (region)
2370        // info!("Reading unicode file {}", path.to_str().unwrap());
2371        let unicode_file_contents = read_to_string_shim(&path)?;
2372        let unicode_build_fn = |unicode_def_list: &Yaml| {
2373            let unicode_defs = unicode_def_list.as_vec();
2374            if unicode_defs.is_none() {
2375                bail!("File '{}' does not begin with an array", yaml_to_type(unicode_def_list));
2376            };
2377            let mut files_read = vec![path.to_path_buf()];
2378            for unicode_def in unicode_defs.unwrap() {
2379                if let Some(mut added_files) = UnicodeDef::build(unicode_def, &path, self, use_short)
2380                                                                .with_context(|| {format!("In file {:?}", path.to_str())})? {
2381                    files_read.append(&mut added_files);
2382                }
2383            };
2384            return Ok(files_read)
2385        };
2386
2387        return compile_rule(&unicode_file_contents, unicode_build_fn)
2388                    .with_context(||format!("in file {:?}", path.to_str().unwrap()));
2389    }
2390
2391    pub fn print_sizes() -> String {
2392        // let _ = &SPEECH_RULES.with_borrow(|rules| {
2393        //     debug!("SPEECH RULES entries\n");
2394        //     let rules = &rules.rules;
2395        //     for (key, _) in rules.iter() {
2396        //         debug!("key: {}", key);
2397        //     }
2398        // });
2399        let mut answer = rule_size(&SPEECH_RULES, "SPEECH_RULES");
2400        answer += &rule_size(&INTENT_RULES, "INTENT_RULES");
2401        answer += &rule_size(&BRAILLE_RULES, "BRAILLE_RULES");
2402        answer += &rule_size(&NAVIGATION_RULES, "NAVIGATION_RULES");
2403        answer += &rule_size(&OVERVIEW_RULES, "OVERVIEW_RULES");
2404        SPEECH_RULES.with_borrow(|rule| {
2405            answer += &format!("Speech Unicode tables: short={}/{}, long={}/{}\n",
2406                                rule.unicode_short.borrow().len(), rule.unicode_short.borrow().capacity(),
2407                                rule.unicode_full.borrow().len(), rule.unicode_full.borrow().capacity());
2408        });
2409        BRAILLE_RULES.with_borrow(|rule| {
2410            answer += &format!("Braille Unicode tables: short={}/{}, long={}/{}\n",
2411                                rule.unicode_short.borrow().len(), rule.unicode_short.borrow().capacity(),
2412                                rule.unicode_full.borrow().len(), rule.unicode_full.borrow().capacity());
2413        });
2414        return answer;
2415
2416        fn rule_size(rules: &'static std::thread::LocalKey<RefCell<SpeechRules>>, name: &str) -> String {
2417            rules.with_borrow(|rule| {
2418                let hash_map = &rule.rules;
2419                return format!("{}: {}/{}\n", name, hash_map.len(), hash_map.capacity());
2420            })
2421        }
2422    }
2423}
2424
2425
2426/// We track three different lifetimes:
2427///   'c -- the lifetime of the context and mathml
2428///   's -- the lifetime of the speech rules (which is static)
2429///   'r -- the lifetime of the reference (this seems to be key to keep the rust memory checker happy)
2430impl<'c, 's:'c, 'r, 'm:'c> SpeechRulesWithContext<'c, 's,'m> {
2431    pub fn new(speech_rules: &'s SpeechRules, doc: Document<'m>, nav_node_id: &'m str, nav_node_offset: usize) -> SpeechRulesWithContext<'c, 's, 'm> {
2432        return SpeechRulesWithContext {
2433            speech_rules,
2434            context_stack: ContextStack::new(&speech_rules.pref_manager.borrow()),
2435            doc,
2436            nav_node_id,
2437            nav_node_offset,
2438            inside_spell: false,
2439            translate_count: 0,
2440        }
2441    }
2442
2443    pub fn get_rules(&mut self) -> &SpeechRules {
2444        return self.speech_rules;
2445    }
2446
2447    pub fn get_context(&mut self) -> &mut sxd_xpath::Context<'c> {
2448        return &mut self.context_stack.base;
2449    }
2450
2451    pub fn get_document(&mut self) -> Document<'m> {
2452        return self.doc;
2453    }
2454
2455    pub fn set_nav_node_offset(&mut self, offset: usize) {
2456        // debug!("Setting nav node offset to {}", offset);
2457        self.nav_node_offset = offset;
2458    }
2459
2460    pub fn match_pattern<T:TreeOrString<'c, 'm, T>>(&'r mut self, mathml: Element<'c>) -> Result<T> {
2461        // debug!("Looking for a match for: \n{}", mml_to_string(mathml));
2462        let tag_name = mathml.name().local_part();
2463        let rules = &self.speech_rules.rules;
2464
2465        // start with priority rules that apply to any node (should be a very small number)
2466        if let Some(rule_vector) = rules.get("!*") &&
2467           let Some(result) = self.find_match(rule_vector, mathml)? {
2468                return Ok(result);      // found a match
2469            }
2470        
2471        if let Some(rule_vector) = rules.get(tag_name) &&
2472           let Some(result) = self.find_match(rule_vector, mathml)? {
2473                return Ok(result);      // found a match
2474            }
2475
2476        // no rules for specific element, fall back to rules for "*" which *should* be present in all rule files as fallback
2477        if let Some(rule_vector) = rules.get("*") &&
2478           let Some(result) = self.find_match(rule_vector, mathml)? {
2479                return Ok(result);      // found a match
2480            }
2481
2482        // no rules matched -- poorly written rule file -- let flow through to default error
2483        // report error message with file name
2484        let speech_manager = self.speech_rules.pref_manager.borrow();
2485        let file_name = speech_manager.get_rule_file(&self.speech_rules.name);
2486        // FIX: handle error appropriately 
2487        bail!("\nNo match found!\nMissing patterns in {} for MathML.\n{}", file_name.to_string_lossy(), mml_to_string(mathml));
2488    }
2489
2490    fn find_match<T:TreeOrString<'c, 'm, T>>(&'r mut self, rule_vector: &[Box<SpeechPattern>], mathml: Element<'c>) -> Result<Option<T>> {
2491        for pattern in rule_vector {
2492            // debug!("Pattern name: {}", pattern.pattern_name);
2493            // always pushing and popping around the is_match would be a little cleaner, but push/pop is relatively expensive,
2494            //   so we optimize and only push first if the variables are needed to do the match
2495            if pattern.match_uses_var_defs {
2496                self.context_stack.push(pattern.var_defs.clone(), mathml)?;
2497            }
2498            if pattern.is_match(&self.context_stack.base, mathml)
2499                    .with_context(|| error_string(pattern, mathml) )? {
2500                // debug!("  find_match: FOUND!!!");
2501                if !pattern.match_uses_var_defs && pattern.var_defs.len() > 0 { // don't push them on twice
2502                    self.context_stack.push(pattern.var_defs.clone(), mathml)?;
2503                }
2504                let result = if self.nav_node_offset > 0 &&
2505                            self.nav_node_id == mathml.attribute_value("id").unwrap_or_default() && is_leaf(mathml) {
2506                    let ch = crate::canonicalize::as_text(mathml).chars().nth(self.nav_node_offset-1).unwrap_or_default();
2507                    let ch = self.replace_single_char(ch, mathml)?;
2508                    // debug!("find_match: ch={} from '{}'; matched pattern name/tag: {}/{} with nav_node_offset={}",
2509                    //     ch, crate::canonicalize::as_text(mathml),
2510                    //     pattern.pattern_name, pattern.tag_name, self.nav_node_offset);
2511                    T::from_string(ch.to_string(), self.doc)
2512                } else {
2513                    pattern.replacements.replace(self, mathml)
2514                };
2515                if pattern.var_defs.len() > 0 {
2516                    self.context_stack.pop();
2517                }
2518                return match result {
2519                    Ok(s) => {
2520                        // for all except braille and navigation, nav_node_id will be an empty string and will not match
2521                        if self.nav_node_id.is_empty() {
2522                            Ok( Some(s) )
2523                        } else {
2524                            if self.nav_node_id == mathml.attribute_value("id").unwrap_or_default() {debug!("Matched pattern name/tag: {}/{}", pattern.pattern_name, pattern.tag_name)};
2525                            Ok ( Some(self.nav_node_adjust(s, mathml)) )
2526                        }
2527                    },
2528                    Err(e) => Err( e.context(
2529                        format!(
2530                            "attempting replacement pattern: \"{}\" for \"{}\".\n\
2531                            Replacement\n{}\n...due to matching the MathML\n{} with the pattern\n\
2532                            {}\n\
2533                            The patterns are in {}.\n",
2534                            pattern.pattern_name, pattern.tag_name,
2535                            pattern.replacements.pretty_print_replacements(),
2536                            mml_to_string(mathml), pattern.pattern,
2537                            pattern.file_name
2538                        )
2539                    ))
2540                }
2541            } else if pattern.match_uses_var_defs {
2542                self.context_stack.pop();
2543            }
2544        };
2545        return Ok(None);    // no matches
2546
2547        fn error_string(pattern: &SpeechPattern, mathml: Element) -> String {
2548            return format!(
2549                "error during pattern match using: \"{}\" for \"{}\".\n\
2550                Pattern is \n{}\nMathML for the match:\n\
2551                {}\
2552                The patterns are in {}.\n",
2553                pattern.pattern_name, pattern.tag_name,
2554                pattern.pattern,
2555                mml_to_string(mathml),
2556                pattern.file_name
2557            );
2558        }
2559
2560    }
2561
2562    fn nav_node_adjust<T:TreeOrString<'c, 'm, T>>(&self, speech: T, mathml: Element<'c>) -> T {
2563      if let Some(id) = mathml.attribute_value("id") &&
2564         self.nav_node_id == id {
2565        let offset = mathml.attribute_value(crate::navigate::ID_OFFSET).unwrap_or("0");
2566        debug!("nav_node_adjust: id/name='{}/{}' offset?='{}'", id, name(mathml),
2567               self.nav_node_offset.to_string().as_str() == offset
2568        );
2569        if is_leaf(mathml) || self.nav_node_offset.to_string().as_str() == offset {
2570          if self.speech_rules.name == RulesFor::Braille {
2571            let highlight_style =  self.speech_rules.pref_manager.borrow().pref_to_string("BrailleNavHighlight");
2572            return T::highlight_braille(speech, highlight_style);
2573          } else {
2574            debug!("nav_node_adjust: id='{}' offset='{}/{}'", id, self.nav_node_offset, offset);
2575            return T::mark_nav_speech(speech)
2576          }
2577        }
2578      }
2579      return speech;
2580    }
2581    
2582    fn highlight_braille_string(braille: String, highlight_style: String) -> String {
2583        // add dots 7 & 8 to the Unicode braille (28xx)
2584        if &highlight_style == "Off" || braille.is_empty() {
2585            return braille;
2586        }
2587        
2588        // FIX: this seems needlessly complex. It is much simpler if the char can be changed in place...
2589        // find first char that can get the dots and add them
2590        let mut chars = braille.chars().collect::<Vec<char>>();
2591
2592        // the 'b' for baseline indicator is really part of the previous token, so it needs to be highlighted but isn't because it is not Unicode braille
2593        let baseline_indicator_hack = PreferenceManager::get().borrow().pref_to_string("BrailleCode") == "Nemeth";
2594        // debug!("highlight_braille_string: highlight_style={}\n braille={}", highlight_style, braille);
2595        let mut i_first_modified = 0;
2596        for (i, ch) in chars.iter_mut().enumerate() {
2597            let modified_ch = add_dots_to_braille_char(*ch, baseline_indicator_hack);
2598            if *ch != modified_ch {
2599                *ch = modified_ch; 
2600                i_first_modified = i;
2601                break;
2602            };
2603        };
2604
2605        let mut i_last_modified = i_first_modified;
2606        if &highlight_style != "FirstChar" {
2607            // find last char so that we know when to modify the char
2608            for i in (i_first_modified..chars.len()).rev(){
2609                let ch = chars[i];
2610                let modified_ch = add_dots_to_braille_char(ch, baseline_indicator_hack);
2611                chars[i] = modified_ch;
2612                if ch !=  modified_ch {
2613                    i_last_modified = i;
2614                    break;
2615                }
2616            }
2617        }
2618
2619        if &highlight_style == "All" {
2620            // finish going through the string
2621			#[allow(clippy::needless_range_loop)]  // I don't like enumerate/take/skip here
2622            for i in i_first_modified+1..i_last_modified {
2623                chars[i] = add_dots_to_braille_char(chars[i], baseline_indicator_hack);
2624            };
2625        }
2626
2627        let result = chars.into_iter().collect::<String>(); 
2628        // debug!("    result={}", result);
2629        return result;
2630
2631        fn add_dots_to_braille_char(ch: char, baseline_indicator_hack: bool) -> char {
2632            let as_u32 = ch as u32;
2633            if (0x2800..0x28FF).contains(&as_u32) {
2634                return unsafe {char::from_u32_unchecked(as_u32 | 0xC0)};  // safe because we have checked the range
2635            } else if baseline_indicator_hack && ch == 'b' {
2636                return '𝑏'
2637            } else {
2638                return ch;
2639            }
2640        }
2641    }
2642
2643    fn mark_nav_speech(speech: String) -> String {
2644        // add unique markers (since speech is mostly ascii letters and digits, most any symbol will do)
2645        // it's a bug (but happened during intent generation), we might have identical id's, choose innermost one
2646        debug!("mark_nav_speech: adding [[ {} ]] ", &speech);
2647        if !speech.contains("[[") {
2648            return "[[".to_string() + &speech + "]]";
2649        } else {
2650            return speech
2651        }
2652    }
2653
2654    fn replace<T:TreeOrString<'c, 'm, T>>(&'r mut self, replacement: &Replacement, mathml: Element<'c>) -> Result<T> {
2655        return Ok(
2656            match replacement {
2657                Replacement::Text(t) => T::from_string(t.clone(), self.doc)?,
2658                Replacement::XPath(xpath) => xpath.replace(self, mathml)?,
2659                Replacement::TTS(tts) => {
2660                    T::from_string(
2661                        self.speech_rules.pref_manager.borrow().get_tts().replace(tts, &self.speech_rules.pref_manager.borrow(), self, mathml)?,
2662                        self.doc
2663                    )?
2664                },
2665                Replacement::Intent(intent) => {
2666                    intent.replace(self, mathml)?                     
2667                },
2668                Replacement::Test(test) => {
2669                    test.replace(self, mathml)?                     
2670                },
2671                Replacement::With(with) => {
2672                    with.replace(self, mathml)?                     
2673                },
2674                Replacement::SetVariables(vars) => {
2675                    vars.replace(self, mathml)?                     
2676                },
2677                Replacement::Insert(ic) => {
2678                    ic.replace(self, mathml)?                     
2679                },
2680                Replacement::Translate(id) => {
2681                    id.replace(self, mathml)?                     
2682                },
2683            }
2684        )
2685    }
2686
2687    /// Iterate over all the nodes, concatenating the result strings together with a ' ' between them
2688    /// If the node is an element, pattern match it
2689    /// For 'Text' and 'Attribute' nodes, convert them to strings
2690    fn replace_nodes<T:TreeOrString<'c, 'm, T>>(&'r mut self, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<T> {
2691        return T::replace_nodes(self, nodes, mathml);
2692    }
2693
2694    /// Iterate over all the nodes finding matches for the elements
2695    /// For this case of returning MathML, everything else is an error
2696    fn replace_nodes_tree(&'r mut self, nodes: Vec<Node<'c>>, _mathml: Element<'c>) -> Result<Element<'m>> {
2697        let mut children = Vec::with_capacity(3*nodes.len());   // guess (2 chars/node + space)
2698        for node in nodes {
2699            let matched = match node {
2700                Node::Element(n) => self.match_pattern::<Element<'m>>(n)?,
2701                Node::Text(t) =>  {
2702                    let leaf = create_mathml_element(&self.doc, "TEMP_NAME");
2703                    leaf.set_text(t.text());
2704                    leaf
2705                },
2706                Node::Attribute(attr) => {
2707                    // debug!("  from attr with text '{}'", attr.value());
2708                    let leaf = create_mathml_element(&self.doc, "TEMP_NAME");
2709                    leaf.set_text(attr.value());
2710                    leaf
2711                },
2712                _ => {
2713                    bail!("replace_nodes: found unexpected node type!!!");
2714                },
2715            };
2716            children.push(matched);
2717        }
2718
2719        let result = create_mathml_element(&self.doc, "TEMP_NAME");    // FIX: what name should be used?
2720        result.append_children(children);
2721        // debug!("replace_nodes_tree\n{}\n====>>>>>\n", mml_to_string(result));
2722        return Ok( result );
2723    }
2724
2725    fn replace_nodes_string(&'r mut self, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<String> {
2726        // debug!("replace_nodes: working on {} nodes", nodes.len());
2727        let mut result = String::with_capacity(3*nodes.len());   // guess (2 chars/node + space)
2728        let mut first_time = true;
2729        for node in nodes {
2730            if first_time {
2731                first_time = false;
2732            } else {
2733                result.push(' ');
2734            };
2735            let matched = match node {
2736                Node::Element(n) => self.match_pattern::<String>(n)?,
2737                Node::Text(t) =>  self.replace_chars(t.text(), mathml)?,
2738                Node::Attribute(attr) => self.replace_chars(attr.value(), mathml)?,
2739                _ => bail!("replace_nodes: found unexpected node type!!!"),
2740            };
2741            result += &matched;
2742        }
2743        return Ok( result );
2744    }
2745
2746    /// Lookup unicode "pronunciation" of char.
2747    /// Note: TTS is not supported here (not needed and a little less efficient)
2748    pub fn replace_chars(&'r mut self, str: &str, mathml: Element<'c>) -> Result<String> {
2749        let chars = str.chars().collect::<Vec<char>>();
2750        let rules = self.speech_rules;
2751        // handled in match_pattern -- temporarily leaving as comments in case something is missed and needed here
2752        // if self.nav_node_offset > 0 && chars.len() > 1 {
2753        //     if self.nav_node_offset > chars.len() {
2754        //         debug!("replace_chars: nav_node_offset {} is larger than string length {}", self.nav_node_offset, chars.len());
2755        //         self.nav_node_offset = chars.len();
2756        //     }
2757        //     let ch = chars[self.nav_node_offset-1];
2758        //     debug!("replace_chars: adjusted string to '{}' based on nav_node_offset {}", ch, self.nav_node_offset);
2759        //     if rules.translate_single_chars_only {
2760        //         return self.replace_single_char(ch, mathml);
2761        //     } else {
2762        //         return Ok( ch.to_string() );
2763        //     }
2764        // }
2765        if is_quoted_string(str) {  // quoted string -- already translated (set in get_braille_chars)
2766            return Ok(unquote_string(str).to_string());
2767        }
2768        // in a string, avoid "a" -> "eigh", "." -> "point", etc
2769        if rules.translate_single_chars_only {
2770            if chars.len() == 1 {
2771                return self.replace_single_char(chars[0], mathml)
2772            } else {
2773                // more than one char -- fix up non-breaking space
2774                return Ok(str.replace('\u{00A0}', " ").replace(['\u{2061}', '\u{2062}', '\u{2063}', '\u{2064}'], ""))
2775            }
2776        };
2777
2778        let result = chars.iter()
2779            .map(|&ch| self.replace_single_char(ch, mathml))
2780            .collect::<Result<Vec<String>>>()?
2781            .join("");
2782        return Ok( result );
2783    }
2784
2785    fn replace_single_char(&'r mut self, ch: char, mathml: Element<'c>) -> Result<String> {
2786        let ch_as_u32 = ch as u32;
2787        let rules =  self.speech_rules;
2788        let mut unicode = rules.unicode_short.borrow();
2789        let mut replacements = unicode.get( &ch_as_u32 );
2790        // debug!("replace_single_char: looking for unicode {} for char '{}'/{:#06x}, found: {:?}", rules.name, ch, ch_as_u32, replacements);
2791        if replacements.is_none() {
2792            // see if it in the full unicode table (if it isn't loaded already)
2793            let pref_manager = rules.pref_manager.borrow();
2794            let unicode_pref_files = if rules.name == RulesFor::Braille {pref_manager.get_braille_unicode_file()} else {pref_manager.get_speech_unicode_file()};
2795            let should_ignore_file_time = pref_manager.pref_to_string("CheckRuleFiles") == "All";
2796            if rules.unicode_full.borrow().is_empty() || !rules.unicode_full_files.borrow().is_file_up_to_date(unicode_pref_files.1, should_ignore_file_time) {
2797                info!("*** Loading full unicode {} for char '{}'/{:#06x}", rules.name, ch, ch_as_u32);
2798                rules.unicode_full.borrow_mut().clear();
2799                rules.unicode_full_files.borrow_mut().set_files_and_times(rules.read_unicode(None, false)?);
2800                info!("# Unicode defs = {}/{}", rules.unicode_short.borrow().len(), rules.unicode_full.borrow().len());
2801            }
2802            unicode = rules.unicode_full.borrow();
2803            replacements = unicode.get( &ch_as_u32 );
2804            if replacements.is_none() {
2805              self.translate_count = 0;     // not in loop
2806              // debug!("*** Did not find unicode {} for char '{}'/{:#06x}", rules.name, ch, ch_as_u32);
2807              if rules.translate_single_chars_only || ch.is_ascii() {  // speech or if braille, avoid loop (ASCII remains ASCII if not found)
2808                return Ok(String::from(ch));   // no replacement, so just return the char and hope for the best
2809              } else { // braille -- must turn into braille dots
2810                // Emulate what NVDA does: generate (including single quotes) '\xhhhh' or '\yhhhhhh'
2811                let ch_as_int = ch as u32;
2812                let prefix_indicator = if ch_as_int < 1<<16 {'x'} else {'y'};
2813                return self.replace_chars( &format!("'\\{prefix_indicator}{:06x}'", ch_as_int), mathml);
2814              }
2815            }
2816        };
2817
2818        // map across all the parts of the replacement, collect them up into a Vec, and then concat them together
2819        let result = replacements.unwrap()
2820                    .iter()
2821                    .map(|replacement|
2822                         self.replace(replacement, mathml)
2823                                .with_context(|| format!("Unicode replacement error: {replacement}")) )
2824                    .collect::<Result<Vec<String>>>()?
2825                    .join(" ");
2826         self.translate_count = 0;     // found a replacement, so not in a loop
2827        return Ok(result);
2828    }
2829}
2830
2831/// Hack to allow replacement of `str` with braille chars.
2832pub fn braille_replace_chars(str: &str, mathml: Element) -> Result<String> {
2833    return BRAILLE_RULES.with(|rules| {
2834        let rules = rules.borrow();
2835        let new_package = Package::new();
2836        let mut rules_with_context = SpeechRulesWithContext::new(&rules, new_package.as_document(), "", 0);
2837        return match rules_with_context.replace_chars(str, mathml) {
2838            Ok(s) => Ok(
2839                s.replace(CONCAT_STRING, "")
2840                 .replace(CONCAT_INDICATOR, "") 
2841                 .replace(POSTFIX_CONCAT_STRING, "")
2842                 .replace(POSTFIX_CONCAT_INDICATOR, "")
2843            ),
2844            Err(e) => Err(e),
2845        }                   
2846
2847
2848    })
2849}
2850
2851
2852
2853#[cfg(test)]
2854mod tests {
2855    #[allow(unused_imports)]
2856    use crate::init_logger;
2857
2858    use super::*;
2859
2860    #[test]
2861    fn test_read_statement() {
2862        let str = r#"---
2863        {name: default, tag: math, match: ".", replace: [x: "./*"] }"#;
2864        let doc = YamlLoader::load_from_str(str).unwrap();
2865        assert_eq!(doc.len(), 1);
2866        let mut rules = SpeechRules::new(RulesFor::Speech, true);
2867
2868        SpeechPattern::build(&doc[0], Path::new("testing"), &mut rules).unwrap();
2869        assert_eq!(rules.rules["math"].len(), 1, "\nshould only be one rule");
2870
2871        let speech_pattern = &rules.rules["math"][0];
2872        assert_eq!(speech_pattern.pattern_name, "default", "\npattern name failure");
2873        assert_eq!(speech_pattern.tag_name, "math", "\ntag name failure");
2874        assert_eq!(speech_pattern.pattern.rc.string, ".", "\npattern failure");
2875        assert_eq!(speech_pattern.replacements.replacements.len(), 1, "\nreplacement failure");
2876        assert_eq!(speech_pattern.replacements.replacements[0].to_string(), r#""./*""#, "\nreplacement failure");
2877    }
2878
2879    #[test]
2880    fn test_read_statements_with_replace() {
2881        let str = r#"---
2882        {name: default, tag: math, match: ".", replace: [x: "./*"] }"#;
2883        let doc = YamlLoader::load_from_str(str).unwrap();
2884        assert_eq!(doc.len(), 1);
2885        let mut rules = SpeechRules::new(RulesFor::Speech, true);
2886        SpeechPattern::build(&doc[0], Path::new("testing"), &mut rules).unwrap();
2887
2888        let str = r#"---
2889        {name: default, tag: math, match: ".", replace: [t: "test", x: "./*"] }"#;
2890        let doc2 = YamlLoader::load_from_str(str).unwrap();
2891        assert_eq!(doc2.len(), 1);
2892        SpeechPattern::build(&doc2[0], Path::new("testing"), &mut rules).unwrap();
2893        assert_eq!(rules.rules["math"].len(), 1, "\nfirst rule not replaced");
2894
2895        let speech_pattern = &rules.rules["math"][0];
2896        assert_eq!(speech_pattern.pattern_name, "default", "\npattern name failure");
2897        assert_eq!(speech_pattern.tag_name, "math", "\ntag name failure");
2898        assert_eq!(speech_pattern.pattern.rc.string, ".", "\npattern failure");
2899        assert_eq!(speech_pattern.replacements.replacements.len(), 2, "\nreplacement failure");
2900    }
2901
2902    #[test]
2903    fn test_read_statements_with_add() {
2904        let str = r#"---
2905        {name: default, tag: math, match: ".", replace: [x: "./*"] }"#;
2906        let doc = YamlLoader::load_from_str(str).unwrap();
2907        assert_eq!(doc.len(), 1);
2908        let mut rules = SpeechRules::new(RulesFor::Speech, true);
2909        SpeechPattern::build(&doc[0], Path::new("testing"), &mut rules).unwrap();
2910
2911        let str = r#"---
2912        {name: another-rule, tag: math, match: ".", replace: [t: "test", x: "./*"] }"#;
2913        let doc2 = YamlLoader::load_from_str(str).unwrap();
2914        assert_eq!(doc2.len(), 1);
2915        SpeechPattern::build(&doc2[0], Path::new("testing"), &mut rules).unwrap();
2916        assert_eq!(rules.rules["math"].len(), 2, "\nsecond rule not added");
2917
2918        let speech_pattern = &rules.rules["math"][0];
2919        assert_eq!(speech_pattern.pattern_name, "default", "\npattern name failure");
2920        assert_eq!(speech_pattern.tag_name, "math", "\ntag name failure");
2921        assert_eq!(speech_pattern.pattern.rc.string, ".", "\npattern failure");
2922        assert_eq!(speech_pattern.replacements.replacements.len(), 1, "\nreplacement failure");
2923    }
2924
2925    #[test]
2926    fn test_debug_no_debug() {
2927        let str = r#"*[2]/*[3][text()='3']"#;
2928        let result = MyXPath::add_debug_string_arg(str);
2929        assert!(result.is_ok());
2930        assert_eq!(result.unwrap(), str);
2931    }
2932
2933    #[test]
2934    fn test_debug_no_debug_with_quote() {
2935        let str = r#"*[2]/*[3][text()='(']"#;
2936        let result = MyXPath::add_debug_string_arg(str);
2937        assert!(result.is_ok());
2938        assert_eq!(result.unwrap(), str);
2939    }
2940
2941    #[test]
2942    fn test_debug_no_quoted_paren() {
2943        let str = r#"DEBUG(*[2]/*[3][text()='3'])"#;
2944        let result = MyXPath::add_debug_string_arg(str);
2945        assert!(result.is_ok());
2946        assert_eq!(result.unwrap(), r#"DEBUG(*[2]/*[3][text()='3'], "*[2]/*[3][text()='3']")"#);
2947    }
2948
2949    #[test]
2950    fn test_debug_quoted_paren() {
2951        let str = r#"DEBUG(*[2]/*[3][text()='('])"#;
2952        let result = MyXPath::add_debug_string_arg(str);
2953        assert!(result.is_ok());
2954        assert_eq!(result.unwrap(), r#"DEBUG(*[2]/*[3][text()='('], "*[2]/*[3][text()='(']")"#);
2955    }
2956
2957    #[test]
2958    fn test_debug_quoted_paren_before_paren() {
2959        let str = r#"DEBUG(ClearSpeak_Matrix = 'Combinatorics') and IsBracketed(., '(', ')')"#;
2960        let result = MyXPath::add_debug_string_arg(str);
2961        assert!(result.is_ok());
2962        assert_eq!(result.unwrap(), r#"DEBUG(ClearSpeak_Matrix = 'Combinatorics', "ClearSpeak_Matrix = 'Combinatorics'") and IsBracketed(., '(', ')')"#);
2963    }
2964
2965
2966// zipped files do NOT include "zz", hence we need to exclude this test
2967cfg_if::cfg_if! {if #[cfg(not(feature = "include-zip"))] {  
2968    #[test]
2969    fn test_up_to_date() {
2970        use crate::interface::*;
2971        // initialize and move to a directory where making a time change doesn't really matter
2972        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
2973        set_preference("Language", "zz-aa").unwrap();
2974        // not much is support in zz
2975        if let Err(e) = set_mathml("<math><mi>x</mi></math>") {
2976            error!("{}", crate::errors_to_string(&e));
2977            panic!("Should not be an error in setting MathML")
2978        }
2979
2980        set_preference("CheckRuleFiles", "All").unwrap();
2981        assert!(!is_file_time_same(), "file's time did not get updated");
2982        set_preference("CheckRuleFiles", "None").unwrap();
2983        assert!(is_file_time_same(), "file's time was wrongly updated (preference 'CheckRuleFiles' should have prevented updating)");
2984
2985        // change a file, cause read_files to be called, and return if MathCAT noticed the change and updated its time
2986        fn is_file_time_same() -> bool {
2987            // read and write a unicode file in a test dir
2988            // files are read in due to setting the MathML
2989
2990            use std::time::Duration;
2991            return SPEECH_RULES.with(|rules| {
2992                let start_main_file = rules.borrow().unicode_short_files.borrow().ft[0].clone();
2993
2994                // open the file, read all the contents, then write them back so the time changes
2995                let contents = std::fs::read(&start_main_file.file).expect(&format!("Failed to read file {} during test", &start_main_file.file.to_string_lossy()));
2996                std::fs::write(start_main_file.file, contents).unwrap();
2997                std::thread::sleep(Duration::from_millis(5));       // pause a little to make sure the time changes
2998
2999                // speak should cause the file stored to have a new time
3000                if let Err(e) = get_spoken_text() {
3001                    error!("{}", crate::errors_to_string(&e));
3002                    panic!("Should not be an error in speech")
3003                }
3004                return rules.borrow().unicode_short_files.borrow().ft[0].time == start_main_file.time;
3005            });
3006        }    
3007    }
3008}}
3009
3010    // #[test]
3011    // fn test_nested_debug_quoted_paren() {
3012    //     let str = r#"DEBUG(*[2]/*[3][DEBUG(text()='(')])"#;
3013    //     let result = MyXPath::add_debug_string_arg(str);
3014    //     assert!(result.is_ok());
3015    //     assert_eq!(result.unwrap(), r#"DEBUG(*[2]/*[3][DEBUG(text()='(')], "DEBUG(*[2]/*[3][DEBUG(text()='(')], \"text()='(')]\")"#);
3016    // }
3017
3018}