libmathcat/
speech.rs

1//! The speech module is where the speech rules are read in and speech generated.
2//!
3//! The speech rules call out to the preferences and tts modules and the dividing line is not always clean.
4//! A number of useful utility functions used by other modules are defined here.
5#![allow(clippy::needless_return)]
6use std::path::PathBuf;
7use std::collections::HashMap;
8use std::cell::{RefCell, RefMut};
9use std::sync::LazyLock;
10use sxd_document::dom::{ChildOfElement, Document, Element};
11use sxd_document::{Package, QName};
12use sxd_xpath::context::Evaluation;
13use sxd_xpath::{Factory, Value, XPath};
14use sxd_xpath::nodeset::Node;
15use std::fmt;
16use std::time::SystemTime;
17use crate::definitions::read_definitions_file;
18use crate::errors::*;
19use crate::prefs::*;
20use crate::xpath_functions::is_leaf;
21use yaml_rust::{YamlLoader, Yaml, yaml::Hash};
22use crate::tts::*;
23use crate::infer_intent::*;
24use crate::pretty_print::{mml_to_string, yaml_to_string};
25use std::path::Path;
26use std::rc::Rc;
27use crate::shim_filesystem::{read_to_string_shim, canonicalize_shim};
28use crate::canonicalize::{as_element, create_mathml_element, set_mathml_name, name, MATHML_FROM_NAME_ATTR};
29use regex::Regex;
30use log::{debug, error, info};
31
32
33pub const NAV_NODE_SPEECH_NOT_FOUND: &str = "NAV_NODE_NOT_FOUND";
34
35/// Like lisp's ' (quote foo), this is used to block "replace_chars" being called.
36///   Unlike lisp, this appended to the end of a string (more efficient)
37/// At the moment, the only use is BrailleChars(...) -- internally, it calls replace_chars and we don't want it called again.
38/// Note: an alternative to this hack is to add "xq" (execute but don't eval the result), but that's heavy-handed for the current need
39const NO_EVAL_QUOTE_CHAR: char = '\u{efff}';            // a private space char
40const NO_EVAL_QUOTE_CHAR_AS_BYTES: [u8;3] = [0xee,0xbf,0xbf];
41const N_BYTES_NO_EVAL_QUOTE_CHAR: usize = NO_EVAL_QUOTE_CHAR.len_utf8();
42
43/// Converts 'string' into a "quoted" string -- use is_quoted_string and unquote_string
44pub fn make_quoted_string(mut string: String) -> String {
45    string.push(NO_EVAL_QUOTE_CHAR);
46    return string;
47}
48
49/// Checks the string to see if it is "quoted"
50pub fn is_quoted_string(str: &str) -> bool {
51    if str.len() < N_BYTES_NO_EVAL_QUOTE_CHAR {
52        return false;
53    }
54    let bytes = str.as_bytes();
55    return bytes[bytes.len()-N_BYTES_NO_EVAL_QUOTE_CHAR..] == NO_EVAL_QUOTE_CHAR_AS_BYTES;
56}
57
58/// Converts 'string' into a "quoted" string -- use is_quoted_string and unquote_string
59/// IMPORTANT: this assumes the string is quoted -- no check is made
60pub fn unquote_string(str: &str) -> &str {
61    return &str[..str.len()-N_BYTES_NO_EVAL_QUOTE_CHAR];
62}
63
64
65/// The main external call, `intent_from_mathml` returns a string for the speech associated with the `mathml`.
66///   It matches against the rules that are computed by user prefs such as "Language" and "SpeechStyle".
67///
68/// The speech rules assume `mathml` has been "cleaned" via the canonicalization step.
69///
70/// If the preferences change (and hence the speech rules to use change), or if the rule file changes,
71///   `intent_from_mathml` will detect that and (re)load the proper rules.
72///
73/// A string is returned in call cases.
74/// If there is an error, the speech string will indicate an error.
75pub fn intent_from_mathml<'m>(mathml: Element, doc: Document<'m>) -> Result<Element<'m>> {
76    let intent_tree = intent_rules(&INTENT_RULES, doc, mathml, "")?;
77    doc.root().append_child(intent_tree);
78    return Ok(intent_tree);
79}
80
81pub fn speak_mathml(mathml: Element, nav_node_id: &str, nav_node_offset: usize) -> Result<String> {
82    return speak_rules(&SPEECH_RULES, mathml, nav_node_id, nav_node_offset);
83}
84
85pub fn overview_mathml(mathml: Element, nav_node_id: &str, nav_node_offset: usize) -> Result<String> {
86    return speak_rules(&OVERVIEW_RULES, mathml, nav_node_id, nav_node_offset);
87}
88
89
90fn intent_rules<'m>(rules: &'static std::thread::LocalKey<RefCell<SpeechRules>>, doc: Document<'m>, mathml: Element, nav_node_id: &'m str) -> Result<Element<'m>> {
91    rules.with(|rules| {
92        rules.borrow_mut().read_files()?;
93        let rules = rules.borrow();
94        // debug!("intent_rules:\n{}", mml_to_string(mathml));
95        let should_set_literal_intent = rules.pref_manager.borrow().pref_to_string("SpeechStyle").as_str() == "LiteralSpeak";
96        let original_intent = mathml.attribute_value("intent");
97        if should_set_literal_intent {
98            if let Some(intent) = original_intent {
99                let intent = if intent.contains('(') {intent.replace('(', ":literal(")} else {intent.to_string() + ":literal"};
100                mathml.set_attribute_value("intent", &intent);
101            } else {
102                mathml.set_attribute_value("intent", ":literal");
103            };
104        }
105        let mut rules_with_context = SpeechRulesWithContext::new(&rules, doc, nav_node_id, 0);
106        let intent =  rules_with_context.match_pattern::<Element<'m>>(mathml)
107                    .context("Pattern match/replacement failure!")?;
108        let answer = if name(intent) == "TEMP_NAME" {   // unneeded extra layer
109            assert_eq!(intent.children().len(), 1);
110            as_element(intent.children()[0])
111        } else {
112            intent
113        };
114        if should_set_literal_intent {
115            if let Some(original_intent) = original_intent {
116                mathml.set_attribute_value("intent", original_intent);
117            } else {
118                mathml.remove_attribute("intent");
119            }
120        }
121        return Ok(answer);
122    })
123}
124
125/// Speak the MathML
126/// If 'nav_node_id' is not an empty string, then the element with that id will have [[...]] around it
127fn speak_rules(rules: &'static std::thread::LocalKey<RefCell<SpeechRules>>, mathml: Element, nav_node_id: &str, nav_node_offset: usize) -> Result<String> {
128    return rules.with(|rules| {
129        rules.borrow_mut().read_files()?;
130        let rules = rules.borrow();
131        // debug!("speak_rules:\n{}", mml_to_string(mathml));
132        let new_package = Package::new();
133        let mut rules_with_context = SpeechRulesWithContext::new(&rules, new_package.as_document(), nav_node_id, nav_node_offset);
134        let speech_string = nestable_speak_rules(& mut rules_with_context, mathml)?;
135        return Ok( rules.pref_manager.borrow().get_tts()
136            .merge_pauses(remove_optional_indicators(
137                &speech_string.replace(CONCAT_STRING, "")
138                                    .replace(CONCAT_INDICATOR, "")                            
139                            )
140            .trim_start().trim_end_matches([' ', ',', ';'])) );
141    });
142
143    fn nestable_speak_rules<'c, 's:'c, 'm:'c>(rules_with_context: &mut SpeechRulesWithContext<'c, 's, 'm>, mathml: Element<'c>) -> Result<String> {
144        let mut speech_string = rules_with_context.match_pattern::<String>(mathml)
145                    .context("Pattern match/replacement failure!")?;
146        // Note: [[...]] is added around a matching child, but if the "id" is on 'mathml', the whole string is used
147        if !rules_with_context.nav_node_id.is_empty() {
148            // See https://github.com/NSoiffer/MathCAT/issues/174 for why we can just start the speech at the nav node
149            let intent_attr = mathml.attribute_value("data-intent-property").unwrap_or_default();
150            if let Some(start) = speech_string.find("[[") {
151                match speech_string[start+2..].find("]]") {
152                    None => bail!("Internal error: looking for '[[...]]' during navigation -- only found '[[' in '{}'", speech_string),
153                    Some(end) => speech_string = speech_string[start+2..start+2+end].to_string(),
154                }
155            } else if !intent_attr.contains(":literal:") {
156                // try again with LiteralSpeak -- some parts might have been elided in other SpeechStyles
157                mathml.set_attribute_value("data-intent-property", (":literal:".to_string() + intent_attr).as_str());
158                let speech = nestable_speak_rules(rules_with_context, mathml);
159                mathml.set_attribute_value("data-intent-property", intent_attr);
160                return speech;
161            } else {
162                bail!(NAV_NODE_SPEECH_NOT_FOUND); //  NAV_NODE_SPEECH_NOT_FOUND is tested for later
163            }
164        }
165        return Ok(speech_string);
166    }
167}
168
169/// Converts its argument to a string that can be used in a debugging message.
170pub fn yaml_to_type(yaml: &Yaml) -> String {
171    return match yaml {
172        Yaml::Real(v)=> format!("real='{v:#}'"),
173        Yaml::Integer(v)=> format!("integer='{v:#}'"),
174        Yaml::String(v)=> format!("string='{v:#}'"),
175        Yaml::Boolean(v)=> format!("boolean='{v:#}'"),
176        Yaml::Array(v)=> match v.len() {
177            0 => "array with no entries".to_string(),
178            1 => format!("array with the entry: {}", yaml_to_type(&v[0])),
179            _ => format!("array with {} entries. First entry: {}", v.len(), yaml_to_type(&v[0])),
180        }
181        Yaml::Hash(h)=> {
182            let first_pair = 
183                if h.is_empty() {
184                    "no pairs".to_string()
185                } else {
186                    let (key, val) = h.iter().next().unwrap();
187                    format!("({}, {})", yaml_to_type(key), yaml_to_type(val))
188                };
189            format!("dictionary with {} pair{}. A pair: {}", h.len(), if h.len()==1 {""} else {"s"}, first_pair)
190        }
191        Yaml::Alias(_)=> "Alias".to_string(),
192        Yaml::Null=> "Null".to_string(),
193        Yaml::BadValue=> "BadValue".to_string(),       
194    }
195}
196
197fn yaml_type_err(yaml: &Yaml, str: &str) -> Error {
198    anyhow!("Expected {}, found {}", str, yaml_to_type(yaml))
199}
200
201// fn yaml_key_err(dict: &Yaml, key: &str, yaml_type: &str) -> String {
202//     if dict.as_hash().is_none() {
203//        return format!("Expected dictionary with key '{}', found\n{}", key, yaml_to_string(dict, 1));
204//     }
205//     let str = &dict[key];
206//     if str.is_badvalue() {
207//         return format!("Did not find '{}' in\n{}", key,  yaml_to_string(dict, 1));
208//     }
209//     return format!("Type of '{}' is not a {}.\nIt is a {}. YAML value is\n{}", 
210//             key, yaml_type, yaml_to_type(str), yaml_to_string(dict, 0));
211// }
212
213fn find_str<'a>(dict: &'a Yaml, key: &'a str) -> Option<&'a str> {
214    return dict[key].as_str();
215}
216
217/// Returns the Yaml as a `Hash` or an error if it isn't.
218pub fn as_hash_checked(value: &Yaml) -> Result<&Hash> {
219    let result = value.as_hash();
220    let result = result.ok_or_else(|| yaml_type_err(value, "hashmap"))?;
221    return Ok( result );
222}
223
224/// Returns the Yaml as a `Vec` or an error if it isn't.
225pub fn as_vec_checked(value: &Yaml) -> Result<&Vec<Yaml>> {
226    let result = value.as_vec();
227    let result = result.ok_or_else(|| yaml_type_err(value, "array"))?;
228    return Ok( result );
229}
230
231/// Returns the Yaml as a `&str` or an error if it isn't.
232pub fn as_str_checked(yaml: &Yaml) -> Result<&str> {
233    return yaml.as_str().ok_or_else(|| yaml_type_err(yaml, "string"));
234}
235
236
237/// A bit of a hack to concatenate replacements (without a ' ').
238/// The CONCAT_INDICATOR is added by a "ct:" (instead of 't:') in the speech rules
239/// and checked for by the tts code.
240pub const CONCAT_INDICATOR: &str = "\u{F8FE}";
241
242// This is the pattern that needs to be matched (and deleted)
243pub const CONCAT_STRING: &str = " \u{F8FE}";
244
245// a similar hack to potentially delete (repetitive) optional replacements
246// the OPTIONAL_INDICATOR is added by "ot:" before and after the optional string
247const OPTIONAL_INDICATOR: &str  = "\u{F8FD}";
248const OPTIONAL_INDICATOR_LEN: usize = OPTIONAL_INDICATOR.len();
249
250pub fn remove_optional_indicators(str: &str) -> String {
251    return str.replace(OPTIONAL_INDICATOR, "");
252}
253
254/// Given a string that should be Yaml, it calls `build_fn` with that string.
255/// The build function/closure should process the Yaml as appropriate and capture any errors and write them to `std_err`.
256/// The returned value should be a Vector containing the paths of all the files that were included.
257pub fn compile_rule<F>(str: &str, mut build_fn: F) -> Result<Vec<PathBuf>> where
258            F: FnMut(&Yaml) -> Result<Vec<PathBuf>> {
259    let docs = YamlLoader::load_from_str(str);
260    match docs {
261        Err(e) => {
262            bail!("Parse error!!: {}", e);
263        },
264        Ok(docs) => {
265            if docs.len() != 1 {
266                bail!("Didn't find rules!");
267            }
268            return build_fn(&docs[0]);
269        }
270    }
271}
272
273pub fn process_include<F>(current_file: &Path, new_file_name: &str, mut read_new_file: F) -> Result<Vec<PathBuf>>
274                    where F: FnMut(&Path) -> Result<Vec<PathBuf>> {
275    let parent_path = current_file.parent();
276    if parent_path.is_none() {
277        bail!("Internal error: {:?} is not a valid file name", current_file);
278    }
279    let mut new_file = match canonicalize_shim(parent_path.unwrap()) {
280        Ok(path) => path,
281        Err(e) => bail!("process_include: canonicalize failed for {} with message {}", parent_path.unwrap().display(), e),
282    };
283
284    // the referenced file might be in a directory that hasn't been zipped up -- find the dir and call the unzip function
285    for unzip_dir in new_file.ancestors() {
286        if unzip_dir.ends_with("Rules") {
287            break;      // nothing to unzip
288        }
289        if unzip_dir.ends_with("Languages") || unzip_dir.ends_with("Braille") {
290            // get the subdir ...Rules/Braille/en/...
291            // could have ...Rules/Braille/definitions.yaml, so 'next()' doesn't exist in this case, but the file wasn't zipped up
292            if let Some(subdir) = new_file.strip_prefix(unzip_dir).unwrap().iter().next() {
293                let default_lang = if unzip_dir.ends_with("Languages") {"en"} else {"UEB;"};
294                PreferenceManager::unzip_files(unzip_dir, subdir.to_str().unwrap(), Some(default_lang)).unwrap_or_default();
295            }
296        }
297    }
298    new_file.push(new_file_name);
299    info!("...processing include: {new_file_name}...");
300    let new_file = match crate::shim_filesystem::canonicalize_shim(new_file.as_path()) {
301        Ok(buf) => buf,
302        Err(msg) => bail!("-include: constructed file name '{}' causes error '{}'",
303                                 new_file.to_str().unwrap(), msg),
304    };
305
306    let mut included_files = read_new_file(new_file.as_path())?;
307    let mut files_read = vec![new_file];
308    files_read.append(&mut included_files);
309    return Ok(files_read);
310}
311
312/// As the name says, TreeOrString is either a Tree (Element) or a String
313/// It is used to share code during pattern matching
314pub trait TreeOrString<'c, 'm:'c, T> {
315    fn from_element(e: Element<'m>) -> Result<T>;
316    fn from_string(s: String, doc: Document<'m>) -> Result<T>;
317    fn replace_tts<'s:'c, 'r>(tts: &TTS, command: &TTSCommandRule, prefs: &PreferenceManager, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T>;
318    fn replace<'s:'c, 'r>(ra: &ReplacementArray, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T>;
319    fn replace_nodes<'s:'c, 'r>(rules: &'r mut SpeechRulesWithContext<'c, 's,'m>, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<T>;
320    fn highlight_braille(braille: T, highlight_style: String) -> T;
321    fn mark_nav_speech(speech: T) -> T;
322}
323
324impl<'c, 'm:'c> TreeOrString<'c, 'm, String> for String {
325    fn from_element(_e: Element<'m>) -> Result<String> {
326         bail!("from_element not allowed for strings");
327    }
328
329    fn from_string(s: String, _doc: Document<'m>) -> Result<String> {
330        return Ok(s);
331    }
332
333    fn replace_tts<'s:'c, 'r>(tts: &TTS, command: &TTSCommandRule, prefs: &PreferenceManager, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<String> {
334        return tts.replace_string(command, prefs, rules_with_context, mathml);
335    }
336
337    fn replace<'s:'c, 'r>(ra: &ReplacementArray, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<String> {
338        return ra.replace_array_string(rules_with_context, mathml);
339    }
340
341    fn replace_nodes<'s:'c, 'r>(rules: &'r mut SpeechRulesWithContext<'c, 's,'m>, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<String> {
342        return rules.replace_nodes_string(nodes, mathml);
343    }
344
345    fn highlight_braille(braille: String, highlight_style: String) -> String {
346        return SpeechRulesWithContext::highlight_braille_string(braille, highlight_style);
347    }
348
349    fn mark_nav_speech(speech: String) -> String {
350        return SpeechRulesWithContext::mark_nav_speech(speech);
351    }
352}
353
354impl<'c, 'm:'c> TreeOrString<'c, 'm, Element<'m>> for Element<'m> {
355    fn from_element(e: Element<'m>) -> Result<Element<'m>> {
356         return Ok(e);
357    }
358
359    fn from_string(s: String, doc: Document<'m>) -> Result<Element<'m>> {
360        // FIX: is 'mi' really ok?  Don't want to use TEMP_NAME because this name needs to move to the outside world
361        let leaf = create_mathml_element(&doc, "mi");
362        leaf.set_text(&s);
363        return Ok(leaf);
364}
365
366    fn replace_tts<'s:'c, 'r>(_tts: &TTS, _command: &TTSCommandRule, _prefs: &PreferenceManager, _rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, _mathml: Element<'c>) -> Result<Element<'m>> {
367        bail!("Internal error: applying a TTS rule to a tree");
368    }
369
370    fn replace<'s:'c, 'r>(ra: &ReplacementArray, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<Element<'m>> {
371        return ra.replace_array_tree(rules_with_context, mathml);
372    }
373
374    fn replace_nodes<'s:'c, 'r>(rules: &'r mut SpeechRulesWithContext<'c, 's,'m>, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<Element<'m>> {
375        return rules.replace_nodes_tree(nodes, mathml);
376    }
377
378    fn highlight_braille(_braille: Element<'c>, _highlight_style: String) -> Element<'m> {
379        panic!("Internal error: highlight_braille called on a tree");
380    }
381
382    fn mark_nav_speech(_speech: Element<'c>) -> Element<'m> {
383        panic!("Internal error: mark_nav_speech called on a tree");
384    }
385}
386
387/// 'Replacement' is an enum that contains all the potential replacement types/structs
388/// Hence there are fields 'Test' ("test:"), 'Text" ("t:"), "XPath", etc
389#[derive(Debug, Clone)]
390#[allow(clippy::upper_case_acronyms)]
391enum Replacement {
392    // Note: all of these are pointer types
393    Text(String),
394    XPath(MyXPath),
395    Intent(Box<Intent>),
396    Test(Box<TestArray>),
397    TTS(Box<TTSCommandRule>),
398    With(Box<With>),
399    SetVariables(Box<SetVariables>),
400    Insert(Box<InsertChildren>),
401    Translate(TranslateExpression),
402}
403
404impl fmt::Display for Replacement {
405    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
406        return write!(f, "{}",
407            match self {
408                Replacement::Test(c) => c.to_string(),
409                Replacement::Text(t) => format!("t: \"{t}\""),
410                Replacement::XPath(x) => x.to_string(),
411                Replacement::Intent(i) => i.to_string(),
412                Replacement::TTS(t) => t.to_string(),
413                Replacement::With(w) => w.to_string(),
414                Replacement::SetVariables(v) => v.to_string(),
415                Replacement::Insert(ic) => ic.to_string(),
416                Replacement::Translate(x) => x.to_string(),
417            }
418        );
419    }
420}
421
422impl Replacement {   
423    fn build(replacement: &Yaml) -> Result<Replacement> {
424        // Replacement -- single key/value (see below for allowed values)
425        let dictionary = replacement.as_hash();
426        if dictionary.is_none() {
427            bail!("  expected a key/value pair. Found {}.",  yaml_to_string(replacement, 0));
428        };
429        let dictionary = dictionary.unwrap();
430        if dictionary.is_empty() { 
431            bail!("No key/value pairs found for key 'replace'.\n\
432                Suggestion: are the following lines indented properly?");
433        }
434        if dictionary.len() > 1 { 
435            bail!("Should only be one key/value pair for the replacement.\n    \
436                    Suggestion: are the following lines indented properly?\n    \
437                    The key/value pairs found are\n{}", yaml_to_string(replacement, 2));
438        }
439
440        // get the single value
441        let (key, value) = dictionary.iter().next().unwrap();
442        let key = key.as_str().ok_or_else(|| anyhow!("replacement key(e.g, 't') is not a string"))?;
443        match key {
444            "t" | "T" => {
445                return Ok( Replacement::Text( as_str_checked(value)?.to_string() ) );
446            },
447            "ct" | "CT" => {
448                return Ok( Replacement::Text( CONCAT_INDICATOR.to_string() + as_str_checked(value)? ) );
449            },
450            "ot" | "OT" => {
451                return Ok( Replacement::Text( OPTIONAL_INDICATOR.to_string() + as_str_checked(value)? + OPTIONAL_INDICATOR ) );
452            },
453            "x" => {
454                return Ok( Replacement::XPath( MyXPath::build(value)
455                    .context("while trying to evaluate value of 'x:'")? ) );
456            },
457            "pause" | "rate" | "pitch" | "volume" | "audio" | "gender" | "voice" | "spell" | "SPELL" | "bookmark" | "pronounce" | "PRONOUNCE" => {
458                return Ok( Replacement::TTS( TTS::build(&key.to_ascii_lowercase(), value)? ) );
459            },
460            "intent" => {
461                return Ok( Replacement::Intent( Intent::build(value)? ) );
462            },
463            "test" => {
464                return Ok( Replacement::Test( Box::new( TestArray::build(value)? ) ) );
465            },
466            "with" => {
467                return Ok( Replacement::With( With::build(value)? ) );
468            },
469            "set_variables" => {
470                return Ok( Replacement::SetVariables( SetVariables::build(value)? ) );
471            },
472            "insert" => {
473                return Ok( Replacement::Insert( InsertChildren::build(value)? ) );
474            },
475            "translate" => {
476                return Ok( Replacement::Translate( TranslateExpression::build(value)
477                    .context("while trying to evaluate value of 'speak:'")? ) );
478            },
479            _ => {
480                bail!("Unknown 'replace' command ({}) with value: {}", key, yaml_to_string(value, 0));
481            }
482        }
483    }
484}
485
486// structure used when "insert:" is encountered in a rule
487// the 'replacements' are inserted between each node in the 'xpath'
488#[derive(Debug, Clone)]
489struct InsertChildren {
490    xpath: MyXPath,                     // the replacement nodes
491    replacements: ReplacementArray,     // what is inserted between each node
492}
493
494impl fmt::Display for InsertChildren {
495    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
496        return write!(f, "InsertChildren:\n  nodes {}\n  replacements {}", self.xpath, &self.replacements);
497    }
498}
499
500impl InsertChildren {
501    fn build(insert: &Yaml) -> Result<Box<InsertChildren>> {
502        // 'insert:' -- 'nodes': xxx 'replace': xxx
503        if insert.as_hash().is_none() {
504            bail!("")
505        }
506        let nodes = &insert["nodes"];
507        if nodes.is_badvalue() { 
508            bail!("Missing 'nodes' as part of 'insert'.\n    \
509                  Suggestion: add 'nodes:' or if present, indent so it is contained in 'insert'");
510        }
511        let nodes = as_str_checked(nodes)?;
512        let replace = &insert["replace"];
513        if replace.is_badvalue() { 
514            bail!("Missing 'replace' as part of 'insert'.\n    \
515                  Suggestion: add 'replace:' or if present, indent so it is contained in 'insert'");
516        }
517        return Ok( Box::new( InsertChildren {
518            xpath: MyXPath::new(nodes.to_string())?,
519            replacements: ReplacementArray::build(replace).context("'replace:'")?,
520        } ) );
521    }
522    
523    // It would be most efficient to do an xpath eval, get the nodes (type: NodeSet) and then intersperse the node_replace()
524    //   calls with replacements for the ReplacementArray parts. But that causes problems with the "pause: auto" calculation because
525    //   the replacements are segmented (can't look to neighbors for the calculation there)
526    // An alternative is to introduce another Replacement enum value, but that's a lot of complication for not that much
527    //    gain (and Node's have contagious lifetimes)
528    // The solution adopted is to find out the number of nodes and build up MyXPaths with each node selected (e.g, "*" => "*[3]")
529    //    and put those nodes into a flat ReplacementArray and then do a standard replace on that.
530    //    This is slower than the alternatives, but reuses a bunch of code and hence is less complicated.
531    fn replace<'c, 's:'c, 'm: 'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
532        let result = self.xpath.evaluate(&rules_with_context.context_stack.base, mathml)
533                .with_context(||format!("in '{}' replacing after pattern match", &self.xpath.rc.string) )?;
534        match result {
535            Value::Nodeset(nodes) => {
536                if nodes.size() == 0 {
537                    bail!("During replacement, no matching element found");
538                };
539                let nodes = nodes.document_order();
540                let n_nodes = nodes.len();
541                let mut expanded_result = Vec::with_capacity(n_nodes + (n_nodes+1)*self.replacements.replacements.len());
542                expanded_result.push(
543                    Replacement::XPath(
544                        MyXPath::new(format!("{}[{}]", self.xpath.rc.string , 1))?
545                    )
546                );
547                for i in 2..n_nodes+1 {
548                    expanded_result.extend_from_slice(&self.replacements.replacements);
549                    expanded_result.push(
550                        Replacement::XPath(
551                            MyXPath::new(format!("{}[{}]", self.xpath.rc.string , i))?
552                        )
553                    );
554                }
555                let replacements = ReplacementArray{ replacements: expanded_result };
556                return replacements.replace(rules_with_context, mathml);
557            },
558
559            // FIX: should the options be errors???
560            Value::String(t) => { return T::from_string(rules_with_context.replace_chars(&t, mathml)?, rules_with_context.doc); },
561            Value::Number(num)  => { return T::from_string( num.to_string(), rules_with_context.doc ); },
562            Value::Boolean(b)  => { return T::from_string( b.to_string(), rules_with_context.doc ); },          // FIX: is this right???
563        }
564        
565    }    
566}
567
568
569static ATTR_NAME_VALUE: LazyLock<Regex> = LazyLock::new(|| {
570    Regex::new(
571        // match name='value', where name is sort of an NCNAME (see CONCEPT_OR_LITERAL in infer_intent.rs)
572        // The quotes can be either single or double quotes
573        r#"(?P<name>[^\s\u{0}-\u{40}\[\\\]^`\u{7B}-\u{BF}][^\s\u{0}-\u{2C}/:;<=>?@\[\\\]^`\u{7B}-\u{BF}]*)\s*=\s*('(?P<value>[^']+)'|"(?P<dqvalue>[^"]+)")"#
574    ).unwrap()
575});
576
577// structure used when "intent:" is encountered in a rule
578// the name is either a string or an xpath that needs evaluation. 99% of the time it is a string
579#[derive(Debug, Clone)]
580struct Intent {
581    name: Option<String>,           // name of node
582    xpath: Option<MyXPath>,         // alternative to directly using the string
583    attrs: String,                  // optional attrs -- format "attr1='val1' [attr2='val2'...]"
584    children: ReplacementArray,     // children of node
585}
586
587impl fmt::Display for Intent {
588    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
589        let name = if self.name.is_some() {
590            self.name.as_ref().unwrap().to_string()
591        } else {
592            self.xpath.as_ref().unwrap().to_string()
593        };
594        return write!(f, "intent: {}: {},  attrs='{}'>\n      children: {}",
595                        if self.name.is_some() {"name"} else {"xpath-name"}, name,
596                        self.attrs,
597                        &self.children);
598    }
599}
600
601impl Intent {
602    fn build(yaml_dict: &Yaml) -> Result<Box<Intent>> {
603        // 'intent:' -- 'name': xxx 'children': xxx
604        if yaml_dict.as_hash().is_none() {
605            bail!("Array found for contents of 'intent' -- should be dictionary with keys 'name' and 'children'")
606        }
607        let name = &yaml_dict["name"];
608        let xpath_name = &yaml_dict["xpath-name"];
609        if name.is_badvalue() && xpath_name.is_badvalue(){ 
610            bail!("Missing 'name' or 'xpath-name' as part of 'intent'.\n    \
611                  Suggestion: add 'name:' or if present, indent so it is contained in 'intent'");
612        }
613        let attrs = &yaml_dict["attrs"];
614        let replace = &yaml_dict["children"];
615        if replace.is_badvalue() {
616            bail!("Missing 'children' as part of 'intent'.\n    \
617                  Suggestion: add 'children:' or if present, indent so it is contained in 'intent'");
618        }
619        return Ok( Box::new( Intent {
620            name: if name.is_badvalue() {None} else {Some(as_str_checked(name).context("'name'")?.to_string())},
621            xpath: if xpath_name.is_badvalue() {None} else {Some(MyXPath::build(xpath_name).context("'intent'")?)},
622            attrs: if attrs.is_badvalue() {"".to_string()} else {as_str_checked(attrs).context("'attrs'")?.to_string()},
623            children: ReplacementArray::build(replace).context("'children:'")?,
624        } ) );
625    }
626        
627    fn replace<'c, 's:'c, 'm: 'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
628        let result = self.children.replace::<Element<'m>>(rules_with_context, mathml)
629                    .context("replacing inside 'intent'")?;
630        let mut result = lift_children(result);
631        if name(result) != "TEMP_NAME" && name(result) != "Unknown" {
632            // this case happens when you have an 'intent' replacement as a direct child of an 'intent' replacement
633            let temp = create_mathml_element(&result.document(), "TEMP_NAME");
634            temp.append_child(result);
635            result = temp;
636        }
637        if let Some(intent_name) = &self.name {
638            result.set_attribute_value(MATHML_FROM_NAME_ATTR, name(mathml));
639            set_mathml_name(result, intent_name.as_str());
640        }
641        if let Some(my_xpath) = &self.xpath{    // self.xpath_name must be != None
642            let xpath_value = my_xpath.evaluate(rules_with_context.get_context(), mathml)?;
643            match xpath_value {
644                Value::String(intent_name) => {
645                    result.set_attribute_value(MATHML_FROM_NAME_ATTR, name(mathml));
646                    set_mathml_name(result, intent_name.as_str())
647                },
648                _ => bail!("'xpath-name' value '{}' was not a string", &my_xpath),
649            }
650        }
651        if self.name.is_none() && self.xpath.is_none() {
652            panic!("Intent::replace: internal error -- neither 'name' nor 'xpath' is set");
653        };
654        
655        for attr in mathml.attributes() {
656            result.set_attribute_value(attr.name(), attr.value());
657        }
658
659        // can't test against name == "math" because intent might a new element
660        if mathml.parent().is_some() && mathml.parent().unwrap().element().is_some() &&
661           result.attribute_value("id") == crate::canonicalize::get_parent(mathml).attribute_value("id") {
662            // avoid duplicate ids -- it's a bug if it does, but this helps in that case
663            result.remove_attribute("id");
664        }
665
666        if !self.attrs.is_empty() {
667            // debug!("MathML after children, before attr processing:\n{}", mml_to_string(mathml));
668            // debug!("Result after children, before attr processing:\n{}", mml_to_string(result));
669            // debug!("Intent::replace attrs = \"{}\"", &self.attrs);
670            for cap in ATTR_NAME_VALUE.captures_iter(&self.attrs) {
671                let matched_value = if cap["value"].is_empty() {&cap["dqvalue"]} else {&cap["value"]};
672                let value_as_xpath = MyXPath::new(matched_value.to_string()).context("attr value inside 'intent'")?;
673                let value = value_as_xpath.evaluate(rules_with_context.get_context(), result)
674                        .context("attr xpath evaluation value inside 'intent'")?;
675                let mut value = value.into_string();
676                if &cap["name"] == INTENT_PROPERTY {
677                    value = simplify_fixity_properties(&value);
678                }
679                // debug!("Intent::replace match\n  name={}\n  value={}\n  xpath value={}", &cap["name"], &cap["value"], &value);
680                if &cap["name"] == INTENT_PROPERTY && value == ":" {
681                    // should have been an empty string, so remove the attribute
682                    result.remove_attribute(INTENT_PROPERTY);
683                } else {
684                    result.set_attribute_value(&cap["name"], &value);
685                }
686            };
687        }
688
689        // debug!("Result from 'intent:'\n{}", mml_to_string(result));
690        return T::from_element(result);
691
692
693        /// "lift" up the children any "TEMP_NAME" child -- could short circuit when only one child
694        fn lift_children(result: Element) -> Element {
695            // debug!("lift_children:\n{}", mml_to_string(result));
696            // most likely there will be the same number of new children as result has, but there could be more
697            let mut new_children = Vec::with_capacity(2*result.children().len());
698            for child_of_element in result.children() {
699                match child_of_element {
700                    ChildOfElement::Element(child) => {
701                        if name(child) == "TEMP_NAME" {
702                            new_children.append(&mut child.children());  // almost always just one
703                        } else {
704                            new_children.push(child_of_element);
705                        }
706                    },
707                    _ => new_children.push(child_of_element),      // text()
708                }
709            }
710            result.replace_children(new_children);
711            return result;
712        }
713    }    
714}
715
716// structure used when "with:" is encountered in a rule
717// the variables are placed on (and later) popped of a variable stack before/after the replacement
718#[derive(Debug, Clone)]
719struct With {
720    variables: VariableDefinitions,     // variables and values
721    replacements: ReplacementArray,     // what to do with these vars
722}
723
724impl fmt::Display for With {
725    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
726        return write!(f, "with:\n      variables: {}\n      replace: {}", &self.variables, &self.replacements);
727    }
728}
729
730impl With {
731    fn build(vars_replacements: &Yaml) -> Result<Box<With>> {
732        // 'with:' -- 'variables': xxx 'replace': xxx
733        if vars_replacements.as_hash().is_none() {
734            bail!("Array found for contents of 'with' -- should be dictionary with keys 'variables' and 'replace'")
735        }
736        let var_defs = &vars_replacements["variables"];
737        if var_defs.is_badvalue() { 
738            bail!("Missing 'variables' as part of 'with'.\n    \
739                  Suggestion: add 'variables:' or if present, indent so it is contained in 'with'");
740        }
741        let replace = &vars_replacements["replace"];
742        if replace.is_badvalue() { 
743            bail!("Missing 'replace' as part of 'with'.\n    \
744                  Suggestion: add 'replace:' or if present, indent so it is contained in 'with'");
745        }
746        return Ok( Box::new( With {
747            variables: VariableDefinitions::build(var_defs).context("'variables'")?,
748            replacements: ReplacementArray::build(replace).context("'replace:'")?,
749        } ) );
750    }
751
752    fn replace<'c, 's:'c, 'm: 'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
753        rules_with_context.context_stack.push(self.variables.clone(), mathml)?;
754        let result = self.replacements.replace(rules_with_context, mathml)
755                    .context("replacing inside 'with'")?;
756        rules_with_context.context_stack.pop();
757        return Ok( result );
758    }    
759}
760
761// structure used when "set_variables:" is encountered in a rule
762// the variables are global and are placed in the base context and never popped off
763#[derive(Debug, Clone)]
764struct SetVariables {
765    variables: VariableDefinitions,     // variables and values
766}
767
768impl fmt::Display for SetVariables {
769    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
770        return write!(f, "SetVariables: variables {}", &self.variables);
771    }
772}
773
774impl SetVariables {
775    fn build(vars: &Yaml) -> Result<Box<SetVariables>> {
776        // 'set_variables:' -- 'variables': xxx (array)
777        if vars.as_vec().is_none() {
778            bail!("'set_variables' -- should be an array of variable name, xpath value");
779        }
780        return Ok( Box::new( SetVariables {
781            variables: VariableDefinitions::build(vars).context("'set_variables'")?
782        } ) );
783    }
784        
785    fn replace<'c, 's:'c, 'm: 'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
786        rules_with_context.context_stack.set_globals(self.variables.clone(), mathml)?;
787        return T::from_string( "".to_string(), rules_with_context.doc );
788    }    
789}
790
791
792/// Allow speech of an expression in the middle of a rule (used by "WhereAmI" for navigation)
793#[derive(Debug, Clone)]
794struct TranslateExpression {
795    xpath: MyXPath,     // variables and values
796}
797
798impl fmt::Display for TranslateExpression {
799    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
800        return write!(f, "speak: {}", &self.xpath);
801    }
802}
803impl TranslateExpression {
804    fn build(vars: &Yaml) -> Result<TranslateExpression> {
805        // 'translate:' -- xpath (should evaluate to an id)
806        return Ok( TranslateExpression { xpath: MyXPath::build(vars).context("'translate'")? } );
807    }
808        
809    fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
810        if self.xpath.rc.string.starts_with('@') {
811            let xpath_value = self.xpath.evaluate(rules_with_context.get_context(), mathml)?;
812            let id = match xpath_value {
813                Value::String(s) => Some(s),
814                Value::Nodeset(nodes) => {
815                    if nodes.size() == 1 {
816                        nodes.document_order_first().unwrap().attribute().map(|attr| attr.value().to_string())
817                    } else {
818                        None
819                    }
820                },
821                _ => None,
822            };
823            match id {
824                None => bail!("'translate' value '{}' is not a string or an attribute value (correct by using '@id'??):\n", self.xpath),
825                Some(id) => {
826                    let speech = speak_mathml(mathml, &id, 0)?;
827                    return T::from_string(speech, rules_with_context.doc);
828                }
829            }
830        } else {
831            return T::from_string(
832                self.xpath.replace(rules_with_context, mathml).context("'translate'")?,
833                rules_with_context.doc
834            );
835        }  
836    } 
837}
838
839
840/// An array of rule `Replacement`s (text, xpath, tts commands, etc)
841#[derive(Debug, Clone)]
842pub struct ReplacementArray {
843    replacements: Vec<Replacement>
844}
845
846impl fmt::Display for ReplacementArray {
847    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
848        return write!(f, "{}", self.pretty_print_replacements());
849    }
850}
851
852impl ReplacementArray {
853    /// Return an empty `ReplacementArray`
854    pub fn build_empty() -> ReplacementArray {
855        return ReplacementArray {
856            replacements: vec![]
857        }
858    }
859
860    /// Convert a Yaml input into a [`ReplacementArray`].
861    /// Any errors are passed back out.
862    pub fn build(replacements: &Yaml) -> Result<ReplacementArray> {
863        // replacements is either a single replacement or an array of replacements
864        let result= if replacements.is_array() {
865            let replacements = replacements.as_vec().unwrap();
866            replacements
867                .iter()
868                .enumerate()    // useful for errors
869                .map(|(i, r)| Replacement::build(r)
870                            .with_context(|| format!("replacement #{} of {}", i+1, replacements.len())))
871                .collect::<Result<Vec<Replacement>>>()?
872        } else {
873            vec![ Replacement::build(replacements)?]
874        };
875
876        return Ok( ReplacementArray{ replacements: result } );
877    }
878
879    /// Do all the replacements in `mathml` using `rules`.
880    pub fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
881        return T::replace(self, rules_with_context, mathml);
882    }
883
884    pub fn replace_array_string<'c, 's:'c, 'm:'c>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<String> {
885        // loop over the replacements and build up a vector of strings, excluding empty ones.
886        // * eliminate any redundance
887        // * add/replace auto-pauses
888        // * join the remaining vector together
889        let mut replacement_strings = Vec::with_capacity(self.replacements.len());   // probably conservative guess
890        for replacement in self.replacements.iter() {
891            let string: String = rules_with_context.replace(replacement, mathml)?;
892            if !string.is_empty() {
893                replacement_strings.push(string);
894            }
895        }
896
897        if replacement_strings.is_empty() {
898            return Ok( "".to_string() );
899        }
900        // delete an optional text that is repetitive
901        // we do this by looking for the optional text marker, and if present, check for repetition at end of previous string
902        // if repetitive, we delete the optional string
903        // if not, we leave the markers because the repetition might happen several "levels" up
904        // this could also be done in a final cleanup of the entire string (where we remove any markers),
905        //   but the match is harder (rust regex lacks look behind pattern match) and it is less efficient
906        // Note: we skip the first string since it can't be repetitive of something at this level
907        for i in 1..replacement_strings.len()-1 {
908            if let Some(bytes) = is_repetitive(&replacement_strings[i-1], &replacement_strings[i])  {
909                replacement_strings[i] = bytes.to_string();
910            } 
911        }
912                        
913        for i in 0..replacement_strings.len() {
914            if replacement_strings[i].contains(PAUSE_AUTO_STR) {
915                let before = if i == 0 {""} else {&replacement_strings[i-1]};
916                let after = if i+1 == replacement_strings.len() {""} else {&replacement_strings[i+1]};
917                replacement_strings[i] = replacement_strings[i].replace(
918                    PAUSE_AUTO_STR,
919                    &rules_with_context.speech_rules.pref_manager.borrow().get_tts().compute_auto_pause(&rules_with_context.speech_rules.pref_manager.borrow(), before, after));
920            }
921        }
922
923        // join the strings together with spaces in between
924        // concatenation (removal of spaces) is saved for the top level because they otherwise are stripped at the wrong sometimes
925        return Ok( replacement_strings.join(" ") );
926
927        fn is_repetitive<'a>(prev: &str, optional: &'a str) -> Option<&'a str> {
928            // OPTIONAL_INDICATOR surrounds the optional text
929            // minor optimization -- lots of short strings and the OPTIONAL_INDICATOR takes a few bytes, so skip the check for those strings
930            if optional.len() <=  2 * OPTIONAL_INDICATOR_LEN {
931                return None;
932            }
933            
934            // should be exactly one match -- ignore more than one for now
935            match optional.find(OPTIONAL_INDICATOR) {
936                None => return None,
937                Some(start_index) => {
938                    let optional_word_start_slice = &optional[start_index + OPTIONAL_INDICATOR_LEN..];
939                    // now find the end
940                    match optional_word_start_slice.find(OPTIONAL_INDICATOR) {
941                        None => panic!("Internal error: missing end optional char -- text handling is corrupted!"),
942                        Some(end_index) => {
943                            let optional_word = &optional_word_start_slice[..end_index];
944                            // debug!("check if '{}' is repetitive",  optional_word);
945                            // debug!("   prev: '{}', next '{}'", prev, optional);
946                            let prev = prev.trim_end().as_bytes();
947                            if prev.len() > optional_word.len() &&
948                               &prev[prev.len()-optional_word.len()..] == optional_word.as_bytes() {
949                                return Some( optional_word_start_slice[optional_word.len() + OPTIONAL_INDICATOR_LEN..].trim_start() );
950                            } else {
951                                return None;
952                            }
953                        }
954                    }
955                }
956            }
957        }
958    }
959
960    pub fn replace_array_tree<'c, 's:'c, 'm:'c>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<Element<'m>> {
961        // shortcut for common case (don't build a new tree node)
962        if self.replacements.len() == 1 {
963            return rules_with_context.replace::<Element<'m>>(&self.replacements[0], mathml);
964        }
965
966        let new_element = create_mathml_element(&rules_with_context.doc, "Unknown");  // Hopefully set later (in Intent::Replace())
967        let mut new_children = Vec::with_capacity(self.replacements.len());
968        for child in self.replacements.iter() {
969            let child = rules_with_context.replace::<Element<'m>>(child, mathml)?;
970            new_children.push(ChildOfElement::Element(child));
971        };
972        new_element.append_children(new_children);
973        return Ok(new_element);
974    }
975
976
977    /// Return true if there are no replacements.
978    pub fn is_empty(&self) -> bool {
979        return self.replacements.is_empty();
980    }
981    
982    fn pretty_print_replacements(&self) -> String {
983        let mut group_string = String::with_capacity(128);
984        if self.replacements.len() == 1 {
985            group_string += &format!("[{}]", self.replacements[0]);
986        } else {
987            group_string += &self.replacements.iter()
988                    .map(|replacement| format!("\n  - {replacement}"))
989                    .collect::<Vec<String>>()
990                    .join("");
991            group_string += "\n";
992        }
993        return group_string;
994    }
995}
996
997
998
999// MyXPath is a wrapper around an 'XPath' that keeps around the original xpath expr (as a string) so it can be used in error reporting.
1000// Because we want to be able to clone them and XPath doesn't support clone(), this is a wrapper around an internal MyXPath.
1001// It supports the standard SpeechRule functionality of building and replacing.
1002#[derive(Debug)]
1003struct RCMyXPath {
1004    xpath: XPath,
1005    string: String,        // store for error reporting
1006}
1007
1008#[derive(Debug, Clone)]
1009pub struct MyXPath {
1010    rc: Rc<RCMyXPath>        // rather than putting Rc around both 'xpath' and 'string', just use one and indirect to internal RCMyXPath
1011}
1012
1013
1014impl fmt::Display for MyXPath {
1015    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1016        return write!(f, "\"{}\"", self.rc.string);
1017    }
1018}
1019
1020// pub fn xpath_count() -> (usize, usize) {
1021//     return (XPATH_CACHE.with( |cache| cache.borrow().len()), unsafe{XPATH_CACHE_HITS} );
1022// }
1023thread_local!{
1024    static XPATH_CACHE: RefCell<HashMap<String, MyXPath>> = RefCell::new( HashMap::with_capacity(2047) );
1025}
1026// static mut XPATH_CACHE_HITS: usize = 0;
1027
1028impl MyXPath {
1029    fn new(xpath: String) -> Result<MyXPath> {
1030        return XPATH_CACHE.with( |cache|  {
1031            let mut cache = cache.borrow_mut();
1032            return Ok(
1033                match cache.get(&xpath) {
1034                    Some(compiled_xpath) => {
1035                        // unsafe{ XPATH_CACHE_HITS += 1;};
1036                        compiled_xpath.clone()
1037                    },
1038                    None => {
1039                        let new_xpath = MyXPath {
1040                            rc: Rc::new( RCMyXPath {
1041                                xpath: MyXPath::compile_xpath(&xpath)?,
1042                                string: xpath.clone()
1043                            })};
1044                        cache.insert(xpath.clone(), new_xpath.clone());
1045                        new_xpath
1046                    },
1047                }
1048            )
1049        });
1050    }
1051
1052    pub fn build(xpath: &Yaml) -> Result<MyXPath> {
1053        let xpath = match xpath {
1054            Yaml::String(s) => s.to_string(),
1055            Yaml::Integer(i) => i.to_string(),
1056            Yaml::Real(s) => s.to_string(),
1057            Yaml::Boolean(s) => s.to_string(),
1058            Yaml::Array(v) =>
1059                // array of strings -- concatenate them together
1060                v.iter()
1061                    .map(as_str_checked)
1062                    .collect::<Result<Vec<&str>>>()?
1063                    .join(" "),
1064            _ => bail!("Bad value when trying to create an xpath: {}", yaml_to_string(xpath, 1)),
1065        };
1066        return MyXPath::new(xpath);
1067    }
1068
1069    fn compile_xpath(xpath: &str) -> Result<XPath> {
1070        let factory = Factory::new();
1071        let xpath_with_debug_info = MyXPath::add_debug_string_arg(xpath)?;
1072        let compiled_xpath = factory.build(&xpath_with_debug_info)
1073                        .with_context(|| format!(
1074                            "Could not compile XPath for pattern:\n{}{}",
1075                            &xpath, more_details(xpath)))?;
1076        return match compiled_xpath {
1077            Some(xpath) => Ok(xpath),
1078            None => bail!("Problem compiling Xpath for pattern:\n{}{}",
1079                            &xpath, more_details(xpath)),
1080        };
1081
1082        
1083        fn more_details(xpath: &str) -> String {
1084            // try to give a better error message by counting [], (), 's, and "s
1085            if xpath.is_empty() {
1086                return "xpath is empty string".to_string();
1087            }
1088            let as_bytes = xpath.trim().as_bytes();
1089            if as_bytes[0] == b'\'' && as_bytes[as_bytes.len()-1] != b'\'' {
1090                return "\nmissing \"'\"".to_string();
1091            }
1092            if (as_bytes[0] == b'"' && as_bytes[as_bytes.len()-1] != b'"') ||
1093               (as_bytes[0] != b'"' && as_bytes[as_bytes.len()-1] == b'"'){
1094                return "\nmissing '\"'".to_string();
1095            }
1096
1097            let mut i_bytes = 0;      // keep track of # of bytes into string for error reporting
1098            let mut paren_count = 0;    // counter to make sure they are balanced
1099            let mut i_paren = 0;      // position of the outermost open paren
1100            let mut bracket_count = 0;
1101            let mut i_bracket = 0;
1102            for ch in xpath.chars() {
1103                if ch == '(' {
1104                    if paren_count == 0 {
1105                        i_paren = i_bytes;
1106                    }
1107                    paren_count += 1;
1108                } else if ch == '[' {
1109                    if bracket_count == 0 {
1110                        i_bracket = i_bytes;
1111                    }
1112                    bracket_count += 1;
1113                } else if ch == ')' {
1114                    if paren_count == 0 {
1115                        return format!("\nExtra ')' found after '{}'", &xpath[i_paren..i_bytes]);
1116                    }
1117                    paren_count -= 1;
1118                    if paren_count == 0 && bracket_count > 0 && i_bracket > i_paren {
1119                        return format!("\nUnclosed brackets found at '{}'", &xpath[i_paren..i_bytes]);
1120                    }
1121                } else if ch == ']' {
1122                    if bracket_count == 0 {
1123                        return format!("\nExtra ']' found after '{}'", &xpath[i_bracket..i_bytes]);
1124                    }
1125                    bracket_count -= 1;
1126                    if bracket_count == 0 && paren_count > 0 && i_paren > i_bracket {
1127                        return format!("\nUnclosed parens found at '{}'", &xpath[i_bracket..i_bytes]);
1128                    }
1129                }
1130                i_bytes += ch.len_utf8();
1131            }
1132            return "".to_string();
1133        }
1134    }
1135
1136    /// Convert DEBUG(...) input to the internal function which is DEBUG(arg, arg_as_string)
1137    fn add_debug_string_arg(xpath: &str) -> Result<String> {
1138        // do a quick check to see if "DEBUG" is in the string -- this is the common case
1139        let debug_start = xpath.find("DEBUG(");
1140        if debug_start.is_none() {
1141            return Ok( xpath.to_string() );
1142        }
1143
1144        let debug_start = debug_start.unwrap();
1145        let mut before_paren = xpath[..debug_start+5].to_string();   // includes "DEBUG"
1146        let chars = xpath[debug_start+5..].chars().collect::<Vec<char>>();     // begins at '('
1147        before_paren.push_str(&chars_add_debug_string_arg(&chars).with_context(|| format!("In xpath='{xpath}'"))?);
1148        // debug!("add_debug_string_arg: {}", before_paren);
1149        return Ok(before_paren);
1150
1151        fn chars_add_debug_string_arg(chars: &[char]) -> Result<String>  {
1152            // Find all the DEBUG(...) commands in 'xpath' and adds a string argument.
1153            // The DEBUG function that is used internally takes two arguments, the second one being a string version of the DEBUG arg.
1154            //   Being a string, any quotes need to be escaped, and DEBUGs inside of DEBUGs need more escaping.
1155            //   This is done via recursive calls to this function.
1156            assert_eq!(chars[0], '(', "{} does not start with ')'", chars.iter().collect::<String>());
1157            let mut count = 1;  // open/close count
1158            let mut i = 1;
1159            let mut inside_quote = false;
1160            while i < chars.len() {
1161                let ch = chars[i];
1162                match ch {
1163                    '\\' => {
1164                        if i+1 == chars.len() {
1165                            bail!("Syntax error in DEBUG: last char is escape char\nDebug string: '{}'", chars.iter().collect::<String>());
1166                        }
1167                        i += 1;
1168                    },
1169                    '\'' => inside_quote = !inside_quote,
1170                    '(' => {
1171                        if !inside_quote {
1172                            count += 1;
1173                        }
1174                        // FIX: it would be more efficient to spot "DEBUG" preceding this and recurse rather than matching the whole string and recursing
1175                    },
1176                    ')' => {
1177                        if !inside_quote {
1178                            count -= 1;
1179                            if count == 0 {
1180                                let arg = &chars[1..i].iter().collect::<String>();
1181                                let escaped_arg = arg.replace('"', "\\\"");
1182                                // DEBUG(...) may be inside 'arg' -- recurse
1183                                let processed_arg = MyXPath::add_debug_string_arg(arg)?;
1184
1185                                // DEBUG(...) may be in the remainder of the string -- recurse
1186                                let processed_rest = MyXPath::add_debug_string_arg(&chars[i+1..].iter().collect::<String>())?;
1187                                return Ok( format!("({processed_arg}, \"{escaped_arg}\"){processed_rest}") );
1188                            }
1189                        }
1190                    },
1191                    _ => (),
1192                }
1193                i += 1;
1194            }
1195            bail!("Syntax error in DEBUG: didn't find matching closing paren\nDEBUG{}", chars.iter().collect::<String>());
1196        }
1197    }
1198
1199    fn is_true(&self, context: &sxd_xpath::Context, mathml: Element) -> Result<bool> {
1200        // return true if there is no condition or if the condition evaluates to true
1201        return Ok(
1202            match self.evaluate(context, mathml)? {
1203                Value::Boolean(b) => b,
1204                Value::Nodeset(nodes) => nodes.size() > 0,
1205                _                      => false,      
1206            }
1207        )
1208    }
1209
1210    pub fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
1211        if self.rc.string == "process-intent(.)" {
1212            return T::from_element( infer_intent(rules_with_context, mathml)? );
1213        }
1214        
1215        let result = self.evaluate(&rules_with_context.context_stack.base, mathml)
1216                .with_context(|| format!("in '{}' replacing after pattern match", &self.rc.string) )?;
1217        let string = match result {
1218                Value::Nodeset(nodes) => {
1219                    if nodes.size() == 0 {
1220                        bail!("During replacement, no matching element found");
1221                    }
1222                    return rules_with_context.replace_nodes(nodes.document_order(), mathml);
1223                },
1224                Value::String(s) => s,
1225                Value::Number(num) => num.to_string(),
1226                Value::Boolean(b) => b.to_string(),          // FIX: is this right???
1227        };
1228        // Hack!: this test for input that starts with a '$' (defined variable), avoids a double evaluate;
1229        // We don't need NO_EVAL_QUOTE_CHAR here, but the more general solution of a quoted execute (- xq:) would avoid this hack
1230        let result = if self.rc.string.starts_with('$') {string} else {rules_with_context.replace_chars(&string, mathml)?};
1231        return T::from_string(result, rules_with_context.doc );
1232    }
1233    
1234    pub fn evaluate<'c>(&self, context: &sxd_xpath::Context<'c>, mathml: Element<'c>) -> Result<Value<'c>> {
1235        // debug!("evaluate: {}", self);
1236        let result = self.rc.xpath.evaluate(context, mathml);
1237        return match result {
1238            Ok(val) => Ok( val ),
1239            Err(e) => {
1240                // debug!("MyXPath::trying to evaluate:\n  '{}'\n caused the error\n'{}'", self, e.to_string().replace("OwnedPrefixedName { prefix: None, local_part:", "").replace(" }", ""));
1241                bail!( "{}\n\n",
1242                     // remove confusing parts of error message from xpath
1243                    e.to_string().replace("OwnedPrefixedName { prefix: None, local_part:", "").replace(" }", "") );
1244            }
1245        };
1246    }
1247
1248    pub fn test_input<F>(self, f: F) -> bool where F: Fn(&str) -> bool {
1249        return f(self.rc.string.as_ref());
1250    }
1251}
1252
1253// 'SpeechPattern' holds a single pattern.
1254// Some info is not needed beyond converting the Yaml to the SpeechPattern, but is useful for error reporting.
1255// The two main parts are the pattern to be matched and the replacements to do if there is a match.
1256// Any variables/prefs that are defined/set are also stored.
1257#[derive(Debug)]
1258struct SpeechPattern {
1259    pattern_name: String,
1260    tag_name: String,
1261    file_name: String,
1262    pattern: MyXPath,                     // the xpath expr to attempt to match
1263    match_uses_var_defs: bool,            // include var_defs in context for matching
1264    var_defs: VariableDefinitions,        // any variable definitions [can be and probably is an empty vector most of the time]
1265    replacements: ReplacementArray,       // the replacements in case there is a match
1266}
1267
1268impl fmt::Display for SpeechPattern {
1269    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1270        return write!(f, "[name: {}, tag: {},\n  variables: {:?}, pattern: {},\n  replacement: {}]",
1271                self.pattern_name, self.tag_name, self.var_defs, self.pattern,
1272                self.replacements.pretty_print_replacements());
1273    }
1274}
1275
1276impl SpeechPattern  {
1277    fn build(dict: &Yaml, file: &Path, rules: &mut SpeechRules) -> Result<Option<Vec<PathBuf>>> {
1278        // Rule::SpeechPattern
1279        //   build { "pattern_name", "tag_name", "pattern", "replacement" }
1280        // or recurse via include: file_name
1281
1282        // debug!("\nbuild_speech_pattern: dict:\n{}", yaml_to_string(dict, 0));
1283        if let Some(include_file_name) = find_str(dict, "include") {
1284            let do_include_fn = |new_file: &Path| {
1285                rules.read_patterns(new_file)
1286            };
1287
1288            return Ok( Some(process_include(file, include_file_name, do_include_fn)?) );
1289        }
1290
1291        let pattern_name = find_str(dict, "name");
1292
1293        // tag_named can be either a string (most common) or an array of strings
1294        let mut tag_names: Vec<&str> = Vec::new();
1295        match find_str(dict, "tag") {
1296            Some(str) => tag_names.push(str),
1297            None => {
1298                // check for array
1299                let tag_array  = &dict["tag"];
1300                tag_names = vec![];
1301                if tag_array.is_array() {
1302                    for (i, name) in tag_array.as_vec().unwrap().iter().enumerate() {
1303                        match as_str_checked(name) {
1304                            Err(e) => return Err(
1305                                e.context(
1306                                    format!("tag name '{}' is not a string in:\n{}",
1307                                        &yaml_to_string(&tag_array.as_vec().unwrap()[i], 0),
1308                                        &yaml_to_string(dict, 1)))
1309                            ),
1310                            Ok(str) => tag_names.push(str),
1311                        };
1312                    }
1313                } else {
1314                    bail!("Errors trying to find 'tag' in:\n{}", &yaml_to_string(dict, 1));
1315                }
1316            }
1317        }
1318
1319        if pattern_name.is_none() {
1320            if dict.is_null() {
1321                bail!("Error trying to find 'name': empty value (two consecutive '-'s?");
1322            } else {
1323                bail!("Errors trying to find 'name' in:\n{}", &yaml_to_string(dict, 1));
1324            };
1325        };
1326        let pattern_name = pattern_name.unwrap().to_string();
1327
1328        // FIX: add check to make sure tag_name is a valid MathML tag name
1329        if dict["match"].is_badvalue() {
1330            bail!("Did not find 'match' in\n{}", yaml_to_string(dict, 1));
1331        }
1332        if dict["replace"].is_badvalue() {
1333            bail!("Did not find 'replace' in\n{}", yaml_to_string(dict, 1));
1334        }
1335    
1336        // xpath's can't be cloned, so we need to do a 'build_xxx' for each tag name
1337        for tag_name in tag_names {
1338            let tag_name = tag_name.to_string();
1339            let pattern_xpath = MyXPath::build(&dict["match"])
1340                    .with_context(|| {
1341                        format!("value for 'match' in rule ({}: {}):\n{}",
1342                                tag_name, pattern_name, yaml_to_string(dict, 1))
1343                    })?;
1344            let speech_pattern =
1345                Box::new( SpeechPattern{
1346                    pattern_name: pattern_name.clone(),
1347                    tag_name: tag_name.clone(),
1348                    file_name: file.to_str().unwrap().to_string(),
1349                    match_uses_var_defs: dict["variables"].is_array() && pattern_xpath.rc.string.contains('$'),    // FIX: should look at var_defs for actual name
1350                    pattern: pattern_xpath,
1351                    var_defs: VariableDefinitions::build(&dict["variables"])
1352                        .with_context(|| {
1353                            format!("value for 'variables' in rule ({}: {}):\n{}",
1354                                    tag_name, pattern_name, yaml_to_string(dict, 1))
1355                        })?,
1356                    replacements: ReplacementArray::build(&dict["replace"])
1357                        .with_context(|| {
1358                            format!("value for 'replace' in rule ({}: {}). Replacements:\n{}",
1359                                    tag_name, pattern_name, yaml_to_string(&dict["replace"], 1))
1360                    })?
1361                } );
1362            // get the array of rules for the tag name
1363            let rule_value = rules.rules.entry(tag_name).or_default();
1364
1365            // if the name exists, replace it. Otherwise add the new rule
1366            match rule_value.iter().enumerate().find(|&pattern| pattern.1.pattern_name == speech_pattern.pattern_name) {
1367                None => rule_value.push(speech_pattern),
1368                Some((i, _old_pattern)) => {
1369                    let old_rule = &rule_value[i];
1370                    info!("\n\n***WARNING***: replacing {}/'{}' in {} with rule from {}\n",
1371                            old_rule.tag_name, old_rule.pattern_name, old_rule.file_name, speech_pattern.file_name);
1372                    rule_value[i] = speech_pattern;
1373                },
1374            }
1375        }
1376
1377        return Ok(None);
1378    }
1379
1380    fn is_match(&self, context: &sxd_xpath::Context, mathml: Element) -> Result<bool> {
1381        if self.tag_name != mathml.name().local_part() && self.tag_name != "*" && self.tag_name != "!*" {
1382            return Ok( false );
1383        }
1384
1385        // debug!("\nis_match: pattern='{}'", self.pattern_name);
1386        // debug!("    pattern_expr {:?}", self.pattern);
1387        // debug!("is_match: mathml is\n{}", mml_to_string(mathml));
1388        return Ok(
1389            match self.pattern.evaluate(context, mathml)? {
1390                Value::Boolean(b)       => b,
1391                Value::Nodeset(nodes) => nodes.size() > 0,
1392                _                             => false,
1393            }
1394        );
1395    }
1396}
1397
1398
1399// 'Test' holds information used if the replacement is a "test:" clause.
1400// The condition is an xpath expr and the "else:" part is optional.
1401
1402#[derive(Debug, Clone)]
1403struct TestArray {
1404    tests: Vec<Test>
1405}
1406
1407impl fmt::Display for TestArray {
1408    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1409        for test in &self.tests {
1410            writeln!(f, "{test}")?;
1411        }
1412        return Ok( () );
1413    }
1414}
1415
1416impl TestArray {
1417    fn build(test: &Yaml) -> Result<TestArray> {
1418        // 'test:' for convenience takes either a dictionary with keys if/else_if/then/then_test/else/else_test or
1419        //      or an array of those values (there should be at most one else/else_test)
1420
1421        // if 'test' is a dictionary ('Hash'), we convert it to an array with one entry and proceed
1422        let tests = if test.as_hash().is_some() {
1423            vec![test]
1424        } else if let Some(vec) = test.as_vec() {
1425            vec.iter().collect()
1426        } else {
1427            bail!("Value for 'test:' is neither a dictionary or an array.")
1428        };
1429
1430        // each entry in 'tests' should be a dictionary with keys if/then/then_test/else/else_test
1431        // a valid entry is one of:
1432        //   if:/else_if:, then:/then_test: and optional else:/else_test:
1433        //   else:/else_test: -- if this case, it should be the last entry in 'tests'
1434        // 'if:' should only be the first entry in the array; 'else_if' should never be the first entry. Otherwise, they are the same
1435        let mut test_array = vec![];
1436        for test in tests {
1437            if test.as_hash().is_none() {
1438                bail!("Value for array entry in 'test:' must be a dictionary/contain keys");
1439            }
1440            let if_part = &test[if test_array.is_empty() {"if"} else {"else_if"}];
1441            if !if_part.is_badvalue() {
1442                // first case: if:, then:, optional else:
1443                let condition = Some( MyXPath::build(if_part)? );
1444                let then_part = TestOrReplacements::build(test, "then", "then_test", true)?;
1445                let else_part = TestOrReplacements::build(test, "else", "else_test", false)?;
1446                let n_keys = if else_part.is_none() {2} else {3};
1447                if test.as_hash().unwrap().len() > n_keys {
1448                    bail!("A key other than 'if', 'else_if', 'then', 'then_test', 'else', or 'else_test' was found in the 'then' clause of 'test'");
1449                };
1450                test_array.push(
1451                    Test { condition, then_part, else_part }
1452                );
1453            } else {
1454                // second case: should be else/else_test
1455                let else_part = TestOrReplacements::build(test, "else", "else_test", true)?;
1456                if test.as_hash().unwrap().len() > 1 {
1457                    bail!("A key other than 'if', 'else_if', 'then', 'then_test', 'else', or 'else_test' was found the 'else' clause of 'test'");
1458                };
1459                test_array.push(
1460                    Test { condition: None, then_part: None, else_part }
1461                );
1462                
1463                // there shouldn't be any trailing tests
1464                if test_array.len() < test.as_hash().unwrap().len() {
1465                    bail!("'else'/'else_test' key is not last key in 'test:'");
1466                }
1467            }
1468        };
1469
1470        if test_array.is_empty() {
1471            bail!("No entries for 'test:'");
1472        }
1473
1474        return Ok( TestArray { tests: test_array } );
1475    }
1476
1477    fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
1478        for test in &self.tests {
1479            if test.is_true(&rules_with_context.context_stack.base, mathml)? {
1480                assert!(test.then_part.is_some());
1481                return test.then_part.as_ref().unwrap().replace(rules_with_context, mathml);
1482            } else if let Some(else_part) = test.else_part.as_ref() {
1483                return else_part.replace(rules_with_context, mathml);
1484            }
1485        }
1486        return T::from_string("".to_string(), rules_with_context.doc);
1487    }
1488}
1489
1490#[derive(Debug, Clone)]
1491// Used to hold then/then_test and also else/else_test -- only one of these can be present at a time
1492enum TestOrReplacements {
1493    Replacements(ReplacementArray),     // replacements to use when a test is true
1494    Test(TestArray),                    // the array of if/then/else tests
1495}
1496
1497impl fmt::Display for TestOrReplacements {
1498    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1499        if let TestOrReplacements::Test(_) = self {
1500            write!(f, "  _test")?;
1501        }
1502        write!(f, ":")?;
1503        return match self {
1504            TestOrReplacements::Test(t) => write!(f, "{t}"),
1505            TestOrReplacements::Replacements(r) => write!(f, "{r}"),
1506        };
1507    }
1508}
1509
1510impl TestOrReplacements {
1511    fn build(test: &Yaml, replace_key: &str, test_key: &str, key_required: bool) -> Result<Option<TestOrReplacements>> {
1512        let part = &test[replace_key];
1513        let test_part = &test[test_key];
1514        if !part.is_badvalue() && !test_part.is_badvalue() { 
1515            bail!(format!("Only one of '{}' or '{}' is allowed as part of 'test'.\n{}\n    \
1516                  Suggestion: delete one or adjust indentation",
1517                    replace_key, test_key, yaml_to_string(test, 2)));
1518        }
1519        if part.is_badvalue() && test_part.is_badvalue() {
1520            if key_required {
1521                bail!(format!("Missing one of '{}'/'{}:' as part of 'test:'\n{}\n   \
1522                    Suggestion: add the missing key or indent so it is contained in 'test'",
1523                    replace_key, test_key, yaml_to_string(test, 2)))
1524            } else {
1525                return Ok( None );
1526            }
1527        }
1528        // at this point, we have only one of the two options
1529        if test_part.is_badvalue() {
1530            return Ok( Some( TestOrReplacements::Replacements( ReplacementArray::build(part)? ) ) );
1531        } else {
1532            return Ok( Some( TestOrReplacements::Test( TestArray::build(test_part)? ) ) );
1533        }
1534    }
1535
1536    fn replace<'c, 's:'c, 'm:'c, T:TreeOrString<'c, 'm, T>>(&self, rules_with_context: &mut SpeechRulesWithContext<'c, 's,'m>, mathml: Element<'c>) -> Result<T> {
1537        return match self {
1538            TestOrReplacements::Replacements(r) => r.replace(rules_with_context, mathml),
1539            TestOrReplacements::Test(t) => t.replace(rules_with_context, mathml),
1540        }
1541    }
1542}
1543
1544#[derive(Debug, Clone)]
1545struct Test {
1546    condition: Option<MyXPath>,
1547    then_part: Option<TestOrReplacements>,
1548    else_part: Option<TestOrReplacements>,
1549}
1550impl fmt::Display for Test {
1551    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1552        write!(f, "test: [ ")?;
1553        if let Some(if_part) = &self.condition {
1554            write!(f, " if: '{if_part}'")?;
1555        }
1556        if let Some(then_part) = &self.then_part {
1557            write!(f, " then{then_part}")?;
1558        }
1559        if let Some(else_part) = &self.else_part {
1560            write!(f, " else{else_part}")?;
1561        }
1562        return write!(f, "]");
1563    }
1564}
1565
1566impl Test {
1567    fn is_true(&self, context: &sxd_xpath::Context, mathml: Element) -> Result<bool> {
1568        return match self.condition.as_ref() {
1569            None => Ok( false ),     // trivially false -- want to do else part
1570            Some(condition) => condition.is_true(context, mathml)
1571                                .context("Failure in conditional test"),
1572        }
1573    }
1574}
1575
1576// Used for speech rules with "variables: ..."
1577#[derive(Debug, Clone)]
1578struct VariableDefinition {
1579    name: String,     // name of variable
1580    value: MyXPath,   // xpath value, typically a constant like "true" or "0", but could be "*/*[1]" to store some nodes   
1581}
1582
1583impl fmt::Display for VariableDefinition {
1584    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1585        return write!(f, "[name: {}={}]", self.name, self.value);
1586    }   
1587}
1588
1589// Used for speech rules with "variables: ..."
1590#[derive(Debug)]
1591struct VariableValue<'v> {
1592    name: String,       // name of variable
1593    value: Option<Value<'v>>,   // xpath value, typically a constant like "true" or "0", but could be "*/*[1]" to store some nodes   
1594}
1595
1596impl fmt::Display for VariableValue<'_> {
1597    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1598        let value = match &self.value {
1599            None => "unset".to_string(),
1600            Some(val) => format!("{val:?}")
1601        };
1602        return write!(f, "[name: {}, value: {}]", self.name, value);
1603    }   
1604}
1605
1606impl VariableDefinition {
1607    fn build(name_value_def: &Yaml) -> Result<VariableDefinition> {
1608        match name_value_def.as_hash() {
1609            Some(map) => {
1610                if map.len() != 1 {
1611                    bail!("definition is not a key/value pair. Found {}",
1612                            yaml_to_string(name_value_def, 1) );
1613                }
1614                let (name, value) = map.iter().next().unwrap();
1615                let name = as_str_checked( name)
1616                    .with_context(|| format!( "definition name is not a string: {}",
1617                            yaml_to_string(name, 1) ))?.to_string();
1618                match value {
1619                    Yaml::Boolean(_) | Yaml::String(_)  | Yaml::Integer(_) | Yaml::Real(_) => (),
1620                    _ => bail!("definition value is not a string, boolean, or number. Found {}",
1621                            yaml_to_string(value, 1) )
1622                };
1623                return Ok(
1624                    VariableDefinition{
1625                        name,
1626                        value: MyXPath::build(value)?
1627                    }
1628                );
1629            },
1630            None => bail!("definition is not a key/value pair. Found {}",
1631                            yaml_to_string(name_value_def, 1) )
1632        }
1633    }
1634}
1635
1636
1637#[derive(Debug, Clone)]
1638struct VariableDefinitions {
1639    defs: Vec<VariableDefinition>
1640}
1641
1642impl fmt::Display for VariableDefinitions {
1643    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1644        for def in &self.defs {
1645            write!(f, "{def},")?;
1646        }
1647        return Ok( () );
1648    }
1649}
1650
1651struct VariableValues<'v> {
1652    defs: Vec<VariableValue<'v>>
1653}
1654
1655impl fmt::Display for VariableValues<'_> {
1656    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1657        for value in &self.defs {
1658            write!(f, "{value}")?;
1659        }
1660        return writeln!(f);
1661    }
1662}
1663
1664impl VariableDefinitions {
1665    fn new(len: usize) -> VariableDefinitions {
1666        return VariableDefinitions{ defs: Vec::with_capacity(len) };
1667    }
1668
1669    fn build(defs: &Yaml) -> Result<VariableDefinitions> {
1670        if defs.is_badvalue() {
1671            return Ok( VariableDefinitions::new(0) );
1672        };
1673        if defs.is_array() {
1674            let defs = defs.as_vec().unwrap();
1675            let mut definitions = VariableDefinitions::new(defs.len());
1676            for def in defs {
1677                let variable_def = VariableDefinition::build(def)
1678                        .context("definition of 'variables'")?;
1679                definitions.push( variable_def);
1680            };
1681            return Ok (definitions );
1682        }
1683        bail!( "'variables' is not an array of {{name: xpath-value}} definitions. Found {}'",
1684                yaml_to_string(defs, 1) );
1685    }
1686
1687    fn push(&mut self, var_def: VariableDefinition) {
1688        self.defs.push(var_def);
1689    }
1690
1691    fn len(&self) -> usize {
1692        return self.defs.len();
1693    }
1694}
1695
1696struct ContextStack<'c> {
1697    // Note: values are generated by calling value_of on an Evaluation -- that makes the two lifetimes the same
1698    old_values: Vec<VariableValues<'c>>,   // store old values so they can be set on pop 
1699    base: sxd_xpath::Context<'c>                      // initial context -- contains all the function defs and pref variables
1700}
1701
1702impl fmt::Display for ContextStack<'_> {
1703    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1704        writeln!(f, " {} old_values", self.old_values.len())?;
1705        for values in &self.old_values {
1706            writeln!(f, "  {values}")?;
1707        }
1708        return writeln!(f);
1709    }
1710}
1711
1712impl<'c, 'r> ContextStack<'c> {
1713    fn new<'a,>(pref_manager: &'a PreferenceManager) -> ContextStack<'c> {
1714        let prefs = pref_manager.merge_prefs();
1715        let mut context_stack = ContextStack {
1716            base: ContextStack::base_context(prefs),
1717            old_values: Vec::with_capacity(31)      // should avoid allocations
1718        };
1719        // FIX: the list of variables to set should come from definitions.yaml
1720        // These can't be set on the <math> tag because of the "translate" command which starts speech at an 'id'
1721        context_stack.base.set_variable("MatchingPause", Value::Boolean(false));
1722        context_stack.base.set_variable("IsColumnSilent", Value::Boolean(false));
1723
1724
1725        return context_stack;
1726    }
1727
1728    fn base_context(var_defs: PreferenceHashMap) -> sxd_xpath::Context<'c> {
1729        let mut context  = sxd_xpath::Context::new();
1730        context.set_namespace("m", "http://www.w3.org/1998/Math/MathML");
1731        crate::xpath_functions::add_builtin_functions(&mut context);
1732        for (key, value) in var_defs {
1733            context.set_variable(key.as_str(), yaml_to_value(&value));
1734            // if let Some(str_value) = value.as_str() {
1735            //     if str_value != "Auto" {
1736            //         debug!("Set {}='{}'", key.as_str(), str_value);
1737            //     }
1738            // }
1739        };
1740        return context;
1741    }
1742
1743    fn set_globals(&'r mut self, new_vars: VariableDefinitions, mathml: Element<'c>) -> Result<()> {
1744        // for each var/value pair, evaluate the value and add the var/value to the base context
1745        for def in &new_vars.defs {
1746            // set the new value
1747            let new_value = match def.value.evaluate(&self.base, mathml) {
1748                Ok(val) => val,
1749                Err(_) => bail!(format!("Can't evaluate variable def for {}", def)),
1750            };
1751            let qname = QName::new(def.name.as_str());
1752            self.base.set_variable(qname, new_value);
1753        }
1754        return Ok( () );
1755    }
1756
1757    fn push(&'r mut self, new_vars: VariableDefinitions, mathml: Element<'c>) -> Result<()> {
1758        // store the old value and set the new one 
1759        let mut old_values = VariableValues {defs: Vec::with_capacity(new_vars.defs.len()) };
1760        let evaluation = Evaluation::new(&self.base, Node::Element(mathml));
1761        for def in &new_vars.defs {
1762            // get the old value (might not be defined)
1763            let qname = QName::new(def.name.as_str());
1764            let old_value = evaluation.value_of(qname).cloned();
1765            old_values.defs.push( VariableValue{ name: def.name.clone(), value: old_value} );
1766        }
1767
1768        // use a second loop because of borrow problem with self.base and 'evaluation'
1769        for def in &new_vars.defs {
1770            // set the new value
1771            let new_value = match def.value.evaluate(&self.base, mathml) {
1772                Ok(val) => val,
1773                Err(_) => Value::Nodeset(sxd_xpath::nodeset::Nodeset::new()),
1774            };
1775            let qname = QName::new(def.name.as_str());
1776            self.base.set_variable(qname, new_value);
1777        }
1778        self.old_values.push(old_values);
1779        return Ok( () );
1780    }
1781
1782    fn pop(&mut self) {
1783        const MISSING_VALUE: &str = "-- unset value --";     // can't remove a variable from context, so use this value
1784        let old_values = self.old_values.pop().unwrap();
1785        for variable in old_values.defs {
1786            let qname = QName::new(&variable.name);
1787            let old_value = match variable.value {
1788                None => Value::String(MISSING_VALUE.to_string()),
1789                Some(val) => val,
1790            };
1791            self.base.set_variable(qname, old_value);
1792        }
1793    }
1794}
1795
1796
1797fn yaml_to_value<'b>(yaml: &Yaml) -> Value<'b> {
1798    return match yaml {
1799        Yaml::String(s) => Value::String(s.clone()),
1800        Yaml::Boolean(b)  => Value::Boolean(*b),
1801        Yaml::Integer(i)   => Value::Number(*i as f64),
1802        Yaml::Real(s)   => Value::Number(s.parse::<f64>().unwrap()),
1803        _  => {
1804            error!("yaml_to_value: illegal type found in Yaml value: {}", yaml_to_string(yaml, 1));
1805            Value::String("".to_string())
1806        },
1807    }
1808}
1809
1810
1811// Information for matching a Unicode char (defined in unicode.yaml) and building its replacement
1812struct UnicodeDef {
1813    ch: u32,
1814    speech: ReplacementArray
1815}
1816
1817impl  fmt::Display for UnicodeDef {
1818    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1819        return write!(f, "UnicodeDef{{ch: {}, speech: {:?}}}", self.ch, self.speech);
1820    }
1821}
1822
1823impl UnicodeDef {
1824    fn build(unicode_def: &Yaml, file_name: &Path, speech_rules: &SpeechRules, use_short: bool) -> Result<Option<Vec<PathBuf>>> {
1825        if let Some(include_file_name) = find_str(unicode_def, "include") {
1826            let do_include_fn = |new_file: &Path| {
1827                speech_rules.read_unicode(Some(new_file.to_path_buf()), use_short)
1828            };
1829            return Ok( Some(process_include(file_name, include_file_name, do_include_fn)?) );
1830        }
1831        // key: char, value is replacement or array of replacements
1832        let dictionary = unicode_def.as_hash();
1833        if dictionary.is_none() {
1834            bail!("Expected a unicode definition (e.g, '+':[t: \"plus\"]'), found {}", yaml_to_string(unicode_def, 0));
1835        }
1836
1837        let dictionary = dictionary.unwrap();
1838        if dictionary.len() != 1 {
1839            bail!("Expected a unicode definition (e.g, '+':[t: \"plus\"]'), found {}", yaml_to_string(unicode_def, 0));
1840        }
1841
1842        let (ch, replacements) = dictionary.iter().next().ok_or_else(|| anyhow!("Expected a unicode definition (e.g, '+':[t: \"plus\"]'), found {}", yaml_to_string(unicode_def, 0)))?;
1843        let mut unicode_table = if use_short {
1844            speech_rules.unicode_short.borrow_mut()
1845        } else {
1846            speech_rules.unicode_full.borrow_mut()
1847        };
1848        if let Some(str) = ch.as_str() {
1849            if str.is_empty() {
1850                bail!("Empty character definition. Replacement is {}", replacements.as_str().unwrap());
1851            }
1852            let mut chars = str.chars();
1853            let first_ch = chars.next().unwrap();       // non-empty string, so a char exists
1854            if chars.next().is_some() {                       // more than one char
1855                if str.contains('-')  {
1856                    return process_range(str, replacements, unicode_table);
1857                } else if first_ch != '0' {     // exclude 0xDDDD
1858                    for ch in str.chars() {     // restart the iterator
1859                        let ch_as_str = ch.to_string();
1860                        if unicode_table.insert(ch as u32, ReplacementArray::build(&substitute_ch(replacements, &ch_as_str))
1861                                            .with_context(|| format!("In definition of char: '{str}'"))?.replacements).is_some() {
1862                            error!("*** Character '{}' (0x{:X}) is repeated", ch, ch as u32);
1863                        }
1864                    }
1865                    return Ok(None);
1866                }
1867            }
1868        }
1869
1870        let ch = UnicodeDef::get_unicode_char(ch)?;
1871        if unicode_table.insert(ch, ReplacementArray::build(replacements)
1872                                        .with_context(|| format!("In definition of char: '{}' (0x{})",
1873                                                                        char::from_u32(ch).unwrap(), ch))?.replacements).is_some() {
1874            error!("*** Character '{}' (0x{:X}) is repeated", char::from_u32(ch).unwrap(), ch);
1875        }
1876        return Ok(None);
1877
1878        fn process_range(def_range: &str, replacements: &Yaml, mut unicode_table: RefMut<HashMap<u32,Vec<Replacement>>>) -> Result<Option<Vec<PathBuf>>> {
1879            // should be a character range (e.g., "A-Z")
1880            // iterate over that range and also substitute the char for '.' in the 
1881            let mut range = def_range.split('-');
1882            let first = range.next().unwrap().chars().next().unwrap() as u32;
1883            let last = range.next().unwrap().chars().next().unwrap() as u32;
1884            if range.next().is_some() {
1885                bail!("Character range definition has more than one '-': '{}'", def_range);
1886            }
1887
1888            for ch in first..last+1 {
1889                let ch_as_str = char::from_u32(ch).unwrap().to_string();
1890                unicode_table.insert(ch, ReplacementArray::build(&substitute_ch(replacements, &ch_as_str))
1891                                        .with_context(|| format!("In definition of char: '{def_range}'"))?.replacements);
1892            };
1893
1894            return Ok(None)
1895        }
1896
1897        fn substitute_ch(yaml: &Yaml, ch: &str) -> Yaml {
1898            return match yaml {
1899                Yaml::Array(v) => {
1900                    Yaml::Array(
1901                        v.iter()
1902                         .map(|e| substitute_ch(e, ch))
1903                         .collect::<Vec<Yaml>>()
1904                    )
1905                },
1906                Yaml::Hash(h) => {
1907                    Yaml::Hash(
1908                        h.iter()
1909                         .map(|(key,val)| (key.clone(), substitute_ch(val, ch)) )
1910                         .collect::<Hash>()
1911                    )
1912                },
1913                Yaml::String(s) => Yaml::String( s.replace('.', ch) ),
1914                _ => yaml.clone(),
1915            }
1916        }
1917    }
1918    
1919    fn get_unicode_char(ch: &Yaml) -> Result<u32> {
1920        // either "a" or 0x1234 (number)
1921        if let Some(ch) = ch.as_str() {
1922            let mut ch_iter = ch.chars();
1923            let unicode_ch = ch_iter.next();
1924            if unicode_ch.is_none() || ch_iter.next().is_some() {
1925                bail!("Wanted unicode char, found string '{}')", ch);
1926            };
1927            return Ok( unicode_ch.unwrap() as u32 );
1928        }
1929    
1930        if let Some(num) = ch.as_i64() {
1931            return Ok( num as u32 );
1932        }
1933        bail!("Unicode character '{}' can't be converted to an code point", yaml_to_string(ch, 0));
1934    }    
1935}
1936
1937// Fix: there should be a cache so subsequent library calls don't have to read in the same speech rules
1938//   likely a cache of size 1 is fine
1939// Fix: all statics should be gathered together into one structure that is a Mutex
1940//   for each library call, we should grab a lock on the Mutex in case others try to call
1941//   at the same time.
1942//   If this turns out to be something that others actually do, then a cache > 1 would be good
1943
1944 type RuleTable = HashMap<String, Vec<Box<SpeechPattern>>>;
1945 type UnicodeTable = Rc<RefCell<HashMap<u32,Vec<Replacement>>>>;
1946 type FilesAndTimesShared = Rc<RefCell<FilesAndTimes>>;
1947
1948 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
1949 pub enum RulesFor {
1950     Intent,
1951     Speech,
1952     OverView,
1953     Navigation,
1954     Braille,
1955 }
1956
1957 impl fmt::Display for RulesFor {
1958    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1959        let name = match self {
1960            RulesFor::Intent => "Intent",
1961            RulesFor::Speech => "Speech",
1962            RulesFor::OverView => "OverView",
1963            RulesFor::Navigation => "Navigation",
1964            RulesFor::Braille => "Braille",
1965        };
1966       return write!(f, "{name}");
1967    }
1968 }
1969
1970 
1971#[derive(Debug, Clone)]
1972pub struct FileAndTime {
1973    file: PathBuf,
1974    time: SystemTime,
1975}
1976
1977impl FileAndTime {
1978    fn new(file: PathBuf) -> FileAndTime {
1979        return FileAndTime {
1980            file,
1981            time: SystemTime::UNIX_EPOCH,
1982        }
1983    }
1984
1985    // used for debugging preference settings
1986    pub fn debug_get_file(&self) -> Option<&str> {
1987        return self.file.to_str();
1988    }
1989
1990    pub fn new_with_time(file: PathBuf) -> FileAndTime {
1991        return FileAndTime {
1992            time: FileAndTime::get_metadata(&file),
1993            file,
1994        }
1995    }
1996
1997    pub fn is_up_to_date(&self) -> bool {
1998        let file_mod_time = FileAndTime::get_metadata(&self.file);
1999        return self.time >= file_mod_time;
2000    }
2001
2002    fn get_metadata(path: &Path) -> SystemTime {
2003        use std::fs;
2004        if !cfg!(target_family = "wasm") {
2005            let metadata = fs::metadata(path);
2006            if let Ok(metadata) = metadata &&
2007               let Ok(mod_time) = metadata.modified() {
2008                    return mod_time;
2009                }
2010        }
2011        return SystemTime::UNIX_EPOCH
2012    }
2013
2014}
2015#[derive(Debug, Default)]
2016pub struct FilesAndTimes {
2017    // ft[0] is the main file -- other files are included by it (or recursively)
2018    // We could be a little smarter about invalidation by tracking what file is the parent (including file),
2019    // but it seems more complicated than it is worth
2020    ft: Vec<FileAndTime>
2021}
2022
2023impl FilesAndTimes {
2024    pub fn new(start_path: PathBuf) -> FilesAndTimes {
2025        let mut ft = Vec::with_capacity(8);
2026        ft.push( FileAndTime::new(start_path) );
2027        return FilesAndTimes{ ft };
2028    }
2029
2030    /// Returns true if the main file matches the corresponding preference location and files' times are all current
2031    pub fn is_file_up_to_date(&self, pref_path: &Path, should_ignore_file_time: bool) -> bool {
2032
2033        // if the time isn't set or the path is different from the preference (which might have changed), return false
2034        if self.ft.is_empty() || self.as_path() != pref_path {
2035            return false;
2036        }
2037        if should_ignore_file_time || cfg!(target_family = "wasm") {
2038            return true;
2039        }
2040        if  self.ft[0].time == SystemTime::UNIX_EPOCH {
2041            return false;
2042        }
2043
2044
2045        // check the time stamp on the included files -- if the head file hasn't changed, the the paths for the included files will the same
2046        for file in &self.ft {
2047            if !file.is_up_to_date() {
2048                return false;
2049            }
2050        }
2051        return true;
2052    }
2053
2054    fn set_files_and_times(&mut self, new_files: Vec<PathBuf>)  {
2055        self.ft.clear();
2056        for path in new_files {
2057            let time = FileAndTime::get_metadata(&path);      // do before move below
2058            self.ft.push( FileAndTime{ file: path, time })
2059        }
2060    }
2061
2062    pub fn as_path(&self) -> &Path {
2063        assert!(!self.ft.is_empty());
2064        return &self.ft[0].file;
2065    }
2066
2067    pub fn paths(&self) -> Vec<PathBuf> {
2068        return self.ft.iter().map(|ft| ft.file.clone()).collect::<Vec<PathBuf>>();
2069    }
2070
2071}
2072
2073
2074/// `SpeechRulesWithContext` encapsulates a named group of speech rules (e.g, "ClearSpeak")
2075/// along with the preferences to be used for speech.
2076// Note: if we can't read the files, an error message is stored in the structure and needs to be checked.
2077// I tried using Result<SpeechRules>, but it was a mess with all the unwrapping.
2078// Important: the code needs to be careful to check this at the top level calls
2079pub struct SpeechRules {
2080    error: String,
2081    name: RulesFor,
2082    pub pref_manager: Rc<RefCell<PreferenceManager>>,
2083    rules: RuleTable,                              // the speech rules used (partitioned into MathML tags in hashmap, then linearly searched)
2084    rule_files: FilesAndTimes,                     // files that were read
2085    translate_single_chars_only: bool,             // strings like "half" don't want 'a's translated, but braille does
2086    unicode_short: UnicodeTable,                   // the short list of rules used for Unicode characters
2087    unicode_short_files: FilesAndTimesShared,     // files that were read
2088    unicode_full:  UnicodeTable,                   // the long remaining rules used for Unicode characters
2089    unicode_full_files: FilesAndTimesShared,      // files that were read
2090    definitions_files: FilesAndTimesShared,       // files that were read
2091}
2092
2093impl fmt::Display for SpeechRules {
2094    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2095        writeln!(f, "SpeechRules '{}'\n{})", self.name, self.pref_manager.borrow())?;
2096        let mut rules_vec: Vec<(&String, &Vec<Box<SpeechPattern>>)> = self.rules.iter().collect();
2097        rules_vec.sort_by(|(tag_name1, _), (tag_name2, _)| tag_name1.cmp(tag_name2));
2098        for (tag_name, rules) in rules_vec {
2099            writeln!(f, "   {}: #patterns {}", tag_name, rules.len())?;
2100        };
2101        return writeln!(f, "   {}+{} unicode entries", &self.unicode_short.borrow().len(), &self.unicode_full.borrow().len());
2102    }
2103}
2104
2105
2106/// `SpeechRulesWithContext` encapsulates a named group of speech rules (e.g, "ClearSpeak")
2107/// along with the preferences to be used for speech.
2108/// Because speech rules can define variables, there is also a context that is carried with them
2109pub struct SpeechRulesWithContext<'c, 's:'c, 'm:'c> {
2110    speech_rules: &'s SpeechRules,
2111    context_stack: ContextStack<'c>,   // current value of (context) variables
2112    doc: Document<'m>,
2113    nav_node_id: &'m str,
2114    nav_node_offset: usize,
2115    pub inside_spell: bool,     // hack to allow 'spell' to avoid infinite loop (see 'spell' implementation in tts.rs)
2116    pub translate_count: usize, // hack to avoid 'translate' infinite loop (see 'spell' implementation in tts.rs)
2117}
2118
2119impl<'c, 's:'c, 'm:'c> fmt::Display for SpeechRulesWithContext<'c, 's,'m> {
2120    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2121        writeln!(f, "SpeechRulesWithContext \n{})", self.speech_rules)?;
2122        return writeln!(f, "   {} context entries, nav node id '({}, {})'", &self.context_stack, self.nav_node_id, self.nav_node_offset);
2123    }
2124}
2125
2126thread_local!{
2127    /// SPEECH_UNICODE_SHORT is shared among several rules, so "RC" is used
2128    static SPEECH_UNICODE_SHORT: UnicodeTable =
2129        Rc::new( RefCell::new( HashMap::with_capacity(500) ) );
2130        
2131    /// SPEECH_UNICODE_FULL is shared among several rules, so "RC" is used
2132    static SPEECH_UNICODE_FULL: UnicodeTable =
2133        Rc::new( RefCell::new( HashMap::with_capacity(6500) ) );
2134        
2135    /// BRAILLE_UNICODE_SHORT is shared among several rules, so "RC" is used
2136    static BRAILLE_UNICODE_SHORT: UnicodeTable =
2137        Rc::new( RefCell::new( HashMap::with_capacity(500) ) );
2138        
2139    /// BRAILLE_UNICODE_FULL is shared among several rules, so "RC" is used
2140    static BRAILLE_UNICODE_FULL: UnicodeTable =
2141        Rc::new( RefCell::new( HashMap::with_capacity(5000) ) );
2142
2143    /// SPEECH_DEFINITION_FILES_AND_TIMES is shared among several rules, so "RC" is used
2144    static SPEECH_DEFINITION_FILES_AND_TIMES: FilesAndTimesShared =
2145        Rc::new( RefCell::new(FilesAndTimes::default()) );
2146        
2147    /// BRAILLE_DEFINITION_FILES_AND_TIMES is shared among several rules, so "RC" is used
2148    static BRAILLE_DEFINITION_FILES_AND_TIMES: FilesAndTimesShared =
2149        Rc::new( RefCell::new(FilesAndTimes::default()) );
2150        
2151    /// SPEECH_UNICODE_SHORT_FILES_AND_TIMES is shared among several rules, so "RC" is used
2152    static SPEECH_UNICODE_SHORT_FILES_AND_TIMES: FilesAndTimesShared =
2153        Rc::new( RefCell::new(FilesAndTimes::default()) );
2154        
2155    /// SPEECH_UNICODE_FULL_FILES_AND_TIMES is shared among several rules, so "RC" is used
2156    static SPEECH_UNICODE_FULL_FILES_AND_TIMES: FilesAndTimesShared =
2157        Rc::new( RefCell::new(FilesAndTimes::default()) );
2158        
2159    /// BRAILLE_UNICODE_SHORT_FILES_AND_TIMES is shared among several rules, so "RC" is used
2160    static BRAILLE_UNICODE_SHORT_FILES_AND_TIMES: FilesAndTimesShared =
2161        Rc::new( RefCell::new(FilesAndTimes::default()) );
2162        
2163    /// BRAILLE_UNICODE_FULL_FILES_AND_TIMES is shared among several rules, so "RC" is used
2164    static BRAILLE_UNICODE_FULL_FILES_AND_TIMES: FilesAndTimesShared =
2165        Rc::new( RefCell::new(FilesAndTimes::default()) );
2166        
2167    /// The current set of speech rules
2168    // maybe this should be a small cache of rules in case people switch rules/prefs?
2169    pub static INTENT_RULES: RefCell<SpeechRules> =
2170            RefCell::new( SpeechRules::new(RulesFor::Intent, true) );
2171
2172    pub static SPEECH_RULES: RefCell<SpeechRules> =
2173            RefCell::new( SpeechRules::new(RulesFor::Speech, true) );
2174
2175    pub static OVERVIEW_RULES: RefCell<SpeechRules> =
2176            RefCell::new( SpeechRules::new(RulesFor::OverView, true) );
2177
2178    pub static NAVIGATION_RULES: RefCell<SpeechRules> =
2179            RefCell::new( SpeechRules::new(RulesFor::Navigation, true) );
2180
2181    pub static BRAILLE_RULES: RefCell<SpeechRules> =
2182            RefCell::new( SpeechRules::new(RulesFor::Braille, false) );
2183}
2184
2185impl SpeechRules {
2186    pub fn new(name: RulesFor, translate_single_chars_only: bool) -> SpeechRules {
2187        let globals = if name == RulesFor::Braille {
2188            (
2189                (BRAILLE_UNICODE_SHORT.with(Rc::clone), BRAILLE_UNICODE_SHORT_FILES_AND_TIMES.with(Rc::clone)),
2190                (BRAILLE_UNICODE_FULL. with(Rc::clone), BRAILLE_UNICODE_FULL_FILES_AND_TIMES.with(Rc::clone)),
2191                BRAILLE_DEFINITION_FILES_AND_TIMES.with(Rc::clone),
2192            )
2193        } else {
2194            (
2195                (SPEECH_UNICODE_SHORT.with(Rc::clone), SPEECH_UNICODE_SHORT_FILES_AND_TIMES.with(Rc::clone)),
2196                (SPEECH_UNICODE_FULL. with(Rc::clone), SPEECH_UNICODE_FULL_FILES_AND_TIMES.with(Rc::clone)),
2197                SPEECH_DEFINITION_FILES_AND_TIMES.with(Rc::clone),
2198            )
2199        };
2200
2201        return SpeechRules {
2202            error: Default::default(),
2203            name,
2204            rules: HashMap::with_capacity(if name == RulesFor::Intent || name == RulesFor::Speech {500} else {50}),                       // lazy load them
2205            rule_files: FilesAndTimes::default(),
2206            unicode_short: globals.0.0,       // lazy load them
2207            unicode_short_files: globals.0.1,
2208            unicode_full: globals.1.0,        // lazy load them
2209            unicode_full_files: globals.1.1,
2210            definitions_files: globals.2,
2211            translate_single_chars_only,
2212            pref_manager: PreferenceManager::get(),
2213        };
2214}
2215
2216    pub fn get_error(&self) -> Option<&str> {
2217        return if self.error.is_empty() {
2218             None
2219        } else {
2220            Some(&self.error)
2221        }
2222    }
2223
2224    pub fn read_files(&mut self) -> Result<()> {
2225        let check_rule_files = self.pref_manager.borrow().pref_to_string("CheckRuleFiles");
2226        if check_rule_files != "None" {  // "Prefs" or "All" are other values
2227            self.pref_manager.borrow_mut().set_preference_files()?;
2228        }
2229        let should_ignore_file_time = self.pref_manager.borrow().pref_to_string("CheckRuleFiles") != "All";     // ignore for "None", "Prefs"
2230        let rule_file = self.pref_manager.borrow().get_rule_file(&self.name).to_path_buf();     // need to create PathBuf to avoid a move/use problem
2231        if self.rules.is_empty() || !self.rule_files.is_file_up_to_date(&rule_file, should_ignore_file_time) {
2232            self.rules.clear();
2233            let files_read = self.read_patterns(&rule_file)?;
2234            self.rule_files.set_files_and_times(files_read);
2235        }
2236
2237        let pref_manager = self.pref_manager.borrow();
2238        let unicode_pref_files = if self.name == RulesFor::Braille {pref_manager.get_braille_unicode_file()} else {pref_manager.get_speech_unicode_file()};
2239
2240        if !self.unicode_short_files.borrow().is_file_up_to_date(unicode_pref_files.0, should_ignore_file_time) {
2241            self.unicode_short.borrow_mut().clear();
2242            self.unicode_short_files.borrow_mut().set_files_and_times(self.read_unicode(None, true)?);
2243        }
2244
2245        if self.definitions_files.borrow().ft.is_empty() || !self.definitions_files.borrow().is_file_up_to_date(
2246                            pref_manager.get_definitions_file(self.name != RulesFor::Braille),
2247                            should_ignore_file_time
2248        ) {
2249            self.definitions_files.borrow_mut().set_files_and_times(read_definitions_file(self.name != RulesFor::Braille)?);
2250        }
2251        return Ok( () );
2252    }
2253
2254    fn read_patterns(&mut self, path: &Path) -> Result<Vec<PathBuf>> {
2255        // info!("Reading rule file: {}", p.to_str().unwrap());
2256        let rule_file_contents = read_to_string_shim(path).with_context(|| format!("cannot read file '{}'", path.to_str().unwrap()))?;
2257        let rules_build_fn = |pattern: &Yaml| {
2258            self.build_speech_patterns(pattern, path)
2259                .with_context(||format!("in file {:?}", path.to_str().unwrap()))
2260        };
2261        return compile_rule(&rule_file_contents, rules_build_fn)
2262                .with_context(||format!("in file {:?}", path.to_str().unwrap()));
2263    }
2264
2265    fn build_speech_patterns(&mut self, patterns: &Yaml, file_name: &Path) -> Result<Vec<PathBuf>> {
2266        // Rule::SpeechPatternList
2267        let patterns_vec = patterns.as_vec();
2268        if patterns_vec.is_none() {
2269            bail!(yaml_type_err(patterns, "array"));
2270        }
2271        let patterns_vec = patterns.as_vec().unwrap();
2272        let mut files_read = vec![file_name.to_path_buf()];
2273        for entry in patterns_vec.iter() {
2274            if let Some(mut added_files) = SpeechPattern::build(entry, file_name, self)? {
2275                files_read.append(&mut added_files);
2276            }
2277        }
2278        return Ok(files_read)
2279    }
2280    
2281    fn read_unicode(&self, path: Option<PathBuf>, use_short: bool) -> Result<Vec<PathBuf>> {
2282        let path = match path {
2283            Some(p) => p,
2284            None => {
2285                // get the path to either the short or long unicode file
2286                let pref_manager = self.pref_manager.borrow();
2287                let unicode_files = if self.name == RulesFor::Braille {
2288                    pref_manager.get_braille_unicode_file()
2289                } else {
2290                    pref_manager.get_speech_unicode_file()
2291                };
2292                let unicode_files = if use_short {unicode_files.0} else {unicode_files.1};
2293                unicode_files.to_path_buf()
2294            }
2295        };
2296
2297        // FIX: should read first (lang), then supplement with second (region)
2298        // info!("Reading unicode file {}", path.to_str().unwrap());
2299        let unicode_file_contents = read_to_string_shim(&path)?;
2300        let unicode_build_fn = |unicode_def_list: &Yaml| {
2301            let unicode_defs = unicode_def_list.as_vec();
2302            if unicode_defs.is_none() {
2303                bail!("File '{}' does not begin with an array", yaml_to_type(unicode_def_list));
2304            };
2305            let mut files_read = vec![path.to_path_buf()];
2306            for unicode_def in unicode_defs.unwrap() {
2307                if let Some(mut added_files) = UnicodeDef::build(unicode_def, &path, self, use_short)
2308                                                                .with_context(|| {format!("In file {:?}", path.to_str())})? {
2309                    files_read.append(&mut added_files);
2310                }
2311            };
2312            return Ok(files_read)
2313        };
2314
2315        return compile_rule(&unicode_file_contents, unicode_build_fn)
2316                    .with_context(||format!("in file {:?}", path.to_str().unwrap()));
2317    }
2318
2319    pub fn print_sizes() -> String {
2320        // let _ = &SPEECH_RULES.with_borrow(|rules| {
2321        //     debug!("SPEECH RULES entries\n");
2322        //     let rules = &rules.rules;
2323        //     for (key, _) in rules.iter() {
2324        //         debug!("key: {}", key);
2325        //     }
2326        // });
2327        let mut answer = rule_size(&SPEECH_RULES, "SPEECH_RULES");
2328        answer += &rule_size(&INTENT_RULES, "INTENT_RULES");
2329        answer += &rule_size(&BRAILLE_RULES, "BRAILLE_RULES");
2330        answer += &rule_size(&NAVIGATION_RULES, "NAVIGATION_RULES");
2331        answer += &rule_size(&OVERVIEW_RULES, "OVERVIEW_RULES");
2332        SPEECH_RULES.with_borrow(|rule| {
2333            answer += &format!("Speech Unicode tables: short={}/{}, long={}/{}\n",
2334                                rule.unicode_short.borrow().len(), rule.unicode_short.borrow().capacity(),
2335                                rule.unicode_full.borrow().len(), rule.unicode_full.borrow().capacity());
2336        });
2337        BRAILLE_RULES.with_borrow(|rule| {
2338            answer += &format!("Braille Unicode tables: short={}/{}, long={}/{}\n",
2339                                rule.unicode_short.borrow().len(), rule.unicode_short.borrow().capacity(),
2340                                rule.unicode_full.borrow().len(), rule.unicode_full.borrow().capacity());
2341        });
2342        return answer;
2343
2344        fn rule_size(rules: &'static std::thread::LocalKey<RefCell<SpeechRules>>, name: &str) -> String {
2345            rules.with_borrow(|rule| {
2346                let hash_map = &rule.rules;
2347                return format!("{}: {}/{}\n", name, hash_map.len(), hash_map.capacity());
2348            })
2349        }
2350    }
2351}
2352
2353
2354/// We track three different lifetimes:
2355///   'c -- the lifetime of the context and mathml
2356///   's -- the lifetime of the speech rules (which is static)
2357///   'r -- the lifetime of the reference (this seems to be key to keep the rust memory checker happy)
2358impl<'c, 's:'c, 'r, 'm:'c> SpeechRulesWithContext<'c, 's,'m> {
2359    pub fn new(speech_rules: &'s SpeechRules, doc: Document<'m>, nav_node_id: &'m str, nav_node_offset: usize) -> SpeechRulesWithContext<'c, 's, 'm> {
2360        return SpeechRulesWithContext {
2361            speech_rules,
2362            context_stack: ContextStack::new(&speech_rules.pref_manager.borrow()),
2363            doc,
2364            nav_node_id,
2365            nav_node_offset,
2366            inside_spell: false,
2367            translate_count: 0,
2368        }
2369    }
2370
2371    pub fn get_rules(&mut self) -> &SpeechRules {
2372        return self.speech_rules;
2373    }
2374
2375    pub fn get_context(&mut self) -> &mut sxd_xpath::Context<'c> {
2376        return &mut self.context_stack.base;
2377    }
2378
2379    pub fn get_document(&mut self) -> Document<'m> {
2380        return self.doc;
2381    }
2382
2383    pub fn set_nav_node_offset(&mut self, offset: usize) {
2384        // debug!("Setting nav node offset to {}", offset);
2385        self.nav_node_offset = offset;
2386    }
2387
2388    pub fn match_pattern<T:TreeOrString<'c, 'm, T>>(&'r mut self, mathml: Element<'c>) -> Result<T> {
2389        // debug!("Looking for a match for: \n{}", mml_to_string(mathml));
2390        let tag_name = mathml.name().local_part();
2391        let rules = &self.speech_rules.rules;
2392
2393        // start with priority rules that apply to any node (should be a very small number)
2394        if let Some(rule_vector) = rules.get("!*") &&
2395           let Some(result) = self.find_match(rule_vector, mathml)? {
2396                return Ok(result);      // found a match
2397            }
2398        
2399        if let Some(rule_vector) = rules.get(tag_name) &&
2400           let Some(result) = self.find_match(rule_vector, mathml)? {
2401                return Ok(result);      // found a match
2402            }
2403
2404        // no rules for specific element, fall back to rules for "*" which *should* be present in all rule files as fallback
2405        if let Some(rule_vector) = rules.get("*") &&
2406           let Some(result) = self.find_match(rule_vector, mathml)? {
2407                return Ok(result);      // found a match
2408            }
2409
2410        // no rules matched -- poorly written rule file -- let flow through to default error
2411        // report error message with file name
2412        let speech_manager = self.speech_rules.pref_manager.borrow();
2413        let file_name = speech_manager.get_rule_file(&self.speech_rules.name);
2414        // FIX: handle error appropriately 
2415        bail!("\nNo match found!\nMissing patterns in {} for MathML.\n{}", file_name.to_string_lossy(), mml_to_string(mathml));
2416    }
2417
2418    fn find_match<T:TreeOrString<'c, 'm, T>>(&'r mut self, rule_vector: &[Box<SpeechPattern>], mathml: Element<'c>) -> Result<Option<T>> {
2419        for pattern in rule_vector {
2420            // debug!("Pattern name: {}", pattern.pattern_name);
2421            // always pushing and popping around the is_match would be a little cleaner, but push/pop is relatively expensive,
2422            //   so we optimize and only push first if the variables are needed to do the match
2423            if pattern.match_uses_var_defs {
2424                self.context_stack.push(pattern.var_defs.clone(), mathml)?;
2425            }
2426            if pattern.is_match(&self.context_stack.base, mathml)
2427                    .with_context(|| error_string(pattern, mathml) )? {
2428                // debug!("  find_match: FOUND!!!");
2429                if !pattern.match_uses_var_defs && pattern.var_defs.len() > 0 { // don't push them on twice
2430                    self.context_stack.push(pattern.var_defs.clone(), mathml)?;
2431                }
2432                let result = if self.nav_node_offset > 0 &&
2433                            self.nav_node_id == mathml.attribute_value("id").unwrap_or_default() && is_leaf(mathml) {
2434                    let ch = crate::canonicalize::as_text(mathml).chars().nth(self.nav_node_offset-1).unwrap_or_default();
2435                    let ch = self.replace_single_char(ch, mathml)?;
2436                    // debug!("find_match: ch={} from '{}'; matched pattern name/tag: {}/{} with nav_node_offset={}",
2437                    //     ch, crate::canonicalize::as_text(mathml),
2438                    //     pattern.pattern_name, pattern.tag_name, self.nav_node_offset);
2439                    T::from_string(ch.to_string(), self.doc)
2440                } else {
2441                    pattern.replacements.replace(self, mathml)
2442                };
2443                if pattern.var_defs.len() > 0 {
2444                    self.context_stack.pop();
2445                }
2446                return match result {
2447                    Ok(s) => {
2448                        // for all except braille and navigation, nav_node_id will be an empty string and will not match
2449                        if self.nav_node_id.is_empty() {
2450                            Ok( Some(s) )
2451                        } else {
2452                            if self.nav_node_id == mathml.attribute_value("id").unwrap_or_default() {debug!("Matched pattern name/tag: {}/{}", pattern.pattern_name, pattern.tag_name)};
2453                            Ok ( Some(self.nav_node_adjust(s, mathml)) )
2454                        }
2455                    },
2456                    Err(e) => Err( e.context(
2457                        format!(
2458                            "attempting replacement pattern: \"{}\" for \"{}\".\n\
2459                            Replacement\n{}\n...due to matching the MathML\n{} with the pattern\n\
2460                            {}\n\
2461                            The patterns are in {}.\n",
2462                            pattern.pattern_name, pattern.tag_name,
2463                            pattern.replacements.pretty_print_replacements(),
2464                            mml_to_string(mathml), pattern.pattern,
2465                            pattern.file_name
2466                        )
2467                    ))
2468                }
2469            } else if pattern.match_uses_var_defs {
2470                self.context_stack.pop();
2471            }
2472        };
2473        return Ok(None);    // no matches
2474
2475        fn error_string(pattern: &SpeechPattern, mathml: Element) -> String {
2476            return format!(
2477                "error during pattern match using: \"{}\" for \"{}\".\n\
2478                Pattern is \n{}\nMathML for the match:\n\
2479                {}\
2480                The patterns are in {}.\n",
2481                pattern.pattern_name, pattern.tag_name,
2482                pattern.pattern,
2483                mml_to_string(mathml),
2484                pattern.file_name
2485            );
2486        }
2487
2488    }
2489
2490    fn nav_node_adjust<T:TreeOrString<'c, 'm, T>>(&self, speech: T, mathml: Element<'c>) -> T {
2491      if let Some(id) = mathml.attribute_value("id") &&
2492         self.nav_node_id == id {
2493        let offset = mathml.attribute_value(crate::navigate::ID_OFFSET).unwrap_or("0");
2494        debug!("nav_node_adjust: id/name='{}/{}' offset?='{}'", id, name(mathml),
2495               self.nav_node_offset.to_string().as_str() == offset
2496        );
2497        if is_leaf(mathml) || self.nav_node_offset.to_string().as_str() == offset {
2498          if self.speech_rules.name == RulesFor::Braille {
2499            let highlight_style =  self.speech_rules.pref_manager.borrow().pref_to_string("BrailleNavHighlight");
2500            return T::highlight_braille(speech, highlight_style);
2501          } else {
2502            debug!("nav_node_adjust: id='{}' offset='{}/{}'", id, self.nav_node_offset, offset);
2503            return T::mark_nav_speech(speech)
2504          }
2505        }
2506      }
2507      return speech;
2508    }
2509    
2510    fn highlight_braille_string(braille: String, highlight_style: String) -> String {
2511        // add dots 7 & 8 to the Unicode braille (28xx)
2512        if &highlight_style == "Off" || braille.is_empty() {
2513            return braille;
2514        }
2515        
2516        // FIX: this seems needlessly complex. It is much simpler if the char can be changed in place...
2517        // find first char that can get the dots and add them
2518        let mut chars = braille.chars().collect::<Vec<char>>();
2519
2520        // the 'b' for baseline indicator is really part of the previous token, so it needs to be highlighted but isn't because it is not Unicode braille
2521        let baseline_indicator_hack = PreferenceManager::get().borrow().pref_to_string("BrailleCode") == "Nemeth";
2522        // debug!("highlight_braille_string: highlight_style={}\n braille={}", highlight_style, braille);
2523        let mut i_first_modified = 0;
2524        for (i, ch) in chars.iter_mut().enumerate() {
2525            let modified_ch = add_dots_to_braille_char(*ch, baseline_indicator_hack);
2526            if *ch != modified_ch {
2527                *ch = modified_ch; 
2528                i_first_modified = i;
2529                break;
2530            };
2531        };
2532
2533        let mut i_last_modified = i_first_modified;
2534        if &highlight_style != "FirstChar" {
2535            // find last char so that we know when to modify the char
2536            for i in (i_first_modified..chars.len()).rev(){
2537                let ch = chars[i];
2538                let modified_ch = add_dots_to_braille_char(ch, baseline_indicator_hack);
2539                chars[i] = modified_ch;
2540                if ch !=  modified_ch {
2541                    i_last_modified = i;
2542                    break;
2543                }
2544            }
2545        }
2546
2547        if &highlight_style == "All" {
2548            // finish going through the string
2549			#[allow(clippy::needless_range_loop)]  // I don't like enumerate/take/skip here
2550            for i in i_first_modified+1..i_last_modified {
2551                chars[i] = add_dots_to_braille_char(chars[i], baseline_indicator_hack);
2552            };
2553        }
2554
2555        let result = chars.into_iter().collect::<String>(); 
2556        // debug!("    result={}", result);
2557        return result;
2558
2559        fn add_dots_to_braille_char(ch: char, baseline_indicator_hack: bool) -> char {
2560            let as_u32 = ch as u32;
2561            if (0x2800..0x28FF).contains(&as_u32) {
2562                return unsafe {char::from_u32_unchecked(as_u32 | 0xC0)};
2563            } else if baseline_indicator_hack && ch == 'b' {
2564                return '𝑏'
2565            } else {
2566                return ch;
2567            }
2568        }
2569    }
2570
2571    fn mark_nav_speech(speech: String) -> String {
2572        // add unique markers (since speech is mostly ascii letters and digits, most any symbol will do)
2573        // it's a bug (but happened during intent generation), we might have identical id's, choose innermost one
2574        debug!("mark_nav_speech: adding [[ {} ]] ", &speech);
2575        if !speech.contains("[[") {
2576            return "[[".to_string() + &speech + "]]";
2577        } else {
2578            return speech
2579        }
2580    }
2581
2582    fn replace<T:TreeOrString<'c, 'm, T>>(&'r mut self, replacement: &Replacement, mathml: Element<'c>) -> Result<T> {
2583        return Ok(
2584            match replacement {
2585                Replacement::Text(t) => T::from_string(t.clone(), self.doc)?,
2586                Replacement::XPath(xpath) => xpath.replace(self, mathml)?,
2587                Replacement::TTS(tts) => {
2588                    T::from_string(
2589                        self.speech_rules.pref_manager.borrow().get_tts().replace(tts, &self.speech_rules.pref_manager.borrow(), self, mathml)?,
2590                        self.doc
2591                    )?
2592                },
2593                Replacement::Intent(intent) => {
2594                    intent.replace(self, mathml)?                     
2595                },
2596                Replacement::Test(test) => {
2597                    test.replace(self, mathml)?                     
2598                },
2599                Replacement::With(with) => {
2600                    with.replace(self, mathml)?                     
2601                },
2602                Replacement::SetVariables(vars) => {
2603                    vars.replace(self, mathml)?                     
2604                },
2605                Replacement::Insert(ic) => {
2606                    ic.replace(self, mathml)?                     
2607                },
2608                Replacement::Translate(id) => {
2609                    id.replace(self, mathml)?                     
2610                },
2611            }
2612        )
2613    }
2614
2615    /// Iterate over all the nodes, concatenating the result strings together with a ' ' between them
2616    /// If the node is an element, pattern match it
2617    /// For 'Text' and 'Attribute' nodes, convert them to strings
2618    fn replace_nodes<T:TreeOrString<'c, 'm, T>>(&'r mut self, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<T> {
2619        return T::replace_nodes(self, nodes, mathml);
2620    }
2621
2622    /// Iterate over all the nodes finding matches for the elements
2623    /// For this case of returning MathML, everything else is an error
2624    fn replace_nodes_tree(&'r mut self, nodes: Vec<Node<'c>>, _mathml: Element<'c>) -> Result<Element<'m>> {
2625        let mut children = Vec::with_capacity(3*nodes.len());   // guess (2 chars/node + space)
2626        for node in nodes {
2627            let matched = match node {
2628                Node::Element(n) => self.match_pattern::<Element<'m>>(n)?,
2629                Node::Text(t) =>  {
2630                    let leaf = create_mathml_element(&self.doc, "TEMP_NAME");
2631                    leaf.set_text(t.text());
2632                    leaf
2633                },
2634                Node::Attribute(attr) => {
2635                    // debug!("  from attr with text '{}'", attr.value());
2636                    let leaf = create_mathml_element(&self.doc, "TEMP_NAME");
2637                    leaf.set_text(attr.value());
2638                    leaf
2639                },
2640                _ => {
2641                    bail!("replace_nodes: found unexpected node type!!!");
2642                },
2643            };
2644            children.push(matched);
2645        }
2646
2647        let result = create_mathml_element(&self.doc, "TEMP_NAME");    // FIX: what name should be used?
2648        result.append_children(children);
2649        // debug!("replace_nodes_tree\n{}\n====>>>>>\n", mml_to_string(result));
2650        return Ok( result );
2651    }
2652
2653    fn replace_nodes_string(&'r mut self, nodes: Vec<Node<'c>>, mathml: Element<'c>) -> Result<String> {
2654        // debug!("replace_nodes: working on {} nodes", nodes.len());
2655        let mut result = String::with_capacity(3*nodes.len());   // guess (2 chars/node + space)
2656        let mut first_time = true;
2657        for node in nodes {
2658            if first_time {
2659                first_time = false;
2660            } else {
2661                result.push(' ');
2662            };
2663            let matched = match node {
2664                Node::Element(n) => self.match_pattern::<String>(n)?,
2665                Node::Text(t) =>  self.replace_chars(t.text(), mathml)?,
2666                Node::Attribute(attr) => self.replace_chars(attr.value(), mathml)?,
2667                _ => bail!("replace_nodes: found unexpected node type!!!"),
2668            };
2669            result += &matched;
2670        }
2671        return Ok( result );
2672    }
2673
2674    /// Lookup unicode "pronunciation" of char.
2675    /// Note: TTS is not supported here (not needed and a little less efficient)
2676    pub fn replace_chars(&'r mut self, str: &str, mathml: Element<'c>) -> Result<String> {
2677        let chars = str.chars().collect::<Vec<char>>();
2678        let rules = self.speech_rules;
2679        // handled in match_pattern -- temporarily leaving as comments in case something is missed and needed here
2680        // if self.nav_node_offset > 0 && chars.len() > 1 {
2681        //     if self.nav_node_offset > chars.len() {
2682        //         debug!("replace_chars: nav_node_offset {} is larger than string length {}", self.nav_node_offset, chars.len());
2683        //         self.nav_node_offset = chars.len();
2684        //     }
2685        //     let ch = chars[self.nav_node_offset-1];
2686        //     debug!("replace_chars: adjusted string to '{}' based on nav_node_offset {}", ch, self.nav_node_offset);
2687        //     if rules.translate_single_chars_only {
2688        //         return self.replace_single_char(ch, mathml);
2689        //     } else {
2690        //         return Ok( ch.to_string() );
2691        //     }
2692        // }
2693        if is_quoted_string(str) {  // quoted string -- already translated (set in get_braille_chars)
2694            return Ok(unquote_string(str).to_string());
2695        }
2696        // in a string, avoid "a" -> "eigh", "." -> "point", etc
2697        if rules.translate_single_chars_only {
2698            if chars.len() == 1 {
2699                return self.replace_single_char(chars[0], mathml)
2700            } else {
2701                // more than one char -- fix up non-breaking space
2702                return Ok(str.replace('\u{00A0}', " ").replace(['\u{2061}', '\u{2062}', '\u{2063}', '\u{2064}'], ""))
2703            }
2704        };
2705
2706        let result = chars.iter()
2707            .map(|&ch| self.replace_single_char(ch, mathml))
2708            .collect::<Result<Vec<String>>>()?
2709            .join("");
2710        return Ok( result );
2711    }
2712
2713    fn replace_single_char(&'r mut self, ch: char, mathml: Element<'c>) -> Result<String> {
2714        let ch_as_u32 = ch as u32;
2715        let rules =  self.speech_rules;
2716        let mut unicode = rules.unicode_short.borrow();
2717        let mut replacements = unicode.get( &ch_as_u32 );
2718        // debug!("replace_single_char: looking for unicode {} for char '{}'/{:#06x}, found: {:?}", rules.name, ch, ch_as_u32, replacements);
2719        if replacements.is_none() {
2720            // see if it in the full unicode table (if it isn't loaded already)
2721            let pref_manager = rules.pref_manager.borrow();
2722            let unicode_pref_files = if rules.name == RulesFor::Braille {pref_manager.get_braille_unicode_file()} else {pref_manager.get_speech_unicode_file()};
2723            let should_ignore_file_time = pref_manager.pref_to_string("CheckRuleFiles") == "All";
2724            if rules.unicode_full.borrow().is_empty() || !rules.unicode_full_files.borrow().is_file_up_to_date(unicode_pref_files.1, should_ignore_file_time) {
2725                info!("*** Loading full unicode {} for char '{}'/{:#06x}", rules.name, ch, ch_as_u32);
2726                rules.unicode_full.borrow_mut().clear();
2727                rules.unicode_full_files.borrow_mut().set_files_and_times(rules.read_unicode(None, false)?);
2728                info!("# Unicode defs = {}/{}", rules.unicode_short.borrow().len(), rules.unicode_full.borrow().len());
2729            }
2730            unicode = rules.unicode_full.borrow();
2731            replacements = unicode.get( &ch_as_u32 );
2732            if replacements.is_none() {
2733              self.translate_count = 0;     // not in loop
2734              // debug!("*** Did not find unicode {} for char '{}'/{:#06x}", rules.name, ch, ch_as_u32);
2735              if rules.translate_single_chars_only || ch.is_ascii() {  // speech or if braille, avoid loop (ASCII remains ASCII if not found)
2736                return Ok(String::from(ch));   // no replacement, so just return the char and hope for the best
2737              } else { // braille -- must turn into braille dots
2738                // Emulate what NVDA does: generate (including single quotes) '\xhhhh' or '\yhhhhhh'
2739                let ch_as_int = ch as u32;
2740                let prefix_indicator = if ch_as_int < 1<<16 {'x'} else {'y'};
2741                return self.replace_chars( &format!("'\\{prefix_indicator}{:06x}'", ch_as_int), mathml);
2742              }
2743            }
2744        };
2745
2746        // map across all the parts of the replacement, collect them up into a Vec, and then concat them together
2747        let result = replacements.unwrap()
2748                    .iter()
2749                    .map(|replacement|
2750                         self.replace(replacement, mathml)
2751                                .with_context(|| format!("Unicode replacement error: {replacement}")) )
2752                    .collect::<Result<Vec<String>>>()?
2753                    .join(" ");
2754         self.translate_count = 0;     // found a replacement, so not in a loop
2755        return Ok(result);
2756    }
2757}
2758
2759/// Hack to allow replacement of `str` with braille chars.
2760pub fn braille_replace_chars(str: &str, mathml: Element) -> Result<String> {
2761    return BRAILLE_RULES.with(|rules| {
2762        let rules = rules.borrow();
2763        let new_package = Package::new();
2764        let mut rules_with_context = SpeechRulesWithContext::new(&rules, new_package.as_document(), "", 0);
2765        return rules_with_context.replace_chars(str, mathml);
2766    })
2767}
2768
2769
2770
2771#[cfg(test)]
2772mod tests {
2773    #[allow(unused_imports)]
2774    use crate::init_logger;
2775
2776    use super::*;
2777
2778    #[test]
2779    fn test_read_statement() {
2780        let str = r#"---
2781        {name: default, tag: math, match: ".", replace: [x: "./*"] }"#;
2782        let doc = YamlLoader::load_from_str(str).unwrap();
2783        assert_eq!(doc.len(), 1);
2784        let mut rules = SpeechRules::new(RulesFor::Speech, true);
2785
2786        SpeechPattern::build(&doc[0], Path::new("testing"), &mut rules).unwrap();
2787        assert_eq!(rules.rules["math"].len(), 1, "\nshould only be one rule");
2788
2789        let speech_pattern = &rules.rules["math"][0];
2790        assert_eq!(speech_pattern.pattern_name, "default", "\npattern name failure");
2791        assert_eq!(speech_pattern.tag_name, "math", "\ntag name failure");
2792        assert_eq!(speech_pattern.pattern.rc.string, ".", "\npattern failure");
2793        assert_eq!(speech_pattern.replacements.replacements.len(), 1, "\nreplacement failure");
2794        assert_eq!(speech_pattern.replacements.replacements[0].to_string(), r#""./*""#, "\nreplacement failure");
2795    }
2796
2797    #[test]
2798    fn test_read_statements_with_replace() {
2799        let str = r#"---
2800        {name: default, tag: math, match: ".", replace: [x: "./*"] }"#;
2801        let doc = YamlLoader::load_from_str(str).unwrap();
2802        assert_eq!(doc.len(), 1);
2803        let mut rules = SpeechRules::new(RulesFor::Speech, true);
2804        SpeechPattern::build(&doc[0], Path::new("testing"), &mut rules).unwrap();
2805
2806        let str = r#"---
2807        {name: default, tag: math, match: ".", replace: [t: "test", x: "./*"] }"#;
2808        let doc2 = YamlLoader::load_from_str(str).unwrap();
2809        assert_eq!(doc2.len(), 1);
2810        SpeechPattern::build(&doc2[0], Path::new("testing"), &mut rules).unwrap();
2811        assert_eq!(rules.rules["math"].len(), 1, "\nfirst rule not replaced");
2812
2813        let speech_pattern = &rules.rules["math"][0];
2814        assert_eq!(speech_pattern.pattern_name, "default", "\npattern name failure");
2815        assert_eq!(speech_pattern.tag_name, "math", "\ntag name failure");
2816        assert_eq!(speech_pattern.pattern.rc.string, ".", "\npattern failure");
2817        assert_eq!(speech_pattern.replacements.replacements.len(), 2, "\nreplacement failure");
2818    }
2819
2820    #[test]
2821    fn test_read_statements_with_add() {
2822        let str = r#"---
2823        {name: default, tag: math, match: ".", replace: [x: "./*"] }"#;
2824        let doc = YamlLoader::load_from_str(str).unwrap();
2825        assert_eq!(doc.len(), 1);
2826        let mut rules = SpeechRules::new(RulesFor::Speech, true);
2827        SpeechPattern::build(&doc[0], Path::new("testing"), &mut rules).unwrap();
2828
2829        let str = r#"---
2830        {name: another-rule, tag: math, match: ".", replace: [t: "test", x: "./*"] }"#;
2831        let doc2 = YamlLoader::load_from_str(str).unwrap();
2832        assert_eq!(doc2.len(), 1);
2833        SpeechPattern::build(&doc2[0], Path::new("testing"), &mut rules).unwrap();
2834        assert_eq!(rules.rules["math"].len(), 2, "\nsecond rule not added");
2835
2836        let speech_pattern = &rules.rules["math"][0];
2837        assert_eq!(speech_pattern.pattern_name, "default", "\npattern name failure");
2838        assert_eq!(speech_pattern.tag_name, "math", "\ntag name failure");
2839        assert_eq!(speech_pattern.pattern.rc.string, ".", "\npattern failure");
2840        assert_eq!(speech_pattern.replacements.replacements.len(), 1, "\nreplacement failure");
2841    }
2842
2843    #[test]
2844    fn test_debug_no_debug() {
2845        let str = r#"*[2]/*[3][text()='3']"#;
2846        let result = MyXPath::add_debug_string_arg(str);
2847        assert!(result.is_ok());
2848        assert_eq!(result.unwrap(), str);
2849    }
2850
2851    #[test]
2852    fn test_debug_no_debug_with_quote() {
2853        let str = r#"*[2]/*[3][text()='(']"#;
2854        let result = MyXPath::add_debug_string_arg(str);
2855        assert!(result.is_ok());
2856        assert_eq!(result.unwrap(), str);
2857    }
2858
2859    #[test]
2860    fn test_debug_no_quoted_paren() {
2861        let str = r#"DEBUG(*[2]/*[3][text()='3'])"#;
2862        let result = MyXPath::add_debug_string_arg(str);
2863        assert!(result.is_ok());
2864        assert_eq!(result.unwrap(), r#"DEBUG(*[2]/*[3][text()='3'], "*[2]/*[3][text()='3']")"#);
2865    }
2866
2867    #[test]
2868    fn test_debug_quoted_paren() {
2869        let str = r#"DEBUG(*[2]/*[3][text()='('])"#;
2870        let result = MyXPath::add_debug_string_arg(str);
2871        assert!(result.is_ok());
2872        assert_eq!(result.unwrap(), r#"DEBUG(*[2]/*[3][text()='('], "*[2]/*[3][text()='(']")"#);
2873    }
2874
2875    #[test]
2876    fn test_debug_quoted_paren_before_paren() {
2877        let str = r#"DEBUG(ClearSpeak_Matrix = 'Combinatorics') and IsBracketed(., '(', ')')"#;
2878        let result = MyXPath::add_debug_string_arg(str);
2879        assert!(result.is_ok());
2880        assert_eq!(result.unwrap(), r#"DEBUG(ClearSpeak_Matrix = 'Combinatorics', "ClearSpeak_Matrix = 'Combinatorics'") and IsBracketed(., '(', ')')"#);
2881    }
2882
2883
2884// zipped files do NOT include "zz", hence we need to exclude this test
2885cfg_if::cfg_if! {if #[cfg(not(feature = "include-zip"))] {  
2886    #[test]
2887    fn test_up_to_date() {
2888        use crate::interface::*;
2889        // initialize and move to a directory where making a time change doesn't really matter
2890        set_rules_dir(super::super::abs_rules_dir_path()).unwrap();
2891        set_preference("Language", "zz-aa").unwrap();
2892        // not much is support in zz
2893        if let Err(e) = set_mathml("<math><mi>x</mi></math>") {
2894            error!("{}", crate::errors_to_string(&e));
2895            panic!("Should not be an error in setting MathML")
2896        }
2897
2898        set_preference("CheckRuleFiles", "All").unwrap();
2899        assert!(!is_file_time_same(), "file's time did not get updated");
2900        set_preference("CheckRuleFiles", "None").unwrap();
2901        assert!(is_file_time_same(), "file's time was wrongly updated (preference 'CheckRuleFiles' should have prevented updating)");
2902
2903        // change a file, cause read_files to be called, and return if MathCAT noticed the change and updated its time
2904        fn is_file_time_same() -> bool {
2905            // read and write a unicode file in a test dir
2906            // files are read in due to setting the MathML
2907
2908            use std::time::Duration;
2909            return SPEECH_RULES.with(|rules| {
2910                let start_main_file = rules.borrow().unicode_short_files.borrow().ft[0].clone();
2911
2912                // open the file, read all the contents, then write them back so the time changes
2913                let contents = std::fs::read(&start_main_file.file).expect(&format!("Failed to read file {} during test", &start_main_file.file.to_string_lossy()));
2914                std::fs::write(start_main_file.file, contents).unwrap();
2915                std::thread::sleep(Duration::from_millis(5));       // pause a little to make sure the time changes
2916
2917                // speak should cause the file stored to have a new time
2918                if let Err(e) = get_spoken_text() {
2919                    error!("{}", crate::errors_to_string(&e));
2920                    panic!("Should not be an error in speech")
2921                }
2922                return rules.borrow().unicode_short_files.borrow().ft[0].time == start_main_file.time;
2923            });
2924        }    
2925    }
2926}}
2927
2928    // #[test]
2929    // fn test_nested_debug_quoted_paren() {
2930    //     let str = r#"DEBUG(*[2]/*[3][DEBUG(text()='(')])"#;
2931    //     let result = MyXPath::add_debug_string_arg(str);
2932    //     assert!(result.is_ok());
2933    //     assert_eq!(result.unwrap(), r#"DEBUG(*[2]/*[3][DEBUG(text()='(')], "DEBUG(*[2]/*[3][DEBUG(text()='(')], \"text()='(')]\")"#);
2934    // }
2935
2936}