mathcat 0.7.5

MathCAT: Math Capable Assistive Technology ('Speech and braille from MathML')
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
//! #Speech Engine Information
//!
//! ## Pitch (default 140hz)
//! ### SAPI4: Relative pitch
//! * Number is relative to the default/current pitch.
//! * 50 is 1/2 of the default/current pitch, 200 is 2 times the default/current pitch.
//!
//!  Note: no range is specified by the spec
//! ### SAPI5: Relative pitch
//! From https://documentation.help/SAPI-5/sapi.xsd
//! * A value of +10 sets a voice to speak at four-thirds (or 4/3) of its default pitch.
//! * Each increment between –10 and +10 is logarithmically distributed such that
//!   incrementing/decrementing by 1 is multiplying/dividing the pitch by the 24th root of 2 (about 1.03).
//! * Values more extreme than –10 and 10 will be passed to an engine but SAPI 5compliant engines may not support
//!   such extremes and instead may clip the pitch to the maximum or minimum pitch it supports.
//! * Values of –24 and +24 must lower and raise pitch by 1 octave respectively.
//!   All incrementing/decrementing by 1 must multiply/divide the pitch by the 24th root of 2.
//! 
//! Note: an octave is a doubling of frequency, so pitch change of 100% should turn into +/- 24
//! ### SSML: Relative pitch
//! * pitch in hertz (default/current man's voice is about 100hz, woman's 180hz)
//!
//! Note: other legal values for SSML are not supported, and all numbers are interpreted as relative changes
//! ### Eloquence: Absolute pitch (relative pitch not supported by Eloquence)
//! * Range is 0 - 100.  Guess is that 0 ~= 42hz, 100 ~= 422hz based on supported \"sapi\" values
//! ## Rate (default 180 words/min)
//! ### SAPI4: Absolute rate
//! * Number is relative to the default/current rate
//! * 50 is 1/2 of the default/current rate, 200 is 2 times the default/current rate
//!
//! Note: no range is specified by the spec
//! ### SAPI5: Relative rate
//! * Number is in range -10 to 10
//! * -10 is 1/3 of the default/current speed; 10 3 times the default/current speech
//! * changes are logarithmic -- a change of +/-1 corresponds to multiplying/dividing by 10th root of 3 (10*log_3(change))
//! ### SSML: Relative rate %
//! * 100% is no change, 50% is half the current rate, 200% is doubling the rate
//!
//!  Note:  other legal values for SSML are not supported, and all numbers are interpreted as relative changes
//! ### Eloquence: Absolute rate (relative rate not supported by Eloquence)
//! * Range is 0 - 250, which manual seems to indicate corresponds to 70 - 1297 words/min.
//! * * Window-Eyes only seems to give values in range 1 - 150.
//! * On the low end, 1 ~= 72words/min
//! * On the high end, I can't tell, but 80 seems to be a bit over twice normal (~400 words/min?)
//!   250 ~= 1297 words/min based on supported "sapi" values
//!
//! Note: this means words/min = 4.18 * Eloquence rate + 66
//! So the relative pause rate is 180/computed value
//!
//!
//! ## Volume (default 100 \[full])
//! ### SAPI4: Relative volume
//! * Number is relative to the default/current rate
//! * Range is 0 - 065535
//! ### SAPI5: Relative volume
//! * Number is in range 0 to 100
//! ### SSML: Relative volume
//! * Number is in range 0 to 100
//!
//! Note:  other legal values for SSML are not supported, and all numbers are interpreted as relative changes
//! ### Eloquence: Absolute volume (relative volume not supported by Eloquence)
//! * Range is 0 - 100
//!
//! ## Pause
//! * All systems -- pauses are given in milliseconds
//!
//! Note: Pauses on output are scaled based on the ratio of the current rate to the default rate (180 wpm)
#![allow(clippy::needless_return)]

use crate::{errors::*, prefs::PreferenceManager, speech::ReplacementArray};
use sxd_document::dom::Element;
use yaml_rust::Yaml;

use std::fmt;
use crate::speech::{SpeechRulesWithContext, MyXPath, TreeOrString};
use std::string::ToString;
use std::str::FromStr;
use strum_macros::{Display, EnumString};
use regex::Regex;
use sxd_xpath::Value;

const MIN_PAUSE:f64 = 50.0;         // ms -- avoids clutter of putting out pauses that probably can't be heard
const PAUSE_SHORT:f64 = 200.0;  // ms
const PAUSE_MEDIUM:f64 = 400.0; // ms
const PAUSE_LONG:f64 = 800.0;   // ms
const PAUSE_XLONG:f64 = 1600.0;   // ms
const PAUSE_AUTO:f64 = 987654321.5;   // ms -- hopefully unique
pub const PAUSE_AUTO_STR: &str = "\u{F8FA}\u{F8FA}";
const RATE_FROM_CONTEXT:f64 = 987654321.5;   // hopefully unique

const MAX_TRANSLATE_RECURSION: usize = 5;   // probably never more than three -- prevents infinite loop/stack overflows bugs

/// TTSCommand are the supported TTS commands
/// When parsing the YAML rule files, they are converted to these enums
#[derive(Debug, Clone, PartialEq, Eq, Display, EnumString)]
#[strum(serialize_all = "snake_case")]  // allows lower case
pub enum TTSCommand {
    Pause,
    Rate,
    Volume,
    Pitch,
    Audio,
    Gender,
    Voice,
    Spell,
    Bookmark,
    Pronounce,
}

#[derive(Debug, Clone)]
pub struct Pronounce {
    text: String,       // plain text
    ipa: String,        // ipa 
    sapi5: String,
    eloquence: String,
}


impl fmt::Display for Pronounce {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let mut comma = "";     // comma separator so it looks right
        write!(f, "pronounce: [")?;
        if !self.text.is_empty() {
            write!(f, "text: '{}'", self.text)?;
            comma = ",";
        }
        write!(f, "pronounce: [")?;
        if !self.ipa.is_empty() {
            write!(f, "{}ipa: '{}'", comma, self.ipa)?;
            comma = ",";
        }
        write!(f, "pronounce: [")?;
        if !self.sapi5.is_empty() {
            write!(f, "{}sapi5: '{}'", comma, self.sapi5)?;
            comma = ",";
        }
        write!(f, "pronounce: [")?;
        if !self.eloquence.is_empty() {
            write!(f, "{}eloquence: '{}'", comma, self.eloquence)?;
        }
        return writeln!(f, "]");
    }
}

impl Pronounce {
    fn build(values: &Yaml) -> Result<Pronounce> {
        use crate::speech::{as_str_checked, yaml_to_type};
        use crate::pretty_print::yaml_to_string;

        let mut text = "";
        let mut ipa = "";
        let mut sapi5 = "";
        let mut eloquence = "";
        // values should be an array with potential values for Pronounce
        let values = values.as_vec().ok_or_else(||
                                        format!("'pronounce' value '{}' is not an array", yaml_to_type(values)))?;
        for key_value in values {
            let key_value_hash = key_value.as_hash().ok_or_else(|| 
                                        format!("pronounce value '{}' is not key/value pair", yaml_to_string(key_value, 0)))?;
            if key_value_hash.len() != 1 {
                bail!("pronounce value {:?} is not a single key/value pair", key_value_hash);
            }
        
            for (key, value) in key_value_hash {
                match as_str_checked(key)? {
                    "text" => text = as_str_checked(value)?,
                    "ipa" => ipa = as_str_checked(value)?,
                    "sapi5" => sapi5 = as_str_checked(value)?,
                    "eloquence" => eloquence = as_str_checked(value)?,
                    _ => bail!("unknown pronounce type: {} with value {}", yaml_to_string(key, 0), yaml_to_string(value, 0)),
                }
            }
        }
        if text.is_empty() {
            bail!("'text' key/value is required for 'pronounce' -- it is used is the speech engine is unknown.")
        }
        return Ok( Pronounce{
            text: text.to_string(),
            ipa: ipa.to_string(),
            sapi5: sapi5.to_string(),
            eloquence: eloquence.to_string()
        } );
    

    }
}
/// TTSCommands are either numbers (f64 because of YAML) or strings
#[derive(Debug, Clone)]
pub enum TTSCommandValue {
    Number(f64),
    String(String),
    XPath(MyXPath),
    Pronounce(Box<Pronounce>),
}

impl TTSCommandValue {
    fn get_num(&self) -> f64 {
        match self {
            TTSCommandValue::Number(n) => return *n,
            _                               => panic!("Internal error: TTSCommandValue is not a number"),
        }
    }

    fn get_string(&self) -> &String {
        match self {
            TTSCommandValue::String(s) => return s,
            _                                  => panic!("Internal error: TTSCommandValue is not a string"),
        }
    }

    fn get_pronounce(&self) -> &Pronounce {
        match self {
            TTSCommandValue::Pronounce(p) => return p,
            _                               => panic!("Internal error: TTSCommandValue is not a 'pronounce' command'"),
        }
        
    }
}

/// A TTS rule consists of the command, the value, and its replacement
#[derive(Debug, Clone)]
pub struct TTSCommandRule {
    command: TTSCommand,
    value: TTSCommandValue,
    replacements: ReplacementArray
}

impl fmt::Display for TTSCommandRule {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let value = match &self.value {
            TTSCommandValue::String(s) => s.to_string(),
            TTSCommandValue::Number(f) => f.to_string(),
            TTSCommandValue::XPath(p) => p.to_string(),
            TTSCommandValue::Pronounce(p) => p.to_string(),
        };
        if self.command == TTSCommand::Pause {
            return write!(f, "pause: {value}");
        } else {
            return write!(f, "{}: {}{}", self.command, value, self.replacements);
        };
    }
}


impl TTSCommandRule {
    pub fn new(command: TTSCommand, value: TTSCommandValue, replacements: ReplacementArray) -> TTSCommandRule {
        return TTSCommandRule{
            command,
            value,
            replacements
        }
    }
}

/// Supported TTS engines
/// These types should do something for all the TTSCommands
#[allow(clippy::upper_case_acronyms)]
#[allow(dead_code)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum TTS {
    None,
    SSML,
    SAPI5,
//    Eloquence,
//    Mac,
}

impl TTS {
    /// Given the tts command ("pause", "rate", etc) and its value, build the TTS data structure for it.
    ///
    /// `tts_command`: one of "pause", "rate", etc
    ///
    /// `value`: keyword 'value' or dict with 'value' and 'replace' (optional) keys
    pub fn build(tts_command: &str, values: &Yaml) -> Result<Box<TTSCommandRule>> {
        use crate::pretty_print::yaml_to_string;
        let hashmap = values.as_hash();
        let tts_value;
        let replacements;
        if hashmap.is_some() {
            tts_value = &values["value"];
            if tts_value.is_badvalue() {
                bail!("{} TTS command is missing a 'value' sub-key. Found\n{}", tts_command, yaml_to_string(values, 1));
            };
            replacements = ReplacementArray::build(&values["replace"])?;
        } else {
            tts_value = values;
            replacements = ReplacementArray::build_empty();
        }
        let tts_str_value = yaml_to_string(tts_value, 0);
        let tts_str_value = tts_str_value.trim();
        let tts_enum = match TTSCommand::from_str(tts_command) {
            Ok(t) => t,
            Err(_) => bail!("Internal error in build_tts: unexpected rule ({:?}) encountered", tts_command),
        };
    
        let tts_command_value = match tts_enum {
            TTSCommand::Pause | TTSCommand::Rate | TTSCommand::Volume | TTSCommand::Pitch => {
                // these strings are almost always what the value will be, so we try them first
                let val = match tts_str_value {
                    "auto" => Ok( PAUSE_AUTO ),
                    "short" => Ok( PAUSE_SHORT ),
                    "medium" => Ok( PAUSE_MEDIUM ),
                    "long" => Ok( PAUSE_LONG ),
                    "xlong" => Ok( PAUSE_XLONG ),
                    "$MathRate" => Ok( RATE_FROM_CONTEXT ), // special case hack -- value determined in replace
                    _ => tts_str_value.parse::<f64>()
                };

                match val {
                    Ok(num) => TTSCommandValue::Number(num),
                    Err(_) => {
                        // let's try as an xpath (e.g., could be '$CapitalLetters_Pitch')
                        TTSCommandValue::XPath(
                            MyXPath::build(tts_value).chain_err(|| format!("while trying to evaluate value of '{tts_enum}:'"))?
                        )
                    }
                }
            },
            TTSCommand::Bookmark | TTSCommand::Spell => {
                TTSCommandValue::XPath(
                    MyXPath::build(values).chain_err(|| format!("while trying to evaluate value of '{tts_enum}:'"))?
                )
            },
            TTSCommand::Pronounce => {
                TTSCommandValue::Pronounce( Box::new( Pronounce::build(values)? ) )
            },
            _ => {
                TTSCommandValue::String(tts_str_value.to_string())
            },
        };
        return Ok( Box::new( TTSCommandRule::new(tts_enum, tts_command_value, replacements) ) );
    }
    
    /// The rule called to execute the TTSCommand `command`
    /// `prefs` are used for scaling the speech rate
    /// some rules have MathML nested inside, so we need to do replacements on them (hence `rules` and `mathml` are needed)
    ///
    /// A string is returned for the speech engine.
    ///
    /// `auto` pausing is handled at a later phase and a special char is used for it
    pub fn replace<'c, 's:'c, 'm:'c, 'r, T:TreeOrString<'c, 'm, T>>(&self, command: &TTSCommandRule, prefs: &PreferenceManager, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's, 'm>, mathml: Element<'c>) -> Result<T> {
        return T::replace_tts(self, command, prefs, rules_with_context, mathml);
    }

    pub fn replace_string<'c, 's:'c, 'm, 'r>(&self, command: &TTSCommandRule, prefs: &PreferenceManager, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's, 'm>, mathml: Element<'c>) -> Result<String> {
        // The general idea is we handle the begin tag, the contents, and then the end tag
        // For the begin/end tag, we dispatch off to specialized code for each TTS engine

        // 'bookmark' is special in that we need to eval the xpath
        // rather than pass a bunch of extra info into the generic handling routines, we just deal with them here
        if command.command == TTSCommand::Bookmark {
            // if we aren't suppose to generate bookmarks, short circuit and just return
            if prefs.pref_to_string("Bookmark") != "true"{
                return Ok("".to_string());
            }
            return Ok( match self {
                TTS::None  => "".to_string(),
                TTS::SSML => compute_bookmark_element(&command.value, "mark name", rules_with_context, mathml)?,
                TTS::SAPI5 => compute_bookmark_element(&command.value, "bookmark mark", rules_with_context, mathml)?,
            } );
        }

        let mut command = command.clone();
        if command.command == TTSCommand::Spell {
            // spell is also special because we need to eval the xpath to get the string to spell (typically the text content of an mi)
            match command.value {
                TTSCommandValue::XPath(xpath) => {
                    let value = xpath.evaluate(rules_with_context.get_context(), mathml)
                        .chain_err(|| format!("in 'spell': can't evaluate xpath \"{}\"", &xpath.to_string()) )?;
                    let value_string = match value {
                        Value::String(s) => s,
                        Value::Nodeset(nodes) if nodes.size() == 1 => {
                            let node = nodes.iter().next().unwrap();
                            if let Some(text) = node.text() {
                                text.text().to_string()
                            } else if let Some(el) = node.element() {
                                if crate::xpath_functions::is_leaf(el) {
                                    crate::canonicalize::as_text(el).to_string()
                                } else {
                                    bail!("in 'spell': value returned from xpath '{}' does not evaluate to a string",  &xpath.to_string());
                                }
                            } else {
                                bail!("in 'spell': value returned from xpath '{}' does not evaluate to a string, it is {} nodes",
                                        &xpath.to_string(), nodes.size());
                            }
                        },
                        _ => bail!("in 'spell': value returned from xpath '{}' does not evaluate to a string",  &xpath.to_string()),
                    };
                    // Chemistry wants to spell elements like "Na". But we also have the issue of capitalization (SpeechOverrides_CapitalLetters)
                    //   so the "N" need to use that. The logic for that is already in unicode.yaml. We could replicate that here.
                    // Rather than duplicate the logic (we would need to handle 'a', and who knows what in other languages),
                    //   we split the token into each letter and call the replacement on each letter.
                    // That in turns calls spell again. We end up in an infinite loop. To prevent this we set a flag that says don't recurse.
                    // The only structure to put that in is SpeechRulesWithContext. A bit of a hack to put it there, but better than a static var.
                    // Also, to avoid repeating the code for "cap" over and over, "spell" with "translate" is used. So keep going until no "translate"
                    let xpath_str = xpath.to_string();
                    if rules_with_context.inside_spell && !xpath_str.contains("translate") {
                        command.value = TTSCommandValue::String(value_string);
                        rules_with_context.translate_count  = 0;
                    } else if rules_with_context.translate_count > MAX_TRANSLATE_RECURSION {
                        bail!("Rule error: potential infinite recursion found in translate: {}", xpath_str);
                    } else {
                        // let the call to replace call spell on the individual chars -- that lets an "cap" be outside "spell"
                        rules_with_context.translate_count += 1;
                        let str_with_spaces = value_string.chars()
                                .map(|ch| {
                                    rules_with_context.inside_spell = true;
                                    let spelled_char = rules_with_context.replace_chars(ch.to_string().as_str(), mathml);
                                    rules_with_context.inside_spell = false;
                                    spelled_char
                                })
                                .collect::<Result<Vec<String>>>()?
                                .join(" ");
                        return Ok(str_with_spaces);
                    }             
                },
                _ => bail!("Implementation error: found non-xpath value for spell"),
            }
        } else if command.command == TTSCommand::Rate && self != &TTS::None {
            if let TTSCommandValue::Number(number_value) = command.value {
                if number_value == RATE_FROM_CONTEXT {
                    // handle hack for $Rate -- need to look up in context
                    let rate_from_context = crate::navigate::context_get_variable(rules_with_context.get_context(), "MathRate", mathml)?.1;
                    assert!(rate_from_context.is_some());
                    command.value = TTSCommandValue::Number(rate_from_context.unwrap());
                }
            }
        }

        // evaluate any xpath value now to simplify later code
        if let TTSCommandValue::XPath(xpath) = command.value {
            let eval_str = xpath.replace::<String>(rules_with_context, mathml)?;
            // can it be a number?
            command.value = match eval_str.parse::<f64>() {
                Ok(num) => TTSCommandValue::Number(num),
                Err(_) => TTSCommandValue::String(eval_str),
            }
        };


        // small optimization to avoid generating tags that do nothing
        if ((command.command == TTSCommand::Pitch || command.command == TTSCommand::Volume || command.command == TTSCommand::Pause) && command.value.get_num() == 0.0) ||
           (command.command == TTSCommand::Rate && command.value.get_num() == 100.0) {
            return command.replacements.replace::<String>(rules_with_context, mathml);
        }

        let mut result = String::with_capacity(255);
        result += &match self {
            TTS::None  => self.get_string_none(&command, prefs, true),
            TTS::SSML  => self.get_string_ssml(&command, prefs, true),
            TTS::SAPI5 => self.get_string_sapi5(&command, prefs, true),
        };


        if !command.replacements.is_empty()  {
            if result.is_empty() {
                result += " ";
            }
            result += &command.replacements.replace::<String>(rules_with_context, mathml)?;    
        }

        let end_tag = match self {
            TTS::None  => self.get_string_none(&command, prefs, false),
            TTS::SSML  => self.get_string_ssml(&command, prefs, false),
            TTS::SAPI5 => self.get_string_sapi5(&command, prefs, false),
        };

        if end_tag.is_empty() {
            return Ok( result ); // avoids adding in " "
        } else {
            return Ok( result + &end_tag );
        }


        fn compute_bookmark_element<'c, 's:'c, 'm, 'r>(value: &TTSCommandValue, tag_and_attr: &str, rules_with_context: &'r mut SpeechRulesWithContext<'c, 's, 'm>, mathml: Element<'c>) -> Result<String> {
            match value {
                TTSCommandValue::XPath(xpath) => {
                    let id = xpath.replace::<String>(rules_with_context, mathml)?;
                    return Ok( format!("<{tag_and_attr}='{id}'/>") );
                },
                _ => bail!("Implementation error: found bookmark value that did not evaluate to a string"),
            }
        }
    
    }

    // auto pausing can't be known until neighboring strings are computed
    // we create a unique string in this case and compute the real value later 
    fn get_string_none(&self, command: &TTSCommandRule,  prefs: &PreferenceManager, is_start_tag: bool) -> String  {
        // they only thing to do is handle "pause" with some punctuation hacks along with 'spell'        
        if is_start_tag {
            if command.command == TTSCommand::Pause {
                let amount = command.value.get_num();
                // only ',' and ';' are used as '.' didn't seem to reliably generate pauses in tests
                return crate::speech::CONCAT_INDICATOR.to_string() + (
                    if amount == PAUSE_AUTO {
                        PAUSE_AUTO_STR
                    } else {
                        let amount  =  amount * TTS::get_pause_multiplier(prefs);
                        if amount <= MIN_PAUSE {
                            ""
                        } else if amount <= 250.0 {
                            ","
                        } else  {
                            ";"
                        }
                    }
                );
            } else if command.command == TTSCommand::Spell {
                // debug!("spell rule: {}", command.value.get_string());
                return command.value.get_string().to_string();
            } else if let TTSCommandValue::Pronounce(p) = &command.value {
                return crate::speech::CONCAT_INDICATOR.to_string() + &p.text;
            }
        };
        return "".to_string();
    }
    
    fn get_string_sapi5(&self, command: &TTSCommandRule, prefs: &PreferenceManager, is_start_tag: bool) -> String  {
        return match &command.command {
            TTSCommand::Pause => if is_start_tag {
                let amount = command.value.get_num();
                if amount == PAUSE_AUTO {
                    PAUSE_AUTO_STR.to_string()
                } else {
                    let amount = amount * TTS::get_pause_multiplier(prefs);
                    if amount > MIN_PAUSE {
                        format!("<silence msec=='{}ms'/>", (amount * 180.0/prefs.get_rate()).round())
                    } else {
                        "".to_string()
                    }
                }
            } else {
                "".to_string()
            },
            // pitch must be in [-10, 10], logarithmic based on octaves
            // note MathPlayer uses 'absmiddle' (requires keeping a stack) -- could be 'middle' is not well supported
            TTSCommand::Pitch => if is_start_tag {format!("<pitch middle=\"{}\">", (24.0*(1.0+command.value.get_num()/100.0).log2()).round())} else {String::from("</prosody>")},
            // rate must be in [-10, 10], but we get relative %s. 300% => 10 (see comments at top of file)
            TTSCommand::Rate =>  if is_start_tag {format!("<rate speed='{:.1}'>", 10.0*(0.01*command.value.get_num()).log(3.0))} else {String::from("</rate>")},
            TTSCommand::Volume =>if is_start_tag {format!("<volume level='{}'>", command.value.get_num())} else {String::from("</volume>")},
            TTSCommand::Audio => "".to_string(),    // SAPI5 doesn't support audio
            TTSCommand::Gender =>if is_start_tag {format!("<voice required=\"Gender={}\">", command.value.get_string())} else {String::from("</prosody>")},
            TTSCommand::Voice =>if is_start_tag {format!("<voice required=\"Name={}\">", command.value.get_string())} else {String::from("</prosody>")},
            TTSCommand::Spell =>if is_start_tag {format!("<spell>{}", command.value.get_string())} else {String::from("</spell>")},
            TTSCommand::Pronounce =>if is_start_tag {
                    format!("<pron sym='{}'>{}", &command.value.get_pronounce().sapi5, &command.value.get_pronounce().text)
                } else {
                    String::from("</pron>")
                },
            TTSCommand::Bookmark => panic!("Internal error: bookmarks should have been handled earlier"),
        };
    }

    fn get_string_ssml(&self, command: &TTSCommandRule, prefs: &PreferenceManager, is_start_tag: bool) -> String  {
        return match &command.command {
            TTSCommand::Pause => {
                if is_start_tag {
                    let amount = command.value.get_num();
                    if amount == PAUSE_AUTO {
                        PAUSE_AUTO_STR.to_string()
                    } else {
                        let amount = amount * TTS::get_pause_multiplier(prefs);
                        if amount > MIN_PAUSE {
                            format!("<break time='{}ms'/>", (amount * 180.0/prefs.get_rate()).round())
                        } else {
                            "".to_string()
                        }
                    }
                } else {
                    "".to_string()
                }
            },
            TTSCommand::Pitch => if is_start_tag {format!("<prosody pitch='{}%'>", command.value.get_num())} else {String::from("</prosody>")},
            TTSCommand::Rate =>  if is_start_tag {format!("<prosody rate='{}%'>", command.value.get_num())} else {String::from("</prosody>")},
            TTSCommand::Volume =>if is_start_tag {format!("<prosody volume='{}db'>", command.value.get_num())} else {String::from("</prosody>")},
            TTSCommand::Audio =>if is_start_tag {format!("<audio src='{}'>", command.value.get_string())} else {String::from("</audio>")}, // only 'beep' is supported for now
            TTSCommand::Gender =>if is_start_tag {format!("<voice required='gender=\"{}\"'>", command.value.get_string())} else {String::from("</voice>")},
            TTSCommand::Voice =>if is_start_tag {format!("<voice required='{}'>", command.value.get_string())} else {String::from("</voice>")},
            TTSCommand::Spell =>if is_start_tag {format!("<say-as interpret-as='characters'>{}", command.value.get_string())} else {String::from("</say-as>")},
            TTSCommand::Pronounce =>if is_start_tag {
                format!("<phoneme alphabet='ipa' ph='{}'>{}", &command.value.get_pronounce().ipa, &command.value.get_pronounce().text)
            } else {
                String::from("</phoneme>")
            },
        TTSCommand::Bookmark => panic!("Internal error: bookmarks should have been handled earlier"),
        }
    }

    fn get_pause_multiplier(prefs: &PreferenceManager) -> f64 {
        return prefs.pref_to_string("PauseFactor").parse::<f64>().unwrap_or(100.)/100.0;
    }

    /// Compute the length of the pause to use.
    ///
    /// The computation is based on the length of the speech strings (after removing tagging).
    /// There is a bias towards pausing more _after_ longer strings.
    pub fn compute_auto_pause(&self, prefs: &PreferenceManager, before: &str, after: &str) -> String {
        lazy_static! {
            static ref REMOVE_XML: Regex = Regex::new(r"<.+?>").unwrap();    // punctuation ending with a '.'
        }
        let before_len;
        let after_len;
        match self {
            TTS::SSML | TTS::SAPI5 => {
                before_len = REMOVE_XML.replace_all(before, "").len();
                after_len = REMOVE_XML.replace_all(after, "").len();
            },
            _ => {
                before_len = before.len();
                after_len = after.len();
            },
        }

        // pause values are not cut in stone
        // the calculation bias to 'previous' is based on MathPlayer which used '30 * #-of-descendants-on-left
        // I think I did this as a sort of "take a breath" after saying something long although one might want to do that
        //   before speaking something long.
        if after_len < 3 {
            // hack to prevent pausing before "of" in exprs like "the fourth power of secant, of x"
            // if it should pause anywhere, it should be after the "of"
            return "".to_string(); 
        }
        let pause = std::cmp::min(3000, ((2 * before_len + after_len)/48) * 128);
        // create a TTSCommandRule so we reuse code
        let command = TTSCommandRule::new(
            TTSCommand::Pause,
            TTSCommandValue::Number(pause as f64),
            ReplacementArray::build_empty(),
        );
        return match self {
            TTS::None  => self.get_string_none(&command, prefs, true),
            TTS::SSML  => self.get_string_ssml(&command, prefs, true),
            TTS::SAPI5 => self.get_string_sapi5(&command, prefs, true),
        };

    }

    /// Take the longest of the pauses
    ///
    /// Two other options are:
    /// 1. average the pauses
    /// 2. add the pauses together.
    ///
    /// Until evidence points otherwise, use 'longest'.
    pub fn merge_pauses(&self, str: &str) -> String {
        // we need specialized merges for each TTS engine because we need to know the format of the commands
        return match self {
            TTS::None  => self.merge_pauses_none(str),
            TTS::SSML  => self.merge_pauses_ssml(str),
            TTS::SAPI5 => self.merge_pauses_sapi5(str),
        };        
    }

    fn merge_pauses_none(&self, str: &str) -> String {
        // punctuation used for pauses is ",", ";" 
        lazy_static! {
            static ref SPACES: Regex = Regex::new(r"\s+([;,])").unwrap();   // two or more pauses
            static ref MULTIPLE_PAUSES: Regex = Regex::new(r"([,;][,;]+)").unwrap();   // two or more pauses
        }
        // we reduce all sequences of two or more pauses to a single medium pause
        let merges_string = SPACES.replace_all(str, "$1").to_string();
        let merges_string = MULTIPLE_PAUSES.replace_all(&merges_string, ";").to_string();
        return merges_string;
    }

    fn merge_pauses_xml<F>(str: &str, full_attr_re: &Regex, sub_attr_re: &Regex, replace_with: F) -> String 
            where F: Fn(usize) -> String {
        // we reduce all sequences of two or more pauses to the max pause amount
        // other options would be the sum or an average
        // maybe some amount a little longer than the max would be best???
        let mut merges_string = str.to_string();
        for cap in full_attr_re.captures_iter(str) {
            let mut amount = 0;
            for c in sub_attr_re.captures_iter(&cap[0]) {
                amount = std::cmp::max(amount, c[1].parse::<usize>().unwrap());
            };
            merges_string = merges_string.replace(&cap[0], &replace_with(amount));
        }
        return merges_string;
    }

    fn merge_pauses_sapi5(&self, str: &str) -> String {
        lazy_static! {
            static ref CONSECUTIVE_BREAKS: Regex = Regex::new(r"(<silence msec[^>]+?> *){2,}").unwrap();   // two or more pauses
            static ref PAUSE_AMOUNT: Regex = Regex::new(r"msec=.*?(\d+)").unwrap();   // amount after 'time'
        }
        let replacement = |amount: usize| format!("<silence msec=='{amount}ms'/>");
        return TTS::merge_pauses_xml(str, &CONSECUTIVE_BREAKS, &PAUSE_AMOUNT, replacement);
    }

    fn merge_pauses_ssml(&self, str: &str) -> String {
        lazy_static! {
            static ref CONSECUTIVE_BREAKS: Regex = Regex::new(r"(<break time=[^>]+?> *){2,}").unwrap();   // two or more pauses
            static ref PAUSE_AMOUNT: Regex = Regex::new(r"time=.*?(\d+)").unwrap();   // amount after 'time'
        }
        let replacement = |amount: usize| format!("<break time='{amount}ms'/>");
        return TTS::merge_pauses_xml(str, &CONSECUTIVE_BREAKS, &PAUSE_AMOUNT, replacement);
    }
}