ssml_parser/
parser.rs

1//! Handles parsing SSML input and returning our `Ssml` structure, contains a simple parse function
2//! that sets up the parser with the default options and hides it as well as a parser type a user
3//! can construct themselves to have more control over parsing.
4use crate::elements::*;
5use crate::*;
6use anyhow::{bail, Context, Result};
7use derive_builder::Builder;
8use lazy_static::lazy_static;
9use mediatype::MediaTypeBuf;
10use quick_xml::events::{BytesStart, BytesText, Event};
11use quick_xml::reader::Reader;
12use regex::Regex;
13use std::cmp::{Ord, Ordering};
14use std::collections::BTreeMap;
15use std::io;
16use std::num::NonZeroUsize;
17use std::str::from_utf8;
18use std::str::FromStr;
19
20/// Shows a region of the cleaned transcript which an SSML element applies to.
21#[derive(Clone, Debug, PartialEq)]
22pub struct Span {
23    /// This is the index of span's start (inclusive) in terms of unicode scalar values - not bytes
24    /// or graphemes
25    pub start: usize,
26    /// This is the of span's end (exclusive) in terms of unicode scalar values - not bytes
27    /// or graphemes
28    pub end: usize,
29    /// The element contained within this span
30    pub element: ParsedElement,
31}
32
33impl Span {
34    /// Returns true if a span is contained within another span. This only takes advantage of the
35    /// start and end indexes. Other constraints such as the fact the parser returns spans in order
36    /// they're seen need to be used in combination to see if this _really contains_ the other
37    /// span. So if you're going over the list in order you can rely on this but if you've
38    /// rearranged the tag list it may not hold true.
39    ///
40    /// This does handle tags which can't contain other tags. So `<break/><break/>` will appear
41    /// with the same start and end. However break has to be an empty tag. This function will
42    /// return false. Whereas `<s/><s/>` will return true as a sentence can contain other tags. In
43    /// future as a sentence cannot contain a sentence this may return false.
44    pub fn maybe_contains(&self, other: &Self) -> bool {
45        self.element.can_contain(&other.element)
46            && (self.start <= other.start && self.end >= other.end)
47    }
48}
49
50impl Eq for Span {}
51
52impl Ord for Span {
53    fn cmp(&self, other: &Self) -> Ordering {
54        // We want spans that start earlier to be orderered sooner, but if both spans start in same
55        // location then the one with the further ahead end is the later one
56        match self.start.cmp(&other.start) {
57            Ordering::Equal => other.end.cmp(&self.end),
58            ord => ord,
59        }
60    }
61}
62
63impl PartialOrd for Span {
64    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
65        Some(self.cmp(other))
66    }
67}
68
69/// SSML parser, contains options used during parsing to determine how to handle certain elements.
70#[derive(Clone, Debug, Builder)]
71pub struct SsmlParser {
72    /// If true expand substitution elements replacing them with the text to substitute in the
73    /// attribute.
74    #[builder(default = "false")]
75    expand_sub: bool,
76}
77
78/// We're attaching no meaning to repeated whitespace, but things like space at end
79/// of text and line-breaks are word delimiters and we want to keep at least one in
80/// if there are repeated. But don't want half our transcript to be formatting
81/// induced whitespace.
82fn push_text(e: BytesText, text_buffer: &mut String) -> Result<()> {
83    let ends_in_whitespace = text_buffer.ends_with(char::is_whitespace);
84    let text = e.unescape()?;
85    let trimmed = text.trim();
86    if trimmed.is_empty() {
87        if !(text_buffer.is_empty() || ends_in_whitespace) {
88            text_buffer.push(' ');
89        }
90    } else {
91        if !ends_in_whitespace && text.starts_with(char::is_whitespace) {
92            text_buffer.push(' ');
93        }
94        let mut first = true;
95        for line in trimmed.lines() {
96            if !first {
97                text_buffer.push(' ');
98            }
99            text_buffer.push_str(line.trim());
100            first = false;
101        }
102        if text.ends_with(char::is_whitespace) {
103            text_buffer.push(' ');
104        }
105    }
106    Ok(())
107}
108
109/// Parses SSML with a default `SsmlParser`
110pub fn parse_ssml(ssml: &str) -> Result<Ssml> {
111    SsmlParserBuilder::default().build().unwrap().parse(ssml)
112}
113
114impl SsmlParser {
115    /// Returns true if the text should be added to the text buffer. If text isn't synthesisable
116    /// then it won't be entered.
117    fn text_should_enter_buffer(&self, element: Option<&SsmlElement>) -> bool {
118        match element {
119            None => true,
120            Some(elem) => {
121                !(self.expand_sub && elem == &SsmlElement::Sub)
122                    && elem.contains_synthesisable_text()
123            }
124        }
125    }
126
127    /// Parse the given SSML string
128    pub fn parse(&self, ssml: &str) -> Result<Ssml> {
129        let mut reader = Reader::from_str(ssml);
130        reader.check_end_names(true);
131        let mut has_started = false;
132        let mut text_buffer = String::new();
133        let mut open_tags = vec![];
134        let mut tags = vec![];
135        let mut event_log = vec![];
136
137        loop {
138            match reader.read_event()? {
139                Event::Start(e) if e.local_name().as_ref() == b"speak" => {
140                    if !has_started {
141                        text_buffer.clear();
142                    } else {
143                        bail!("Speak element cannot be placed inside a Speak");
144                    }
145                    has_started = true;
146
147                    let element = parse_speak(e, &reader)?;
148                    event_log.push(ParserLogEvent::Open(element.clone()));
149
150                    let span = Span {
151                        start: text_buffer.chars().count(),
152                        end: text_buffer.chars().count(),
153                        element,
154                    };
155
156                    open_tags.push((SsmlElement::Speak, tags.len(), span));
157                }
158                Event::Start(e) => {
159                    // TODO implement ordering constraints:
160                    //
161                    // The meta, metadata and lexicon elements must occur before all other elements and text
162                    // contained within the root speak element. There are no other ordering constraints on the
163                    // elements in this specification.
164                    if has_started {
165                        if !(text_buffer.is_empty() || text_buffer.ends_with(char::is_whitespace))
166                            && matches!(e.local_name().as_ref(), b"s" | b"p")
167                        {
168                            // Need to add in a space as they're using tags instead
169                            text_buffer.push(' ');
170                        }
171                        let (ty, element) = parse_element(e, &mut reader)?;
172                        if ty == SsmlElement::Sub && self.expand_sub {
173                            if let ParsedElement::Sub(attrs) = &element {
174                                let text_start = text_buffer.len();
175                                text_buffer.push(' ');
176                                text_buffer.push_str(&attrs.alias);
177                                text_buffer.push(' ');
178                                let text_end = text_buffer.len();
179                                event_log.push(ParserLogEvent::Text((text_start, text_end)));
180                            } else {
181                                unreachable!("Sub element wasn't returned for sub type");
182                            }
183                        } else {
184                            event_log.push(ParserLogEvent::Open(element.clone()));
185                            match open_tags.last().map(|x| &x.0) {
186                                Some(open_type) if !open_type.can_contain(&ty) => {
187                                    bail!("{:?} cannot be placed inside {:?}", ty, open_type)
188                                }
189                                _ => {}
190                            }
191                        }
192                        let new_span = Span {
193                            start: text_buffer.chars().count(),
194                            end: text_buffer.chars().count(),
195                            element,
196                        };
197
198                        open_tags.push((ty, tags.len(), new_span));
199                    }
200                }
201                Event::Comment(_)
202                | Event::CData(_)
203                | Event::Decl(_)
204                | Event::PI(_)
205                | Event::DocType(_) => continue,
206                Event::Eof => break,
207                Event::Text(e) => {
208                    let elem = open_tags.last().map(|x| &x.0);
209                    if self.text_should_enter_buffer(elem) {
210                        let text_start = text_buffer.len();
211                        push_text(e, &mut text_buffer)?;
212                        let text_end = text_buffer.len();
213                        event_log.push(ParserLogEvent::Text((text_start, text_end)));
214                    }
215                }
216                Event::End(e) => {
217                    let name = e.name();
218                    let name = from_utf8(name.as_ref())?;
219                    if open_tags.is_empty() {
220                        bail!(
221                            "Invalid SSML close tag '{}' presented without open tag.",
222                            name
223                        );
224                    }
225                    let ssml_elem = SsmlElement::from_str(name).unwrap();
226                    if ssml_elem != open_tags[open_tags.len() - 1].0 {
227                        // We have a close tag without an open!
228                    } else {
229                        // Okay time to close and remove tag
230                        let (_, pos, mut span) = open_tags.remove(open_tags.len() - 1);
231                        if !(ssml_elem == SsmlElement::Sub && self.expand_sub) {
232                            event_log.push(ParserLogEvent::Close(span.element.clone()));
233                            span.end = text_buffer.chars().count();
234                            tags.insert(pos, span);
235                            if !(ssml_elem == SsmlElement::Speak && open_tags.is_empty()) {
236                            } else {
237                                break;
238                            }
239                        }
240                    }
241                }
242                Event::Empty(e) => {
243                    let (_, element) = parse_element(e, &mut reader)?;
244                    let span = Span {
245                        start: text_buffer.chars().count(),
246                        end: text_buffer.chars().count(),
247                        element,
248                    };
249                    event_log.push(ParserLogEvent::Empty(span.element.clone()));
250                    tags.push(span);
251                }
252            }
253        }
254        tags.sort();
255        Ok(Ssml {
256            text: text_buffer,
257            tags,
258            event_log,
259        })
260    }
261}
262
263/// Parse an SSML element, this returns an `SsmlElement` as a tag to represent the SSML and the
264/// `ParsedElement` with the attributes to make conditions no the ssml type easier to write.
265pub(crate) fn parse_element(
266    elem: BytesStart,
267    reader: &mut Reader<&[u8]>,
268) -> Result<(SsmlElement, ParsedElement)> {
269    let name = elem.name();
270    let name = from_utf8(name.as_ref())?;
271    let elem_type = SsmlElement::from_str(name).unwrap();
272
273    let res = match elem_type {
274        SsmlElement::Speak => parse_speak(elem, reader)?,
275        SsmlElement::Lexicon => parse_lexicon(elem, reader)?,
276        SsmlElement::Lookup => parse_lookup(elem, reader)?,
277        SsmlElement::Meta => parse_meta(elem, reader)?,
278        SsmlElement::Metadata => ParsedElement::Metadata,
279        SsmlElement::Paragraph => ParsedElement::Paragraph,
280        SsmlElement::Sentence => ParsedElement::Sentence,
281        SsmlElement::Token => parse_token(elem, reader)?,
282        SsmlElement::Word => parse_word(elem, reader)?,
283        SsmlElement::SayAs => parse_say_as(elem, reader)?,
284        SsmlElement::Phoneme => parse_phoneme(elem, reader)?,
285        SsmlElement::Sub => parse_sub(elem, reader)?,
286        SsmlElement::Lang => parse_language(elem, reader)?,
287        SsmlElement::Voice => parse_voice(elem, reader)?,
288        SsmlElement::Emphasis => parse_emphasis(elem, reader)?,
289        SsmlElement::Break => parse_break(elem, reader)?,
290        SsmlElement::Prosody => parse_prosody(elem, reader)?,
291        SsmlElement::Audio => parse_audio(elem, reader)?,
292        SsmlElement::Mark => parse_mark(elem, reader)?,
293        SsmlElement::Description => {
294            let text = reader
295                .read_text(elem.to_end().name())
296                .unwrap_or_default()
297                .to_string();
298            ParsedElement::Description(text)
299        }
300        SsmlElement::Custom(ref s) => {
301            let mut attributes = BTreeMap::new();
302            for attr in elem.attributes() {
303                let attr = attr?;
304                attributes.insert(
305                    String::from_utf8(attr.key.0.to_vec())?,
306                    String::from_utf8(attr.value.to_vec())?,
307                );
308            }
309            ParsedElement::Custom((s.to_string(), attributes))
310        }
311    };
312
313    Ok((elem_type, res))
314}
315
316// TODO: handle start mark and end mark
317fn parse_speak<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
318    let version = elem.try_get_attribute("version")?;
319
320    // Technically spec non-compliant however commercial TTS such as amazon, google and microsoft
321    // don't require the version and just assume 1.1
322    let version = if let Some(v) = version {
323        let version = v.decode_and_unescape_value(reader)?;
324        match version.as_ref() {
325            "1.0" | "1.1" => (),
326            v => bail!("Unsupported SSML spec version: {}", v),
327        }
328        version.to_string()
329    } else {
330        "1.1".to_string()
331    };
332
333    let lang = elem.try_get_attribute("xml:lang")?;
334    let lang = if let Some(lang) = lang {
335        Some(lang.decode_and_unescape_value(reader)?.to_string())
336    } else {
337        None
338    };
339    let base = elem.try_get_attribute("xml:base")?;
340    let base = if let Some(base) = base {
341        Some(base.decode_and_unescape_value(reader)?.to_string())
342    } else {
343        None
344    };
345    let on_lang_failure = elem.try_get_attribute("onlangfailure")?;
346    let on_lang_failure = if let Some(lang) = on_lang_failure {
347        let value = lang.decode_and_unescape_value(reader)?;
348        Some(OnLanguageFailure::from_str(&value)?)
349    } else {
350        None
351    };
352
353    let mut xml_root_attrs = BTreeMap::new();
354    for attr in elem.attributes() {
355        let attr = attr?;
356
357        match std::str::from_utf8(attr.key.0).unwrap() {
358            "xml:base" | "xml:lang" | "onlangfailure" | "version" => continue,
359            attr_name => {
360                xml_root_attrs.insert(
361                    String::from(attr_name),
362                    String::from_utf8(attr.value.into())?,
363                );
364            }
365        }
366    }
367
368    Ok(ParsedElement::Speak(SpeakAttributes {
369        lang,
370        base,
371        on_lang_failure,
372        version,
373        xml_root_attrs,
374    }))
375}
376
377fn parse_lexicon<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
378    let xml_id = elem
379        .try_get_attribute("xml:id")?
380        .context("xml:id attribute is required with a lexicon element")?
381        .decode_and_unescape_value(reader)?
382        .to_string();
383
384    let uri: http::Uri = elem
385        .try_get_attribute("uri")?
386        .context("uri attribute is required with a lexicon element")?
387        .decode_and_unescape_value(reader)?
388        .to_string()
389        .parse()?;
390
391    let fetch_timeout = match elem.try_get_attribute("fetchtimeout")? {
392        Some(fetchtimeout) => {
393            let fetchtimeout = fetchtimeout.decode_and_unescape_value(reader)?;
394            Some(TimeDesignation::from_str(&fetchtimeout)?)
395        }
396        None => None,
397    };
398
399    let ty = match elem.try_get_attribute("type")? {
400        Some(ty) => {
401            let ty = ty.decode_and_unescape_value(reader)?.to_string();
402            let ty = MediaTypeBuf::from_string(ty)
403                .context("invalid media type for type attribute of lexicon element")?;
404
405            Some(ty)
406        }
407        None => None,
408    };
409
410    Ok(ParsedElement::Lexicon(LexiconAttributes {
411        uri,
412        xml_id,
413        fetch_timeout,
414        ty,
415    }))
416}
417
418fn parse_lookup<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
419    let lookup_ref = elem
420        .try_get_attribute("ref")?
421        .context("ref attribute is required with a lookup element")?
422        .decode_and_unescape_value(reader)?
423        .to_string();
424
425    Ok(ParsedElement::Lookup(LookupAttributes { lookup_ref }))
426}
427
428fn parse_meta<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
429    let content = elem
430        .try_get_attribute("content")?
431        .context("content attribute is required with a meta element")?
432        .decode_and_unescape_value(reader)?
433        .to_string();
434
435    let name = elem.try_get_attribute("name")?;
436    let http_equiv = elem.try_get_attribute("http-equiv")?;
437
438    let (name, http_equiv) = match (name, http_equiv) {
439        (Some(name), None) => (
440            Some(name.decode_and_unescape_value(reader)?.to_string()),
441            None,
442        ),
443        (None, Some(http_equiv)) => (
444            None,
445            Some(http_equiv.decode_and_unescape_value(reader)?.to_string()),
446        ),
447        _ => {
448            bail!("either name or http-equiv attr must be set in meta element (but not both)")
449        }
450    };
451
452    Ok(ParsedElement::Meta(MetaAttributes {
453        name,
454        http_equiv,
455        content,
456    }))
457}
458
459fn parse_token<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
460    let role = match elem.try_get_attribute("role")? {
461        Some(attr) => Some(attr.decode_and_unescape_value(reader)?.to_string()),
462        None => None,
463    };
464
465    Ok(ParsedElement::Token(TokenAttributes { role }))
466}
467
468fn parse_word<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
469    let role = match elem.try_get_attribute("role")? {
470        Some(attr) => Some(attr.decode_and_unescape_value(reader)?.to_string()),
471        None => None,
472    };
473
474    Ok(ParsedElement::Word(TokenAttributes { role }))
475}
476
477fn parse_say_as<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
478    // TODO: maybe rewrite the error handling in other parse functions to look like this.
479    let interpret_as = elem
480        .try_get_attribute("interpret-as")?
481        .context("interpret-as attribute is required with a say-as element")?
482        .decode_and_unescape_value(reader)?
483        .to_string();
484
485    let format = match elem.try_get_attribute("format")? {
486        Some(attr) => Some(attr.decode_and_unescape_value(reader)?.to_string()),
487        None => None,
488    };
489
490    let detail = match elem.try_get_attribute("detail")? {
491        Some(attr) => Some(attr.decode_and_unescape_value(reader)?.to_string()),
492        None => None,
493    };
494
495    Ok(ParsedElement::SayAs(SayAsAttributes {
496        interpret_as,
497        format,
498        detail,
499    }))
500}
501
502fn parse_phoneme<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
503    let phoneme = elem.try_get_attribute("ph")?;
504    let phoneme = if let Some(phoneme) = phoneme {
505        let value = phoneme.decode_and_unescape_value(reader)?;
506        value.to_string()
507    } else {
508        bail!("ph attribute is required with a phoneme element");
509    };
510
511    let alphabet = elem.try_get_attribute("alphabet")?;
512    let alphabet = if let Some(alpha) = alphabet {
513        let val = alpha.decode_and_unescape_value(reader)?;
514        Some(PhonemeAlphabet::from_str(&val).unwrap())
515    } else {
516        None
517    };
518
519    Ok(ParsedElement::Phoneme(PhonemeAttributes {
520        ph: phoneme,
521        alphabet,
522    }))
523}
524
525fn parse_break<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
526    let strength = elem.try_get_attribute("strength")?;
527    let strength = if let Some(strength) = strength {
528        let value = strength.decode_and_unescape_value(reader)?;
529        let value = Strength::from_str(&value)?;
530        Some(value)
531    } else {
532        None
533    };
534    let time = match elem.try_get_attribute("time")? {
535        Some(time) => {
536            let value = time.decode_and_unescape_value(reader)?;
537            Some(TimeDesignation::from_str(&value)?)
538        }
539        None => None,
540    };
541
542    Ok(ParsedElement::Break(BreakAttributes { strength, time }))
543}
544
545fn parse_sub<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
546    let alias = elem
547        .try_get_attribute("alias")?
548        .context("alias attribute required for sub element")?
549        .decode_and_unescape_value(reader)?
550        .to_string();
551
552    Ok(ParsedElement::Sub(SubAttributes { alias }))
553}
554
555fn parse_language<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
556    let lang = elem
557        .try_get_attribute("xml:lang")?
558        .context("xml:lang attribute is required with a lang element")?
559        .decode_and_unescape_value(reader)?
560        .to_string();
561
562    let on_lang_failure = match elem.try_get_attribute("onlangfailure")? {
563        Some(s) => {
564            let value = s.decode_and_unescape_value(reader)?;
565            Some(OnLanguageFailure::from_str(&value)?)
566        }
567        None => None,
568    };
569
570    Ok(ParsedElement::Lang(LangAttributes {
571        lang,
572        on_lang_failure,
573    }))
574}
575
576fn parse_emphasis<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
577    let level = elem.try_get_attribute("level")?;
578    let level = if let Some(level) = level {
579        let value = level.decode_and_unescape_value(reader)?;
580        let value = EmphasisLevel::from_str(&value)?;
581        Some(value)
582    } else {
583        None
584    };
585
586    Ok(ParsedElement::Emphasis(EmphasisAttributes { level }))
587}
588
589fn parse_prosody<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
590    let pitch = elem.try_get_attribute("pitch")?;
591    let pitch = if let Some(pitch) = pitch {
592        let value = pitch.decode_and_unescape_value(reader)?;
593        let value = match PitchRange::from_str(&value) {
594            Ok(result) => result,
595            Err(e) => bail!("Error: {}", e),
596        };
597
598        Some(value)
599    } else {
600        None
601    };
602    let contour = elem.try_get_attribute("contour")?;
603    let contour = if let Some(contour) = contour {
604        let value = contour.decode_and_unescape_value(reader)?;
605        let value = match PitchContour::from_str(&value) {
606            Ok(result) => result,
607            Err(e) => bail!("Error: {}", e),
608        };
609        Some(value)
610    } else {
611        None
612    };
613    let range = elem.try_get_attribute("range")?;
614    let range = if let Some(range) = range {
615        let value = range.decode_and_unescape_value(reader)?;
616        let value = match PitchRange::from_str(&value) {
617            Ok(result) => result,
618            Err(e) => bail!("Error: {}", e),
619        };
620
621        Some(value)
622    } else {
623        None
624    };
625    let rate = elem.try_get_attribute("rate")?;
626    let rate = if let Some(rate) = rate {
627        let value = rate.decode_and_unescape_value(reader)?;
628        let value = match RateRange::from_str(&value) {
629            Ok(result) => result,
630            Err(e) => bail!("Error: {}", e),
631        };
632
633        Some(value)
634    } else {
635        None
636    };
637    let duration = match elem.try_get_attribute("duration")? {
638        Some(val) => Some(val.decode_and_unescape_value(reader)?.parse()?),
639        None => None,
640    };
641
642    let volume = elem.try_get_attribute("volume")?;
643    let volume = if let Some(volume) = volume {
644        let value = volume.decode_and_unescape_value(reader)?;
645        let value = match VolumeRange::from_str(&value) {
646            Ok(result) => result,
647            Err(e) => bail!("Error: {}", e),
648        };
649
650        Some(value)
651    } else {
652        None
653    };
654
655    Ok(ParsedElement::Prosody(ProsodyAttributes {
656        pitch,
657        contour,
658        range,
659        rate,
660        duration,
661        volume,
662    }))
663}
664
665fn parse_mark<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
666    let name = elem
667        .try_get_attribute("name")?
668        .context("name attribute is required with mark element")?
669        .decode_and_unescape_value(reader)?
670        .to_string();
671
672    Ok(ParsedElement::Mark(MarkAttributes { name }))
673}
674
675fn parse_voice<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
676    let gender = elem.try_get_attribute("gender")?;
677    let gender = match gender {
678        Some(v) => {
679            let value = v.decode_and_unescape_value(reader)?;
680            if value.is_empty() {
681                None
682            } else {
683                Some(Gender::from_str(&value)?)
684            }
685        }
686        None => None,
687    };
688    let age = elem.try_get_attribute("age")?;
689    let age = match age {
690        Some(v) => {
691            let value = v.decode_and_unescape_value(reader)?;
692            if value.is_empty() {
693                None
694            } else {
695                Some(value.parse::<u8>()?)
696            }
697        }
698        None => None,
699    };
700    let variant = elem.try_get_attribute("variant")?;
701    let variant = match variant {
702        Some(v) => {
703            let value = v.decode_and_unescape_value(reader)?;
704            if value.is_empty() {
705                None
706            } else {
707                Some(value.parse::<NonZeroUsize>()?)
708            }
709        }
710        None => None,
711    };
712    let name = elem.try_get_attribute("name")?;
713    let name = match name {
714        Some(v) => {
715            let value = v.decode_and_unescape_value(reader)?;
716            value
717                .split(' ')
718                .map(|x| x.to_string())
719                .collect::<Vec<String>>()
720        }
721        None => vec![],
722    };
723    let languages = elem.try_get_attribute("languages")?;
724    let languages = match languages {
725        Some(v) => {
726            let value = v.decode_and_unescape_value(reader)?;
727            let mut res = vec![];
728            for language in value.split(' ') {
729                res.push(LanguageAccentPair::from_str(language)?);
730            }
731            res
732        }
733        None => vec![],
734    };
735    Ok(ParsedElement::Voice(VoiceAttributes {
736        gender,
737        age,
738        variant,
739        name,
740        languages,
741    }))
742}
743
744fn parse_audio<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
745    let src = match elem.try_get_attribute("src")? {
746        Some(s) => {
747            let src: http::Uri = s.decode_and_unescape_value(reader)?.to_string().parse()?;
748            Some(src)
749        }
750        None => None,
751    };
752
753    let fetch_timeout = match elem.try_get_attribute("fetchtimeout")? {
754        Some(fetchtimeout) => {
755            let fetchtimeout = fetchtimeout.decode_and_unescape_value(reader)?;
756            Some(TimeDesignation::from_str(&fetchtimeout)?)
757        }
758        None => None,
759    };
760
761    let fetch_hint = match elem.try_get_attribute("fetchhint")? {
762        Some(fetch) => {
763            let fetch = fetch.decode_and_unescape_value(reader)?;
764            FetchHint::from_str(&fetch)?
765        }
766        None => FetchHint::default(),
767    };
768
769    let max_age = if let Some(v) = elem.try_get_attribute("maxage")? {
770        Some(v.decode_and_unescape_value(reader)?.parse::<usize>()?)
771    } else {
772        None
773    };
774
775    let max_stale = if let Some(v) = elem.try_get_attribute("maxstale")? {
776        Some(v.decode_and_unescape_value(reader)?.parse::<usize>()?)
777    } else {
778        None
779    };
780
781    let clip_begin = match elem.try_get_attribute("clipBegin")? {
782        Some(clip) => {
783            let clip = clip.decode_and_unescape_value(reader)?;
784            TimeDesignation::from_str(&clip)?
785        }
786        None => TimeDesignation::Seconds(0.0),
787    };
788
789    let clip_end = match elem.try_get_attribute("clipEnd")? {
790        Some(clip) => {
791            let clip = clip.decode_and_unescape_value(reader)?;
792            Some(TimeDesignation::from_str(&clip)?)
793        }
794        None => None,
795    };
796
797    let repeat_count = if let Some(v) = elem.try_get_attribute("repeatCount")? {
798        v.decode_and_unescape_value(reader)?
799            .parse::<NonZeroUsize>()?
800    } else {
801        unsafe { NonZeroUsize::new_unchecked(1) }
802    };
803
804    let repeat_dur = match elem.try_get_attribute("repeatDur")? {
805        Some(repeat) => {
806            let repeat = repeat.decode_and_unescape_value(reader)?;
807            Some(TimeDesignation::from_str(&repeat)?)
808        }
809        None => None,
810    };
811
812    let sound_level = match elem.try_get_attribute("soundLevel")? {
813        Some(sound) => {
814            let sound = sound.decode_and_unescape_value(reader)?;
815            parse_decibel(&sound)?
816        }
817        None => 0.0,
818    };
819
820    let speed = match elem.try_get_attribute("speed")? {
821        Some(speed) => {
822            let speed = speed.decode_and_unescape_value(reader)?;
823            parse_unsigned_percentage(&speed)? / 100.0
824        }
825        None => 1.0,
826    };
827
828    Ok(ParsedElement::Audio(AudioAttributes {
829        src,
830        fetch_timeout,
831        fetch_hint,
832        max_age,
833        max_stale,
834        clip_begin,
835        clip_end,
836        repeat_count,
837        repeat_dur,
838        sound_level,
839        speed,
840    }))
841}
842
843pub(crate) fn parse_decibel(val: &str) -> anyhow::Result<f32> {
844    lazy_static! {
845        static ref DB_RE: Regex = Regex::new(r"^([+-]?(?:\d*\.)?\d+)dB$").unwrap();
846    }
847    let caps = DB_RE
848        .captures(val)
849        .context("value must be a valid decibel value")?;
850
851    let num_val = caps[1].parse::<f32>()?;
852    Ok(num_val)
853}
854
855/// returns percentages as written
856pub(crate) fn parse_unsigned_percentage(val: &str) -> anyhow::Result<f32> {
857    lazy_static! {
858        static ref PERCENT_RE: Regex = Regex::new(r"^+?((?:\d*\.)?\d+)%$").unwrap();
859    }
860    let caps = PERCENT_RE
861        .captures(val)
862        .context("value must be a valid percentage value")?;
863
864    let num_val = caps[1].parse::<f32>()?;
865    Ok(num_val)
866}
867
868#[cfg(test)]
869mod tests {
870    use super::*;
871
872    #[test]
873    fn span_ordering() {
874        let a = Span {
875            start: 0,
876            end: 10,
877            element: ParsedElement::Speak(Default::default()),
878        };
879
880        let b = Span {
881            start: 0,
882            end: 5,
883            element: ParsedElement::Speak(Default::default()),
884        };
885
886        let c = Span {
887            start: 4,
888            end: 5,
889            element: ParsedElement::Speak(Default::default()),
890        };
891
892        let d = Span {
893            start: 11,
894            end: 15,
895            element: ParsedElement::Speak(Default::default()),
896        };
897
898        assert!(a < b);
899        assert!(b < c);
900        assert!(a < c);
901        assert!(a < d);
902        assert!(a == a);
903    }
904
905    #[test]
906    fn char_position_not_byte() {
907        let unicode = parse_ssml(r#"<speak version="1.1">Let’s review a complex structure. Please note how threshold of control is calculated in this example.</speak>"#).unwrap();
908        let ascii = parse_ssml(r#"<speak version="1.1">Let's review a complex structure. Please note how threshold of control is calculated in this example.</speak>"#).unwrap();
909
910        let master_span_unicode = unicode.tags().next().unwrap();
911        let master_span_ascii = ascii.tags().next().unwrap();
912
913        assert_eq!(master_span_ascii.end, master_span_unicode.end);
914        assert_eq!(master_span_ascii.end, ascii.get_text().chars().count());
915    }
916
917    #[test]
918    fn span_contains() {
919        let empty = parse_ssml(r#"<speak version="1.1"><break/><break/></speak>"#).unwrap();
920
921        assert!(empty.tags[0].maybe_contains(&empty.tags[1]));
922        assert!(empty.tags[0].maybe_contains(&empty.tags[2]));
923        assert!(!empty.tags[1].maybe_contains(&empty.tags[2]));
924
925        let hello =
926            parse_ssml(r#"<speak version="1.1">Hello <s><w>hello</w></s> world <break/></speak>"#)
927                .unwrap();
928        assert!(hello.tags[0].maybe_contains(&hello.tags[1]));
929        assert!(hello.tags[0].maybe_contains(&hello.tags[2]));
930        assert!(hello.tags[0].maybe_contains(&hello.tags[3]));
931        assert!(hello.tags[1].maybe_contains(&hello.tags[2]));
932        assert!(!hello.tags[1].maybe_contains(&hello.tags[3]));
933        assert!(!hello.tags[2].maybe_contains(&hello.tags[3]));
934
935        let empty = parse_ssml(r#"<speak version="1.1">Hello <p></p><p></p></speak>"#).unwrap();
936        assert!(!empty.tags[1].maybe_contains(&empty.tags[2]));
937
938        let break_inside_custom = parse_ssml(r#"<speak version="1.1"><mstts:express-as style="string" styledegree="value" role="string">hello<break/> world</mstts:express-as></speak>"#).unwrap();
939        assert!(break_inside_custom.tags[1].maybe_contains(&break_inside_custom.tags[2]));
940    }
941
942    #[test]
943    fn reject_invalid_combos() {
944        assert!(parse_ssml("<speak><speak>hello</speak></speak>").is_err());
945        assert!(parse_ssml("<speak><p>hello<p>world</p></p></speak>").is_err());
946    }
947
948    #[test]
949    fn skip_description_text() {
950        let text = r#"<?xml version="1.0"?>
951<speak xmlns="http://www.w3.org/2001/10/synthesis"
952       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
953       xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
954                 http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
955       xml:lang="en-US">
956                 
957  <!-- Normal use of <desc> -->
958  Heads of State often make mistakes when speaking in a foreign language.
959  One of the most well-known examples is that of John F. Kennedy:
960  <audio src="ichbineinberliner.wav">If you could hear it, this would be
961  a recording of John F. Kennedy speaking in Berlin.
962    <desc>Kennedy's famous German language gaffe</desc>
963  </audio>
964</speak>"#;
965
966        let res = parse_ssml(text).unwrap();
967
968        assert_eq!(res.get_text().trim(),
969                   "Heads of State often make mistakes when speaking in a foreign language. One of the most well-known examples is that of John F. Kennedy: If you could hear it, this would be a recording of John F. Kennedy speaking in Berlin.");
970    }
971
972    #[test]
973    fn handle_language_elements() {
974        let lang = r#"<speak version="1.1"><lang xml:lang="ja"></lang><lang xml:lang="en" onlangfailure="ignoretext"></lang></speak>"#;
975
976        let res = parse_ssml(lang).unwrap();
977
978        assert_eq!(res.tags.len(), 3);
979        assert_eq!(
980            res.tags[1].element,
981            ParsedElement::Lang(LangAttributes {
982                lang: "ja".to_string(),
983                on_lang_failure: None
984            })
985        );
986        assert_eq!(
987            res.tags[2].element,
988            ParsedElement::Lang(LangAttributes {
989                lang: "en".to_string(),
990                on_lang_failure: Some(OnLanguageFailure::IgnoreText)
991            })
992        );
993
994        let lang = r#"<speak version="1.1"><lang lang="ja"></lang></speak>"#;
995
996        assert!(parse_ssml(lang).is_err());
997    }
998
999    #[test]
1000    fn filter_out_elems() {
1001        let mut parser = SsmlParserBuilder::default().build().unwrap();
1002
1003        assert!(parser.text_should_enter_buffer(Some(&SsmlElement::Sub)));
1004        assert!(!parser.text_should_enter_buffer(Some(&SsmlElement::Description)));
1005
1006        parser.expand_sub = true;
1007
1008        assert!(!parser.text_should_enter_buffer(Some(&SsmlElement::Sub)));
1009        assert!(!parser.text_should_enter_buffer(Some(&SsmlElement::Description)));
1010    }
1011
1012    #[test]
1013    fn expand_sub() {
1014        let parser = SsmlParserBuilder::default()
1015            .expand_sub(true)
1016            .build()
1017            .unwrap();
1018        let sub =
1019            r#"<speak version="1.1"><sub alias="World wide web consortium">W3C</sub></speak>"#;
1020
1021        let res = parser.parse(sub).unwrap();
1022        assert_eq!(res.get_text().trim(), "World wide web consortium");
1023        assert_eq!(res.event_log.len(), 3);
1024        assert!(matches!(res.event_log[1], ParserLogEvent::Text(_)));
1025
1026        let parser = SsmlParserBuilder::default().build().unwrap();
1027
1028        let res = parser.parse(sub).unwrap();
1029        assert_eq!(res.get_text().trim(), "W3C");
1030
1031        assert_eq!(res.event_log.len(), 5);
1032    }
1033
1034    #[test]
1035    fn decibels() {
1036        assert!(parse_decibel("56").is_err());
1037        assert!(parse_decibel("hello").is_err());
1038        assert!(parse_decibel("64.5DB").is_err());
1039        assert!(parse_decibel("64.5dBs").is_err());
1040
1041        assert_eq!(parse_decibel("-10dB").unwrap() as i32, -10);
1042        assert_eq!(parse_decibel("15dB").unwrap() as i32, 15);
1043        assert_eq!(parse_decibel(".5dB").unwrap(), 0.5);
1044    }
1045
1046    #[test]
1047    fn unsigned_percentages() {
1048        assert!(parse_unsigned_percentage("56").is_err());
1049        assert!(parse_unsigned_percentage("64pc").is_err());
1050        assert!(parse_unsigned_percentage("74%%").is_err());
1051
1052        assert_eq!(parse_unsigned_percentage("10%").unwrap() as i32, 10);
1053        assert_eq!(parse_unsigned_percentage("110%").unwrap() as i32, 110);
1054        assert_eq!(parse_unsigned_percentage(".5%").unwrap(), 0.5);
1055    }
1056}