1use crate::elements::*;
5use crate::*;
6use anyhow::{bail, Context, Result};
7use derive_builder::Builder;
8use lazy_static::lazy_static;
9use mediatype::MediaTypeBuf;
10use quick_xml::events::{BytesStart, BytesText, Event};
11use quick_xml::reader::Reader;
12use regex::Regex;
13use std::cmp::{Ord, Ordering};
14use std::collections::BTreeMap;
15use std::io;
16use std::num::NonZeroUsize;
17use std::str::from_utf8;
18use std::str::FromStr;
19
20#[derive(Clone, Debug, PartialEq)]
22pub struct Span {
23 pub start: usize,
26 pub end: usize,
29 pub element: ParsedElement,
31}
32
33impl Span {
34 pub fn maybe_contains(&self, other: &Self) -> bool {
45 self.element.can_contain(&other.element)
46 && (self.start <= other.start && self.end >= other.end)
47 }
48}
49
50impl Eq for Span {}
51
52impl Ord for Span {
53 fn cmp(&self, other: &Self) -> Ordering {
54 match self.start.cmp(&other.start) {
57 Ordering::Equal => other.end.cmp(&self.end),
58 ord => ord,
59 }
60 }
61}
62
63impl PartialOrd for Span {
64 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
65 Some(self.cmp(other))
66 }
67}
68
69#[derive(Clone, Debug, Builder)]
71pub struct SsmlParser {
72 #[builder(default = "false")]
75 expand_sub: bool,
76}
77
78fn push_text(e: BytesText, text_buffer: &mut String) -> Result<()> {
83 let ends_in_whitespace = text_buffer.ends_with(char::is_whitespace);
84 let text = e.unescape()?;
85 let trimmed = text.trim();
86 if trimmed.is_empty() {
87 if !(text_buffer.is_empty() || ends_in_whitespace) {
88 text_buffer.push(' ');
89 }
90 } else {
91 if !ends_in_whitespace && text.starts_with(char::is_whitespace) {
92 text_buffer.push(' ');
93 }
94 let mut first = true;
95 for line in trimmed.lines() {
96 if !first {
97 text_buffer.push(' ');
98 }
99 text_buffer.push_str(line.trim());
100 first = false;
101 }
102 if text.ends_with(char::is_whitespace) {
103 text_buffer.push(' ');
104 }
105 }
106 Ok(())
107}
108
109pub fn parse_ssml(ssml: &str) -> Result<Ssml> {
111 SsmlParserBuilder::default().build().unwrap().parse(ssml)
112}
113
114impl SsmlParser {
115 fn text_should_enter_buffer(&self, element: Option<&SsmlElement>) -> bool {
118 match element {
119 None => true,
120 Some(elem) => {
121 !(self.expand_sub && elem == &SsmlElement::Sub)
122 && elem.contains_synthesisable_text()
123 }
124 }
125 }
126
127 pub fn parse(&self, ssml: &str) -> Result<Ssml> {
129 let mut reader = Reader::from_str(ssml);
130 reader.check_end_names(true);
131 let mut has_started = false;
132 let mut text_buffer = String::new();
133 let mut open_tags = vec![];
134 let mut tags = vec![];
135 let mut event_log = vec![];
136
137 loop {
138 match reader.read_event()? {
139 Event::Start(e) if e.local_name().as_ref() == b"speak" => {
140 if !has_started {
141 text_buffer.clear();
142 } else {
143 bail!("Speak element cannot be placed inside a Speak");
144 }
145 has_started = true;
146
147 let element = parse_speak(e, &reader)?;
148 event_log.push(ParserLogEvent::Open(element.clone()));
149
150 let span = Span {
151 start: text_buffer.chars().count(),
152 end: text_buffer.chars().count(),
153 element,
154 };
155
156 open_tags.push((SsmlElement::Speak, tags.len(), span));
157 }
158 Event::Start(e) => {
159 if has_started {
165 if !(text_buffer.is_empty() || text_buffer.ends_with(char::is_whitespace))
166 && matches!(e.local_name().as_ref(), b"s" | b"p")
167 {
168 text_buffer.push(' ');
170 }
171 let (ty, element) = parse_element(e, &mut reader)?;
172 if ty == SsmlElement::Sub && self.expand_sub {
173 if let ParsedElement::Sub(attrs) = &element {
174 let text_start = text_buffer.len();
175 text_buffer.push(' ');
176 text_buffer.push_str(&attrs.alias);
177 text_buffer.push(' ');
178 let text_end = text_buffer.len();
179 event_log.push(ParserLogEvent::Text((text_start, text_end)));
180 } else {
181 unreachable!("Sub element wasn't returned for sub type");
182 }
183 } else {
184 event_log.push(ParserLogEvent::Open(element.clone()));
185 match open_tags.last().map(|x| &x.0) {
186 Some(open_type) if !open_type.can_contain(&ty) => {
187 bail!("{:?} cannot be placed inside {:?}", ty, open_type)
188 }
189 _ => {}
190 }
191 }
192 let new_span = Span {
193 start: text_buffer.chars().count(),
194 end: text_buffer.chars().count(),
195 element,
196 };
197
198 open_tags.push((ty, tags.len(), new_span));
199 }
200 }
201 Event::Comment(_)
202 | Event::CData(_)
203 | Event::Decl(_)
204 | Event::PI(_)
205 | Event::DocType(_) => continue,
206 Event::Eof => break,
207 Event::Text(e) => {
208 let elem = open_tags.last().map(|x| &x.0);
209 if self.text_should_enter_buffer(elem) {
210 let text_start = text_buffer.len();
211 push_text(e, &mut text_buffer)?;
212 let text_end = text_buffer.len();
213 event_log.push(ParserLogEvent::Text((text_start, text_end)));
214 }
215 }
216 Event::End(e) => {
217 let name = e.name();
218 let name = from_utf8(name.as_ref())?;
219 if open_tags.is_empty() {
220 bail!(
221 "Invalid SSML close tag '{}' presented without open tag.",
222 name
223 );
224 }
225 let ssml_elem = SsmlElement::from_str(name).unwrap();
226 if ssml_elem != open_tags[open_tags.len() - 1].0 {
227 } else {
229 let (_, pos, mut span) = open_tags.remove(open_tags.len() - 1);
231 if !(ssml_elem == SsmlElement::Sub && self.expand_sub) {
232 event_log.push(ParserLogEvent::Close(span.element.clone()));
233 span.end = text_buffer.chars().count();
234 tags.insert(pos, span);
235 if !(ssml_elem == SsmlElement::Speak && open_tags.is_empty()) {
236 } else {
237 break;
238 }
239 }
240 }
241 }
242 Event::Empty(e) => {
243 let (_, element) = parse_element(e, &mut reader)?;
244 let span = Span {
245 start: text_buffer.chars().count(),
246 end: text_buffer.chars().count(),
247 element,
248 };
249 event_log.push(ParserLogEvent::Empty(span.element.clone()));
250 tags.push(span);
251 }
252 }
253 }
254 tags.sort();
255 Ok(Ssml {
256 text: text_buffer,
257 tags,
258 event_log,
259 })
260 }
261}
262
263pub(crate) fn parse_element(
266 elem: BytesStart,
267 reader: &mut Reader<&[u8]>,
268) -> Result<(SsmlElement, ParsedElement)> {
269 let name = elem.name();
270 let name = from_utf8(name.as_ref())?;
271 let elem_type = SsmlElement::from_str(name).unwrap();
272
273 let res = match elem_type {
274 SsmlElement::Speak => parse_speak(elem, reader)?,
275 SsmlElement::Lexicon => parse_lexicon(elem, reader)?,
276 SsmlElement::Lookup => parse_lookup(elem, reader)?,
277 SsmlElement::Meta => parse_meta(elem, reader)?,
278 SsmlElement::Metadata => ParsedElement::Metadata,
279 SsmlElement::Paragraph => ParsedElement::Paragraph,
280 SsmlElement::Sentence => ParsedElement::Sentence,
281 SsmlElement::Token => parse_token(elem, reader)?,
282 SsmlElement::Word => parse_word(elem, reader)?,
283 SsmlElement::SayAs => parse_say_as(elem, reader)?,
284 SsmlElement::Phoneme => parse_phoneme(elem, reader)?,
285 SsmlElement::Sub => parse_sub(elem, reader)?,
286 SsmlElement::Lang => parse_language(elem, reader)?,
287 SsmlElement::Voice => parse_voice(elem, reader)?,
288 SsmlElement::Emphasis => parse_emphasis(elem, reader)?,
289 SsmlElement::Break => parse_break(elem, reader)?,
290 SsmlElement::Prosody => parse_prosody(elem, reader)?,
291 SsmlElement::Audio => parse_audio(elem, reader)?,
292 SsmlElement::Mark => parse_mark(elem, reader)?,
293 SsmlElement::Description => {
294 let text = reader
295 .read_text(elem.to_end().name())
296 .unwrap_or_default()
297 .to_string();
298 ParsedElement::Description(text)
299 }
300 SsmlElement::Custom(ref s) => {
301 let mut attributes = BTreeMap::new();
302 for attr in elem.attributes() {
303 let attr = attr?;
304 attributes.insert(
305 String::from_utf8(attr.key.0.to_vec())?,
306 String::from_utf8(attr.value.to_vec())?,
307 );
308 }
309 ParsedElement::Custom((s.to_string(), attributes))
310 }
311 };
312
313 Ok((elem_type, res))
314}
315
316fn parse_speak<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
318 let version = elem.try_get_attribute("version")?;
319
320 let version = if let Some(v) = version {
323 let version = v.decode_and_unescape_value(reader)?;
324 match version.as_ref() {
325 "1.0" | "1.1" => (),
326 v => bail!("Unsupported SSML spec version: {}", v),
327 }
328 version.to_string()
329 } else {
330 "1.1".to_string()
331 };
332
333 let lang = elem.try_get_attribute("xml:lang")?;
334 let lang = if let Some(lang) = lang {
335 Some(lang.decode_and_unescape_value(reader)?.to_string())
336 } else {
337 None
338 };
339 let base = elem.try_get_attribute("xml:base")?;
340 let base = if let Some(base) = base {
341 Some(base.decode_and_unescape_value(reader)?.to_string())
342 } else {
343 None
344 };
345 let on_lang_failure = elem.try_get_attribute("onlangfailure")?;
346 let on_lang_failure = if let Some(lang) = on_lang_failure {
347 let value = lang.decode_and_unescape_value(reader)?;
348 Some(OnLanguageFailure::from_str(&value)?)
349 } else {
350 None
351 };
352
353 let mut xml_root_attrs = BTreeMap::new();
354 for attr in elem.attributes() {
355 let attr = attr?;
356
357 match std::str::from_utf8(attr.key.0).unwrap() {
358 "xml:base" | "xml:lang" | "onlangfailure" | "version" => continue,
359 attr_name => {
360 xml_root_attrs.insert(
361 String::from(attr_name),
362 String::from_utf8(attr.value.into())?,
363 );
364 }
365 }
366 }
367
368 Ok(ParsedElement::Speak(SpeakAttributes {
369 lang,
370 base,
371 on_lang_failure,
372 version,
373 xml_root_attrs,
374 }))
375}
376
377fn parse_lexicon<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
378 let xml_id = elem
379 .try_get_attribute("xml:id")?
380 .context("xml:id attribute is required with a lexicon element")?
381 .decode_and_unescape_value(reader)?
382 .to_string();
383
384 let uri: http::Uri = elem
385 .try_get_attribute("uri")?
386 .context("uri attribute is required with a lexicon element")?
387 .decode_and_unescape_value(reader)?
388 .to_string()
389 .parse()?;
390
391 let fetch_timeout = match elem.try_get_attribute("fetchtimeout")? {
392 Some(fetchtimeout) => {
393 let fetchtimeout = fetchtimeout.decode_and_unescape_value(reader)?;
394 Some(TimeDesignation::from_str(&fetchtimeout)?)
395 }
396 None => None,
397 };
398
399 let ty = match elem.try_get_attribute("type")? {
400 Some(ty) => {
401 let ty = ty.decode_and_unescape_value(reader)?.to_string();
402 let ty = MediaTypeBuf::from_string(ty)
403 .context("invalid media type for type attribute of lexicon element")?;
404
405 Some(ty)
406 }
407 None => None,
408 };
409
410 Ok(ParsedElement::Lexicon(LexiconAttributes {
411 uri,
412 xml_id,
413 fetch_timeout,
414 ty,
415 }))
416}
417
418fn parse_lookup<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
419 let lookup_ref = elem
420 .try_get_attribute("ref")?
421 .context("ref attribute is required with a lookup element")?
422 .decode_and_unescape_value(reader)?
423 .to_string();
424
425 Ok(ParsedElement::Lookup(LookupAttributes { lookup_ref }))
426}
427
428fn parse_meta<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
429 let content = elem
430 .try_get_attribute("content")?
431 .context("content attribute is required with a meta element")?
432 .decode_and_unescape_value(reader)?
433 .to_string();
434
435 let name = elem.try_get_attribute("name")?;
436 let http_equiv = elem.try_get_attribute("http-equiv")?;
437
438 let (name, http_equiv) = match (name, http_equiv) {
439 (Some(name), None) => (
440 Some(name.decode_and_unescape_value(reader)?.to_string()),
441 None,
442 ),
443 (None, Some(http_equiv)) => (
444 None,
445 Some(http_equiv.decode_and_unescape_value(reader)?.to_string()),
446 ),
447 _ => {
448 bail!("either name or http-equiv attr must be set in meta element (but not both)")
449 }
450 };
451
452 Ok(ParsedElement::Meta(MetaAttributes {
453 name,
454 http_equiv,
455 content,
456 }))
457}
458
459fn parse_token<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
460 let role = match elem.try_get_attribute("role")? {
461 Some(attr) => Some(attr.decode_and_unescape_value(reader)?.to_string()),
462 None => None,
463 };
464
465 Ok(ParsedElement::Token(TokenAttributes { role }))
466}
467
468fn parse_word<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
469 let role = match elem.try_get_attribute("role")? {
470 Some(attr) => Some(attr.decode_and_unescape_value(reader)?.to_string()),
471 None => None,
472 };
473
474 Ok(ParsedElement::Word(TokenAttributes { role }))
475}
476
477fn parse_say_as<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
478 let interpret_as = elem
480 .try_get_attribute("interpret-as")?
481 .context("interpret-as attribute is required with a say-as element")?
482 .decode_and_unescape_value(reader)?
483 .to_string();
484
485 let format = match elem.try_get_attribute("format")? {
486 Some(attr) => Some(attr.decode_and_unescape_value(reader)?.to_string()),
487 None => None,
488 };
489
490 let detail = match elem.try_get_attribute("detail")? {
491 Some(attr) => Some(attr.decode_and_unescape_value(reader)?.to_string()),
492 None => None,
493 };
494
495 Ok(ParsedElement::SayAs(SayAsAttributes {
496 interpret_as,
497 format,
498 detail,
499 }))
500}
501
502fn parse_phoneme<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
503 let phoneme = elem.try_get_attribute("ph")?;
504 let phoneme = if let Some(phoneme) = phoneme {
505 let value = phoneme.decode_and_unescape_value(reader)?;
506 value.to_string()
507 } else {
508 bail!("ph attribute is required with a phoneme element");
509 };
510
511 let alphabet = elem.try_get_attribute("alphabet")?;
512 let alphabet = if let Some(alpha) = alphabet {
513 let val = alpha.decode_and_unescape_value(reader)?;
514 Some(PhonemeAlphabet::from_str(&val).unwrap())
515 } else {
516 None
517 };
518
519 Ok(ParsedElement::Phoneme(PhonemeAttributes {
520 ph: phoneme,
521 alphabet,
522 }))
523}
524
525fn parse_break<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
526 let strength = elem.try_get_attribute("strength")?;
527 let strength = if let Some(strength) = strength {
528 let value = strength.decode_and_unescape_value(reader)?;
529 let value = Strength::from_str(&value)?;
530 Some(value)
531 } else {
532 None
533 };
534 let time = match elem.try_get_attribute("time")? {
535 Some(time) => {
536 let value = time.decode_and_unescape_value(reader)?;
537 Some(TimeDesignation::from_str(&value)?)
538 }
539 None => None,
540 };
541
542 Ok(ParsedElement::Break(BreakAttributes { strength, time }))
543}
544
545fn parse_sub<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
546 let alias = elem
547 .try_get_attribute("alias")?
548 .context("alias attribute required for sub element")?
549 .decode_and_unescape_value(reader)?
550 .to_string();
551
552 Ok(ParsedElement::Sub(SubAttributes { alias }))
553}
554
555fn parse_language<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
556 let lang = elem
557 .try_get_attribute("xml:lang")?
558 .context("xml:lang attribute is required with a lang element")?
559 .decode_and_unescape_value(reader)?
560 .to_string();
561
562 let on_lang_failure = match elem.try_get_attribute("onlangfailure")? {
563 Some(s) => {
564 let value = s.decode_and_unescape_value(reader)?;
565 Some(OnLanguageFailure::from_str(&value)?)
566 }
567 None => None,
568 };
569
570 Ok(ParsedElement::Lang(LangAttributes {
571 lang,
572 on_lang_failure,
573 }))
574}
575
576fn parse_emphasis<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
577 let level = elem.try_get_attribute("level")?;
578 let level = if let Some(level) = level {
579 let value = level.decode_and_unescape_value(reader)?;
580 let value = EmphasisLevel::from_str(&value)?;
581 Some(value)
582 } else {
583 None
584 };
585
586 Ok(ParsedElement::Emphasis(EmphasisAttributes { level }))
587}
588
589fn parse_prosody<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
590 let pitch = elem.try_get_attribute("pitch")?;
591 let pitch = if let Some(pitch) = pitch {
592 let value = pitch.decode_and_unescape_value(reader)?;
593 let value = match PitchRange::from_str(&value) {
594 Ok(result) => result,
595 Err(e) => bail!("Error: {}", e),
596 };
597
598 Some(value)
599 } else {
600 None
601 };
602 let contour = elem.try_get_attribute("contour")?;
603 let contour = if let Some(contour) = contour {
604 let value = contour.decode_and_unescape_value(reader)?;
605 let value = match PitchContour::from_str(&value) {
606 Ok(result) => result,
607 Err(e) => bail!("Error: {}", e),
608 };
609 Some(value)
610 } else {
611 None
612 };
613 let range = elem.try_get_attribute("range")?;
614 let range = if let Some(range) = range {
615 let value = range.decode_and_unescape_value(reader)?;
616 let value = match PitchRange::from_str(&value) {
617 Ok(result) => result,
618 Err(e) => bail!("Error: {}", e),
619 };
620
621 Some(value)
622 } else {
623 None
624 };
625 let rate = elem.try_get_attribute("rate")?;
626 let rate = if let Some(rate) = rate {
627 let value = rate.decode_and_unescape_value(reader)?;
628 let value = match RateRange::from_str(&value) {
629 Ok(result) => result,
630 Err(e) => bail!("Error: {}", e),
631 };
632
633 Some(value)
634 } else {
635 None
636 };
637 let duration = match elem.try_get_attribute("duration")? {
638 Some(val) => Some(val.decode_and_unescape_value(reader)?.parse()?),
639 None => None,
640 };
641
642 let volume = elem.try_get_attribute("volume")?;
643 let volume = if let Some(volume) = volume {
644 let value = volume.decode_and_unescape_value(reader)?;
645 let value = match VolumeRange::from_str(&value) {
646 Ok(result) => result,
647 Err(e) => bail!("Error: {}", e),
648 };
649
650 Some(value)
651 } else {
652 None
653 };
654
655 Ok(ParsedElement::Prosody(ProsodyAttributes {
656 pitch,
657 contour,
658 range,
659 rate,
660 duration,
661 volume,
662 }))
663}
664
665fn parse_mark<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
666 let name = elem
667 .try_get_attribute("name")?
668 .context("name attribute is required with mark element")?
669 .decode_and_unescape_value(reader)?
670 .to_string();
671
672 Ok(ParsedElement::Mark(MarkAttributes { name }))
673}
674
675fn parse_voice<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
676 let gender = elem.try_get_attribute("gender")?;
677 let gender = match gender {
678 Some(v) => {
679 let value = v.decode_and_unescape_value(reader)?;
680 if value.is_empty() {
681 None
682 } else {
683 Some(Gender::from_str(&value)?)
684 }
685 }
686 None => None,
687 };
688 let age = elem.try_get_attribute("age")?;
689 let age = match age {
690 Some(v) => {
691 let value = v.decode_and_unescape_value(reader)?;
692 if value.is_empty() {
693 None
694 } else {
695 Some(value.parse::<u8>()?)
696 }
697 }
698 None => None,
699 };
700 let variant = elem.try_get_attribute("variant")?;
701 let variant = match variant {
702 Some(v) => {
703 let value = v.decode_and_unescape_value(reader)?;
704 if value.is_empty() {
705 None
706 } else {
707 Some(value.parse::<NonZeroUsize>()?)
708 }
709 }
710 None => None,
711 };
712 let name = elem.try_get_attribute("name")?;
713 let name = match name {
714 Some(v) => {
715 let value = v.decode_and_unescape_value(reader)?;
716 value
717 .split(' ')
718 .map(|x| x.to_string())
719 .collect::<Vec<String>>()
720 }
721 None => vec![],
722 };
723 let languages = elem.try_get_attribute("languages")?;
724 let languages = match languages {
725 Some(v) => {
726 let value = v.decode_and_unescape_value(reader)?;
727 let mut res = vec![];
728 for language in value.split(' ') {
729 res.push(LanguageAccentPair::from_str(language)?);
730 }
731 res
732 }
733 None => vec![],
734 };
735 Ok(ParsedElement::Voice(VoiceAttributes {
736 gender,
737 age,
738 variant,
739 name,
740 languages,
741 }))
742}
743
744fn parse_audio<R: io::BufRead>(elem: BytesStart, reader: &Reader<R>) -> Result<ParsedElement> {
745 let src = match elem.try_get_attribute("src")? {
746 Some(s) => {
747 let src: http::Uri = s.decode_and_unescape_value(reader)?.to_string().parse()?;
748 Some(src)
749 }
750 None => None,
751 };
752
753 let fetch_timeout = match elem.try_get_attribute("fetchtimeout")? {
754 Some(fetchtimeout) => {
755 let fetchtimeout = fetchtimeout.decode_and_unescape_value(reader)?;
756 Some(TimeDesignation::from_str(&fetchtimeout)?)
757 }
758 None => None,
759 };
760
761 let fetch_hint = match elem.try_get_attribute("fetchhint")? {
762 Some(fetch) => {
763 let fetch = fetch.decode_and_unescape_value(reader)?;
764 FetchHint::from_str(&fetch)?
765 }
766 None => FetchHint::default(),
767 };
768
769 let max_age = if let Some(v) = elem.try_get_attribute("maxage")? {
770 Some(v.decode_and_unescape_value(reader)?.parse::<usize>()?)
771 } else {
772 None
773 };
774
775 let max_stale = if let Some(v) = elem.try_get_attribute("maxstale")? {
776 Some(v.decode_and_unescape_value(reader)?.parse::<usize>()?)
777 } else {
778 None
779 };
780
781 let clip_begin = match elem.try_get_attribute("clipBegin")? {
782 Some(clip) => {
783 let clip = clip.decode_and_unescape_value(reader)?;
784 TimeDesignation::from_str(&clip)?
785 }
786 None => TimeDesignation::Seconds(0.0),
787 };
788
789 let clip_end = match elem.try_get_attribute("clipEnd")? {
790 Some(clip) => {
791 let clip = clip.decode_and_unescape_value(reader)?;
792 Some(TimeDesignation::from_str(&clip)?)
793 }
794 None => None,
795 };
796
797 let repeat_count = if let Some(v) = elem.try_get_attribute("repeatCount")? {
798 v.decode_and_unescape_value(reader)?
799 .parse::<NonZeroUsize>()?
800 } else {
801 unsafe { NonZeroUsize::new_unchecked(1) }
802 };
803
804 let repeat_dur = match elem.try_get_attribute("repeatDur")? {
805 Some(repeat) => {
806 let repeat = repeat.decode_and_unescape_value(reader)?;
807 Some(TimeDesignation::from_str(&repeat)?)
808 }
809 None => None,
810 };
811
812 let sound_level = match elem.try_get_attribute("soundLevel")? {
813 Some(sound) => {
814 let sound = sound.decode_and_unescape_value(reader)?;
815 parse_decibel(&sound)?
816 }
817 None => 0.0,
818 };
819
820 let speed = match elem.try_get_attribute("speed")? {
821 Some(speed) => {
822 let speed = speed.decode_and_unescape_value(reader)?;
823 parse_unsigned_percentage(&speed)? / 100.0
824 }
825 None => 1.0,
826 };
827
828 Ok(ParsedElement::Audio(AudioAttributes {
829 src,
830 fetch_timeout,
831 fetch_hint,
832 max_age,
833 max_stale,
834 clip_begin,
835 clip_end,
836 repeat_count,
837 repeat_dur,
838 sound_level,
839 speed,
840 }))
841}
842
843pub(crate) fn parse_decibel(val: &str) -> anyhow::Result<f32> {
844 lazy_static! {
845 static ref DB_RE: Regex = Regex::new(r"^([+-]?(?:\d*\.)?\d+)dB$").unwrap();
846 }
847 let caps = DB_RE
848 .captures(val)
849 .context("value must be a valid decibel value")?;
850
851 let num_val = caps[1].parse::<f32>()?;
852 Ok(num_val)
853}
854
855pub(crate) fn parse_unsigned_percentage(val: &str) -> anyhow::Result<f32> {
857 lazy_static! {
858 static ref PERCENT_RE: Regex = Regex::new(r"^+?((?:\d*\.)?\d+)%$").unwrap();
859 }
860 let caps = PERCENT_RE
861 .captures(val)
862 .context("value must be a valid percentage value")?;
863
864 let num_val = caps[1].parse::<f32>()?;
865 Ok(num_val)
866}
867
868#[cfg(test)]
869mod tests {
870 use super::*;
871
872 #[test]
873 fn span_ordering() {
874 let a = Span {
875 start: 0,
876 end: 10,
877 element: ParsedElement::Speak(Default::default()),
878 };
879
880 let b = Span {
881 start: 0,
882 end: 5,
883 element: ParsedElement::Speak(Default::default()),
884 };
885
886 let c = Span {
887 start: 4,
888 end: 5,
889 element: ParsedElement::Speak(Default::default()),
890 };
891
892 let d = Span {
893 start: 11,
894 end: 15,
895 element: ParsedElement::Speak(Default::default()),
896 };
897
898 assert!(a < b);
899 assert!(b < c);
900 assert!(a < c);
901 assert!(a < d);
902 assert!(a == a);
903 }
904
905 #[test]
906 fn char_position_not_byte() {
907 let unicode = parse_ssml(r#"<speak version="1.1">Let’s review a complex structure. Please note how threshold of control is calculated in this example.</speak>"#).unwrap();
908 let ascii = parse_ssml(r#"<speak version="1.1">Let's review a complex structure. Please note how threshold of control is calculated in this example.</speak>"#).unwrap();
909
910 let master_span_unicode = unicode.tags().next().unwrap();
911 let master_span_ascii = ascii.tags().next().unwrap();
912
913 assert_eq!(master_span_ascii.end, master_span_unicode.end);
914 assert_eq!(master_span_ascii.end, ascii.get_text().chars().count());
915 }
916
917 #[test]
918 fn span_contains() {
919 let empty = parse_ssml(r#"<speak version="1.1"><break/><break/></speak>"#).unwrap();
920
921 assert!(empty.tags[0].maybe_contains(&empty.tags[1]));
922 assert!(empty.tags[0].maybe_contains(&empty.tags[2]));
923 assert!(!empty.tags[1].maybe_contains(&empty.tags[2]));
924
925 let hello =
926 parse_ssml(r#"<speak version="1.1">Hello <s><w>hello</w></s> world <break/></speak>"#)
927 .unwrap();
928 assert!(hello.tags[0].maybe_contains(&hello.tags[1]));
929 assert!(hello.tags[0].maybe_contains(&hello.tags[2]));
930 assert!(hello.tags[0].maybe_contains(&hello.tags[3]));
931 assert!(hello.tags[1].maybe_contains(&hello.tags[2]));
932 assert!(!hello.tags[1].maybe_contains(&hello.tags[3]));
933 assert!(!hello.tags[2].maybe_contains(&hello.tags[3]));
934
935 let empty = parse_ssml(r#"<speak version="1.1">Hello <p></p><p></p></speak>"#).unwrap();
936 assert!(!empty.tags[1].maybe_contains(&empty.tags[2]));
937
938 let break_inside_custom = parse_ssml(r#"<speak version="1.1"><mstts:express-as style="string" styledegree="value" role="string">hello<break/> world</mstts:express-as></speak>"#).unwrap();
939 assert!(break_inside_custom.tags[1].maybe_contains(&break_inside_custom.tags[2]));
940 }
941
942 #[test]
943 fn reject_invalid_combos() {
944 assert!(parse_ssml("<speak><speak>hello</speak></speak>").is_err());
945 assert!(parse_ssml("<speak><p>hello<p>world</p></p></speak>").is_err());
946 }
947
948 #[test]
949 fn skip_description_text() {
950 let text = r#"<?xml version="1.0"?>
951<speak xmlns="http://www.w3.org/2001/10/synthesis"
952 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
953 xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
954 http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
955 xml:lang="en-US">
956
957 <!-- Normal use of <desc> -->
958 Heads of State often make mistakes when speaking in a foreign language.
959 One of the most well-known examples is that of John F. Kennedy:
960 <audio src="ichbineinberliner.wav">If you could hear it, this would be
961 a recording of John F. Kennedy speaking in Berlin.
962 <desc>Kennedy's famous German language gaffe</desc>
963 </audio>
964</speak>"#;
965
966 let res = parse_ssml(text).unwrap();
967
968 assert_eq!(res.get_text().trim(),
969 "Heads of State often make mistakes when speaking in a foreign language. One of the most well-known examples is that of John F. Kennedy: If you could hear it, this would be a recording of John F. Kennedy speaking in Berlin.");
970 }
971
972 #[test]
973 fn handle_language_elements() {
974 let lang = r#"<speak version="1.1"><lang xml:lang="ja"></lang><lang xml:lang="en" onlangfailure="ignoretext"></lang></speak>"#;
975
976 let res = parse_ssml(lang).unwrap();
977
978 assert_eq!(res.tags.len(), 3);
979 assert_eq!(
980 res.tags[1].element,
981 ParsedElement::Lang(LangAttributes {
982 lang: "ja".to_string(),
983 on_lang_failure: None
984 })
985 );
986 assert_eq!(
987 res.tags[2].element,
988 ParsedElement::Lang(LangAttributes {
989 lang: "en".to_string(),
990 on_lang_failure: Some(OnLanguageFailure::IgnoreText)
991 })
992 );
993
994 let lang = r#"<speak version="1.1"><lang lang="ja"></lang></speak>"#;
995
996 assert!(parse_ssml(lang).is_err());
997 }
998
999 #[test]
1000 fn filter_out_elems() {
1001 let mut parser = SsmlParserBuilder::default().build().unwrap();
1002
1003 assert!(parser.text_should_enter_buffer(Some(&SsmlElement::Sub)));
1004 assert!(!parser.text_should_enter_buffer(Some(&SsmlElement::Description)));
1005
1006 parser.expand_sub = true;
1007
1008 assert!(!parser.text_should_enter_buffer(Some(&SsmlElement::Sub)));
1009 assert!(!parser.text_should_enter_buffer(Some(&SsmlElement::Description)));
1010 }
1011
1012 #[test]
1013 fn expand_sub() {
1014 let parser = SsmlParserBuilder::default()
1015 .expand_sub(true)
1016 .build()
1017 .unwrap();
1018 let sub =
1019 r#"<speak version="1.1"><sub alias="World wide web consortium">W3C</sub></speak>"#;
1020
1021 let res = parser.parse(sub).unwrap();
1022 assert_eq!(res.get_text().trim(), "World wide web consortium");
1023 assert_eq!(res.event_log.len(), 3);
1024 assert!(matches!(res.event_log[1], ParserLogEvent::Text(_)));
1025
1026 let parser = SsmlParserBuilder::default().build().unwrap();
1027
1028 let res = parser.parse(sub).unwrap();
1029 assert_eq!(res.get_text().trim(), "W3C");
1030
1031 assert_eq!(res.event_log.len(), 5);
1032 }
1033
1034 #[test]
1035 fn decibels() {
1036 assert!(parse_decibel("56").is_err());
1037 assert!(parse_decibel("hello").is_err());
1038 assert!(parse_decibel("64.5DB").is_err());
1039 assert!(parse_decibel("64.5dBs").is_err());
1040
1041 assert_eq!(parse_decibel("-10dB").unwrap() as i32, -10);
1042 assert_eq!(parse_decibel("15dB").unwrap() as i32, 15);
1043 assert_eq!(parse_decibel(".5dB").unwrap(), 0.5);
1044 }
1045
1046 #[test]
1047 fn unsigned_percentages() {
1048 assert!(parse_unsigned_percentage("56").is_err());
1049 assert!(parse_unsigned_percentage("64pc").is_err());
1050 assert!(parse_unsigned_percentage("74%%").is_err());
1051
1052 assert_eq!(parse_unsigned_percentage("10%").unwrap() as i32, 10);
1053 assert_eq!(parse_unsigned_percentage("110%").unwrap() as i32, 110);
1054 assert_eq!(parse_unsigned_percentage(".5%").unwrap(), 0.5);
1055 }
1056}