ssml_parser/elements.rs
1//! Documentation comments are taken in part from the SSML specification which
2//! can be found [here](https://www.w3.org/TR/speech-synthesis11). All copied
3//! sections will be marked with:
4//!
5//! "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
6//! All Rights Reserved._
7//!
8//! If any sections aren't marked please submit a PR. For types this copyright
9//! notice will be placed on the top level type and not each field for conciseness
10//! but keep in mind the fields will also be taken from the same section of the
11//! standard.
12use anyhow::{bail, Context};
13use lazy_static::lazy_static;
14use quick_xml::escape::escape;
15use regex::Regex;
16use std::collections::BTreeMap;
17use std::convert::Infallible;
18use std::fmt::{self, Display};
19use std::num::NonZeroUsize;
20use std::str::FromStr;
21use std::time::Duration;
22
23/// Type of the SSML element
24#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
25pub enum SsmlElement {
26 /// The `<speak></speak>` element.
27 Speak,
28 /// The `<lexicon/>` element.
29 Lexicon,
30 /// The `<lookup></lookup>` element.
31 Lookup,
32 /// The `<meta/>` element.
33 Meta,
34 /// The `<metadata></metadata>` element.
35 Metadata,
36 /// The `<p></p>` element.
37 Paragraph,
38 /// The `<s></s>` element.
39 Sentence,
40 /// The `<token></token>` element.
41 Token,
42 /// The `<word></word>` element.
43 Word,
44 /// The `<say-as></say-as>` element.
45 SayAs,
46 /// The `<phoneme></phoneme>` element.
47 Phoneme,
48 /// The `<sub></sub>` element.
49 Sub,
50 /// The `<lang></lang>` element.
51 Lang,
52 /// The `<voice></voice>` element.
53 Voice,
54 /// The `<emphasis></emphasis>` element.
55 Emphasis,
56 /// The `<break/>` element.
57 Break,
58 /// The `<prosody></prosody>` element.
59 Prosody,
60 /// The `<audio></audio>` element.
61 Audio,
62 /// The `<mark/>` element.
63 Mark,
64 /// The `<desc></desc>` element.
65 Description,
66 /// Custom elements not defined in the spec, the element name is stored in the given string.
67 Custom(String),
68}
69
70impl SsmlElement {
71 /// Returns whether a tag can contain other tags - will always be true for custom tags as we
72 /// want to check just in case.
73 #[inline(always)]
74 pub fn can_contain_tags(&self) -> bool {
75 // empty elements
76 // * Lexicon
77 // * Meta
78 // * Metadata (can contain content but is ignored by synthesis processor)
79 // * say-as can only contain text to render (word is the same)
80 // * phoneme is text only
81 // * sub subtitles only (no elements)
82 // * description is only for inside audio tag and not to be rendered
83 // * mark element is empty element used as a bookmark
84 matches!(
85 self,
86 Self::Speak
87 | Self::Paragraph
88 | Self::Sentence
89 | Self::Voice
90 | Self::Emphasis
91 | Self::Token
92 | Self::Word
93 | Self::Lang
94 | Self::Prosody
95 | Self::Audio
96 | Self::Custom(_)
97 )
98 }
99
100 /// Check whether the provided element can contain another specified tag. For custom elements
101 /// if an element can contain tags it will be assumed it can contain the custom one as these
102 /// are outside of the SSML specification.
103 pub fn can_contain(&self, other: &Self) -> bool {
104 match (self, other) {
105 (a, Self::Custom(_)) if a.can_contain_tags() => true,
106 (a, _) if !a.can_contain_tags() => false,
107 (_, Self::Speak) => false,
108 (Self::Speak, _) => true,
109 (Self::Paragraph, a) => a.allowed_in_paragraph(),
110 (Self::Sentence, a) => a.allowed_in_sentence(),
111 (Self::Voice, a) => a.allowed_in_speak(), // Everything allowed inside
112 (Self::Emphasis, a) => a.allowed_in_sentence(), // Emphasis and sentence lists match
113 (Self::Token | Self::Word, a) => a.allowed_in_token(),
114 (Self::Lang, a) => a.allowed_in_speak(),
115 (Self::Prosody, a) => a.allowed_in_speak(),
116 (Self::Audio, a) => a.allowed_in_speak(),
117 (Self::Custom(_), _) => true,
118 _ => false, // Should be unreachable
119 }
120 }
121
122 /// Returns true if an SSML element is allowed within a paragraph `<p>...</p>`
123 #[inline(always)]
124 fn allowed_in_paragraph(&self) -> bool {
125 matches!(self, Self::Sentence) || self.allowed_in_sentence()
126 }
127
128 /// Returns true if an SSML element is allowed within a sentence `<s>...</s>`
129 #[inline(always)]
130 fn allowed_in_sentence(&self) -> bool {
131 matches!(
132 self,
133 Self::Custom(_)
134 | Self::Audio
135 | Self::Break
136 | Self::Emphasis
137 | Self::Lang
138 | Self::Lookup
139 | Self::Mark
140 | Self::Phoneme
141 | Self::Prosody
142 | Self::SayAs
143 | Self::Sub
144 | Self::Token
145 | Self::Voice
146 | Self::Word
147 )
148 }
149
150 /// Returns true if an SSML element is allowed within `<speak></speak>`
151 #[inline(always)]
152 fn allowed_in_speak(&self) -> bool {
153 self != &Self::Speak
154 }
155
156 #[inline(always)]
157 fn allowed_in_token(&self) -> bool {
158 matches!(
159 self,
160 Self::Audio
161 | Self::Break
162 | Self::Emphasis
163 | Self::Mark
164 | Self::Phoneme
165 | Self::Prosody
166 | Self::SayAs
167 | Self::Sub
168 | Self::Custom(_)
169 )
170 }
171
172 /// Returns true if the text inside should be processed by the speech synthesiser. Returns
173 /// false for custom elements.
174 #[inline(always)]
175 pub(crate) fn contains_synthesisable_text(&self) -> bool {
176 !matches!(
177 self,
178 Self::Description
179 | Self::Metadata
180 | Self::Mark
181 | Self::Break
182 | Self::Lexicon
183 | Self::Meta
184 )
185 }
186}
187
188impl Display for SsmlElement {
189 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
190 use SsmlElement::*;
191 write!(
192 f,
193 "{}",
194 match self {
195 Custom(name) => name,
196 Speak => "speak",
197 Lexicon => "lexicon",
198 Lookup => "lookup",
199 Meta => "meta",
200 Metadata => "metadata",
201 Paragraph => "p",
202 Sentence => "s",
203 Token => "token",
204 Word => "w",
205 SayAs => "say-as",
206 Phoneme => "phoneme",
207 Sub => "sub",
208 Lang => "lang",
209 Voice => "voice",
210 Emphasis => "emphasis",
211 Break => "break",
212 Prosody => "prosody",
213 Audio => "audio",
214 Mark => "mark",
215 Description => "desc",
216 }
217 )
218 }
219}
220
221/// Enum representing the parsed element, each element with attributes allowed also contains an
222/// object for it's attributes.
223#[derive(Debug, Clone, PartialEq)]
224pub enum ParsedElement {
225 /// The `<speak></speak>` element and given attributes.
226 Speak(SpeakAttributes),
227 /// The `<lexicon/>` element and given attributes.
228 // TODO: spec mentions `lexicon` can only be immediate children of `speak`. enforce this check
229 Lexicon(LexiconAttributes),
230 /// The `<lookup></lookup>` element and given attributes.
231 Lookup(LookupAttributes),
232 /// The `<meta/> element and given attributes.
233 Meta(MetaAttributes),
234 /// The `<metadata></metadata>` element.
235 Metadata,
236 /// The `<p></p>` element.
237 Paragraph,
238 /// The `<s></s>` element.
239 Sentence,
240 /// The `<token></token>` element and given attributes.
241 Token(TokenAttributes),
242 /// The `<word></word>` element and given attributes.
243 // `w` element is just an alias for `token`
244 Word(TokenAttributes),
245 /// The `<say-as></say-as>` element and given attributes.
246 SayAs(SayAsAttributes),
247 /// The `<phoneme></phoneme>` element and given attributes.
248 Phoneme(PhonemeAttributes),
249 /// The `<sub></sub>` element and given attributes.
250 Sub(SubAttributes),
251 /// The `<lang></lang>` element and given attributes.
252 Lang(LangAttributes),
253 /// The `<voice></voice>` element and given attributes.
254 Voice(VoiceAttributes),
255 /// The `<emphasis></emphasis>` element and given attributes.
256 Emphasis(EmphasisAttributes),
257 /// The `<break/>` element and given attributes.
258 Break(BreakAttributes),
259 /// The `<prosody></prosody>` element and given attributes.
260 Prosody(ProsodyAttributes),
261 /// The `<audio></audio>` element and given attributes.
262 Audio(AudioAttributes),
263 /// The `<mark/>` element and given attributes.
264 Mark(MarkAttributes),
265 /// The `<desc></desc>` element and given attributes.
266 Description(String),
267 /// Custom elements not defined in the spec, the element name is stored in the given string and
268 /// any attributes in the map.
269 Custom((String, BTreeMap<String, String>)),
270}
271
272impl ParsedElement {
273 /// From an element get the XML attribute string - this is used for writing the SSML back out
274 pub fn attribute_string(&self) -> String {
275 use ParsedElement::*;
276
277 match self {
278 Lexicon(attr) => format!("{}", attr),
279 Lookup(attr) => format!("{}", attr),
280 Meta(attr) => format!("{}", attr),
281 Metadata => String::new(),
282 Paragraph => String::new(),
283 Sentence => String::new(),
284 Token(attr) => format!("{}", attr),
285 Word(attr) => format!("{}", attr),
286 SayAs(attr) => format!("{}", attr),
287 Speak(attr) => format!("{}", attr),
288 Phoneme(attr) => format!("{}", attr),
289 Sub(attr) => format!("{}", attr),
290 Lang(attr) => format!("{}", attr),
291 Voice(attr) => format!("{}", attr),
292 Emphasis(attr) => format!("{}", attr),
293 Break(attr) => format!("{}", attr),
294 Prosody(attr) => format!("{}", attr),
295 Audio(attr) => format!("{}", attr),
296 Mark(attr) => format!("{}", attr),
297 Description(_) => String::new(),
298 Custom((_, attr_map)) => {
299 let mut attr_str = String::new();
300 for (name, val) in attr_map.iter() {
301 attr_str.push_str(&format!(" {}=\"{}\"", name, val));
302 }
303 attr_str
304 }
305 }
306 }
307
308 /// Returns true if an SSML element can contain tags
309 pub fn can_contain_tags(&self) -> bool {
310 SsmlElement::from(self).can_contain_tags()
311 }
312
313 /// Returns true if an SSML element can contain another element
314 pub fn can_contain(&self, other: &Self) -> bool {
315 SsmlElement::from(self).can_contain(&SsmlElement::from(other))
316 }
317}
318
319impl From<&ParsedElement> for SsmlElement {
320 fn from(elem: &ParsedElement) -> Self {
321 match elem {
322 ParsedElement::Speak(_) => Self::Speak,
323 ParsedElement::Lexicon(_) => Self::Lexicon,
324 ParsedElement::Lookup(_) => Self::Lookup,
325 ParsedElement::Meta(_) => Self::Meta,
326 ParsedElement::Metadata => Self::Metadata,
327 ParsedElement::Paragraph => Self::Paragraph,
328 ParsedElement::Sentence => Self::Sentence,
329 ParsedElement::Token(_) => Self::Token,
330 ParsedElement::Word(_) => Self::Word,
331 ParsedElement::SayAs(_) => Self::SayAs,
332 ParsedElement::Phoneme(_) => Self::Phoneme,
333 ParsedElement::Sub(_) => Self::Sub,
334 ParsedElement::Lang(_) => Self::Lang,
335 ParsedElement::Voice(_) => Self::Voice,
336 ParsedElement::Emphasis(_) => Self::Emphasis,
337 ParsedElement::Break(_) => Self::Break,
338 ParsedElement::Prosody(_) => Self::Prosody,
339 ParsedElement::Audio(_) => Self::Audio,
340 ParsedElement::Mark(_) => Self::Mark,
341 ParsedElement::Description(_) => Self::Description,
342 ParsedElement::Custom((s, _)) => Self::Custom(s.to_string()),
343 }
344 }
345}
346
347/// The Speech Synthesis Markup Language is an XML application. The root element is speak.
348///
349/// N.B. According to the standard version is a required attribute, however we haven't found any
350/// TTS providers that enforce that rule so implement a laxer parsing for compatibility with the
351/// wider ecosystem.
352///
353/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
354/// All Rights Reserved._
355#[derive(Clone, Debug, Default, Eq, PartialEq)]
356pub struct SpeakAttributes {
357 /// Lang is an attribute specifying the language of the root document. In the specification
358 /// this is a REQUIRED attribute, however in reality most TTS APIs require a different way to
359 /// specify the language outside of SSML and treat this as optional. Because of that this
360 /// implementation has chosen to be more permissive than the spec.
361 pub lang: Option<String>,
362 /// Base is an OPTIONAL attribute specifying the Base URI of the root document.
363 pub base: Option<String>,
364 /// On Language Failure is an OPTIONAL attribute specifying the desired behavior upon language speaking failure.
365 pub on_lang_failure: Option<OnLanguageFailure>,
366 /// The version attribute is a REQUIRED attribute that indicates the version of the specification to be used for the document and MUST have the value "1.1".
367 pub version: String,
368 /// for remaining attributes on root like namespace etc
369 pub xml_root_attrs: BTreeMap<String, String>,
370}
371
372#[cfg(test)]
373impl fake::Dummy<fake::Faker> for SpeakAttributes {
374 fn dummy_with_rng<R: rand::Rng + ?Sized>(f: &fake::Faker, rng: &mut R) -> Self {
375 use fake::Fake;
376 Self {
377 lang: f.fake_with_rng(rng),
378 base: f.fake_with_rng(rng),
379 on_lang_failure: f.fake_with_rng(rng),
380 version: "1.1".to_string(),
381 xml_root_attrs: f.fake_with_rng(rng),
382 }
383 }
384}
385
386impl Display for SpeakAttributes {
387 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
388 write!(f, " version=\"{}\"", escape(&self.version))?;
389 if let Some(lang) = &self.lang {
390 write!(f, " xml:lang=\"{}\"", escape(lang))?;
391 }
392 if let Some(base) = &self.base {
393 write!(f, " xml:base=\"{}\"", escape(base))?;
394 }
395 if let Some(fail) = &self.on_lang_failure {
396 write!(f, " onlangfailure=\"{}\"", fail)?;
397 }
398 for (attr_name, attr_value) in self.xml_root_attrs.iter() {
399 write!(f, " {}=\"{}\"", attr_name, attr_value)?;
400 }
401 Ok(())
402 }
403}
404
405/// The lang element is used to specify the natural language of the content. This element MAY be used when there is a change in the natural language.
406#[derive(Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)]
407#[cfg_attr(test, derive(fake::Dummy))]
408pub struct LangAttributes {
409 /// Lang is a REQUIRED attribute specifying the language of the root document.
410 pub lang: String,
411 /// On Language Failure is an OPTIONAL attribute specifying the desired behavior upon language speaking failure.
412 pub on_lang_failure: Option<OnLanguageFailure>,
413}
414
415impl Display for LangAttributes {
416 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
417 write!(f, " xml:lang=\"{}\"", escape(&self.lang))?;
418 if let Some(fail) = self.on_lang_failure {
419 write!(f, " onlangfailure=\"{}\"", fail)?;
420 }
421
422 Ok(())
423 }
424}
425
426/// The onlangfailure attribute is an optional attribute that contains one value
427/// from the following enumerated list describing the desired behavior of the
428/// synthesis processor upon language speaking failure. A conforming synthesis
429/// processor must report a language speaking failure in addition to taking th
430/// action(s) below.
431///
432/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
433/// All Rights Reserved._
434#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
435#[cfg_attr(test, derive(fake::Dummy))]
436pub enum OnLanguageFailure {
437 /// If a voice exists that can speak the language, the synthesis processor
438 /// will switch to that voice and speak the content. Otherwise, the
439 /// processor chooses another behavior (either ignoretext or ignorelang).
440 ChangeVoice,
441 /// The synthesis processor will not attempt to render the text that is in
442 /// the failed language.
443 IgnoreText,
444 /// The synthesis processor will ignore the change in language and speak as
445 /// if the content were in the previous language.
446 IgnoreLang,
447 /// The synthesis processor chooses the behavior (either changevoice, ignoretext,
448 /// or ignorelang).
449 ProcessorChoice,
450}
451
452impl Display for OnLanguageFailure {
453 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
454 use OnLanguageFailure::*;
455 write!(
456 f,
457 "{}",
458 match self {
459 ChangeVoice => "changevoice",
460 IgnoreText => "ignoretext",
461 IgnoreLang => "ignorelang",
462 ProcessorChoice => "processorchoice",
463 }
464 )
465 }
466}
467
468impl FromStr for OnLanguageFailure {
469 type Err = anyhow::Error;
470
471 fn from_str(s: &str) -> Result<Self, Self::Err> {
472 let s = match s {
473 "changevoice" => Self::ChangeVoice,
474 "ignoretext" => Self::IgnoreText,
475 "ignorelang" => Self::IgnoreLang,
476 "processorchoice" => Self::ProcessorChoice,
477 e => bail!("Unrecognised language failure value {}", e),
478 };
479 Ok(s)
480 }
481}
482
483impl FromStr for SsmlElement {
484 type Err = Infallible;
485
486 fn from_str(s: &str) -> Result<Self, Self::Err> {
487 let s = match s {
488 "speak" => Self::Speak,
489 "lexicon" => Self::Lexicon,
490 "lookup" => Self::Lookup,
491 "meta" => Self::Meta,
492 "metadata" => Self::Metadata,
493 "p" => Self::Paragraph,
494 "s" => Self::Sentence,
495 "token" => Self::Token,
496 "w" => Self::Word,
497 "say-as" => Self::SayAs,
498 "phoneme" => Self::Phoneme,
499 "sub" => Self::Sub,
500 "lang" => Self::Lang,
501 "voice" => Self::Voice,
502 "emphasis" => Self::Emphasis,
503 "break" => Self::Break,
504 "prosody" => Self::Prosody,
505 "audio" => Self::Audio,
506 "mark" => Self::Mark,
507 "desc" => Self::Description,
508 e => Self::Custom(e.to_string()),
509 };
510 Ok(s)
511 }
512}
513
514/// An SSML document MAY reference one or more lexicon documents. A lexicon
515/// document is located by a URI with an OPTIONAL media type and is assigned a
516/// name that is unique in the SSML document. Any number of lexicon elements MAY
517/// occur as immediate children of the speak element.
518///
519/// The lexicon element MUST have a uri attribute specifying a URI that identifies
520/// the location of the lexicon document.
521///
522/// The lexicon element MUST have an xml:id attribute that assigns a name to the
523/// lexicon document. The name MUST be unique to the current SSML document. The
524/// scope of this name is the current SSML document.
525///
526/// The lexicon element MAY have a type attribute that specifies the media type of
527/// the lexicon document. The default value of the type attribute is
528/// application/pls+xml, the media type associated with Pronunciation Lexicon
529/// Specification [PLS] documents as defined in [RFC4267].
530///
531/// The lexicon element MAY have a fetchtimeout attribute that specifies the timeout
532/// for fetches. The value is a Time Designation. The default value is processor-specific.
533///
534/// The lexicon element MAY have a maxage attribute that indicates that the document is
535/// willing to use content whose age is no greater than the specified time
536/// (cf. 'max-age' in HTTP 1.1 [RFC2616]). The value is an xsd:nonNegativeInteger
537/// [SCHEMA2 §3.3.20]. The document is not willing to use stale content, unless maxstale
538/// is also provided.
539///
540/// The lexicon element MAY have a maxstale attribute that indicates that the document is
541/// willing to use content that has exceeded its expiration time (cf. 'max-stale' in HTTP 1.1
542/// [RFC2616]). The value is an xsd:nonNegativeInteger [SCHEMA2 §3.3.20]. If maxstale is
543/// assigned a value, then the document is willing to accept content that has exceeded its
544/// expiration time by no more than the specified amount of time.
545///
546/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
547/// All Rights Reserved._
548#[derive(Debug, Clone, PartialEq)]
549pub struct LexiconAttributes {
550 /// The lexicon element MUST have a uri attribute specifying a URI that identifies the location of the lexicon document.
551 pub uri: http::Uri,
552 /// The lexicon element MUST have an xml:id attribute that assigns a name to the lexicon document. The name MUST be unique to the current SSML document.
553 /// The scope of this name is the current SSML document.
554 pub xml_id: String,
555 /// The lexicon element MAY have a type attribute that specifies the media type of the lexicon
556 /// document. The default value of the type attribute is application/pls+xml, the media type
557 /// associated with Pronunciation Lexicon Specification documents.
558 pub ty: Option<mediatype::MediaTypeBuf>,
559 /// The lexicon element MAY have a fetchtimeout attribute that specifies the timeout for fetches.
560 pub fetch_timeout: Option<TimeDesignation>,
561 // TODO we don't support maxage or maxstale
562}
563
564#[cfg(test)]
565impl fake::Dummy<fake::Faker> for LexiconAttributes {
566 fn dummy_with_rng<R: rand::Rng + ?Sized>(f: &fake::Faker, rng: &mut R) -> Self {
567 use fake::Fake;
568 use mediatype::names::*;
569 let ty = if rng.gen_bool(0.5) {
570 Some(mediatype::MediaTypeBuf::new(
571 APPLICATION,
572 mediatype::Name::new("pls+xml").unwrap(),
573 ))
574 } else {
575 None
576 };
577 Self {
578 uri: f.fake_with_rng(rng),
579 xml_id: f.fake_with_rng(rng),
580 fetch_timeout: f.fake_with_rng(rng),
581 ty,
582 }
583 }
584}
585
586impl Display for LexiconAttributes {
587 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
588 write!(f, " uri=\"{}\"", escape(&self.uri.to_string()))?;
589 write!(f, " xml:id=\"{}\"", escape(&self.xml_id))?;
590 if let Some(ty) = &self.ty {
591 write!(f, " type=\"{}\"", ty)?;
592 }
593 if let Some(timeout) = &self.fetch_timeout {
594 write!(f, " fetchtimeout=\"{}\"", timeout)?;
595 }
596 Ok(())
597 }
598}
599
600/// For times SSML only uses seconds or milliseconds in the form "%fs" "%fs", this handles parsing
601/// these times
602#[derive(Debug, Copy, Clone, PartialEq, PartialOrd)]
603#[cfg_attr(test, derive(fake::Dummy))]
604pub enum TimeDesignation {
605 /// Time specified in seconds
606 Seconds(f32),
607 /// Time specified in milliseconds
608 Milliseconds(f32),
609}
610
611impl TimeDesignation {
612 /// Turns the time designation to a std Duration type.
613 pub fn duration(&self) -> Duration {
614 match self {
615 Self::Seconds(s) => Duration::from_secs_f32(*s),
616 Self::Milliseconds(ms) => Duration::from_secs_f32(ms / 1000.0),
617 }
618 }
619}
620
621impl Display for TimeDesignation {
622 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
623 write!(
624 f,
625 "{}",
626 match self {
627 Self::Seconds(val) => format!("{}s", val),
628 Self::Milliseconds(val) => format!("{}ms", val),
629 }
630 )
631 }
632}
633
634impl FromStr for TimeDesignation {
635 type Err = anyhow::Error;
636
637 fn from_str(time: &str) -> Result<Self, Self::Err> {
638 lazy_static! {
639 static ref TIME_RE: Regex = Regex::new(r"^\+?((?:\d*\.)?\d+)\s*(s|ms)$").unwrap();
640 }
641 let caps = TIME_RE
642 .captures(time)
643 .context("attribute must be a valid TimeDesignation")?;
644
645 let num_val = caps[1].parse::<f32>().unwrap();
646
647 match &caps[2] {
648 "s" => Ok(TimeDesignation::Seconds(num_val)),
649 "ms" => Ok(TimeDesignation::Milliseconds(num_val)),
650 _ => unreachable!(),
651 }
652 }
653}
654
655/// The lookup element MUST have a ref attribute. The ref attribute specifies a
656/// name that references a lexicon document as assigned by the xml:id attribute
657/// of the lexicon element.
658///
659/// The referenced lexicon document may contain information (e.g., pronunciation)
660/// for tokens that can appear in a text to be rendered. For PLS lexicon documents
661/// , the information contained within the PLS document MUST be used by the synthesis
662/// processor when rendering tokens that appear within the context of a lookup
663/// element. For non-PLS lexicon documents, the information contained within the
664/// lexicon document SHOULD be used by the synthesis processor when rendering tokens
665/// that appear within the content of a lookup element, although the processor MAY
666/// choose not to use the information if it is deemed incompatible with the content
667/// of the SSML document. For example, a vendor-specific lexicon may be used only for
668/// particular values of the interpret-as attribute of the say-as element, or for a
669/// particular set of voices. Vendors SHOULD document the expected behavior of the
670/// synthesis processor when SSML content refers to a non-PLS lexicon.
671///
672/// A lookup element MAY contain other lookup elements. When a lookup element contains
673/// other lookup elements, the child lookup elements have higher precedence. Precedence
674/// means that a token is first looked up in the lexicon with highest precedence. Only
675/// if the token is not found in that lexicon is it then looked up in the lexicon with
676/// the next lower precedence, and so on until the token is successfully found or until
677/// all lexicons have been used for lookup. It is assumed that the synthesis processor
678/// already has one or more built-in system lexicons which will be treated as having
679/// a lower precedence than those specified using the lexicon and lookup elements.
680/// Note that if a token is not within the scope of at least one lookup element, then
681/// the token can only be looked up in the built-in system lexicons.
682///
683/// The lookup element can only contain text to be rendered and the following elements:
684/// audio, break, emphasis, lang, lookup, mark, p, phoneme, prosody, say-as, sub, s,
685/// token, voice, w.
686///
687/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
688/// All Rights Reserved._
689#[derive(Debug, Clone, PartialEq, Eq)]
690#[cfg_attr(test, derive(fake::Dummy))]
691pub struct LookupAttributes {
692 /// Specifies a name that references a lexicon document as assigned by the xml:id attribute of the lexicon element.
693 pub lookup_ref: String,
694}
695
696impl Display for LookupAttributes {
697 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
698 write!(f, " ref=\"{}\"", escape(&self.lookup_ref))
699 }
700}
701
702/// The metadata and meta elements are containers in which information about the
703/// document can be placed. The metadata element provides more general and powerful
704/// treatment of metadata information than meta by using a metadata schema.
705///
706/// A meta declaration associates a string to a declared meta property or declares
707/// "http-equiv" content. Either a name or http-equiv attribute is REQUIRED. It is
708/// an error to provide both name and http-equiv attributes. A content attribute is
709/// REQUIRED. The seeAlso property is the only defined meta property name. It is
710/// used to specify a resource that might provide additional metadata information
711/// about the content. This property is modeled on the seeAlso property of Resource
712/// Description Framework (RDF) Schema Specification 1.0 [RDF-SCHEMA §5.4.1]. The
713/// http-equiv attribute has a special significance when documents are retrieved
714/// via HTTP. Although the preferred method of providing HTTP header information is
715/// by using HTTP header fields, the "http-equiv" content MAY be used in situations
716/// where the SSML document author is unable to configure HTTP header fields
717/// associated with their document on the origin server, for example, cache control
718/// information. Note that HTTP servers and caches are not required to introspect
719/// the contents of meta in SSML documents and thereby override the header values
720/// they would send otherwise.
721///
722/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
723/// All Rights Reserved._
724#[derive(Debug, Clone, PartialEq, Eq)]
725pub struct MetaAttributes {
726 /// Currently, the only defined name is `seeAlso`. In future other meta names may be added.
727 pub name: Option<String>,
728 /// Used for when documents are retrieved via HTTP.
729 pub http_equiv: Option<String>,
730 /// The content referred to by the meta.
731 pub content: String,
732}
733
734#[cfg(test)]
735impl fake::Dummy<fake::Faker> for MetaAttributes {
736 fn dummy_with_rng<R: rand::Rng + ?Sized>(f: &fake::Faker, rng: &mut R) -> Self {
737 use fake::Fake;
738 let (name, http_equiv) = if rng.gen_bool(0.5) {
739 (None, Some(f.fake_with_rng(rng)))
740 } else {
741 (Some(f.fake_with_rng(rng)), None)
742 };
743 Self {
744 name,
745 http_equiv,
746 content: f.fake_with_rng(rng),
747 }
748 }
749}
750
751impl Display for MetaAttributes {
752 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
753 write!(f, " content=\"{}\"", escape(&self.content))?;
754 if let Some(http_equiv) = &self.http_equiv {
755 write!(f, " http-equiv=\"{}\"", escape(http_equiv))?;
756 }
757 if let Some(name) = &self.name {
758 write!(f, " name=\"{}\"", escape(name))?;
759 }
760
761 Ok(())
762 }
763}
764/// The token element allows the author to indicate its content is a token and to
765/// eliminate token (word) segmentation ambiguities of the synthesis processor.
766///
767/// The token element is necessary in order to render languages
768/// - that do not use white space as a token boundary identifier, such as Chinese,
769/// Thai, and Japanese
770/// - that use white space for syllable segmentation, such as Vietnamese
771/// - that use white space for other purposes, such as Urdu
772///
773/// Use of this element can result in improved cues for prosodic control (e.g.,
774/// pause) and may assist the synthesis processor in selection of the correct
775/// pronunciation for homographs. Other elements such as break, mark, and prosody
776/// are permitted within token to allow annotation at a sub-token level (e.g.,
777/// syllable, mora, or whatever units are reasonable for the current language).
778///
779/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
780/// All Rights Reserved._
781#[derive(Debug, Clone, PartialEq, Eq)]
782#[cfg_attr(test, derive(fake::Dummy))]
783pub struct TokenAttributes {
784 /// `role` is an OPTIONAL defined attribute on the token element. The role
785 /// attribute takes as its value one or more white space separated QNames
786 /// (as defined in Section 4 of Namespaces in XML (1.0 [XMLNS 1.0] or 1.1
787 /// [XMLNS 1.1], depending on the version of XML being used)). A QName in
788 /// the attribute content is expanded into an expanded-name using the
789 /// namespace declarations in scope for the containing token element. Thus,
790 /// each QName provides a reference to a specific item in the designated
791 /// namespace. In the second example below, the QName within the role
792 /// attribute expands to the "VV0" item in the
793 /// "http://www.example.com/claws7tags" namespace. This mechanism allows
794 /// for referencing defined taxonomies of word classes, with the expectation
795 /// that they are documented at the specified namespace URI.
796 ///
797 /// The role attribute is intended to be of use in synchronizing with other
798 /// specifications, for example to describe additional information to help
799 /// the selection of the most appropriate pronunciation for the contained
800 /// text inside an external lexicon (see lexicon documents).
801 pub role: Option<String>,
802}
803
804impl Display for TokenAttributes {
805 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
806 if let Some(role) = &self.role {
807 write!(f, " role=\"{}\"", escape(role))?;
808 }
809
810 Ok(())
811 }
812}
813
814/// The say-as element allows the author to indicate information on the type of text
815/// construct contained within the element and to help specify the level of detail
816/// for rendering the contained text.
817/// The say-as element has three attributes: interpret-as, format, and detail.
818/// The interpret-as attribute is always required; the other two attributes are optional.
819/// The legal values for the format attribute depend on the value of the interpret-as attribute.
820/// The say-as element can only contain text to be rendered.
821///
822/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
823/// All Rights Reserved._
824#[derive(Debug, Clone, Eq, PartialEq)]
825#[cfg_attr(test, derive(fake::Dummy))]
826pub struct SayAsAttributes {
827 /// The interpret-as attribute indicates the content type of the contained text construct.
828 /// Specifying the content type helps the synthesis processor to distinguish and interpret
829 /// text constructs that may be rendered in different ways depending on what type of
830 /// information is intended.
831 pub interpret_as: String,
832 /// The optional format attribute can give further hints on the precise formatting of the
833 /// contained text for content types that may have ambiguous formats.
834 pub format: Option<String>,
835 /// The detail attribute is an optional attribute that indicates the level of detail to be
836 /// read aloud or rendered. Every value of the detail attribute must render all of the
837 /// informational content in the contained text; however, specific values for the detail
838 /// attribute can be used to render content that is not usually informational in running
839 /// text but may be important to render for specific purposes. For example, a synthesis
840 /// processor will usually render punctuations through appropriate changes in prosody.
841 /// Setting a higher level of detail may be used to speak punctuations explicitly,
842 /// e.g. for reading out coded part numbers or pieces of software code.
843 pub detail: Option<String>,
844}
845
846impl Display for SayAsAttributes {
847 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
848 write!(f, " interpret-as=\"{}\"", escape(&self.interpret_as))?;
849 if let Some(format) = &self.format {
850 write!(f, " format=\"{}\"", escape(format))?;
851 }
852 if let Some(detail) = &self.detail {
853 write!(f, " detail=\"{}\"", escape(detail))?
854 }
855
856 Ok(())
857 }
858}
859
860/// The phonemic/phonetic pronunciation alphabet. A pronunciation alphabet in this context refers to a collection
861/// of symbols to represent the sounds of one or more human languages.
862///
863/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
864/// All Rights Reserved._
865#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
866#[cfg_attr(test, derive(fake::Dummy))]
867pub enum PhonemeAlphabet {
868 /// The Internation Phonetic Association's alphabet.
869 Ipa,
870 /// Another alphabet (only IPA is required to be supported).
871 Other(String),
872}
873
874impl Display for PhonemeAlphabet {
875 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
876 write!(
877 f,
878 "{}",
879 match self {
880 Self::Ipa => "ipa".into(),
881 Self::Other(alphabet) => escape(alphabet),
882 }
883 )
884 }
885}
886
887impl FromStr for PhonemeAlphabet {
888 type Err = Infallible;
889
890 fn from_str(s: &str) -> Result<Self, Self::Err> {
891 match s {
892 "ipa" => Ok(Self::Ipa),
893 e => Ok(Self::Other(e.to_string())),
894 }
895 }
896}
897
898/// The phoneme element provides a phonemic/phonetic pronunciation for the
899/// contained text. The phoneme element may be empty. However, it is recommended
900/// that the element contain human-readable text that can be used for non-spoken
901/// rendering of the document. For example, the content may be displayed visually
902/// for users with hearing impairments.
903///
904/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
905/// All Rights Reserved._
906#[derive(Debug, Clone, Eq, PartialEq)]
907#[cfg_attr(test, derive(fake::Dummy))]
908pub struct PhonemeAttributes {
909 /// The ph attribute is a required attribute that specifies the phoneme/phone
910 /// string.
911 pub ph: String,
912 /// The alphabet attribute is an optional attribute that specifies the
913 /// phonemic/phonetic pronunciation alphabet. A pronunciation alphabet
914 /// in this context refers to a collection of symbols to represent the
915 /// sounds of one or more human languages. The only valid values for this
916 /// attribute are "ipa", values defined in the
917 /// [Pronunciation Alphabet Registry](https://www.w3.org/TR/speech-synthesis11/#S3.1.10.1)
918 /// and vendor-defined strings of the form "x-organization" or
919 /// "x-organization-alphabet". For example, the Japan Electronics and
920 /// Information Technology Industries Association (JEITA) might wish to
921 /// encourage the use of an alphabet such as "x-JEITA" or "x-JEITA-IT-4002"
922 /// for their phoneme alphabet (JEIDAALPHABET).
923 pub alphabet: Option<PhonemeAlphabet>,
924}
925
926impl Display for PhonemeAttributes {
927 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
928 write!(f, " ph=\"{}\"", escape(&self.ph))?;
929 if let Some(alphabet) = &self.alphabet {
930 write!(f, " alphabet=\"{}\"", alphabet)?;
931 }
932
933 Ok(())
934 }
935}
936
937/// The strength attribute is an optional attribute having one of the following
938/// values: "none", "x-weak", "weak", "medium" (default value), "strong", or
939/// "x-strong". This attribute is used to indicate the strength of the prosodic
940/// break in the speech output. The value "none" indicates that no prosodic
941/// break boundary should be outputted, which can be used to prevent a prosodic
942/// break which the processor would otherwise produce. The other values
943/// indicate monotonically non-decreasing (conceptually increasing) break
944/// strength between tokens. The stronger boundaries are typically accompanied
945/// by pauses. "x-weak" and "x-strong" are mnemonics for "extra weak" and
946/// "extra strong", respectively.
947///
948/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
949/// All Rights Reserved._
950#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
951#[cfg_attr(test, derive(fake::Dummy))]
952pub enum Strength {
953 /// None value - do not insert a break here
954 No,
955 /// Extra weak break (x-weak)
956 ExtraWeak,
957 /// Weak break
958 Weak,
959 /// Medium break (default)
960 Medium,
961 /// Strong break
962 Strong,
963 /// Extra strong break (x-strong)
964 ExtraStrong,
965}
966
967impl Display for Strength {
968 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
969 write!(
970 f,
971 "{}",
972 match self {
973 Self::No => "none",
974 Self::ExtraWeak => "x-weak",
975 Self::Weak => "weak",
976 Self::Medium => "medium",
977 Self::Strong => "strong",
978 Self::ExtraStrong => "x-strong",
979 }
980 )
981 }
982}
983
984impl FromStr for Strength {
985 type Err = anyhow::Error;
986
987 fn from_str(s: &str) -> Result<Self, Self::Err> {
988 match s.to_lowercase().as_ref() {
989 "none" => Ok(Self::No),
990 "x-weak" => Ok(Self::ExtraWeak),
991 "weak" => Ok(Self::Weak),
992 "medium" => Ok(Self::Medium),
993 "strong" => Ok(Self::Strong),
994 "x-strong" => Ok(Self::ExtraStrong),
995 e => bail!("Unrecognised strength value {}", e),
996 }
997 }
998}
999
1000/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
1001/// All Rights Reserved._
1002#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1003#[cfg_attr(test, derive(fake::Dummy))]
1004pub enum PitchStrength {
1005 /// Extra low (x-low)
1006 XLow,
1007 /// Low
1008 Low,
1009 /// Medium
1010 Medium,
1011 /// High
1012 High,
1013 /// Extra high (x-high)
1014 XHigh,
1015 /// Default
1016 Default,
1017}
1018
1019impl FromStr for PitchStrength {
1020 type Err = anyhow::Error;
1021
1022 fn from_str(s: &str) -> Result<Self, Self::Err> {
1023 match s {
1024 "x-low" => Ok(Self::XLow),
1025 "low" => Ok(Self::Low),
1026 "medium" => Ok(Self::Medium),
1027 "high" => Ok(Self::High),
1028 "x-high" => Ok(Self::XHigh),
1029 "default" => Ok(Self::Default),
1030 e => bail!("Unrecognised value {}", e),
1031 }
1032 }
1033}
1034
1035impl Display for PitchStrength {
1036 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1037 let pitch_strength = match self {
1038 PitchStrength::XLow => "x-low",
1039 PitchStrength::Low => "low",
1040 PitchStrength::Medium => "medium",
1041 PitchStrength::High => "high",
1042 PitchStrength::XHigh => "x-high",
1043 PitchStrength::Default => "default",
1044 };
1045 write!(fmt, "{}", pitch_strength)
1046 }
1047}
1048
1049/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
1050/// All Rights Reserved._
1051#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1052#[cfg_attr(test, derive(fake::Dummy))]
1053pub enum VolumeStrength {
1054 /// Silent
1055 Silent,
1056 /// X-soft
1057 XSoft,
1058 /// Soft
1059 Soft,
1060 /// Medium
1061 Medium,
1062 /// Loud
1063 Loud,
1064 /// X-loud
1065 XLoud,
1066 /// Default
1067 Default,
1068}
1069
1070impl FromStr for VolumeStrength {
1071 type Err = anyhow::Error;
1072
1073 fn from_str(s: &str) -> Result<Self, Self::Err> {
1074 match s {
1075 "silent" => Ok(Self::Silent),
1076 "x-soft" => Ok(Self::XSoft),
1077 "soft" => Ok(Self::Soft),
1078 "medium" => Ok(Self::Medium),
1079 "loud" => Ok(Self::Loud),
1080 "x-loud" => Ok(Self::XLoud),
1081 "default" => Ok(Self::Default),
1082 e => bail!("Unrecognised value {}", e),
1083 }
1084 }
1085}
1086
1087impl fmt::Display for VolumeStrength {
1088 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1089 let volume_strength = match self {
1090 VolumeStrength::Silent => "silent",
1091 VolumeStrength::XSoft => "x-soft",
1092 VolumeStrength::Soft => "soft",
1093 VolumeStrength::Medium => "medium",
1094 VolumeStrength::Loud => "loud",
1095 VolumeStrength::XLoud => "x-loud",
1096 VolumeStrength::Default => "default",
1097 };
1098 write!(fmt, "{}", volume_strength)
1099 }
1100}
1101
1102/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
1103/// All Rights Reserved._
1104#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1105#[cfg_attr(test, derive(fake::Dummy))]
1106pub enum RateStrength {
1107 /// X-slow
1108 XSlow,
1109 /// Slow
1110 Slow,
1111 /// Medium
1112 Medium,
1113 /// Fast
1114 Fast,
1115 /// X-fast
1116 XFast,
1117 /// Default
1118 Default,
1119}
1120
1121impl FromStr for RateStrength {
1122 type Err = anyhow::Error;
1123
1124 fn from_str(s: &str) -> Result<Self, Self::Err> {
1125 match s {
1126 "x-slow" => Ok(Self::XSlow),
1127 "slow" => Ok(Self::Slow),
1128 "medium" => Ok(Self::Medium),
1129 "fast" => Ok(Self::Fast),
1130 "x-fast" => Ok(Self::XFast),
1131 "default" => Ok(Self::Default),
1132 e => bail!("Unrecognised value {}", e),
1133 }
1134 }
1135}
1136
1137impl fmt::Display for RateStrength {
1138 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1139 let rate_strength = match self {
1140 RateStrength::XSlow => "x-slow",
1141 RateStrength::Slow => "slow",
1142 RateStrength::Medium => "medium",
1143 RateStrength::Fast => "fast",
1144 RateStrength::XFast => "x-fast",
1145 RateStrength::Default => "default",
1146 };
1147 write!(fmt, "{}", rate_strength)
1148 }
1149}
1150
1151/// Sign for relative values (positive or negative).
1152#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
1153#[cfg_attr(test, derive(fake::Dummy))]
1154pub enum Sign {
1155 /// Positive relative change.
1156 Plus,
1157 /// Negative relative change.
1158 Minus,
1159}
1160
1161impl fmt::Display for Sign {
1162 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1163 match self {
1164 Self::Plus => write!(fmt, "+"),
1165 Self::Minus => write!(fmt, "-"),
1166 }
1167 }
1168}
1169
1170/// Although the exact meaning of "pitch range" will vary across synthesis processors,
1171/// increasing/decreasing this value will typically increase/decrease the dynamic range of the output pitch.
1172///
1173/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
1174/// All Rights Reserved._
1175#[derive(Copy, Clone, Debug, PartialEq, PartialOrd)]
1176#[cfg_attr(test, derive(fake::Dummy))]
1177pub enum PitchRange {
1178 /// Specifies the range in terms of a strength enum
1179 Strength(PitchStrength), // low, medium high etc
1180 /// Specify it in terms of absolute frequencies
1181 Frequency(f32),
1182 /// Specifies the range in terms of relative changes between an existing pitch.
1183 RelativeChange((f32, Sign, Unit)),
1184}
1185
1186impl FromStr for PitchRange {
1187 type Err = anyhow::Error;
1188
1189 fn from_str(s: &str) -> Result<Self, Self::Err> {
1190 match s {
1191 "x-low" => Ok(Self::Strength(PitchStrength::XLow)),
1192 "low" => Ok(Self::Strength(PitchStrength::Low)),
1193 "medium" => Ok(Self::Strength(PitchStrength::Medium)),
1194 "high" => Ok(Self::Strength(PitchStrength::High)),
1195 "x-high" => Ok(Self::Strength(PitchStrength::XHigh)),
1196 "default" => Ok(Self::Strength(PitchStrength::Default)),
1197 value if value.ends_with("Hz") || value.ends_with('%') || value.ends_with("st") => {
1198 if value.ends_with("Hz") {
1199 if value.starts_with('+') || value.starts_with('-') {
1200 if value.starts_with('-') {
1201 Ok(Self::RelativeChange((
1202 value.strip_suffix("Hz").unwrap().parse::<f32>()? * -1.0,
1203 Sign::Minus,
1204 Unit::Hz,
1205 )))
1206 } else {
1207 Ok(Self::RelativeChange((
1208 value.strip_suffix("Hz").unwrap().parse::<f32>()?,
1209 Sign::Plus,
1210 Unit::Hz,
1211 )))
1212 }
1213 } else {
1214 Ok(Self::Frequency(
1215 value.strip_suffix("Hz").unwrap().parse::<f32>()?,
1216 ))
1217 }
1218 } else if value.ends_with('%') {
1219 if value.starts_with('+') || value.starts_with('-') {
1220 if value.starts_with('-') {
1221 Ok(Self::RelativeChange((
1222 value.strip_suffix('%').unwrap().parse::<f32>()? * -1.0,
1223 Sign::Minus,
1224 Unit::Percentage,
1225 )))
1226 } else {
1227 Ok(Self::RelativeChange((
1228 value.strip_suffix('%').unwrap().parse::<f32>()?,
1229 Sign::Plus,
1230 Unit::Percentage,
1231 )))
1232 }
1233 } else {
1234 bail!("Unrecognised value {}", value);
1235 }
1236 } else if value.ends_with("st") {
1237 if value.starts_with('+') || value.starts_with('-') {
1238 if value.starts_with('-') {
1239 Ok(Self::RelativeChange((
1240 value.strip_suffix("st").unwrap().parse::<f32>()? * -1.0,
1241 Sign::Minus,
1242 Unit::St,
1243 )))
1244 } else {
1245 Ok(Self::RelativeChange((
1246 value.strip_suffix("st").unwrap().parse::<f32>()?,
1247 Sign::Plus,
1248 Unit::St,
1249 )))
1250 }
1251 } else {
1252 bail!("Unrecognised value {}", value);
1253 }
1254 } else {
1255 bail!("Unrecognised value {}", value);
1256 }
1257 }
1258 e => bail!("Unrecognised value {}", e),
1259 }
1260 }
1261}
1262
1263impl fmt::Display for PitchRange {
1264 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1265 match self {
1266 Self::Strength(strength) => write!(fmt, "{}", strength),
1267 Self::Frequency(frequency) => write!(fmt, "{}Hz", frequency),
1268 Self::RelativeChange((relchange, sign, unit)) => {
1269 write!(fmt, "{}{}{}", sign, relchange, unit)
1270 }
1271 }
1272 }
1273}
1274
1275/// The volume for the contained text. Legal values are: a number preceded by "+" or "-" and
1276/// immediately followed by "dB"; or "silent", "x-soft", "soft", "medium", "loud", "x-loud", or
1277/// "default". The default is +0.0dB. Specifying a value of "silent" amounts to specifying minus infinity
1278/// decibels (dB).
1279///
1280/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
1281/// All Rights Reserved._
1282#[derive(Copy, Clone, Debug, PartialEq, PartialOrd)]
1283#[cfg_attr(test, derive(fake::Dummy))]
1284pub enum VolumeRange {
1285 /// Specifies the volume via an enumeration
1286 Strength(VolumeStrength), // "silent", "x-soft", "soft", "medium", "loud", "x-loud", default
1287 /// Volume specified via Decibels
1288 Decibel(f32),
1289}
1290
1291impl FromStr for VolumeRange {
1292 type Err = anyhow::Error;
1293
1294 fn from_str(s: &str) -> Result<Self, Self::Err> {
1295 match s {
1296 "silent" => Ok(Self::Strength(VolumeStrength::Silent)),
1297 "x-soft" => Ok(Self::Strength(VolumeStrength::XSoft)),
1298 "soft" => Ok(Self::Strength(VolumeStrength::Soft)),
1299 "medium" => Ok(Self::Strength(VolumeStrength::Medium)),
1300 "loud" => Ok(Self::Strength(VolumeStrength::Loud)),
1301 "x-loud" => Ok(Self::Strength(VolumeStrength::XLoud)),
1302 "default" => Ok(Self::Strength(VolumeStrength::Default)),
1303 value if value.ends_with("dB") => Ok(Self::Decibel(
1304 value.strip_suffix("dB").unwrap().parse::<f32>()?,
1305 )),
1306 e => bail!("Unrecognised value {}", e),
1307 }
1308 }
1309}
1310
1311impl fmt::Display for VolumeRange {
1312 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1313 match self {
1314 Self::Strength(strength) => write!(fmt, "{}", strength),
1315 Self::Decibel(percent) => write!(fmt, "{}dB", percent),
1316 }
1317 }
1318}
1319
1320/// A change in the speaking rate for the contained text. Legal values are: a non-negative percentage or "x-slow",
1321/// "slow", "medium", "fast", "x-fast", or "default". Labels "x-slow" through "x-fast" represent a sequence of
1322/// monotonically non-decreasing speaking rates. When the value is a non-negative percentage it acts as a multiplier
1323/// of the default rate. For example, a value of 100% means no change in speaking rate, a value of 200% means a
1324/// speaking rate twice the default rate, and a value of 50% means a speaking rate of half the default rate.
1325/// The default rate for a voice depends on the language and dialect and on the personality of the voice.
1326/// The default rate for a voice SHOULD be such that it is experienced as a normal speaking rate for the voice when
1327/// reading aloud text. Since voices are processor-specific, the default rate will be as well.
1328///
1329/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
1330/// All Rights Reserved._
1331#[derive(Copy, Clone, Debug, PartialEq, PartialOrd)]
1332#[cfg_attr(test, derive(fake::Dummy))]
1333pub enum RateRange {
1334 /// Rate rate specified via an enum.
1335 Strength(RateStrength), // "x-slow", "slow", "medium", "fast", "x-fast", or "default"
1336 /// Rate range specified via a positive percentage.
1337 Percentage(PositiveNumber),
1338}
1339
1340impl FromStr for RateRange {
1341 type Err = anyhow::Error;
1342
1343 fn from_str(s: &str) -> Result<Self, Self::Err> {
1344 match s {
1345 "x-slow" => Ok(Self::Strength(RateStrength::XSlow)),
1346 "slow" => Ok(Self::Strength(RateStrength::Slow)),
1347 "medium" => Ok(Self::Strength(RateStrength::Medium)),
1348 "fast" => Ok(Self::Strength(RateStrength::Fast)),
1349 "x-fast" => Ok(Self::Strength(RateStrength::XFast)),
1350 "default" => Ok(Self::Strength(RateStrength::Default)),
1351 value if value.ends_with('%') => {
1352 if value.starts_with('+') || value.starts_with('-') {
1353 if value.starts_with('+') {
1354 Ok(Self::Percentage(
1355 value.strip_suffix('%').unwrap().parse::<PositiveNumber>()?,
1356 ))
1357 } else {
1358 bail!(
1359 "Unrecognised value {}",
1360 "Negative percentage not allowed for rate"
1361 );
1362 }
1363 } else {
1364 Ok(Self::Percentage(
1365 value.strip_suffix('%').unwrap().parse::<PositiveNumber>()?,
1366 ))
1367 }
1368 }
1369 e => bail!("Unrecognised value {}", e),
1370 }
1371 }
1372}
1373
1374impl fmt::Display for RateRange {
1375 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1376 match self {
1377 Self::Strength(strength) => write!(fmt, "{}", strength),
1378 Self::Percentage(percent) => write!(fmt, "{}%", percent),
1379 }
1380 }
1381}
1382
1383/// The pitch contour is defined as a set of white space-separated targets at specified time positions in the speech output.
1384/// The algorithm for interpolating between the targets is processor-specific. In each pair of the form (time position,target),
1385/// the first value is a percentage of the period of the contained text (a number followed by "%") and the second value is
1386/// the value of the pitch attribute (a number followed by "Hz", a relative change, or a label value). Time position values
1387/// outside 0% to 100% are ignored. If a pitch value is not defined for 0% or 100% then the nearest pitch target is copied.
1388/// All relative values for the pitch are relative to the pitch value just before the contained text.
1389///
1390/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
1391/// All Rights Reserved._
1392#[derive(Copy, Clone, Debug, PartialEq, PartialOrd)]
1393#[cfg_attr(test, derive(fake::Dummy))]
1394pub enum ContourElement {
1395 /// Pitch contouring element.
1396 Element((f32, PitchRange)),
1397}
1398
1399impl FromStr for ContourElement {
1400 type Err = anyhow::Error;
1401
1402 fn from_str(s: &str) -> Result<Self, Self::Err> {
1403 match s {
1404 value if value.starts_with('(') && value.ends_with(')') => {
1405 let value = value.strip_suffix(')').unwrap().to_string();
1406 let value = value.strip_prefix('(').unwrap().to_string();
1407 let elements = value.split(',').collect::<Vec<_>>();
1408
1409 let pitch = match PitchRange::from_str(elements[1]) {
1410 Ok(result) => result,
1411 Err(e) => bail!("Error: {}", e),
1412 };
1413
1414 if elements[0].ends_with('%') {
1415 let percentage = elements[0].strip_suffix('%').unwrap().parse::<f32>()?;
1416 Ok(Self::Element((percentage, pitch)))
1417 } else {
1418 bail!(
1419 "Unrecognised value {}",
1420 "Invalid percentage in pitch contour"
1421 );
1422 }
1423 }
1424 e => bail!("Unrecognised value {}", e),
1425 }
1426 }
1427}
1428
1429impl fmt::Display for ContourElement {
1430 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1431 match self {
1432 Self::Element((pct, pitch_range)) => {
1433 write!(fmt, "({}%,{})", pct, pitch_range)
1434 }
1435 }
1436 }
1437}
1438
1439/// The pitch contour is defined as a set of white space-separated targets at specified time positions in the speech output.
1440/// The algorithm for interpolating between the targets is processor-specific. In each pair of the form (time position,target),
1441/// the first value is a percentage of the period of the contained text (a number followed by "%") and the second value is
1442/// the value of the pitch attribute (a number followed by "Hz", a relative change, or a label value). Time position values
1443/// outside 0% to 100% are ignored. If a pitch value is not defined for 0% or 100% then the nearest pitch target is copied.
1444/// All relative values for the pitch are relative to the pitch value just before the contained text.
1445///
1446/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
1447/// All Rights Reserved._
1448#[derive(Clone, Debug, PartialEq, PartialOrd)]
1449#[cfg_attr(test, derive(fake::Dummy))]
1450pub enum PitchContour {
1451 /// List of pitch contours
1452 Elements(Vec<ContourElement>),
1453}
1454
1455impl FromStr for PitchContour {
1456 type Err = anyhow::Error;
1457
1458 fn from_str(s: &str) -> Result<Self, Self::Err> {
1459 let mut pitch_contour_elements = Vec::new();
1460 match s {
1461 value if value.starts_with('(') => {
1462 let elements = value.split(' ').collect::<Vec<_>>();
1463
1464 for element in elements {
1465 let pitchcontourelement = ContourElement::from_str(element)?;
1466 pitch_contour_elements.push(pitchcontourelement);
1467 }
1468
1469 Ok(Self::Elements(pitch_contour_elements))
1470 }
1471 e if !e.trim().is_empty() => bail!("Unrecognised value {}", e),
1472 _ => Ok(Self::Elements(pitch_contour_elements)), // No op on pitch contouring
1473 }
1474 }
1475}
1476
1477impl fmt::Display for PitchContour {
1478 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1479 let mut all_elements_str = "".to_string();
1480 let mut start = true;
1481 match self {
1482 Self::Elements(elements) => {
1483 for element in elements {
1484 let element_str = element.to_string();
1485
1486 if !start {
1487 all_elements_str.push(' ');
1488 }
1489 all_elements_str.push_str(&element_str);
1490
1491 if start {
1492 start = false;
1493 }
1494 }
1495 write!(fmt, "{}", all_elements_str)
1496 }
1497 }
1498 }
1499}
1500
1501/// Representation of positive numbers in SSML tags. We keep a float vs integral value to ensure
1502/// that when re-serializating numeric errors are minimised.
1503#[derive(Copy, Clone, Debug, PartialEq, PartialOrd)]
1504pub enum PositiveNumber {
1505 /// Floating point value
1506 FloatNumber(f32),
1507 /// Integral number
1508 RoundNumber(isize),
1509}
1510
1511#[cfg(test)]
1512impl fake::Dummy<fake::Faker> for PositiveNumber {
1513 fn dummy_with_rng<R: rand::Rng + ?Sized>(_: &fake::Faker, rng: &mut R) -> PositiveNumber {
1514 if rng.gen_bool(0.5) {
1515 Self::FloatNumber(rng.gen_range(0.1..100.0))
1516 } else {
1517 Self::RoundNumber(rng.gen_range(1..100))
1518 }
1519 }
1520}
1521
1522impl FromStr for PositiveNumber {
1523 type Err = anyhow::Error;
1524
1525 fn from_str(s: &str) -> Result<Self, Self::Err> {
1526 match s {
1527 value
1528 if value.starts_with('+')
1529 || value.starts_with('-')
1530 || value.parse::<f32>().is_ok() =>
1531 {
1532 if value.starts_with('+') {
1533 if value.contains('.') {
1534 Ok(Self::FloatNumber(
1535 value.strip_prefix('+').unwrap().parse::<f32>()?,
1536 ))
1537 } else {
1538 Ok(Self::RoundNumber(
1539 value.strip_prefix('+').unwrap().parse::<isize>()?,
1540 ))
1541 }
1542 } else if value.starts_with('-') {
1543 bail!("Unrecognised value {}", "Negative number not allowed");
1544 } else if value.contains('.') {
1545 Ok(Self::FloatNumber(value.parse::<f32>()?))
1546 } else {
1547 Ok(Self::RoundNumber(value.parse::<isize>()?))
1548 }
1549 }
1550 e => bail!("Unrecognised value {}", e),
1551 }
1552 }
1553}
1554
1555impl fmt::Display for PositiveNumber {
1556 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1557 match self {
1558 Self::FloatNumber(floatnum) => write!(fmt, "{}", floatnum),
1559 Self::RoundNumber(roundnum) => write!(fmt, "{}", roundnum),
1560 }
1561 }
1562}
1563
1564/// Unit used to measure relative changes in values, this is either percentage or for pitches can
1565/// be measured in semitones or Hertz.
1566#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1567#[cfg_attr(test, derive(fake::Dummy))]
1568pub enum Unit {
1569 /// Hertz
1570 Hz,
1571 /// Semi-tone
1572 St,
1573 /// Percentage
1574 Percentage,
1575}
1576
1577impl fmt::Display for Unit {
1578 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
1579 match self {
1580 Self::Hz => write!(fmt, "Hz"),
1581 Self::St => write!(fmt, "st"),
1582 Self::Percentage => write!(fmt, "%"),
1583 }
1584 }
1585}
1586
1587/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
1588/// All Rights Reserved._
1589#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1590#[cfg_attr(test, derive(fake::Dummy))]
1591pub enum EmphasisLevel {
1592 /// Strong
1593 Strong,
1594 /// Moderate (default)
1595 Moderate,
1596 /// None
1597 None,
1598 /// Reduced
1599 Reduced,
1600}
1601
1602impl Display for EmphasisLevel {
1603 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1604 write!(
1605 f,
1606 "{}",
1607 match self {
1608 Self::Strong => "strong",
1609 Self::Moderate => "moderate",
1610 Self::None => "none",
1611 Self::Reduced => "reduced",
1612 }
1613 )
1614 }
1615}
1616
1617impl FromStr for EmphasisLevel {
1618 type Err = anyhow::Error;
1619
1620 fn from_str(s: &str) -> Result<Self, Self::Err> {
1621 match s {
1622 "strong" => Ok(Self::Strong),
1623 "moderate" => Ok(Self::Moderate),
1624 "none" => Ok(Self::None),
1625 "reduced" => Ok(Self::Reduced),
1626 e => bail!("Unrecognised value {}", e),
1627 }
1628 }
1629}
1630
1631/// The break element is an empty element that controls the pausing or other
1632/// prosodic boundaries between tokens. The use of the break element between
1633/// any pair of tokens is optional. If the element is not present between
1634/// tokens, the synthesis processor is expected to automatically determine a
1635/// break based on the linguistic context. In practice, the break element is
1636/// most often used to override the typical automatic behavior of a synthesis
1637/// processor.
1638///
1639/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
1640/// All Rights Reserved._
1641#[derive(Clone, Debug, PartialEq, PartialOrd)]
1642#[cfg_attr(test, derive(fake::Dummy))]
1643pub struct BreakAttributes {
1644 /// The strength attribute is an optional attribute having one of the following
1645 /// values: "none", "x-weak", "weak", "medium" (default value), "strong", or
1646 /// "x-strong". This attribute is used to indicate the strength of the prosodic
1647 /// break in the speech output. The value "none" indicates that no prosodic
1648 /// break boundary should be outputted, which can be used to prevent a prosodic
1649 /// break which the processor would otherwise produce. The other values
1650 /// indicate monotonically non-decreasing (conceptually increasing) break
1651 /// strength between tokens. The stronger boundaries are typically accompanied
1652 /// by pauses. "x-weak" and "x-strong" are mnemonics for "extra weak" and
1653 /// "extra strong", respectively.
1654 pub strength: Option<Strength>,
1655 /// The time attribute is an optional attribute indicating the duration of a
1656 /// pause to be inserted in the output in seconds or milliseconds. It
1657 /// follows the time value format from the Cascading Style Sheets Level 2
1658 /// Recommendation [CSS2], e.g. "250ms",
1659 pub time: Option<TimeDesignation>,
1660}
1661
1662impl Display for BreakAttributes {
1663 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1664 if let Some(strength) = self.strength {
1665 write!(f, " strength=\"{}\"", strength)?;
1666 }
1667 if let Some(time) = &self.time {
1668 write!(f, " time=\"{}\"", time)?;
1669 }
1670 Ok(())
1671 }
1672}
1673
1674/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
1675/// All Rights Reserved._
1676#[derive(Clone, Debug, PartialEq, PartialOrd)]
1677#[cfg_attr(test, derive(fake::Dummy))]
1678pub struct ProsodyAttributes {
1679 /// pitch: the baseline pitch for the contained text. Although the exact meaning of "baseline pitch"
1680 /// will vary across synthesis processors, increasing/decreasing this value will typically increase/decrease
1681 /// the approximate pitch of the output. Legal values are: a number followed by "Hz", a relative change
1682 /// or "x-low", "low", "medium", "high", "x-high", or "default". Labels "x-low" through "x-high" represent
1683 /// a sequence of monotonically non-decreasing pitch levels.
1684 pub pitch: Option<PitchRange>,
1685 /// The pitch contour is defined as a set of white space-separated targets at specified
1686 /// time positions in the speech output. The algorithm for interpolating between the targets
1687 /// is processor-specific. In each pair of the form (time position,target), the first value
1688 /// is a percentage of the period of the contained text (a number followed by "%") and
1689 /// the second value is the value of the pitch attribute (a number followed by "Hz", a relative
1690 /// change, or a label value). Time position values outside 0% to 100% are ignored.
1691 /// If a pitch value is not defined for 0% or 100% then the nearest pitch target is copied.
1692 /// All relative values for the pitch are relative to the pitch value just before the contained text.
1693 pub contour: Option<PitchContour>,
1694 /// the pitch range (variability) for the contained text. Although the exact meaning of
1695 /// "pitch range" will vary across synthesis processors, increasing/decreasing this value
1696 /// will typically increase/decrease the dynamic range of the output pitch. Legal values
1697 /// are: a number followed by "Hz", a relative change or "x-low", "low", "medium", "high",
1698 /// "x-high", or "default". Labels "x-low" through "x-high" represent a sequence of
1699 /// monotonically non-decreasing pitch ranges.
1700 pub range: Option<PitchRange>,
1701 /// a change in the speaking rate for the contained text. Legal values are: a non-negative
1702 /// percentage or "x-slow", "slow", "medium", "fast", "x-fast", or "default". Labels "x-slow"
1703 /// through "x-fast" represent a sequence of monotonically non-decreasing speaking rates.
1704 /// When the value is a non-negative percentage it acts as a multiplier of the default rate.
1705 /// For example, a value of 100% means no change in speaking rate, a value of 200% means a
1706 /// speaking rate twice the default rate, and a value of 50% means a speaking rate of half
1707 /// the default rate. The default rate for a voice depends on the language and dialect and on
1708 /// the personality of the voice. The default rate for a voice should be such that it is
1709 /// experienced as a normal speaking rate for the voice when reading aloud text. Since voices
1710 /// are processor-specific, the default rate will be as well.
1711 pub rate: Option<RateRange>,
1712 /// duration: a value in seconds or milliseconds for the desired time to take to read the
1713 /// contained text. Follows the time value format from the Cascading Style Sheet Level 2
1714 /// Recommendation [CSS2], e.g. "250ms", "3s".
1715 pub duration: Option<TimeDesignation>,
1716 /// the volume for the contained text. Legal values are: a number preceded by "+" or "-"
1717 /// and immediately followed by "dB"; or "silent", "x-soft", "soft", "medium", "loud", "x-loud",
1718 /// or "default". The default is +0.0dB. Specifying a value of "silent" amounts to specifying
1719 /// minus infinity decibels (dB). Labels "silent" through "x-loud" represent a sequence of
1720 /// monotonically non-decreasing volume levels. When the value is a signed number (dB),
1721 /// it specifies the ratio of the squares of the new signal amplitude (a1) and the current
1722 /// amplitude (a0), and is defined in terms of dB:
1723 pub volume: Option<VolumeRange>,
1724}
1725
1726impl Display for ProsodyAttributes {
1727 fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
1728 if let Some(pitch) = &self.pitch {
1729 write!(f, " pitch=\"{}\"", pitch)?;
1730 }
1731 if let Some(contour) = &self.contour {
1732 write!(f, " contour=\"{}\"", contour)?;
1733 }
1734 if let Some(range) = &self.range {
1735 write!(f, " range=\"{}\"", range)?;
1736 }
1737 if let Some(rate) = &self.rate {
1738 write!(f, " rate=\"{}\"", rate)?;
1739 }
1740 if let Some(duration) = &self.duration {
1741 write!(f, " duration=\"{}\"", duration)?;
1742 }
1743 if let Some(volume) = &self.volume {
1744 write!(f, " volume=\"{}\"", volume)?;
1745 }
1746 Ok(())
1747 }
1748}
1749
1750/// A mark element is an empty element that places a marker into the text/tag
1751/// sequence. It has one REQUIRED attribute, name, which is of type xsd:token
1752/// [SCHEMA2 §3.3.2]. The mark element can be used to reference a specific
1753/// location in the text/tag sequence, and can additionally be used to insert a
1754/// marker into an output stream for asynchronous notification. When processing
1755/// a mark element, a synthesis processor MUST do one or both of the following:
1756/// - inform the hosting environment with the value of the name attribute and
1757/// with information allowing the platform to retrieve the corresponding position
1758/// in the rendered output.
1759/// - when audio output of the SSML document reaches the mark, issue an event that
1760/// includes the REQUIRED name attribute of the element. The hosting environment
1761/// defines the destination of the event.
1762///
1763/// The mark element does not affect the speech output process.
1764///
1765/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
1766/// All Rights Reserved._
1767#[derive(Clone, Debug, Eq, PartialEq)]
1768#[cfg_attr(test, derive(fake::Dummy))]
1769pub struct MarkAttributes {
1770 /// Name of the marker used to refer to it when jumping in the audio.
1771 pub name: String,
1772}
1773
1774impl Display for MarkAttributes {
1775 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1776 write!(f, " name=\"{}\"", escape(&self.name))
1777 }
1778}
1779
1780/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
1781/// All Rights Reserved._
1782#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
1783#[cfg_attr(test, derive(fake::Dummy))]
1784pub struct EmphasisAttributes {
1785 /// the optional level attribute indicates the strength of emphasis to be applied. Defined
1786 /// values are "strong", "moderate", "none" and "reduced". The default level is "moderate".
1787 /// The meaning of "strong" and "moderate" emphasis is interpreted according to the language
1788 /// being spoken (languages indicate emphasis using a possible combination of pitch change,
1789 /// timing changes, loudness and other acoustic differences). The "reduced" level is effectively
1790 /// the opposite of emphasizing a word. For example, when the phrase "going to" is reduced it
1791 /// may be spoken as "gonna". The "none" level is used to prevent the synthesis processor from
1792 /// emphasizing words that it might typically emphasize. The values "none", "moderate", and "strong"
1793 /// are monotonically non-decreasing in strength.
1794 pub level: Option<EmphasisLevel>,
1795}
1796
1797impl Display for EmphasisAttributes {
1798 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1799 if let Some(level) = self.level {
1800 write!(f, " level=\"{}\"", level)?;
1801 }
1802
1803 Ok(())
1804 }
1805}
1806
1807/// The sub element is employed to indicate that the text in the alias attribute
1808/// value replaces the contained text for pronunciation. This allows a document to
1809/// contain both a spoken and written form. The REQUIRED alias attribute specifies
1810/// the string to be spoken instead of the enclosed string. The processor SHOULD
1811/// apply text normalization to the alias value.
1812///
1813/// The sub element can only contain text (no elements).
1814///
1815/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
1816/// All Rights Reserved._
1817#[derive(Clone, Debug, Eq, PartialEq)]
1818#[cfg_attr(test, derive(fake::Dummy))]
1819pub struct SubAttributes {
1820 /// The string to be spoken instead of the string enclosed in the tag
1821 pub alias: String,
1822}
1823
1824impl Display for SubAttributes {
1825 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1826 write!(f, " alias=\"{}\"", escape(&self.alias))
1827 }
1828}
1829
1830/// Attribute indicating the preferred gender of the voice to speak the contained text.
1831///
1832/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
1833/// All Rights Reserved._
1834#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1835#[cfg_attr(test, derive(fake::Dummy))]
1836pub enum Gender {
1837 /// Male voice
1838 Male,
1839 /// Female voice
1840 Female,
1841 /// Gender neutral voice
1842 Neutral,
1843}
1844
1845impl Display for Gender {
1846 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1847 write!(
1848 f,
1849 "{}",
1850 match self {
1851 Self::Male => "male",
1852 Self::Female => "female",
1853 Self::Neutral => "neutral",
1854 }
1855 )
1856 }
1857}
1858
1859impl FromStr for Gender {
1860 type Err = anyhow::Error;
1861
1862 fn from_str(s: &str) -> Result<Self, Self::Err> {
1863 match s {
1864 "male" => Ok(Self::Male),
1865 "female" => Ok(Self::Female),
1866 "neutral" => Ok(Self::Neutral),
1867 e => bail!("Unrecognised gender value {}", e),
1868 }
1869 }
1870}
1871
1872/// A language accent pair, this will be a language (required) and an optional accent in which to
1873/// speak the language.
1874#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1875#[cfg_attr(test, derive(fake::Dummy))]
1876pub struct LanguageAccentPair {
1877 /// Language the voice is desired to speak.
1878 pub lang: String,
1879 /// Optional accent to apply to the language.
1880 pub accent: Option<String>,
1881}
1882
1883impl Display for LanguageAccentPair {
1884 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1885 write!(f, "{}", escape(&self.lang))?;
1886 if let Some(accent) = &self.accent {
1887 write!(f, ":{}", escape(accent))?;
1888 }
1889 Ok(())
1890 }
1891}
1892
1893impl FromStr for LanguageAccentPair {
1894 type Err = anyhow::Error;
1895
1896 fn from_str(s: &str) -> Result<Self, Self::Err> {
1897 if s.is_empty() {
1898 bail!("Empty language string");
1899 } else if s == "und" || s == "zxx" {
1900 bail!("Disallowed language code");
1901 } else {
1902 let lang_accent = s.split(':').collect::<Vec<_>>();
1903 if lang_accent.len() > 2 {
1904 bail!(
1905 "Invalid format 'language:accent' or 'language' expected for '{}'",
1906 s
1907 );
1908 }
1909 if lang_accent.len() == 1 {
1910 Ok(LanguageAccentPair {
1911 lang: lang_accent[0].to_string(),
1912 accent: None,
1913 })
1914 } else if lang_accent.len() == 2 {
1915 Ok(LanguageAccentPair {
1916 lang: lang_accent[0].to_string(),
1917 accent: Some(lang_accent[1].to_string()),
1918 })
1919 } else {
1920 bail!("Unexpected language accent pair: '{}'", s);
1921 }
1922 }
1923 }
1924}
1925
1926/// The voice element is a production element that requests a change in speaking voice. There are
1927/// two kinds of attributes for the voice element: those that indicate desired features of a
1928/// voice and those that control behavior. The voice feature attributes are:
1929///
1930/// * **gender**: _optional_ attribute indicating the preferred gender of the voice to speak the
1931/// contained text. Enumerated values are: "male", "female", "neutral", or the empty string "".
1932/// * **age**: _optional_ attribute indicating the preferred age in years (since birth) of the
1933/// voice to speak the contained text. Acceptable values are of type xsd:nonNegativeInteger
1934/// [SCHEMA2 §3.3.20] or the empty string "".
1935/// * **variant**: _optional_ attribute indicating a preferred variant of the other voice
1936/// characteristics to speak the contained text. (e.g. the second male child voice). Valid values of
1937/// variant are of type xsd:positiveInteger [SCHEMA2 §3.3.25] or the empty string "".
1938/// * **name**: _optional_ attribute indicating a processor-specific voice name to speak the contained
1939/// text. The value may be a space-separated list of names ordered from top preference down or the
1940/// empty string "". As a result a name must not contain any white space.
1941/// * **languages**: _optional_ attribute indicating the list of languages the voice is desired to speak.
1942/// The value must be either the empty string "" or a space-separated list of languages, with optional
1943/// accent indication per language. Each language/accent pair is of the form "language" or
1944/// "language:accent", where both language and accent must be an Extended Language Range
1945/// [BCP47, Matching of Language Tags §2.2], except that the values "und" and "zxx" are disallowed.
1946/// A voice satisfies the languages feature if, for each language/accent pair in the list,
1947/// 1. the voice is documented (see Voice descriptions) as reading/speaking a language that
1948/// matches the Extended Language Range given by language according to the Extended Filtering
1949/// matching algorithm [BCP47, Matching of Language Tags §3.3.2], and
1950/// 2. if an accent is given, the voice is documented (see Voice descriptions) as
1951/// reading/speaking the language above with an accent that matches the Extended Language Range
1952/// given by accent according to the Extended Filtering matching algorithm [BCP47, Matching of
1953/// Language Tags §3.3.2], except that the script and extension subtags of the accent must be
1954/// ignored by the synthesis processor. It is recommended that authors and voice providers do
1955/// not use the script or extension subtags for accents because they are not relevant for
1956/// speaking.
1957///
1958/// For example, a languages value of "en:pt fr:ja" can legally be matched by any voice that can
1959/// both read English (speaking it with a Portuguese accent) and read French (speaking it with a
1960/// Japanese accent). Thus, a voice that only supports "en-US" with a "pt-BR" accent and "fr-CA"
1961/// with a "ja" accent would match. As another example, if we have <voice languages="fr:pt"> and
1962/// there is no voice that supports French with a Portuguese accent, then a voice selection
1963/// failure will occur. Note that if no accent indication is given for a language, then any voice
1964/// that speaks the language is acceptable, regardless of accent. Also, note that author control
1965/// over language support during voice selection is independent of any value of xml:lang in the
1966/// text.
1967///
1968/// For the feature attributes above, an empty string value indicates that any voice will satisfy
1969/// the feature. The top-level default value for all feature attributes is "", the empty string.
1970///
1971/// The behavior control attributes of voice are:
1972///
1973/// * **required**: _optional_ attribute that specifies a set of features by their respective
1974/// attribute names. This set of features is used by the voice selection algorithm described below.
1975/// Valid values of required are a space-separated list composed of values from the list of feature
1976/// names: "name", "languages", "gender", "age", "variant" or the empty string "". The default
1977/// value for this attribute is "languages".
1978/// * **ordering**: _optional_ attribute that specifies the priority ordering of features. Valid
1979/// values of ordering are a space-separated list composed of values from the list of feature
1980/// names: "name", "languages", "gender", "age", "variant" or the empty string "", where features
1981/// named earlier in the list have higher priority . The default value for this attribute is
1982/// "languages". Features not listed in the ordering list have equal priority to each other but
1983/// lower than that of the last feature in the list. Note that if the ordering attribute is set to
1984/// the empty string then all features have the same priority.
1985/// * **onvoicefailure**: _optional_ attribute containing one value from the following enumerated
1986/// list describing the desired behavior of the synthesis processor upon voice selection failure.
1987/// The default value for this attribute is "priorityselect".
1988/// * *priorityselect* - the synthesis processor uses the values of all voice feature attributes
1989/// to select a voice by feature priority, where the starting candidate set is the set of all
1990/// available voices.
1991/// * *keepexisting* - the voice does not change.
1992/// * *processorchoice* - the synthesis processor chooses the behavior (either priorityselect or
1993/// keepexisting).
1994///
1995/// The following voice selection algorithm must be used:
1996///
1997/// 1. All available voices are identified for which the values of all voice feature attributes
1998/// listed in the required attribute value are matched. When the value of the required attribute is
1999/// the empty string "", any and all voices are considered successful matches. If one or more voices
2000/// are identified, the selection is considered successful; otherwise there is voice selection
2001/// failure.
2002/// 2. If a successful selection identifies only one voice, the synthesis processor must use that
2003/// voice.
2004/// 3. If a successful selection identifies more than one voice, the remaining features (those not
2005/// listed in the required attribute value) are used to choose a voice by feature priority, where
2006/// the starting candidate set is the set of all voices identified.
2007/// 4. If there is voice selection failure, a conforming synthesis processor must report the voice
2008/// selection failure in addition to taking the action(s) expressed by the value of the
2009/// onvoicefailure attribute.
2010/// 5. To choose a voice by feature priority, each feature is taken in turn starting with the
2011/// highest priority feature, as controlled by the ordering attribute.
2012/// * If at least one voice matches the value of the current voice feature attribute then all
2013/// voices not matching that value are removed from the candidate set. If a single voice remains
2014/// in the candidate set the synthesis processor must use it. If more than one voice remains in
2015/// the candidate set then the next priority feature is examined for the candidate set.
2016/// * If no voices match the value of the current voice feature attribute then the next priority
2017/// feature is examined for the candidate set.
2018/// 6. After examining all feature attributes on the ordering list, if multiple voices remain in
2019/// the candidate set, the synthesis processor must use any one of them.
2020///
2021/// Although each attribute individually is optional, it is an error if no attributes are specified
2022/// when the voice element is used.
2023///
2024/// # Voice descriptions
2025/// For every voice made available to a synthesis processor, the vendor of the voice must document the
2026/// following:
2027///
2028/// * a list of language tags [BCP47, Tags for Identifying Languages] representing the languages the
2029/// voice can read.
2030/// * for each language, a language tag [BCP47, Tags for Identifying Languages] representing the
2031/// accent the voice uses when reading the language.
2032///
2033/// Although indication of language (using xml:lang) and selection of voice (using voice) are
2034/// independent, there is no requirement that a synthesis processor support every possible
2035/// combination of values of the two. However, a synthesis processor must document expected
2036/// rendering behavior for every possible combination. See the onlangfailure attribute for
2037/// information on what happens when the processor encounters text content that the voice cannot
2038/// speak.
2039///
2040/// voice attributes are inherited down the tree including to within elements that change the
2041/// language. The defaults described for each attribute only apply at the top (document) level and
2042/// are overridden by explicit author use of the voice element. In addition, changes in voice are
2043/// scoped and apply only to the content of the element in which the change occurred. When
2044/// processing reaches the end of a voice element content, i.e. the closing </voice> tag, the voice
2045/// in effect before the beginning tag is restored.
2046///
2047/// Similarly, if a voice is changed by the processor as a result of a language speaking failure,
2048/// the prior voice is restored when that voice is again able to speak the content. Note that there
2049/// is always an active voice, since the synthesis processor is required to select a default voice
2050/// before beginning execution of the document.
2051///
2052/// Relative changes in prosodic parameters should be carried across voice changes. However,
2053/// different voices have different natural defaults for pitch, speaking rate, etc. because they
2054/// represent different personalities, so absolute values of the prosodic parameters may vary across
2055/// changes in the voice.
2056///
2057/// The quality of the output audio or voice may suffer if a change in voice is requested within a
2058/// sentence.
2059///
2060/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
2061/// All Rights Reserved._
2062#[derive(Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
2063#[cfg_attr(test, derive(fake::Dummy))]
2064pub struct VoiceAttributes {
2065 /// OPTIONAL attribute indicating the preferred gender of the voice to speak the contained text.
2066 /// Enumerated values are: "male", "female", "neutral", or the empty string "".
2067 pub gender: Option<Gender>,
2068 /// OPTIONAL attribute indicating the preferred age in years (since birth) of the voice to speak the contained text.
2069 pub age: Option<u8>,
2070 /// OPTIONAL attribute indicating a preferred variant of the other voice characteristics to speak the contained text.
2071 /// (e.g. the second male child voice).
2072 pub variant: Option<NonZeroUsize>,
2073 /// OPTIONAL attribute indicating a processor-specific voice name to speak the contained text.
2074 /// The value MAY be a space-separated list of names ordered from top preference down or the empty string "".
2075 /// As a result a name MUST NOT contain any white space.
2076 pub name: Vec<String>,
2077 /// OPTIONAL attribute indicating the list of languages the voice is desired to speak.
2078 /// The value MUST be either the empty string "" or a space-separated list of languages,
2079 /// with OPTIONAL accent indication per language. Each language/accent pair is of the form "language" or "language:accent",
2080 /// where both language and accent MUST be an Extended Language Range, except that the values "und" and "zxx" are disallowed.
2081 pub languages: Vec<LanguageAccentPair>,
2082}
2083
2084impl Display for VoiceAttributes {
2085 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2086 if let Some(gender) = self.gender {
2087 write!(f, " gender=\"{}\"", gender)?;
2088 }
2089 if let Some(age) = self.age {
2090 write!(f, " age=\"{}\"", age)?;
2091 }
2092 if let Some(variant) = self.variant {
2093 write!(f, " variant=\"{}\"", variant)?;
2094 }
2095 if !self.name.is_empty() {
2096 write!(f, " name=\"{}\"", escape(&self.name.join(" ")))?;
2097 }
2098 if !self.languages.is_empty() {
2099 let languages_str = self
2100 .languages
2101 .iter()
2102 .map(|l| format!("{}", l))
2103 .collect::<Vec<String>>()
2104 .join(" ");
2105
2106 write!(f, " languages=\"{}\"", languages_str)?;
2107 }
2108
2109 Ok(())
2110 }
2111}
2112
2113/// This tells the synthesis processor whether or not it can attempt to optimize rendering by pre-fetching audio.
2114/// The value is either safe to say that audio is only fetched when it is needed, never before; or prefetch to permit,
2115/// but not require the processor to pre-fetch the audio.
2116///
2117/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
2118/// All Rights Reserved._
2119#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
2120#[cfg_attr(test, derive(fake::Dummy))]
2121pub enum FetchHint {
2122 /// The processor can perform an optimisation where it fetches the audio before it is needed
2123 Prefetch,
2124 /// The audio should only be fetched when needed
2125 Safe,
2126}
2127
2128impl Display for FetchHint {
2129 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2130 write!(
2131 f,
2132 "{}",
2133 match self {
2134 Self::Prefetch => "prefetch",
2135 Self::Safe => "safe",
2136 }
2137 )
2138 }
2139}
2140
2141impl FromStr for FetchHint {
2142 type Err = anyhow::Error;
2143
2144 fn from_str(s: &str) -> Result<Self, Self::Err> {
2145 let s = match s {
2146 "prefetch" => Self::Prefetch,
2147 "safe" => Self::Safe,
2148 e => bail!("Unrecognised fetchhint {}", e),
2149 };
2150 Ok(s)
2151 }
2152}
2153
2154impl Default for FetchHint {
2155 fn default() -> Self {
2156 Self::Prefetch
2157 }
2158}
2159
2160/// The audio element supports the insertion of recorded audio files and the insertion of other
2161/// audio formats in conjunction with synthesized speech output. The audio element may be empty.
2162/// If the audio element is not empty then the contents should be the marked-up text to be spoken if the audio document is not available. The alternate content may include text, speech markup, desc elements, or other audio elements. The alternate content may also be used when rendering the document to non-audible output and for accessibility (see the desc element).
2163///
2164/// "Speech Synthesis Markup Language (SSML) Version 1.1" _Copyright © 2010 W3C® (MIT, ERCIM, Keio),
2165/// All Rights Reserved._
2166#[derive(Clone, Debug, PartialEq)]
2167#[cfg_attr(test, derive(fake::Dummy))]
2168pub struct AudioAttributes {
2169 /// The URI of a document with an appropriate media type. If absent, the audio element behaves
2170 /// as if src were present with a legal URI but the document could not be fetched.
2171 pub src: Option<http::Uri>,
2172 /// The timeout for fetches.
2173 pub fetch_timeout: Option<TimeDesignation>,
2174 /// This tells the synthesis processor whether or not it can attempt to optimize rendering by
2175 /// pre-fetching audio. The value is either safe to say that audio is only fetched when it is
2176 /// needed, never before; or prefetch to permit, but not require the processor to pre-fetch the
2177 /// audio.
2178 pub fetch_hint: FetchHint,
2179 /// Indicates that the document is willing to use content whose age is no greater than the
2180 /// specified time (cf. 'max-age' in HTTP 1.1). The document is not willing to use
2181 /// stale content, unless maxstale is also provided.
2182 pub max_age: Option<usize>,
2183 /// Indicates that the document is willing to use content that has exceeded its expiration time
2184 /// (cf. 'max-stale' in HTTP 1.1). If maxstale is assigned a value, then the document is willing
2185 /// to accept content that has exceeded its expiration time by no more than the specified amount
2186 /// of time.
2187 pub max_stale: Option<usize>,
2188 // Trimming attributes
2189 /// offset from start of media to begin rendering. This offset is measured in normal media
2190 /// playback time from the beginning of the media.
2191 pub clip_begin: TimeDesignation,
2192 /// offset from start of media to end rendering. This offset is measured in normal media
2193 /// playback time from the beginning of the media.
2194 pub clip_end: Option<TimeDesignation>,
2195 /// number of iterations of media to render. A fractional value describes a portion of the
2196 /// rendered media.
2197 pub repeat_count: NonZeroUsize,
2198 /// total duration for repeatedly rendering media. This duration is measured in normal media
2199 /// playback time from the beginning of the media.
2200 pub repeat_dur: Option<TimeDesignation>,
2201 /// Sound level in decibels
2202 pub sound_level: f32,
2203 /// Speed in a percentage where 1.0 corresponds to 100%
2204 pub speed: f32,
2205}
2206
2207impl Display for AudioAttributes {
2208 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2209 write!(f, " fetchhint=\"{}\"", self.fetch_hint)?;
2210 write!(f, " clipBegin=\"{}\"", self.clip_begin)?;
2211 write!(f, " repeatCount=\"{}\"", self.repeat_count)?;
2212 write!(f, " soundLevel=\"{}dB\"", self.sound_level)?;
2213 write!(f, " speed=\"{}%\"", self.speed * 100.0)?;
2214 if let Some(src) = &self.src {
2215 write!(f, " src=\"{}\"", escape(&src.to_string()))?;
2216 }
2217 if let Some(timeout) = &self.fetch_timeout {
2218 write!(f, " fetchtimeout=\"{}\"", timeout)?;
2219 }
2220 if let Some(max_age) = &self.max_age {
2221 write!(f, " maxage=\"{}\"", max_age)?;
2222 }
2223 if let Some(max_stale) = &self.max_stale {
2224 write!(f, " maxstale=\"{}\"", max_stale)?;
2225 }
2226 if let Some(clip_end) = &self.clip_end {
2227 write!(f, " clipEnd=\"{}\"", clip_end)?;
2228 }
2229 if let Some(repeat_dur) = &self.repeat_dur {
2230 write!(f, " repeatDur=\"{}\"", repeat_dur)?;
2231 }
2232
2233 Ok(())
2234 }
2235}
2236
2237#[cfg(test)]
2238mod tests {
2239 use super::*;
2240 use crate::parser::*;
2241 use assert_approx_eq::assert_approx_eq;
2242 use fake::{Fake, Faker};
2243 use quick_xml::events::Event;
2244 use quick_xml::reader::Reader;
2245
2246 #[test]
2247 fn duration_conversion() {
2248 let time = TimeDesignation::Seconds(2.0);
2249 let time_ms = TimeDesignation::Milliseconds(2000.0);
2250 assert_eq!(time.duration(), time_ms.duration());
2251 }
2252
2253 // If we take one of our elements and write it out again in theory we should reparse it as the
2254 // same element!
2255
2256 #[test]
2257 fn speak_conversions() {
2258 // lets try 30 times
2259 for _ in 0..30 {
2260 let speak: SpeakAttributes = Faker.fake();
2261
2262 let xml = format!(
2263 "<{} {}></{}>",
2264 SsmlElement::Speak,
2265 speak.to_string(),
2266 SsmlElement::Speak
2267 );
2268 println!("{}", xml);
2269
2270 let mut reader = Reader::from_reader(xml.as_ref());
2271 let event = reader.read_event().unwrap();
2272 println!("{:?}", event);
2273 if let Event::Start(bs) = event {
2274 let (ssml_element, parsed_element) = parse_element(bs, &mut reader).unwrap();
2275
2276 assert_eq!(ssml_element, SsmlElement::Speak);
2277 assert_eq!(parsed_element, ParsedElement::Speak(speak));
2278 } else {
2279 panic!("Didn't get expected event");
2280 }
2281 }
2282 }
2283
2284 #[test]
2285 fn lang_conversions() {
2286 for _ in 0..30 {
2287 let lang: LangAttributes = Faker.fake();
2288
2289 let xml = format!("<{} {}></{}>", SsmlElement::Lang, lang, SsmlElement::Lang);
2290
2291 let mut reader = Reader::from_reader(xml.as_ref());
2292 let event = reader.read_event().unwrap();
2293 println!("{:?}", event);
2294 if let Event::Start(bs) = event {
2295 let (ssml_element, parsed_element) = parse_element(bs, &mut reader).unwrap();
2296
2297 assert_eq!(ssml_element, SsmlElement::Lang);
2298 assert_eq!(parsed_element, ParsedElement::Lang(lang));
2299 } else {
2300 panic!("Didn't get expected event");
2301 }
2302 }
2303 }
2304
2305 #[test]
2306 fn lookup_conversions() {
2307 for _ in 0..30 {
2308 let look: LookupAttributes = Faker.fake();
2309
2310 let xml = format!(
2311 "<{} {}></{}>",
2312 SsmlElement::Lookup,
2313 look,
2314 SsmlElement::Lookup
2315 );
2316
2317 let mut reader = Reader::from_reader(xml.as_ref());
2318 let event = reader.read_event().unwrap();
2319 println!("{:?}", event);
2320 if let Event::Start(bs) = event {
2321 let (ssml_element, parsed_element) = parse_element(bs, &mut reader).unwrap();
2322
2323 assert_eq!(ssml_element, SsmlElement::Lookup);
2324 assert_eq!(parsed_element, ParsedElement::Lookup(look));
2325 } else {
2326 panic!("Didn't get expected event");
2327 }
2328 }
2329 }
2330
2331 #[test]
2332 fn meta_conversions() {
2333 for _ in 0..30 {
2334 let meta: MetaAttributes = Faker.fake();
2335
2336 let xml = format!("<{} {}></{}>", SsmlElement::Meta, meta, SsmlElement::Meta);
2337
2338 let mut reader = Reader::from_reader(xml.as_ref());
2339 let event = reader.read_event().unwrap();
2340 println!("{:?}", event);
2341 if let Event::Start(bs) = event {
2342 let (ssml_element, parsed_element) = parse_element(bs, &mut reader).unwrap();
2343
2344 assert_eq!(ssml_element, SsmlElement::Meta);
2345 assert_eq!(parsed_element, ParsedElement::Meta(meta));
2346 } else {
2347 panic!("Didn't get expected event");
2348 }
2349 }
2350 }
2351
2352 #[test]
2353 fn token_conversions() {
2354 for _ in 0..30 {
2355 let token: TokenAttributes = Faker.fake();
2356
2357 let xml = format!(
2358 "<{} {}></{}>",
2359 SsmlElement::Token,
2360 token,
2361 SsmlElement::Token
2362 );
2363
2364 let mut reader = Reader::from_reader(xml.as_ref());
2365 let event = reader.read_event().unwrap();
2366 println!("{:?}", event);
2367 if let Event::Start(bs) = event {
2368 let (ssml_element, parsed_element) = parse_element(bs, &mut reader).unwrap();
2369
2370 assert_eq!(ssml_element, SsmlElement::Token);
2371 assert_eq!(parsed_element, ParsedElement::Token(token.clone()));
2372 } else {
2373 panic!("Didn't get expected token event");
2374 }
2375
2376 let xml = format!("<{} {}></{}>", SsmlElement::Word, token, SsmlElement::Word);
2377
2378 let mut reader = Reader::from_reader(xml.as_ref());
2379 let event = reader.read_event().unwrap();
2380 println!("{:?}", event);
2381 if let Event::Start(bs) = event {
2382 let (ssml_element, parsed_element) = parse_element(bs, &mut reader).unwrap();
2383
2384 assert_eq!(ssml_element, SsmlElement::Word);
2385 assert_eq!(parsed_element, ParsedElement::Word(token));
2386 } else {
2387 panic!("Didn't get expected word event");
2388 }
2389 }
2390 }
2391
2392 #[test]
2393 fn say_as_conversions() {
2394 for _ in 0..30 {
2395 let say_as: SayAsAttributes = Faker.fake();
2396
2397 let xml = format!(
2398 "<{} {}></{}>",
2399 SsmlElement::SayAs,
2400 say_as,
2401 SsmlElement::SayAs
2402 );
2403
2404 let mut reader = Reader::from_reader(xml.as_ref());
2405 let event = reader.read_event().unwrap();
2406 println!("{:?}", event);
2407 if let Event::Start(bs) = event {
2408 let (ssml_element, parsed_element) = parse_element(bs, &mut reader).unwrap();
2409
2410 assert_eq!(ssml_element, SsmlElement::SayAs);
2411 assert_eq!(parsed_element, ParsedElement::SayAs(say_as));
2412 } else {
2413 panic!("Didn't get expected event");
2414 }
2415 }
2416 }
2417
2418 #[test]
2419 fn phoneme_conversions() {
2420 for _ in 0..30 {
2421 let attr: PhonemeAttributes = Faker.fake();
2422
2423 let xml = format!(
2424 "<{} {}></{}>",
2425 SsmlElement::Phoneme,
2426 attr,
2427 SsmlElement::Phoneme
2428 );
2429
2430 let mut reader = Reader::from_reader(xml.as_ref());
2431 let event = reader.read_event().unwrap();
2432 println!("{:?}", event);
2433 if let Event::Start(bs) = event {
2434 let (ssml_element, parsed_element) = parse_element(bs, &mut reader).unwrap();
2435
2436 assert_eq!(ssml_element, SsmlElement::Phoneme);
2437 assert_eq!(parsed_element, ParsedElement::Phoneme(attr));
2438 } else {
2439 panic!("Didn't get expected event");
2440 }
2441 }
2442 }
2443
2444 #[test]
2445 fn break_conversions() {
2446 for _ in 0..30 {
2447 let attr: BreakAttributes = Faker.fake();
2448
2449 let xml = format!("<{} {}></{}>", SsmlElement::Break, attr, SsmlElement::Break);
2450
2451 let mut reader = Reader::from_reader(xml.as_ref());
2452 let event = reader.read_event().unwrap();
2453 println!("{:?}", event);
2454 if let Event::Start(bs) = event {
2455 let (ssml_element, parsed_element) = parse_element(bs, &mut reader).unwrap();
2456
2457 assert_eq!(ssml_element, SsmlElement::Break);
2458 assert_eq!(parsed_element, ParsedElement::Break(attr));
2459 } else {
2460 panic!("Didn't get expected event");
2461 }
2462 }
2463 }
2464
2465 #[test]
2466 fn prosody_conversions() {
2467 // Prosody has a lot more area to cover!
2468 for _ in 0..50 {
2469 let attr: ProsodyAttributes = Faker.fake();
2470
2471 let xml = format!(
2472 "<{} {}></{}>",
2473 SsmlElement::Prosody,
2474 attr,
2475 SsmlElement::Prosody
2476 );
2477
2478 println!("{}", xml);
2479
2480 let mut reader = Reader::from_reader(xml.as_ref());
2481 let event = reader.read_event().unwrap();
2482 println!("{:?}", event);
2483 if let Event::Start(bs) = event {
2484 let (ssml_element, parsed_element) = parse_element(bs, &mut reader).unwrap();
2485
2486 assert_eq!(ssml_element, SsmlElement::Prosody);
2487 assert_eq!(parsed_element, ParsedElement::Prosody(attr));
2488 } else {
2489 panic!("Didn't get expected event");
2490 }
2491 }
2492 }
2493
2494 #[test]
2495 fn mark_conversions() {
2496 for _ in 0..30 {
2497 let attr: MarkAttributes = Faker.fake();
2498
2499 let xml = format!("<{} {}></{}>", SsmlElement::Mark, attr, SsmlElement::Mark);
2500
2501 let mut reader = Reader::from_reader(xml.as_ref());
2502 let event = reader.read_event().unwrap();
2503 println!("{:?}", event);
2504 if let Event::Start(bs) = event {
2505 let (ssml_element, parsed_element) = parse_element(bs, &mut reader).unwrap();
2506
2507 assert_eq!(ssml_element, SsmlElement::Mark);
2508 assert_eq!(parsed_element, ParsedElement::Mark(attr));
2509 } else {
2510 panic!("Didn't get expected event");
2511 }
2512 }
2513 }
2514
2515 #[test]
2516 fn emphasis_conversions() {
2517 for _ in 0..30 {
2518 let attr: EmphasisAttributes = Faker.fake();
2519
2520 let xml = format!(
2521 "<{} {}></{}>",
2522 SsmlElement::Emphasis,
2523 attr,
2524 SsmlElement::Emphasis
2525 );
2526
2527 let mut reader = Reader::from_reader(xml.as_ref());
2528 let event = reader.read_event().unwrap();
2529 println!("{:?}", event);
2530 if let Event::Start(bs) = event {
2531 let (ssml_element, parsed_element) = parse_element(bs, &mut reader).unwrap();
2532
2533 assert_eq!(ssml_element, SsmlElement::Emphasis);
2534 assert_eq!(parsed_element, ParsedElement::Emphasis(attr));
2535 } else {
2536 panic!("Didn't get expected event");
2537 }
2538 }
2539 }
2540
2541 #[test]
2542 fn sub_conversions() {
2543 for _ in 0..30 {
2544 let attr: SubAttributes = Faker.fake();
2545
2546 let xml = format!("<{} {}></{}>", SsmlElement::Sub, attr, SsmlElement::Sub);
2547
2548 let mut reader = Reader::from_reader(xml.as_ref());
2549 let event = reader.read_event().unwrap();
2550 println!("{:?}", event);
2551 if let Event::Start(bs) = event {
2552 let (ssml_element, parsed_element) = parse_element(bs, &mut reader).unwrap();
2553
2554 assert_eq!(ssml_element, SsmlElement::Sub);
2555 assert_eq!(parsed_element, ParsedElement::Sub(attr));
2556 } else {
2557 panic!("Didn't get expected event");
2558 }
2559 }
2560 }
2561
2562 #[test]
2563 fn lexicon_conversions() {
2564 for _ in 0..30 {
2565 let attr: LexiconAttributes = Faker.fake();
2566
2567 let xml = format!(
2568 "<{} {}></{}>",
2569 SsmlElement::Lexicon,
2570 attr,
2571 SsmlElement::Lexicon
2572 );
2573
2574 let mut reader = Reader::from_reader(xml.as_ref());
2575 let event = reader.read_event().unwrap();
2576 println!("{:?}", event);
2577 if let Event::Start(bs) = event {
2578 let (ssml_element, parsed_element) = parse_element(bs, &mut reader).unwrap();
2579
2580 assert_eq!(ssml_element, SsmlElement::Lexicon);
2581 assert_eq!(parsed_element, ParsedElement::Lexicon(attr));
2582 } else {
2583 panic!("Didn't get expected event");
2584 }
2585 }
2586 }
2587
2588 #[test]
2589 fn voice_conversions() {
2590 for _ in 0..30 {
2591 let attr: VoiceAttributes = Faker.fake();
2592
2593 let xml = format!("<{} {}></{}>", SsmlElement::Voice, attr, SsmlElement::Voice);
2594
2595 let mut reader = Reader::from_reader(xml.as_ref());
2596 let event = reader.read_event().unwrap();
2597 println!("{:?}", event);
2598 if let Event::Start(bs) = event {
2599 let (ssml_element, parsed_element) = parse_element(bs, &mut reader).unwrap();
2600
2601 assert_eq!(ssml_element, SsmlElement::Voice);
2602 assert_eq!(parsed_element, ParsedElement::Voice(attr));
2603 } else {
2604 panic!("Didn't get expected event");
2605 }
2606 }
2607 }
2608
2609 #[test]
2610 fn audio_conversions() {
2611 for _ in 0..50 {
2612 let attr: AudioAttributes = Faker.fake();
2613
2614 let xml = format!("<{} {}></{}>", SsmlElement::Audio, attr, SsmlElement::Audio);
2615
2616 let mut reader = Reader::from_reader(xml.as_ref());
2617 let event = reader.read_event().unwrap();
2618 println!("{:?}", event);
2619 if let Event::Start(bs) = event {
2620 let (ssml_element, parsed_element) = parse_element(bs, &mut reader).unwrap();
2621
2622 assert_eq!(ssml_element, SsmlElement::Audio);
2623 if let ParsedElement::Audio(parsed) = parsed_element {
2624 assert_eq!(parsed.src, attr.src);
2625 assert_eq!(parsed.fetch_timeout, attr.fetch_timeout);
2626 assert_eq!(parsed.fetch_hint, attr.fetch_hint);
2627 assert_eq!(parsed.max_age, attr.max_age);
2628 assert_eq!(parsed.max_stale, attr.max_stale);
2629 assert_eq!(parsed.clip_begin, attr.clip_begin);
2630 assert_eq!(parsed.clip_end, attr.clip_end);
2631 assert_eq!(parsed.repeat_count, attr.repeat_count);
2632 assert_eq!(parsed.repeat_dur, attr.repeat_dur);
2633 assert_approx_eq!(parsed.sound_level, attr.sound_level);
2634 assert_approx_eq!(parsed.speed, attr.speed);
2635 } else {
2636 panic!(
2637 "SSML Element type doesn't match actual parsed value: {:?}",
2638 parsed_element
2639 );
2640 }
2641 } else {
2642 panic!("Didn't get expected event");
2643 }
2644 }
2645 }
2646}