ssml_parser/
lib.rs

1#![doc = include_str!("../README.md")]
2use crate::{elements::SsmlElement, parser::Span};
3use elements::ParsedElement;
4use std::fmt;
5use std::ops::FnMut;
6
7// Public re-export
8pub use crate::parser::parse_ssml;
9
10pub mod elements;
11pub mod parser;
12
13/// Holds parsed SSML string with the text minus tags and the tag information
14#[derive(Clone, Debug)]
15pub struct Ssml {
16    /// Text with all tags removed
17    text: String,
18    /// Vector of tags stored in a depth first search ordering
19    pub(crate) tags: Vec<Span>,
20    /// Simple parse tree to represent the XML document structure
21    pub(crate) event_log: ParserLog,
22}
23
24/// After applying a transformation to SSML writes out the new SSML string and also the
25/// text to be processed by a speech synthesiser. Assumes all text in custom tags is synthesisable.
26#[derive(Clone, Debug)]
27pub struct TransformedSsml {
28    /// Generated SSML String after transformation
29    pub ssml_string: String,
30    /// Synthesisable text after the transformation
31    pub synthesisable_text: String,
32}
33
34/// List of XML events representing the document in the order it was parsed.
35type ParserLog = Vec<ParserLogEvent>;
36
37/// Represents the XML document structure
38#[derive(Clone, Debug)]
39pub(crate) enum ParserLogEvent {
40    /// Text within tags with the start and end character indices
41    Text((usize, usize)),
42    /// An XML open tag
43    Open(ParsedElement),
44    /// An XML close tag
45    Close(ParsedElement),
46    /// An empty XML i.e. `<break/>`
47    Empty(ParsedElement),
48}
49
50/// An owned version of the parser event, this is created to allow for the asynchronous map
51/// transform of the tree without worrying about ownership issues so will take an owned copy of
52/// substrings of the tag-less text.
53#[derive(Clone, Debug)]
54pub enum ParserEvent {
55    /// Some text within a pair of XML tags
56    Text(String),
57    /// An XML open tag
58    Open(ParsedElement),
59    /// An XML close tag
60    Close(ParsedElement),
61    /// An empty XML i.e. `<break/>`
62    Empty(ParsedElement),
63}
64
65/// This trait defines a function used to transform the ssml when asynchronous operations are
66/// involved.
67#[cfg(feature = "async")]
68#[async_trait::async_trait]
69pub trait AsyncSsmlTransformer {
70    /// Can be thought of as an asynchronous filter_map. Given a `ParserEvent` it will either
71    /// return a `ParserEvent` to be inserted into the stream or a `None` to remove the event from
72    /// the event stream. Self is mutable to allow for internal tracking of values.
73    async fn apply(&mut self, event: ParserEvent) -> Option<ParserEvent>;
74}
75
76impl fmt::Display for ParserEvent {
77    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78        match self {
79            Self::Text(text) => write!(f, "{}", quick_xml::escape::escape(&text)),
80            Self::Open(element) => {
81                let name: SsmlElement = element.into();
82                write!(f, "<{}{}>", name, element.attribute_string())
83            }
84            Self::Close(element) => {
85                let name: SsmlElement = element.into();
86                write!(f, "</{}>", name)
87            }
88            Self::Empty(element) => {
89                let name: SsmlElement = element.into();
90                write!(f, "<{}{}/>", name, element.attribute_string())
91            }
92        }
93    }
94}
95
96impl Ssml {
97    /// Gets a version of the text with all the SSML tags stripped
98    pub fn get_text(&self) -> &str {
99        &self.text
100    }
101
102    /// From a given span with start/end characters return the text within that span.
103    ///
104    /// # Panics
105    ///
106    /// Will panic if span exceeds the bounds of the text.
107    pub fn get_text_from_span(&self, span: &Span) -> &str {
108        assert!(span.end <= self.text.len() && span.end >= span.start);
109        &self.text[span.start..span.end]
110    }
111
112    /// Get an iterator over the SSML tags - traversed depth first.
113    pub fn tags(&self) -> impl Iterator<Item = &Span> {
114        self.tags.iter()
115    }
116
117    /// Write out the SSML text again - mainly used for testing correctness of implementation.
118    pub fn write_ssml(&self) -> String {
119        let mut ssml_str = String::new();
120
121        use ParserLogEvent::*;
122        for event in self.event_log.iter() {
123            ssml_str.push_str(&match event {
124                Text(span) => {
125                    let (start, end) = *span;
126                    quick_xml::escape::escape(&self.text[start..end]).to_string()
127                }
128                Open(element) => {
129                    let name: SsmlElement = element.into();
130                    format!("<{}{}>", name, element.attribute_string())
131                }
132                Close(element) => {
133                    let name: SsmlElement = element.into();
134                    format!("</{}>", name)
135                }
136                Empty(element) => {
137                    let name: SsmlElement = element.into();
138                    format!("<{}{}/>", name, element.attribute_string())
139                }
140            });
141        }
142
143        ssml_str
144    }
145
146    /// For each parser event to write out apply a transformation to it or return None if it should
147    /// be filtered out. It is up to the implementor to make sure that if an open tag is removed
148    /// the corresponding close tag is removed as well.
149    ///
150    /// TODO this doesn't track if there are tags where inner text shouldn't be synthesised so
151    /// certain transformations will lead to synthesisable_text being incorrect.
152    pub fn write_ssml_with_transform<F>(&self, mut f: F) -> TransformedSsml
153    where
154        F: FnMut(ParserEvent) -> Option<ParserEvent>,
155    {
156        let mut ssml_string = String::new();
157        let mut synthesisable_text = String::new();
158
159        use ParserLogEvent::*;
160        for event in self.event_log.iter().cloned() {
161            let new_event = match event {
162                Text((start, end)) => f(ParserEvent::Text(self.text[start..end].to_string())),
163                Open(element) => f(ParserEvent::Open(element)),
164                Close(element) => f(ParserEvent::Close(element)),
165                Empty(element) => f(ParserEvent::Empty(element)),
166            };
167            if let Some(new_event) = new_event {
168                let string = new_event.to_string();
169                ssml_string.push_str(&string);
170                if let ParserEvent::Text(t) = new_event {
171                    synthesisable_text.push_str(&t);
172                }
173            }
174        }
175        TransformedSsml {
176            ssml_string,
177            synthesisable_text,
178        }
179    }
180
181    /// Turns the SSML document into a stream of events with open/close tags, text and empty
182    /// elements. This will not filter out text that shouldn't be synthesised so it's on the user
183    /// to keep track of this.
184    pub fn event_iter(&self) -> impl Iterator<Item = ParserEvent> + '_ {
185        self.event_log.iter().cloned().map(|x| match x {
186            ParserLogEvent::Text((start, end)) => {
187                ParserEvent::Text(self.text[start..end].to_string())
188            }
189            ParserLogEvent::Open(elem) => ParserEvent::Open(elem),
190            ParserLogEvent::Close(elem) => ParserEvent::Close(elem),
191            ParserLogEvent::Empty(elem) => ParserEvent::Empty(elem),
192        })
193    }
194
195    /// For each parser event to write out apply a transformation to it or return None if it should
196    /// be filtered out. It is up to the implementor to make sure that if an open tag is removed
197    /// the corresponding close tag is removed as well.
198    ///
199    /// TODO this doesn't track if there are tags where inner text shouldn't be synthesised so
200    /// certain transformations will lead to synthesisable_text being incorrect.
201    #[cfg(feature = "async")]
202    pub async fn async_write_ssml_with_transform(
203        self,
204        mut f: impl AsyncSsmlTransformer,
205    ) -> TransformedSsml {
206        let mut ssml_string = String::new();
207        let mut synthesisable_text = String::new();
208
209        use ParserLogEvent::*;
210        for event in self.event_log.iter().cloned() {
211            let new_event = match event {
212                Text(span) => {
213                    let (start, end) = span;
214                    f.apply(ParserEvent::Text(self.text[start..end].to_string()))
215                        .await
216                }
217                Open(element) => f.apply(ParserEvent::Open(element)).await,
218                Close(element) => f.apply(ParserEvent::Close(element)).await,
219                Empty(element) => f.apply(ParserEvent::Empty(element)).await,
220            };
221            if let Some(new_event) = new_event {
222                let string = new_event.to_string();
223                ssml_string.push_str(&string);
224                if let ParserEvent::Text(t) = new_event {
225                    synthesisable_text.push_str(&t);
226                }
227            }
228        }
229        TransformedSsml {
230            ssml_string,
231            synthesisable_text,
232        }
233    }
234}
235
236#[cfg(test)]
237mod tests {
238    use super::*;
239    use crate::parser::parse_ssml;
240    use quick_xml::events::Event;
241    use quick_xml::reader::Reader;
242    use quick_xml::writer::Writer;
243    use std::io::Cursor;
244
245    #[test]
246    fn basic_ssml_writing() {
247        let ssml = r#"
248        <speak version="1.0" xml:lang="string" foo="&amp;" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts">
249            <mstts:backgroundaudio fadein="string" fadeout="string" src="string" volume="string"/>
250            <voice name="string">
251                <bookmark mark="string"/>
252                <break strength="medium" time="5s"/>
253                <emphasis level="reduced"/>
254                <lang xml:lang="string"/>
255                <lexicon uri="string" xml:id="some_id"/>
256                <math xmlns="http://www.w3.org/1998/Math/MathML"/>
257                <mstts:express-as role="string" style="string" styledegree="value"/>
258                <mstts:silence type="string" value="string"/>
259                <mstts:viseme type="string &amo;"/>
260                <p>Some speech! &amp; With correct escaping on text, hopefully. </p>
261                <phoneme ph="string" alphabet="string"/>
262                <prosody pitch="2.2Hz" contour="(0%,+20Hz) (10%,+30Hz) (40%,+10Hz)" range="-2Hz" rate="20%" volume="2dB"/>
263                <s/>
264                <say-as interpret-as="string" format="string" detail="string"/>
265                <sub alias="correct escaping of attributes &amp;"> Keep me here </sub>
266            </voice>
267        </speak>        
268        "#;
269
270        let rewritten = parse_ssml(ssml).unwrap().write_ssml();
271
272        let mut reader = Reader::from_str(ssml);
273        reader.trim_text(true);
274        let mut writer = Writer::new(Cursor::new(vec![]));
275
276        loop {
277            match reader.read_event().unwrap() {
278                Event::Eof => break,
279                e => writer.write_event(e).unwrap(),
280            }
281        }
282
283        let ssml = String::from_utf8(writer.into_inner().into_inner()).unwrap();
284
285        let mut reader = Reader::from_str(&rewritten);
286        reader.trim_text(true);
287        let mut writer = Writer::new(Cursor::new(vec![]));
288
289        loop {
290            match reader.read_event().unwrap() {
291                Event::Eof => break,
292                e => writer.write_event(e).unwrap(),
293            }
294        }
295
296        let rewritten_trimmed = String::from_utf8(writer.into_inner().into_inner()).unwrap();
297
298        println!("Original:");
299        println!("{}", ssml);
300        println!("Rewritten:");
301        println!("{}", rewritten_trimmed);
302
303        assert!(&ssml == &rewritten_trimmed);
304    }
305
306    #[test]
307    fn ssml_transformation() {
308        let ssml = r#"
309        <speak>
310            <mstts:backgroundaudio fadein="string" fadeout="string" src="string" volume="string"/>
311            <voice name="string">
312                <break strength="medium" time="5s"/>
313                <emphasis level="reduced"/>
314                <lang xml:lang="string"/>
315                <lexicon uri="string" xml:id="some_id"/>
316                <mstts:express-as role="string" style="string" styledegree="value"/>
317                <p>Some speech! &amp; With correct escaping on text, hopefully. </p>
318                <phoneme ph="string" alphabet="string"/>
319                <prosody pitch="2.2Hz" contour="(0%,+20Hz) (10%,+30Hz) (40%,+10Hz)" range="-2Hz" rate="20%" volume="2dB"/>
320            </voice>
321        </speak>        
322        "#;
323
324        let ssml = parse_ssml(ssml).unwrap();
325        // Now here we want to strip away the mstts tags and replace some text to be said. And then
326        // we'll reparse and make sure things seem sane
327
328        let transform = |elem| match &elem {
329            ParserEvent::Open(element)
330            | ParserEvent::Close(element)
331            | ParserEvent::Empty(element) => {
332                if matches!(element, ParsedElement::Custom(_)) {
333                    None
334                } else {
335                    Some(elem)
336                }
337            }
338            ParserEvent::Text(txt) => {
339                let txt = txt.replace("hopefully", "definitely");
340                Some(ParserEvent::Text(txt))
341            }
342        };
343
344        let transformed = ssml.write_ssml_with_transform(transform);
345        assert_eq!(
346            transformed.synthesisable_text.trim(),
347            "Some speech! & With correct escaping on text, definitely."
348        );
349        assert!(!transformed.ssml_string.contains("mstts:backgroundaudio"));
350        assert!(!transformed.ssml_string.contains("mstts:express-as"));
351        assert!(transformed.ssml_string.contains("prosody"));
352
353        // and hopefully our ssml is still valid:
354        parse_ssml(&transformed.ssml_string).unwrap();
355    }
356}