sapi_lite/tts/speech/
builder.rs

1use std::fmt;
2use std::time::Duration;
3
4use xml::writer::XmlEvent;
5use xml::{EmitterConfig, EventWriter};
6
7use crate::tts::{Voice, VoiceSelector};
8
9use super::{Pitch, Rate, SayAs, Speech, Volume};
10
11/// Helper type that can construct a [`Speech`] from a sequence of rendering instructions.
12///
13/// It's important to understand that the instructions do not override the configuration of the
14/// synthesizer, but adjust them instead. For example, if you call `set_volume(80)` on the
15/// synthesizer, and your speech starts with the instruction `start_volume(50)`, the volume at that
16/// point will be set to 40 (i.e. 50% of 80%).
17///
18/// NOTE: Although any complex speech is encoded as XML, the builder performs no validation. This is
19/// because SAPI itself is very lax when processing speech. For example, SAPI will be perfectly
20/// happy to render the following XML:
21/// ```xml
22/// <emph><volume level="50">Hello</emph>world</volume>
23/// ```
24pub struct SpeechBuilder {
25    state: SpeechBuilderState,
26}
27
28enum SpeechBuilderState {
29    Text(String),
30    Xml(EventWriter<Vec<u8>>),
31}
32
33impl SpeechBuilder {
34    /// Constructs a new, empty instance.
35    pub fn new() -> Self {
36        Self {
37            state: SpeechBuilderState::Text(String::new()),
38        }
39    }
40
41    /// Emphasizes all subsequent speech until the corresponding
42    /// [`end_emphasis`](SpeechBuilder::end_emphasis) call.
43    pub fn start_emphasis(&mut self) -> &mut Self {
44        self.append_xml(XmlEvent::start_element("emph").into())
45    }
46
47    /// Changes the pitch of all subsequent speech until the corresponding
48    /// [`end_pitch`](SpeechBuilder::end_pitch) call.
49    pub fn start_pitch<P: Into<Pitch>>(&mut self, pitch: P) -> &mut Self {
50        self.append_xml(
51            XmlEvent::start_element("pitch")
52                .attr("absmiddle", &pitch.into().to_string())
53                .into(),
54        )
55    }
56
57    /// Changes the rate of all subsequent speech until the corresponding
58    /// [`end_rate`](SpeechBuilder::end_rate) call.
59    pub fn start_rate<R: Into<Rate>>(&mut self, rate: R) -> &mut Self {
60        self.append_xml(
61            XmlEvent::start_element("rate")
62                .attr("absspeed", &rate.into().to_string())
63                .into(),
64        )
65    }
66
67    /// Switches to the specified voice until the corresponding
68    /// [`end_voice`](SpeechBuilder::end_voice) call.
69    pub fn start_voice(&mut self, voice: &Voice) -> &mut Self {
70        let mut selector = VoiceSelector::new();
71        if let Some(name) = voice.name() {
72            selector = selector.name_eq(name.to_string_lossy());
73        }
74        self.select_and_start_voice(Some(selector), None)
75    }
76
77    /// Switches to a voice that matches the specified criteria until the corresponding
78    /// [`end_voice`](SpeechBuilder::end_voice) call. For the explanation of `required` and
79    /// `optional` criteria, see [`installed_voices`](crate::tts::installed_voices).
80    pub fn select_and_start_voice(
81        &mut self,
82        required: Option<VoiceSelector>,
83        optional: Option<VoiceSelector>,
84    ) -> &mut Self {
85        let mut event = XmlEvent::start_element("voice");
86
87        let required_expr = required.map(VoiceSelector::into_sapi_expr);
88        if let Some(required_expr) = required_expr.as_ref() {
89            if !required_expr.is_empty() {
90                event = event.attr("required", &required_expr);
91            }
92        }
93
94        let optional_expr = optional.map(VoiceSelector::into_sapi_expr);
95        if let Some(optional_expr) = optional_expr.as_ref() {
96            if !optional_expr.is_empty() {
97                event = event.attr("optional", optional_expr);
98            }
99        }
100
101        self.append_xml(event.into())
102    }
103
104    /// Changes the volume of all subsequent speech until the corresponding
105    /// [`end_rate`](SpeechBuilder::end_rate) call.
106    pub fn start_volume<V: Into<Volume>>(&mut self, volume: V) -> &mut Self {
107        self.append_xml(
108            XmlEvent::start_element("volume")
109                .attr("level", &volume.into().to_string())
110                .into(),
111        )
112    }
113
114    /// Appends text to pronounce.
115    pub fn say<S: AsRef<str>>(&mut self, text: S) -> &mut Self {
116        // TODO: What about punctuation, whitespace, etc?
117        match &mut self.state {
118            SpeechBuilderState::Text(contents) => {
119                contents.push_str(text.as_ref());
120            }
121            SpeechBuilderState::Xml(writer) => {
122                writer.write(text.as_ref()).unwrap();
123            }
124        };
125        self
126    }
127
128    /// Appends text to pronounce, along witha hint on how to pronounce it.
129    pub fn say_as<S: AsRef<str>>(&mut self, text: S, ctx: SayAs) -> &mut Self {
130        self.append_xml(
131            XmlEvent::start_element("context")
132                .attr("id", ctx.sapi_id())
133                .into(),
134        )
135        .say(text)
136        .end_element("context")
137    }
138
139    /// Appends a specific pronunciation to render. The pronunciation specification depends on the
140    /// language of the current voice. For example, "m ah dh ax r" in American English is pronounced
141    /// as "mother".
142    pub fn pronounce<S: AsRef<str>>(&mut self, pronunciation: S) -> &mut Self {
143        self.append_xml(
144            XmlEvent::start_element("pron")
145                .attr("sym", pronunciation.as_ref())
146                .into(),
147        )
148        .end_element("pron")
149    }
150
151    /// Appends a silence with a specified duration. Does not support sub-millisecond precision.
152    pub fn silence(&mut self, duration: Duration) -> &mut Self {
153        let millis = duration.as_millis();
154        if millis == 0 {
155            return self;
156        }
157
158        self.append_xml(
159            XmlEvent::start_element("silence")
160                .attr("msec", &millis.to_string())
161                .into(),
162        )
163        .end_element("silence")
164    }
165
166    /// Ends the effect of the corresponding [`start_emphasis`](SpeechBuilder::start_emphasis) call.
167    pub fn end_emphasis(&mut self) -> &mut Self {
168        self.end_element("emph")
169    }
170
171    /// Ends the effect of the corresponding [`start_pitch`](SpeechBuilder::start_pitch) call.
172    pub fn end_pitch(&mut self) -> &mut Self {
173        self.end_element("pitch")
174    }
175
176    /// Ends the effect of the corresponding [`start_rate`](SpeechBuilder::start_rate) call.
177    pub fn end_rate(&mut self) -> &mut Self {
178        self.end_element("rate")
179    }
180
181    /// Ends the effect of the corresponding [`start_voice`](SpeechBuilder::start_voice) or
182    /// [`select_and_start_voice`](SpeechBuilder::select_and_start_voice) call.
183    pub fn end_voice(&mut self) -> &mut Self {
184        self.end_element("voice")
185    }
186
187    /// Ends the effect of the corresponding [`start_volume`](SpeechBuilder::start_volume) call.
188    pub fn end_volume(&mut self) -> &mut Self {
189        self.end_element("volume")
190    }
191
192    /// Builds the [`Speech`] from instructions received so far. Clears the contents of the builder.
193    pub fn build<'s>(&mut self) -> Speech<'s> {
194        match std::mem::replace(&mut self.state, SpeechBuilderState::Text(String::new())) {
195            SpeechBuilderState::Text(contents) => Speech::Text(contents.into()),
196            SpeechBuilderState::Xml(writer) => {
197                Speech::Xml(String::from_utf8(writer.into_inner()).unwrap().into())
198            }
199        }
200    }
201
202    fn end_element(&mut self, name: &str) -> &mut Self {
203        self.append_xml(XmlEvent::end_element().name(name).into())
204    }
205
206    fn append_xml(&mut self, event: XmlEvent) -> &mut Self {
207        match &mut self.state {
208            SpeechBuilderState::Text(contents) => {
209                let mut writer = EventWriter::new_with_config(
210                    Vec::new(),
211                    EmitterConfig::new()
212                        .keep_element_names_stack(false)
213                        .write_document_declaration(false),
214                );
215                writer.write(contents.as_ref()).unwrap();
216                writer.write(event).unwrap();
217                self.state = SpeechBuilderState::Xml(writer);
218            }
219            SpeechBuilderState::Xml(writer) => {
220                writer.write(event).unwrap();
221            }
222        }
223        self
224    }
225}
226
227impl fmt::Write for SpeechBuilder {
228    fn write_str(&mut self, s: &str) -> fmt::Result {
229        self.say(s);
230        Ok(())
231    }
232}
233
234impl<'s> From<SpeechBuilder> for Speech<'s> {
235    fn from(mut builder: SpeechBuilder) -> Self {
236        builder.build()
237    }
238}
239
240impl<'s> From<&mut SpeechBuilder> for Speech<'s> {
241    fn from(builder: &mut SpeechBuilder) -> Self {
242        builder.build()
243    }
244}