azure_speech/recognizer/
config.rs

1use crate::config::Device;
2use crate::recognizer::Language;
3use serde::{Deserialize, Serialize};
4
5/// The configuration for the recognizer.
6///
7/// The configuration is used to set the parameters of the speech recognition.
8#[derive(Clone, Debug)]
9pub struct Config {
10    pub(crate) device: Device,
11
12    pub(crate) languages: Vec<Language>,
13    pub(crate) output_format: OutputFormat,
14
15    // todo: probably this will be removed and moved directly in the connection.
16    pub(crate) mode: RecognitionMode, // todo: what is this?
17
18    pub(crate) language_detect_mode: Option<LanguageDetectMode>,
19
20    pub(crate) phrases: Option<Vec<String>>,
21
22    pub(crate) custom_models: Option<Vec<(String, String)>>,
23
24    pub(crate) connection_id: Option<String>, // todo: what is this for?
25
26    pub(crate) store_audio: bool, // todo: is this needed?
27
28    pub(crate) profanity: Profanity,
29    // todo: check diarization https://learn.microsoft.com/en-us/azure/ai-services/speech-service/get-started-stt-diarization?tabs=macos&pivots=programming-language-javascript
30    // probably will be moved from here and added to a separate module.
31    //pub(crate) recognize_speaker: bool,
32
33    // todo add more detailed configuration from default:  src/common.speech/ConnectionFactoryBase.ts
34}
35
36impl Default for Config {
37    fn default() -> Self {
38        Config {
39            languages: vec![Language::default()],
40            output_format: OutputFormat::Simple,
41            mode: RecognitionMode::Conversation,
42            language_detect_mode: None,
43            phrases: None,
44            custom_models: None,
45            connection_id: None,
46            store_audio: false,
47            device: Device::default(),
48            profanity: Profanity::Masked,
49        }
50    }
51}
52
53impl Config {
54    /// Enable audio logging in service.
55    ///
56    /// Audio and content logs are stored either in Microsoft-owned storage, or in your own storage account linked
57    /// to your Cognitive Services subscription (Bring Your Own Storage (BYOS) enabled Speech resource).
58    /// The logs will be removed after 30 days.
59    pub fn enable_audio_logging(mut self) -> Self {
60        self.store_audio = true;
61        self
62    }
63
64    /// Set Device information.
65    ///
66    /// The device information is used to provide information about the source.
67    /// Some default values are already set.
68    pub fn set_device(mut self, device: Device) -> Self {
69        self.device = device;
70        self
71    }
72
73    /// Mask the profanity.
74    pub fn set_profanity(mut self, profanity: Profanity) -> Self {
75        self.profanity = profanity;
76        self
77    }
78
79    /// Set the default language for the recognition.
80    ///
81    /// If needed multiple language detection, use the set_detect_languages method.
82    pub fn set_language(mut self, language: Language) -> Self {
83        self.languages = vec![language];
84        self
85    }
86
87    /// Instruct to detect the languages from the audio.
88    ///
89    /// The language detection is used to detect the language of the audio.
90    /// This could not match the language of the audio, but it is used to provide better recognition.
91    pub fn set_detect_languages(
92        mut self,
93        languages: Vec<Language>,
94        language_detect_mode: LanguageDetectMode,
95    ) -> Self {
96        self.languages = languages;
97        self.language_detect_mode = Some(language_detect_mode);
98        self
99    }
100
101    /// Helping phrases to detect better the context.
102    ///
103    /// Untested.
104    pub fn set_phrases(mut self, phrases: Vec<String>) -> Self {
105        self.phrases = Some(phrases);
106        self
107    }
108
109    /// Use custom Models.
110    ///
111    /// Untested.
112    pub fn set_custom_models(mut self, custom_models: Vec<(String, String)>) -> Self {
113        self.custom_models = Some(custom_models);
114        self
115    }
116
117    /// Set the recognition mode.
118    ///
119    /// *Only the Conversation mode was tested.*
120    #[allow(dead_code)]
121    pub fn set_recognition_mode(mut self, mode: RecognitionMode) -> Self {
122        self.mode = mode;
123        self
124    }
125
126    /// Set the output format of event responses.
127    ///
128    /// You will find the json in each event with Message.json() method.
129    pub fn set_output_format(mut self, format: OutputFormat) -> Self {
130        self.output_format = format;
131        self
132    }
133
134    //
135    // pub fn enable_recognize_speaker(mut self) -> Self {
136    //     self.recognize_speaker = true;
137    //     self
138    // }
139}
140
141#[derive(Debug, Clone, Default)]
142/// The profanity level.
143pub enum Profanity {
144    #[allow(missing_docs)]
145    #[default]
146    Masked,
147    #[allow(missing_docs)]
148    Removed,
149    #[allow(missing_docs)]
150    Raw,
151}
152
153impl Profanity {
154    pub(crate) fn as_str(&self) -> &'static str {
155        match self {
156            Profanity::Masked => "masked",
157            Profanity::Removed => "removed",
158            Profanity::Raw => "raw",
159        }
160    }
161}
162
163#[derive(Debug, Clone)]
164/// The configuration for the silence detection.
165///
166/// Untested.
167pub struct Silence {
168    #[allow(missing_docs)]
169    pub initial_timeout_ms: Option<i32>,
170    #[allow(missing_docs)]
171    pub end_timeout_ms: Option<i32>,
172    #[allow(missing_docs)]
173    pub segmentation_timeout_ms: Option<i32>,
174}
175
176#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
177/// The recognition mode.
178pub enum RecognitionMode {
179    /// Use this mode for normal conversation.
180    #[serde(rename = "conversation")]
181    #[default]
182    Conversation,
183    /// Untested.
184    #[serde(rename = "interactive")]
185    Interactive,
186    /// Untested.
187    #[serde(rename = "dictation")]
188    Dictation,
189}
190
191impl RecognitionMode {
192    pub(crate) fn as_str(self) -> &'static str {
193        match self {
194            RecognitionMode::Conversation => "conversation",
195            RecognitionMode::Interactive => "interactive",
196            RecognitionMode::Dictation => "dictation",
197        }
198    }
199}
200
201#[derive(Debug, Clone, Eq, PartialEq, Default)]
202/// The output format of the messages.
203pub enum OutputFormat {
204    #[allow(missing_docs)]
205    #[default]
206    Simple,
207    #[allow(missing_docs)]
208    Detailed,
209}
210
211impl OutputFormat {
212    pub(crate) fn as_str(&self) -> &'static str {
213        match self {
214            OutputFormat::Simple => "simple",
215            OutputFormat::Detailed => "detailed",
216        }
217    }
218}
219
220#[derive(Serialize, Deserialize, Clone, Debug, Default)]
221/// The primary language of the recognized text.
222pub enum LanguageDetectMode {
223    /// Detect the language at the start of the audio.
224    #[serde(rename = "DetectContinuous")]
225    #[default]
226    Continuous,
227    /// Detect the language at the start of the audio.
228    #[serde(rename = "DetectAtAudioStart")]
229    AtStart,
230}
231
232#[derive(Debug, Clone, Default, Serialize)]
233/// Details of the source.
234///
235/// This is used to provide information about the source.
236pub struct AudioDevice {
237    /// Name of the Audio Device
238    pub(crate) name: String,
239    /// Model of the Audio Device
240    pub(crate) model: String,
241    /// Manufacturer of the Audio Device
242    pub(crate) manufacturer: String,
243    /// Type of the Audio Device
244    #[serde(rename = "type")]
245    pub(crate) source: SourceType,
246    /// Connectivity of the Audio Device
247    pub(crate) connectivity: ConnectionType,
248}
249
250#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize)]
251pub enum ConnectionType {
252    Bluetooth,
253    Wired,
254    WiFi,
255    Cellular,
256    InBuilt,
257    #[default]
258    Unknown,
259}
260
261#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize)]
262pub enum SourceType {
263    Phone,
264    Speaker,
265    Car,
266    Headset,
267    Thermostat,
268    Microphones,
269    Deskphone,
270    RemoteControl,
271    #[default]
272    Unknown,
273    File,
274    Stream,
275}
276
277impl AudioDevice {
278    /// Create a new Details instance
279    pub fn new(source: SourceType) -> Self {
280        AudioDevice {
281            source,
282            ..Default::default()
283        }
284    }
285
286    pub fn with_name(mut self, name: impl Into<String>) -> Self {
287        self.name = name.into();
288        self
289    }
290
291    pub fn with_model(mut self, model: impl Into<String>) -> Self {
292        self.model = model.into();
293        self
294    }
295
296    pub fn with_manufacturer(mut self, manufacturer: impl Into<String>) -> Self {
297        self.manufacturer = manufacturer.into();
298        self
299    }
300
301    pub fn with_connectivity(mut self, connectivity: ConnectionType) -> Self {
302        self.connectivity = connectivity;
303        self
304    }
305
306    pub fn with_source(mut self, source: SourceType) -> Self {
307        self.source = source;
308        self
309    }
310
311    #[allow(missing_docs)]
312    pub fn unknown() -> Self {
313        AudioDevice::new(SourceType::Unknown)
314    }
315
316    #[allow(missing_docs)]
317    pub fn stream() -> Self {
318        AudioDevice::new(SourceType::Stream)
319    }
320    #[allow(missing_docs)]
321    pub fn microphone(
322        name: impl Into<String>,
323        manufacture: impl Into<String>,
324        connectivity: ConnectionType,
325    ) -> Self {
326        AudioDevice::new(SourceType::Microphones)
327            .with_connectivity(connectivity)
328            .with_manufacturer(manufacture)
329            .with_name(name)
330    }
331    #[allow(missing_docs)]
332    pub fn file() -> Self {
333        AudioDevice::new(SourceType::File)
334    }
335}