cal_core/device/
utils.rs

1use crate::device::shared::{AiOptions, RecordOptions, TranscribeOptions};
2use crate::{format_number, AsrVendor, FlowState, NumberFormat, Region, VoiceServer};
3use cal_jambonz::dial::TranscribeDial;
4use cal_jambonz::listen::Listen;
5use cal_jambonz::recognizer::Recognizer;
6use cal_jambonz::vendors::amazon::{AWSRecognizer, AwsAsrLanguage};
7use cal_jambonz::vendors::deepgram::DeepgramRecognizer;
8use cal_jambonz::vendors::google::{
9    GoogleInteractionType, GoogleRecognizer, GoogleRecognizerLanguage, GoogleSpeechModel,
10};
11use cal_jambonz::vendors::ibm::IbmRecognizer;
12use cal_jambonz::vendors::microsoft::MSRecognizer;
13use cal_jambonz::vendors::nvidia::NvidiaRecognizer;
14use cal_jambonz::vendors::soniox::SonioxRecognizer;
15use cal_jambonz::verbs;
16use rand::prelude::SliceRandom;
17use rand::rng;
18use std::collections::HashMap;
19use std::sync::Arc;
20
21pub fn get_transcribe(transcribe_opts: TranscribeOptions) -> Option<TranscribeDial> {
22    if transcribe_opts.enabled {
23        match transcribe_opts.language {
24            AsrVendor::Deepgram => {
25                let transcribe = TranscribeDial {
26                    transcription_hook: "transcribe".to_string(),
27                    recognizer: Recognizer::Deepgram(DeepgramRecognizer {
28                        language: None,
29                        separate_recognition_per_channel: Some(true),
30                        hints: None,
31                        vad: None,
32                        interim: None,
33                        alt_languages: None,
34                        asr_dtmf_termination_digit: None,
35                        asr_timeout: None,
36                        deepgram_options: None,
37                    }),
38                };
39                Some(transcribe)
40            }
41            AsrVendor::Google => {
42                let transcribe = TranscribeDial {
43                    transcription_hook: "transcribe".to_string(),
44                    recognizer: Recognizer::Google(GoogleRecognizer {
45                        language: Some(GoogleRecognizerLanguage::EnglishUnitedKingdom),
46                        interaction_type: Some(GoogleInteractionType::PhoneCall),
47                        model: Some(GoogleSpeechModel::PhoneCall),
48                        separate_recognition_per_channel: Some(true),
49                        enhanced_model: Some(true),
50                        vad: None,
51                        interim: None,
52                        alt_languages: None,
53                        asr_dtmf_termination_digit: None,
54                        asr_timeout: None,
55                        diarization: None,
56                        diarization_min_speakers: None,
57                        diarization_max_speakers: None,
58                        hints: None,
59                        hints_boost: None,
60                        naics_code: None,
61                        punctuation: None,
62                        single_utterance: None,
63                    }),
64                };
65                Some(transcribe)
66            }
67            AsrVendor::Aws => {
68                let transcribe = TranscribeDial {
69                    transcription_hook: "transcribe".to_string(),
70                    recognizer: Recognizer::Aws(AWSRecognizer {
71                        language: Some(AwsAsrLanguage::EnglishBritish),
72                        separate_recognition_per_channel: Some(true),
73                        identify_channels: Some(true),
74                        vad: None,
75                        interim: None,
76                        alt_languages: None,
77                        asr_dtmf_termination_digit: None,
78                        asr_timeout: None,
79                        filter_method: None,
80                        vocabulary_name: None,
81                        vocabulary_filter_name: None,
82                    }),
83                };
84                Some(transcribe)
85            }
86            AsrVendor::Ibm => {
87                let transcribe = TranscribeDial {
88                    transcription_hook: "transcribe".to_string(),
89                    recognizer: Recognizer::Ibm(IbmRecognizer {
90                        vad: None,
91                        interim: None,
92                        language: None,
93                        alt_languages: None,
94                        asr_dtmf_termination_digit: None,
95                        asr_timeout: None,
96                        separate_recognition_per_channel: Some(true),
97                        ibm_options: None,
98                    }),
99                };
100                Some(transcribe)
101            }
102            AsrVendor::Microsoft => {
103                let transcribe = TranscribeDial {
104                    transcription_hook: "transcribe".to_string(),
105                    recognizer: Recognizer::Microsoft(MSRecognizer {
106                        vad: None,
107                        interim: None,
108                        language: None,
109                        alt_languages: None,
110                        asr_dtmf_termination_digit: None,
111                        asr_timeout: None,
112                        separate_recognition_per_channel: None,
113                        azure_service_endpoint: None,
114                        azure_options: None,
115                        hints: None,
116                        initial_speech_timeout_ms: None,
117                        profanity_filter: None,
118                        profanity_option: None,
119                        output_format: None,
120                        request_snr: None,
121                    }),
122                };
123                Some(transcribe)
124            }
125            AsrVendor::Nuance => {
126                let transcribe = TranscribeDial {
127                    transcription_hook: "transcribe".to_string(),
128                    recognizer: Recognizer::Microsoft(MSRecognizer {
129                        vad: None,
130                        interim: None,
131                        language: None,
132                        alt_languages: None,
133                        asr_dtmf_termination_digit: None,
134                        asr_timeout: None,
135                        separate_recognition_per_channel: None,
136                        azure_service_endpoint: None,
137                        azure_options: None,
138                        hints: None,
139                        initial_speech_timeout_ms: None,
140                        profanity_filter: None,
141                        profanity_option: None,
142                        output_format: None,
143                        request_snr: None,
144                    }),
145                };
146                Some(transcribe)
147            }
148            AsrVendor::Nvidia => {
149                let transcribe = TranscribeDial {
150                    transcription_hook: "transcribe".to_string(),
151                    recognizer: Recognizer::Nvidia(NvidiaRecognizer {
152                        vad: None,
153                        interim: None,
154                        language: None,
155                        alt_languages: None,
156                        asr_dtmf_termination_digit: None,
157                        asr_timeout: None,
158                        separate_recognition_per_channel: None,
159                        hints: None,
160                        hints_boost: None,
161                        nvidia_options: None,
162                    }),
163                };
164                Some(transcribe)
165            }
166            AsrVendor::Soniox => {
167                let transcribe = TranscribeDial {
168                    transcription_hook: "transcribe".to_string(),
169                    recognizer: Recognizer::Soniox(SonioxRecognizer {
170                        vad: None,
171                        interim: None,
172                        language: None,
173                        alt_languages: None,
174                        asr_dtmf_termination_digit: None,
175                        asr_timeout: None,
176                        separate_recognition_per_channel: None,
177                        hints: None,
178                        soniox_options: None,
179                    }),
180                };
181                Some(transcribe)
182            }
183        }
184    } else {
185        None
186    }
187}
188
189pub fn get_caller_id(format: &NumberFormat, state: &FlowState) -> String {
190    let caller_id = match state.data.get("from") {
191        Some(dt) => dt.value.clone(),
192        None => state.initial_request.to.clone(),
193    };
194    format_number(
195        caller_id.as_str(),
196        state.account.country_code(),
197        &format.clone(),
198    )
199}
200
201pub fn get_called_id(format: &NumberFormat, state: &FlowState) -> String {
202    let called_id = match state.data.get("to") {
203        Some(dt) => dt.value.clone(),
204        None => state.initial_request.to.clone(),
205    };
206    format_number(
207        called_id.as_str(),
208        state.account.country_code(),
209        &format.clone(),
210    )
211}
212
213pub fn get_proxy(
214    proxies: Vec<String>,
215    state: &FlowState,
216    regions: Vec<Arc<Region>>,
217) -> Option<VoiceServer> {
218    let mut proxies = proxies.clone();
219    proxies.shuffle(&mut rng());
220
221    // Prefer current region servers
222    for proxy in &proxies {
223        if let Some(vs) = state.region.voice_servers.iter().find(|vs| vs.id == *proxy) {
224            return Some(vs.clone());
225        }
226    }
227
228    // Try any other matching server
229    for region in &regions {
230        for proxy in &proxies {
231            if let Some(vs) = region.voice_servers.iter().find(|vs| vs.id == *proxy) {
232                return Some(vs.clone());
233            }
234        }
235    }
236
237    None
238}
239
240pub fn get_listen(
241    record_options: &RecordOptions,
242    ai_options: AiOptions,
243    state: &FlowState,
244) -> Option<Listen> {
245    match record_options.enabled {
246        true => {
247            let mut meta: HashMap<String, String> = HashMap::new();
248            meta.insert("type".to_string(), "session:record".to_string());
249            meta.insert("account_id".to_string(), state.account.id.clone());
250            meta.insert(
251                "retention".to_string(),
252                record_options.retention.to_string(),
253            );
254            meta.insert("ai_summary".to_string(), ai_options.summary.to_string());
255            meta.insert(
256                "ai_transcribe".to_string(),
257                ai_options.transcribe.to_string(),
258            );
259
260            let transcribe: Option<verbs::transcribe::Transcribe> = match ai_options.recognizer {
261                Some(recognizer) => Some(verbs::transcribe::Transcribe {
262                    transcription_hook: format!("{}/transcribe", state.base_url),
263                    recognizer,
264                }),
265                None => None,
266            };
267
268            let listen = Listen::new(
269                format!("{}/record", state.base_url),
270                format!("{}/record-complete", state.base_url),
271            )
272            .mix_type(Some(record_options.mix_type.clone()))
273            .transcribe(transcribe)
274            .metadata(Some(meta))
275            .clone();
276
277            Some(listen)
278        }
279        false => None,
280    }
281}
282
283
284