cal_core/device/
utils.rs

1use crate::device::device::{AiOptions, RecordOptions, TranscribeOptions};
2use crate::{format_number, AsrVendor, FlowState, NumberFormat, VoiceServer};
3use cal_jambonz::dial::TranscribeDial;
4use cal_jambonz::listen::Listen;
5use cal_jambonz::recognizer::Recognizer;
6use cal_jambonz::vendors::amazon::{AWSRecognizer, AwsAsrLanguage};
7use cal_jambonz::vendors::deepgram::DeepgramRecognizer;
8use cal_jambonz::vendors::google::{
9    GoogleInteractionType, GoogleRecognizer, GoogleRecognizerLanguage, GoogleSpeechModel,
10};
11use cal_jambonz::vendors::ibm::IbmRecognizer;
12use cal_jambonz::vendors::microsoft::MSRecognizer;
13use cal_jambonz::vendors::nvidia::NvidiaRecognizer;
14use cal_jambonz::vendors::soniox::SonioxRecognizer;
15use cal_jambonz::verbs;
16use rand::prelude::SliceRandom;
17use rand::rng;
18use std::collections::HashMap;
19
20pub fn get_transcribe(transcribe_opts: TranscribeOptions) -> Option<TranscribeDial> {
21    if transcribe_opts.enabled {
22        match transcribe_opts.language {
23            AsrVendor::Deepgram => {
24                let transcribe = TranscribeDial {
25                    transcription_hook: "transcribe".to_string(),
26                    recognizer: Recognizer::Deepgram(DeepgramRecognizer {
27                        language: None,
28                        separate_recognition_per_channel: Some(true),
29                        hints: None,
30                        vad: None,
31                        interim: None,
32                        alt_languages: None,
33                        asr_dtmf_termination_digit: None,
34                        asr_timeout: None,
35                        deepgram_options: None,
36                    }),
37                };
38                Some(transcribe)
39            }
40            AsrVendor::Google => {
41                let transcribe = TranscribeDial {
42                    transcription_hook: "transcribe".to_string(),
43                    recognizer: Recognizer::Google(GoogleRecognizer {
44                        language: Some(GoogleRecognizerLanguage::EnglishUnitedKingdom),
45                        interaction_type: Some(GoogleInteractionType::PhoneCall),
46                        model: Some(GoogleSpeechModel::PhoneCall),
47                        separate_recognition_per_channel: Some(true),
48                        enhanced_model: Some(true),
49                        vad: None,
50                        interim: None,
51                        alt_languages: None,
52                        asr_dtmf_termination_digit: None,
53                        asr_timeout: None,
54                        diarization: None,
55                        diarization_min_speakers: None,
56                        diarization_max_speakers: None,
57                        hints: None,
58                        hints_boost: None,
59                        naics_code: None,
60                        punctuation: None,
61                        single_utterance: None,
62                    }),
63                };
64                Some(transcribe)
65            }
66            AsrVendor::Aws => {
67                let transcribe = TranscribeDial {
68                    transcription_hook: "transcribe".to_string(),
69                    recognizer: Recognizer::Aws(AWSRecognizer {
70                        language: Some(AwsAsrLanguage::EnglishBritish),
71                        separate_recognition_per_channel: Some(true),
72                        identify_channels: Some(true),
73                        vad: None,
74                        interim: None,
75                        alt_languages: None,
76                        asr_dtmf_termination_digit: None,
77                        asr_timeout: None,
78                        filter_method: None,
79                        vocabulary_name: None,
80                        vocabulary_filter_name: None,
81                    }),
82                };
83                Some(transcribe)
84            }
85            AsrVendor::Ibm => {
86                let transcribe = TranscribeDial {
87                    transcription_hook: "transcribe".to_string(),
88                    recognizer: Recognizer::Ibm(IbmRecognizer {
89                        vad: None,
90                        interim: None,
91                        language: None,
92                        alt_languages: None,
93                        asr_dtmf_termination_digit: None,
94                        asr_timeout: None,
95                        separate_recognition_per_channel: Some(true),
96                        ibm_options: None,
97                    }),
98                };
99                Some(transcribe)
100            }
101            AsrVendor::Microsoft => {
102                let transcribe = TranscribeDial {
103                    transcription_hook: "transcribe".to_string(),
104                    recognizer: Recognizer::Microsoft(MSRecognizer {
105                        vad: None,
106                        interim: None,
107                        language: None,
108                        alt_languages: None,
109                        asr_dtmf_termination_digit: None,
110                        asr_timeout: None,
111                        separate_recognition_per_channel: None,
112                        azure_service_endpoint: None,
113                        azure_options: None,
114                        hints: None,
115                        initial_speech_timeout_ms: None,
116                        profanity_filter: None,
117                        profanity_option: None,
118                        output_format: None,
119                        request_snr: None,
120                    }),
121                };
122                Some(transcribe)
123            }
124            AsrVendor::Nuance => {
125                let transcribe = TranscribeDial {
126                    transcription_hook: "transcribe".to_string(),
127                    recognizer: Recognizer::Microsoft(MSRecognizer {
128                        vad: None,
129                        interim: None,
130                        language: None,
131                        alt_languages: None,
132                        asr_dtmf_termination_digit: None,
133                        asr_timeout: None,
134                        separate_recognition_per_channel: None,
135                        azure_service_endpoint: None,
136                        azure_options: None,
137                        hints: None,
138                        initial_speech_timeout_ms: None,
139                        profanity_filter: None,
140                        profanity_option: None,
141                        output_format: None,
142                        request_snr: None,
143                    }),
144                };
145                Some(transcribe)
146            }
147            AsrVendor::Nvidia => {
148                let transcribe = TranscribeDial {
149                    transcription_hook: "transcribe".to_string(),
150                    recognizer: Recognizer::Nvidia(NvidiaRecognizer {
151                        vad: None,
152                        interim: None,
153                        language: None,
154                        alt_languages: None,
155                        asr_dtmf_termination_digit: None,
156                        asr_timeout: None,
157                        separate_recognition_per_channel: None,
158                        hints: None,
159                        hints_boost: None,
160                        nvidia_options: None,
161                    }),
162                };
163                Some(transcribe)
164            }
165            AsrVendor::Soniox => {
166                let transcribe = TranscribeDial {
167                    transcription_hook: "transcribe".to_string(),
168                    recognizer: Recognizer::Soniox(SonioxRecognizer {
169                        vad: None,
170                        interim: None,
171                        language: None,
172                        alt_languages: None,
173                        asr_dtmf_termination_digit: None,
174                        asr_timeout: None,
175                        separate_recognition_per_channel: None,
176                        hints: None,
177                        soniox_options: None,
178                    }),
179                };
180                Some(transcribe)
181            }
182        }
183    } else {
184        None
185    }
186}
187
188pub fn get_caller_id(format: &NumberFormat, state: &FlowState) -> String {
189    let caller_id = match state.data.get("from") {
190        Some(dt) => dt.value.clone(),
191        None => state.initial_request.to.clone(),
192    };
193    format_number(
194        caller_id.as_str(),
195        state.account.country_code(),
196        format.clone(),
197    )
198}
199
200pub fn get_called_id(format: &NumberFormat, state: &FlowState) -> String {
201    let called_id = match state.data.get("to") {
202        Some(dt) => dt.value.clone(),
203        None => state.initial_request.to.clone(),
204    };
205    format_number(
206        called_id.as_str(),
207        state.account.country_code(),
208        format.clone(),
209    )
210}
211
212pub fn get_proxy(proxies: Vec<String>, state: &FlowState) -> Option<VoiceServer> {
213    let mut voice_server: Option<&VoiceServer> = None;
214
215    //shuffle proxies to distribute calls
216    let mut proxies = proxies.clone();
217    proxies.shuffle(&mut rng());
218
219    //Prefer current region servers
220    for proxy in proxies.clone() {
221        voice_server = state.region.voice_servers.iter().find(|vs| vs.id == proxy);
222        if voice_server.is_some() {
223            break;
224        }
225    }
226    //Try any other matching server
227    if voice_server.is_none() {
228        for region in &state.regions {
229            for proxy in proxies.clone() {
230                voice_server = region.voice_servers.iter().find(|vs| vs.id == proxy);
231                if voice_server.is_some() {
232                    break;
233                }
234            }
235        }
236    }
237    voice_server.cloned()
238}
239
240pub fn get_listen(
241    record_options: &RecordOptions,
242    ai_options: AiOptions,
243    state: &FlowState,
244) -> Option<Listen> {
245    match record_options.enabled {
246        true => {
247            let mut meta: HashMap<String, String> = HashMap::new();
248            meta.insert("type".to_string(), "session:record".to_string());
249            meta.insert("account_id".to_string(), state.account.id.clone());
250            meta.insert(
251                "retention".to_string(),
252                record_options.retention.to_string(),
253            );
254            meta.insert("ai_summary".to_string(), ai_options.summary.to_string());
255            meta.insert(
256                "ai_transcribe".to_string(),
257                ai_options.transcribe.to_string(),
258            );
259
260            let transcribe: Option<verbs::transcribe::Transcribe> = match ai_options.recognizer {
261                Some(recognizer) => Some(verbs::transcribe::Transcribe {
262                    transcription_hook: format!("{}/transcribe", state.base_url),
263                    recognizer,
264                }),
265                None => None,
266            };
267
268            let listen = Listen::new(
269                format!("{}/record", state.base_url),
270                format!("{}/record-complete", state.base_url),
271            )
272            .mix_type(Some(record_options.mix_type.clone()))
273            .transcribe(transcribe)
274            .metadata(Some(meta))
275            .clone();
276
277            Some(listen)
278        }
279        false => None,
280    }
281}
282
283
284