1use crate::device::shared::{AiOptions, RecordOptions, TranscribeOptions};
2use crate::{format_number, AsrVendor, FlowState, NumberFormat, Region, VoiceServer};
3use cal_jambonz::dial::TranscribeDial;
4use cal_jambonz::listen::Listen;
5use cal_jambonz::recognizer::Recognizer;
6use cal_jambonz::vendors::amazon::{AWSRecognizer, AwsAsrLanguage};
7use cal_jambonz::vendors::deepgram::DeepgramRecognizer;
8use cal_jambonz::vendors::google::{
9 GoogleInteractionType, GoogleRecognizer, GoogleRecognizerLanguage, GoogleSpeechModel,
10};
11use cal_jambonz::vendors::ibm::IbmRecognizer;
12use cal_jambonz::vendors::microsoft::MSRecognizer;
13use cal_jambonz::vendors::nvidia::NvidiaRecognizer;
14use cal_jambonz::vendors::soniox::SonioxRecognizer;
15use cal_jambonz::verbs;
16use rand::prelude::SliceRandom;
17use rand::rng;
18use std::collections::HashMap;
19use std::sync::Arc;
20
21pub fn get_transcribe(transcribe_opts: TranscribeOptions) -> Option<TranscribeDial> {
22 if transcribe_opts.enabled {
23 match transcribe_opts.language {
24 AsrVendor::Deepgram => {
25 let transcribe = TranscribeDial {
26 transcription_hook: "transcribe".to_string(),
27 recognizer: Recognizer::Deepgram(DeepgramRecognizer {
28 language: None,
29 separate_recognition_per_channel: Some(true),
30 hints: None,
31 vad: None,
32 interim: None,
33 alt_languages: None,
34 asr_dtmf_termination_digit: None,
35 asr_timeout: None,
36 deepgram_options: None,
37 }),
38 };
39 Some(transcribe)
40 }
41 AsrVendor::Google => {
42 let transcribe = TranscribeDial {
43 transcription_hook: "transcribe".to_string(),
44 recognizer: Recognizer::Google(GoogleRecognizer {
45 language: Some(GoogleRecognizerLanguage::EnglishUnitedKingdom),
46 interaction_type: Some(GoogleInteractionType::PhoneCall),
47 model: Some(GoogleSpeechModel::PhoneCall),
48 separate_recognition_per_channel: Some(true),
49 enhanced_model: Some(true),
50 vad: None,
51 interim: None,
52 alt_languages: None,
53 asr_dtmf_termination_digit: None,
54 asr_timeout: None,
55 diarization: None,
56 diarization_min_speakers: None,
57 diarization_max_speakers: None,
58 hints: None,
59 hints_boost: None,
60 naics_code: None,
61 punctuation: None,
62 single_utterance: None,
63 }),
64 };
65 Some(transcribe)
66 }
67 AsrVendor::Aws => {
68 let transcribe = TranscribeDial {
69 transcription_hook: "transcribe".to_string(),
70 recognizer: Recognizer::Aws(AWSRecognizer {
71 language: Some(AwsAsrLanguage::EnglishBritish),
72 separate_recognition_per_channel: Some(true),
73 identify_channels: Some(true),
74 vad: None,
75 interim: None,
76 alt_languages: None,
77 asr_dtmf_termination_digit: None,
78 asr_timeout: None,
79 filter_method: None,
80 vocabulary_name: None,
81 vocabulary_filter_name: None,
82 }),
83 };
84 Some(transcribe)
85 }
86 AsrVendor::Ibm => {
87 let transcribe = TranscribeDial {
88 transcription_hook: "transcribe".to_string(),
89 recognizer: Recognizer::Ibm(IbmRecognizer {
90 vad: None,
91 interim: None,
92 language: None,
93 alt_languages: None,
94 asr_dtmf_termination_digit: None,
95 asr_timeout: None,
96 separate_recognition_per_channel: Some(true),
97 ibm_options: None,
98 }),
99 };
100 Some(transcribe)
101 }
102 AsrVendor::Microsoft => {
103 let transcribe = TranscribeDial {
104 transcription_hook: "transcribe".to_string(),
105 recognizer: Recognizer::Microsoft(MSRecognizer {
106 vad: None,
107 interim: None,
108 language: None,
109 alt_languages: None,
110 asr_dtmf_termination_digit: None,
111 asr_timeout: None,
112 separate_recognition_per_channel: None,
113 azure_service_endpoint: None,
114 azure_options: None,
115 hints: None,
116 initial_speech_timeout_ms: None,
117 profanity_filter: None,
118 profanity_option: None,
119 output_format: None,
120 request_snr: None,
121 }),
122 };
123 Some(transcribe)
124 }
125 AsrVendor::Nuance => {
126 let transcribe = TranscribeDial {
127 transcription_hook: "transcribe".to_string(),
128 recognizer: Recognizer::Microsoft(MSRecognizer {
129 vad: None,
130 interim: None,
131 language: None,
132 alt_languages: None,
133 asr_dtmf_termination_digit: None,
134 asr_timeout: None,
135 separate_recognition_per_channel: None,
136 azure_service_endpoint: None,
137 azure_options: None,
138 hints: None,
139 initial_speech_timeout_ms: None,
140 profanity_filter: None,
141 profanity_option: None,
142 output_format: None,
143 request_snr: None,
144 }),
145 };
146 Some(transcribe)
147 }
148 AsrVendor::Nvidia => {
149 let transcribe = TranscribeDial {
150 transcription_hook: "transcribe".to_string(),
151 recognizer: Recognizer::Nvidia(NvidiaRecognizer {
152 vad: None,
153 interim: None,
154 language: None,
155 alt_languages: None,
156 asr_dtmf_termination_digit: None,
157 asr_timeout: None,
158 separate_recognition_per_channel: None,
159 hints: None,
160 hints_boost: None,
161 nvidia_options: None,
162 }),
163 };
164 Some(transcribe)
165 }
166 AsrVendor::Soniox => {
167 let transcribe = TranscribeDial {
168 transcription_hook: "transcribe".to_string(),
169 recognizer: Recognizer::Soniox(SonioxRecognizer {
170 vad: None,
171 interim: None,
172 language: None,
173 alt_languages: None,
174 asr_dtmf_termination_digit: None,
175 asr_timeout: None,
176 separate_recognition_per_channel: None,
177 hints: None,
178 soniox_options: None,
179 }),
180 };
181 Some(transcribe)
182 }
183 }
184 } else {
185 None
186 }
187}
188
189pub fn get_caller_id(format: &NumberFormat, state: &FlowState) -> String {
190 let caller_id = match state.data.get("from") {
191 Some(dt) => dt.value.clone(),
192 None => state.initial_request.to.clone(),
193 };
194 format_number(
195 caller_id.as_str(),
196 state.account.country_code(),
197 &format.clone(),
198 )
199}
200
201pub fn get_called_id(format: &NumberFormat, state: &FlowState) -> String {
202 let called_id = match state.data.get("to") {
203 Some(dt) => dt.value.clone(),
204 None => state.initial_request.to.clone(),
205 };
206 format_number(
207 called_id.as_str(),
208 state.account.country_code(),
209 &format.clone(),
210 )
211}
212
213pub fn get_proxy(
214 proxies: Vec<String>,
215 state: &FlowState,
216 regions: Vec<Arc<Region>>,
217) -> Option<VoiceServer> {
218 let mut proxies = proxies.clone();
219 proxies.shuffle(&mut rng());
220
221 for proxy in &proxies {
223 if let Some(vs) = state.region.voice_servers.iter().find(|vs| vs.id == *proxy) {
224 return Some(vs.clone());
225 }
226 }
227
228 for region in ®ions {
230 for proxy in &proxies {
231 if let Some(vs) = region.voice_servers.iter().find(|vs| vs.id == *proxy) {
232 return Some(vs.clone());
233 }
234 }
235 }
236
237 None
238}
239
240pub fn get_listen(
241 record_options: &RecordOptions,
242 ai_options: AiOptions,
243 state: &FlowState,
244) -> Option<Listen> {
245 match record_options.enabled {
246 true => {
247 let mut meta: HashMap<String, String> = HashMap::new();
248 meta.insert("type".to_string(), "session:record".to_string());
249 meta.insert("account_id".to_string(), state.account.id.clone());
250 meta.insert(
251 "retention".to_string(),
252 record_options.retention.to_string(),
253 );
254 meta.insert("ai_summary".to_string(), ai_options.summary.to_string());
255 meta.insert(
256 "ai_transcribe".to_string(),
257 ai_options.transcribe.to_string(),
258 );
259
260 let transcribe: Option<verbs::transcribe::Transcribe> = match ai_options.recognizer {
261 Some(recognizer) => Some(verbs::transcribe::Transcribe {
262 transcription_hook: format!("{}/transcribe", state.base_url),
263 recognizer,
264 }),
265 None => None,
266 };
267
268 let listen = Listen::new(
269 format!("{}/record", state.base_url),
270 format!("{}/record-complete", state.base_url),
271 )
272 .mix_type(Some(record_options.mix_type.clone()))
273 .transcribe(transcribe)
274 .metadata(Some(meta))
275 .clone();
276
277 Some(listen)
278 }
279 false => None,
280 }
281}
282
283
284