1use crate::device::device::{AiOptions, RecordOptions, TranscribeOptions};
2use crate::{format_number, AsrVendor, FlowState, NumberFormat, VoiceServer};
3use cal_jambonz::dial::TranscribeDial;
4use cal_jambonz::listen::Listen;
5use cal_jambonz::recognizer::Recognizer;
6use cal_jambonz::vendors::amazon::{AWSRecognizer, AwsAsrLanguage};
7use cal_jambonz::vendors::deepgram::DeepgramRecognizer;
8use cal_jambonz::vendors::google::{
9 GoogleInteractionType, GoogleRecognizer, GoogleRecognizerLanguage, GoogleSpeechModel,
10};
11use cal_jambonz::vendors::ibm::IbmRecognizer;
12use cal_jambonz::vendors::microsoft::MSRecognizer;
13use cal_jambonz::vendors::nvidia::NvidiaRecognizer;
14use cal_jambonz::vendors::soniox::SonioxRecognizer;
15use cal_jambonz::verbs;
16use rand::prelude::SliceRandom;
17use rand::rng;
18use std::collections::HashMap;
19
20pub fn get_transcribe(transcribe_opts: TranscribeOptions) -> Option<TranscribeDial> {
21 if transcribe_opts.enabled {
22 match transcribe_opts.language {
23 AsrVendor::Deepgram => {
24 let transcribe = TranscribeDial {
25 transcription_hook: "transcribe".to_string(),
26 recognizer: Recognizer::Deepgram(DeepgramRecognizer {
27 language: None,
28 separate_recognition_per_channel: Some(true),
29 hints: None,
30 vad: None,
31 interim: None,
32 alt_languages: None,
33 asr_dtmf_termination_digit: None,
34 asr_timeout: None,
35 deepgram_options: None,
36 }),
37 };
38 Some(transcribe)
39 }
40 AsrVendor::Google => {
41 let transcribe = TranscribeDial {
42 transcription_hook: "transcribe".to_string(),
43 recognizer: Recognizer::Google(GoogleRecognizer {
44 language: Some(GoogleRecognizerLanguage::EnglishUnitedKingdom),
45 interaction_type: Some(GoogleInteractionType::PhoneCall),
46 model: Some(GoogleSpeechModel::PhoneCall),
47 separate_recognition_per_channel: Some(true),
48 enhanced_model: Some(true),
49 vad: None,
50 interim: None,
51 alt_languages: None,
52 asr_dtmf_termination_digit: None,
53 asr_timeout: None,
54 diarization: None,
55 diarization_min_speakers: None,
56 diarization_max_speakers: None,
57 hints: None,
58 hints_boost: None,
59 naics_code: None,
60 punctuation: None,
61 single_utterance: None,
62 }),
63 };
64 Some(transcribe)
65 }
66 AsrVendor::Aws => {
67 let transcribe = TranscribeDial {
68 transcription_hook: "transcribe".to_string(),
69 recognizer: Recognizer::Aws(AWSRecognizer {
70 language: Some(AwsAsrLanguage::EnglishBritish),
71 separate_recognition_per_channel: Some(true),
72 identify_channels: Some(true),
73 vad: None,
74 interim: None,
75 alt_languages: None,
76 asr_dtmf_termination_digit: None,
77 asr_timeout: None,
78 filter_method: None,
79 vocabulary_name: None,
80 vocabulary_filter_name: None,
81 }),
82 };
83 Some(transcribe)
84 }
85 AsrVendor::Ibm => {
86 let transcribe = TranscribeDial {
87 transcription_hook: "transcribe".to_string(),
88 recognizer: Recognizer::Ibm(IbmRecognizer {
89 vad: None,
90 interim: None,
91 language: None,
92 alt_languages: None,
93 asr_dtmf_termination_digit: None,
94 asr_timeout: None,
95 separate_recognition_per_channel: Some(true),
96 ibm_options: None,
97 }),
98 };
99 Some(transcribe)
100 }
101 AsrVendor::Microsoft => {
102 let transcribe = TranscribeDial {
103 transcription_hook: "transcribe".to_string(),
104 recognizer: Recognizer::Microsoft(MSRecognizer {
105 vad: None,
106 interim: None,
107 language: None,
108 alt_languages: None,
109 asr_dtmf_termination_digit: None,
110 asr_timeout: None,
111 separate_recognition_per_channel: None,
112 azure_service_endpoint: None,
113 azure_options: None,
114 hints: None,
115 initial_speech_timeout_ms: None,
116 profanity_filter: None,
117 profanity_option: None,
118 output_format: None,
119 request_snr: None,
120 }),
121 };
122 Some(transcribe)
123 }
124 AsrVendor::Nuance => {
125 let transcribe = TranscribeDial {
126 transcription_hook: "transcribe".to_string(),
127 recognizer: Recognizer::Microsoft(MSRecognizer {
128 vad: None,
129 interim: None,
130 language: None,
131 alt_languages: None,
132 asr_dtmf_termination_digit: None,
133 asr_timeout: None,
134 separate_recognition_per_channel: None,
135 azure_service_endpoint: None,
136 azure_options: None,
137 hints: None,
138 initial_speech_timeout_ms: None,
139 profanity_filter: None,
140 profanity_option: None,
141 output_format: None,
142 request_snr: None,
143 }),
144 };
145 Some(transcribe)
146 }
147 AsrVendor::Nvidia => {
148 let transcribe = TranscribeDial {
149 transcription_hook: "transcribe".to_string(),
150 recognizer: Recognizer::Nvidia(NvidiaRecognizer {
151 vad: None,
152 interim: None,
153 language: None,
154 alt_languages: None,
155 asr_dtmf_termination_digit: None,
156 asr_timeout: None,
157 separate_recognition_per_channel: None,
158 hints: None,
159 hints_boost: None,
160 nvidia_options: None,
161 }),
162 };
163 Some(transcribe)
164 }
165 AsrVendor::Soniox => {
166 let transcribe = TranscribeDial {
167 transcription_hook: "transcribe".to_string(),
168 recognizer: Recognizer::Soniox(SonioxRecognizer {
169 vad: None,
170 interim: None,
171 language: None,
172 alt_languages: None,
173 asr_dtmf_termination_digit: None,
174 asr_timeout: None,
175 separate_recognition_per_channel: None,
176 hints: None,
177 soniox_options: None,
178 }),
179 };
180 Some(transcribe)
181 }
182 }
183 } else {
184 None
185 }
186}
187
188pub fn get_caller_id(format: &NumberFormat, state: &FlowState) -> String {
189 let caller_id = match state.data.get("from") {
190 Some(dt) => dt.value.clone(),
191 None => state.initial_request.to.clone(),
192 };
193 format_number(
194 caller_id.as_str(),
195 state.account.country_code(),
196 format.clone(),
197 )
198}
199
200pub fn get_called_id(format: &NumberFormat, state: &FlowState) -> String {
201 let called_id = match state.data.get("to") {
202 Some(dt) => dt.value.clone(),
203 None => state.initial_request.to.clone(),
204 };
205 format_number(
206 called_id.as_str(),
207 state.account.country_code(),
208 format.clone(),
209 )
210}
211
212pub fn get_proxy(proxies: Vec<String>, state: &FlowState) -> Option<VoiceServer> {
213 let mut voice_server: Option<&VoiceServer> = None;
214
215 let mut proxies = proxies.clone();
217 proxies.shuffle(&mut rng());
218
219 for proxy in proxies.clone() {
221 voice_server = state.region.voice_servers.iter().find(|vs| vs.id == proxy);
222 if voice_server.is_some() {
223 break;
224 }
225 }
226 if voice_server.is_none() {
228 for region in &state.regions {
229 for proxy in proxies.clone() {
230 voice_server = region.voice_servers.iter().find(|vs| vs.id == proxy);
231 if voice_server.is_some() {
232 break;
233 }
234 }
235 }
236 }
237 voice_server.cloned()
238}
239
240pub fn get_listen(
241 record_options: &RecordOptions,
242 ai_options: AiOptions,
243 state: &FlowState,
244) -> Option<Listen> {
245 match record_options.enabled {
246 true => {
247 let mut meta: HashMap<String, String> = HashMap::new();
248 meta.insert("type".to_string(), "session:record".to_string());
249 meta.insert("account_id".to_string(), state.account.id.clone());
250 meta.insert(
251 "retention".to_string(),
252 record_options.retention.to_string(),
253 );
254 meta.insert("ai_summary".to_string(), ai_options.summary.to_string());
255 meta.insert(
256 "ai_transcribe".to_string(),
257 ai_options.transcribe.to_string(),
258 );
259
260 let transcribe: Option<verbs::transcribe::Transcribe> = match ai_options.recognizer {
261 Some(recognizer) => Some(verbs::transcribe::Transcribe {
262 transcription_hook: format!("{}/transcribe", state.base_url),
263 recognizer,
264 }),
265 None => None,
266 };
267
268 let listen = Listen::new(
269 format!("{}/record", state.base_url),
270 format!("{}/record-complete", state.base_url),
271 )
272 .mix_type(Some(record_options.mix_type.clone()))
273 .transcribe(transcribe)
274 .metadata(Some(meta))
275 .clone();
276
277 Some(listen)
278 }
279 false => None,
280 }
281}
282
283
284