1use std::collections::BTreeMap;
2use std::time::{Duration, SystemTime, UNIX_EPOCH};
3
4use chrono::Utc;
5use rand::RngCore;
6use serde::Deserialize;
7use serde_json::json;
8use sha2::{Digest, Sha256};
9use uuid::Uuid;
10
11use crate::constants::{
12 MP3_BITRATE_BPS, OUTPUT_FORMAT, SEC_MS_GEC_VERSION, TICKS_PER_SECOND, TRUSTED_CLIENT_TOKEN,
13};
14use crate::error::{Error, Result};
15use crate::options::{SpeakOptions, normalize_voice};
16use crate::types::{Boundary, BoundaryEvent, SynthesisEvent};
17
18const WINDOWS_EPOCH_OFFSET_SECONDS: u64 = 11_644_473_600;
19
20pub fn generate_connection_id() -> String {
21 Uuid::new_v4().simple().to_string()
22}
23
24pub fn generate_muid() -> String {
25 let mut bytes = [0u8; 16];
26 rand::thread_rng().fill_bytes(&mut bytes);
27 bytes.iter().map(|byte| format!("{byte:02X}")).collect()
28}
29
30pub fn generate_sec_ms_gec(now: SystemTime) -> String {
31 let unix_seconds = now
32 .duration_since(UNIX_EPOCH)
33 .unwrap_or(Duration::ZERO)
34 .as_secs();
35 let rounded = (unix_seconds + WINDOWS_EPOCH_OFFSET_SECONDS) / 300 * 300;
36 let windows_ticks = rounded * 10_000_000;
37 let mut hasher = Sha256::new();
38 hasher.update(format!("{windows_ticks}{TRUSTED_CLIENT_TOKEN}").as_bytes());
39 format!("{:X}", hasher.finalize())
40}
41
42pub fn javascript_timestamp() -> String {
43 Utc::now()
44 .format("%a %b %d %Y %H:%M:%S GMT+0000 (Coordinated Universal Time)")
45 .to_string()
46}
47
48pub fn speech_config_message(boundary: Boundary) -> String {
49 let (word, sentence) = boundary.metadata_flags();
50 let payload = json!({
51 "context": {
52 "synthesis": {
53 "audio": {
54 "metadataoptions": {
55 "sentenceBoundaryEnabled": sentence,
56 "wordBoundaryEnabled": word
57 },
58 "outputFormat": OUTPUT_FORMAT
59 }
60 }
61 }
62 });
63 format!(
64 "X-Timestamp:{}\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n{}\r\n",
65 javascript_timestamp(),
66 payload
67 )
68}
69
70pub fn ssml_message(options: &SpeakOptions, chunk: &str) -> Result<String> {
71 let voice = normalize_voice(&options.voice)?;
72 Ok(format!(
73 "X-RequestId:{}\r\nContent-Type:application/ssml+xml\r\nX-Timestamp:{}Z\r\nPath:ssml\r\n\r\n{}",
74 generate_connection_id(),
75 javascript_timestamp(),
76 build_ssml(
77 &voice,
78 &options.rate,
79 &options.volume,
80 &options.pitch,
81 chunk
82 )
83 ))
84}
85
86pub fn build_ssml(voice: &str, rate: &str, volume: &str, pitch: &str, text: &str) -> String {
87 format!(
88 "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'><voice name='{voice}'><prosody pitch='{pitch}' rate='{rate}' volume='{volume}'>{}</prosody></voice></speak>",
89 escape_ssml_text(text)
90 )
91}
92
93pub fn escape_ssml_text(text: &str) -> String {
94 let sanitized = text
95 .chars()
96 .map(|ch| match ch as u32 {
97 0..=8 | 11..=12 | 14..=31 => ' ',
98 _ => ch,
99 })
100 .collect::<String>();
101
102 sanitized
103 .replace('&', "&")
104 .replace('<', "<")
105 .replace('>', ">")
106 .replace('"', """)
107 .replace('\'', "'")
108}
109
110pub fn split_text(text: &str, max_bytes: usize) -> Result<Vec<String>> {
111 if max_bytes == 0 {
112 return Err(Error::InvalidChunkSize);
113 }
114
115 let escaped = escape_ssml_text(text);
116 let mut bytes = escaped.as_bytes();
117 let mut chunks = Vec::new();
118
119 while bytes.len() > max_bytes {
120 let mut split_at = bytes[..max_bytes]
121 .iter()
122 .rposition(|byte| *byte == b'\n' || *byte == b' ')
123 .unwrap_or(max_bytes);
124
125 while std::str::from_utf8(&bytes[..split_at]).is_err() && split_at > 0 {
126 split_at -= 1;
127 }
128
129 split_at = adjust_entity_boundary(bytes, split_at);
130 if split_at == 0 {
131 return Err(Error::InvalidSplitPoint);
132 }
133
134 let chunk = std::str::from_utf8(&bytes[..split_at])
135 .map_err(|_| Error::InvalidSplitPoint)?
136 .trim();
137 if !chunk.is_empty() {
138 chunks.push(chunk.to_owned());
139 }
140 bytes = &bytes[split_at..];
141 }
142
143 let tail = std::str::from_utf8(bytes)
144 .map_err(|_| Error::InvalidSplitPoint)?
145 .trim();
146 if !tail.is_empty() {
147 chunks.push(tail.to_owned());
148 }
149
150 Ok(chunks)
151}
152
153fn adjust_entity_boundary(bytes: &[u8], mut split_at: usize) -> usize {
154 while split_at > 0 {
155 if let Some(amp_index) = bytes[..split_at].iter().rposition(|byte| *byte == b'&') {
156 if bytes[amp_index..split_at].contains(&b';') {
157 break;
158 }
159 split_at = amp_index;
160 continue;
161 }
162 break;
163 }
164 split_at
165}
166
167pub fn parse_headers(
168 data: &[u8],
169 header_length: usize,
170) -> Result<(BTreeMap<String, String>, &[u8])> {
171 if header_length > data.len() {
172 return Err(Error::UnexpectedResponse(
173 "header length exceeds frame length",
174 ));
175 }
176
177 let header_bytes = &data[..header_length];
178 let payload = data
179 .get(header_length..)
180 .ok_or(Error::UnexpectedResponse("frame missing payload"))?;
181 let payload = payload
182 .strip_prefix(b"\r\n\r\n")
183 .or_else(|| payload.strip_prefix(b"\r\n"))
184 .unwrap_or(payload);
185 let header_str = std::str::from_utf8(header_bytes)
186 .map_err(|_| Error::UnexpectedResponse("headers are not valid utf-8"))?;
187
188 let mut headers = BTreeMap::new();
189 for line in header_str.split("\r\n").filter(|line| !line.is_empty()) {
190 let Some((key, value)) = line.split_once(':') else {
191 continue;
192 };
193 headers.insert(key.to_owned(), value.to_owned());
194 }
195 Ok((headers, payload))
196}
197
198pub fn parse_binary_headers(
199 data: &[u8],
200 header_length: usize,
201) -> Result<(BTreeMap<String, String>, &[u8])> {
202 let header_start = 2usize;
203 let header_end = header_start
204 .checked_add(header_length)
205 .ok_or(Error::UnexpectedResponse("binary header length overflow"))?;
206 if header_end > data.len() {
207 return Err(Error::UnexpectedResponse(
208 "binary header length exceeds frame length",
209 ));
210 }
211
212 let header_bytes = &data[header_start..header_end];
213 let payload = data
214 .get(header_end..)
215 .ok_or(Error::UnexpectedResponse("binary frame missing payload"))?;
216 let payload = payload.strip_prefix(b"\r\n").unwrap_or(payload);
217 let header_str = std::str::from_utf8(header_bytes)
218 .map_err(|_| Error::UnexpectedResponse("headers are not valid utf-8"))?;
219
220 let mut headers = BTreeMap::new();
221 for line in header_str.split("\r\n").filter(|line| !line.is_empty()) {
222 let Some((key, value)) = line.split_once(':') else {
223 continue;
224 };
225 headers.insert(key.to_owned(), value.to_owned());
226 }
227 Ok((headers, payload))
228}
229
230#[derive(Debug, Deserialize)]
231struct MetadataEnvelope {
232 #[serde(rename = "Metadata")]
233 metadata: Vec<MetadataItem>,
234}
235
236#[derive(Debug, Deserialize)]
237struct MetadataItem {
238 #[serde(rename = "Type")]
239 kind: String,
240 #[serde(rename = "Data")]
241 data: Option<MetadataData>,
242}
243
244#[derive(Debug, Deserialize)]
245struct MetadataData {
246 #[serde(rename = "Offset")]
247 offset: u64,
248 #[serde(rename = "Duration")]
249 duration: u64,
250 #[serde(rename = "text")]
251 text: MetadataText,
252}
253
254#[derive(Debug, Deserialize)]
255struct MetadataText {
256 #[serde(rename = "Text")]
257 text: String,
258}
259
260pub fn parse_metadata(payload: &[u8], offset_compensation: u64) -> Result<Vec<SynthesisEvent>> {
261 let envelope: MetadataEnvelope = serde_json::from_slice(payload)?;
262 let mut events = Vec::new();
263
264 for item in envelope.metadata {
265 match item.kind.as_str() {
266 "WordBoundary" | "SentenceBoundary" => {
267 let data = item
268 .data
269 .ok_or(Error::UnexpectedResponse("boundary metadata missing data"))?;
270 let kind = if item.kind == "WordBoundary" {
271 Boundary::Word
272 } else {
273 Boundary::Sentence
274 };
275 events.push(SynthesisEvent::Boundary(BoundaryEvent {
276 kind,
277 offset_ticks: data.offset + offset_compensation,
278 duration_ticks: data.duration,
279 text: unescape_xml(&data.text.text),
280 }));
281 }
282 "SessionEnd" => {}
283 other => return Err(Error::UnknownMetadata(other.to_owned())),
284 }
285 }
286
287 Ok(events)
288}
289
290pub fn offset_from_audio_bytes(bytes: usize) -> u64 {
291 (bytes as u64 * 8 * TICKS_PER_SECOND) / MP3_BITRATE_BPS
292}
293
294fn unescape_xml(text: &str) -> String {
295 text.replace("<", "<")
296 .replace(">", ">")
297 .replace(""", "\"")
298 .replace("'", "'")
299 .replace("&", "&")
300}
301
302pub fn voice_headers() -> [(&'static str, String); 8] {
303 [
304 ("Authority", "speech.platform.bing.com".to_owned()),
305 (
306 "Sec-CH-UA",
307 format!(
308 "\" Not;A Brand\";v=\"99\", \"Microsoft Edge\";v=\"{0}\", \"Chromium\";v=\"{0}\"",
309 crate::constants::CHROMIUM_MAJOR_VERSION
310 ),
311 ),
312 ("Sec-CH-UA-Mobile", "?0".to_owned()),
313 ("Accept", "*/*".to_owned()),
314 ("Sec-Fetch-Site", "none".to_owned()),
315 ("Sec-Fetch-Mode", "cors".to_owned()),
316 ("Sec-Fetch-Dest", "empty".to_owned()),
317 ("User-Agent", crate::constants::user_agent()),
318 ]
319}
320
321pub fn websocket_headers(muid: &str) -> [(&'static str, String); 8] {
322 [
323 ("Pragma", "no-cache".to_owned()),
324 ("Cache-Control", "no-cache".to_owned()),
325 (
326 "Origin",
327 "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold".to_owned(),
328 ),
329 ("Sec-WebSocket-Version", "13".to_owned()),
330 ("User-Agent", crate::constants::user_agent()),
331 ("Accept-Encoding", "gzip, deflate, br, zstd".to_owned()),
332 ("Accept-Language", "en-US,en;q=0.9".to_owned()),
333 ("Cookie", format!("muid={muid};")),
334 ]
335}
336
337pub fn sec_ms_gec_version() -> &'static str {
338 SEC_MS_GEC_VERSION
339}