async_translate/microsoft/
mod.rs1use crate::{error::TranslationError, options::TranslateOptions, translator::Translator};
8use reqwest::Client;
9use serde::{Deserialize, Serialize};
10use std::sync::Arc;
11use std::time::{Duration, Instant};
12use tokio::sync::{Mutex, Semaphore};
13use tokio::time::sleep;
14use unic_langid::LanguageIdentifier;
15
16#[derive(Debug, Clone)]
18pub struct MicrosoftConfig {
19 pub endpoint: Option<String>,
21 pub api_key: Option<String>,
23 pub concurrent_limit: usize,
25}
26
27impl Default for MicrosoftConfig {
28 fn default() -> Self {
29 Self {
30 endpoint: None, api_key: None, concurrent_limit: 10,
33 }
34 }
35}
36
37impl MicrosoftConfig {
38 pub fn builder() -> MicrosoftConfigBuilder {
39 MicrosoftConfigBuilder::default()
40 }
41}
42
43#[derive(Debug, Default)]
44pub struct MicrosoftConfigBuilder {
45 endpoint: Option<String>,
46 api_key: Option<String>,
47 concurrent_limit: Option<usize>,
48}
49
50impl MicrosoftConfigBuilder {
51 pub fn endpoint(mut self, endpoint: impl Into<String>) -> Self {
52 self.endpoint = Some(endpoint.into());
53 self
54 }
55
56 pub fn api_key(mut self, api_key: Option<impl Into<String>>) -> Self {
57 self.api_key = api_key.map(|s| s.into());
58 self
59 }
60
61 pub fn concurrent_limit(mut self, concurrent_limit: usize) -> Self {
62 self.concurrent_limit = Some(concurrent_limit);
63 self
64 }
65
66 pub fn build(self) -> MicrosoftConfig {
67 MicrosoftConfig {
68 endpoint: self.endpoint,
69 api_key: self.api_key,
70 concurrent_limit: self.concurrent_limit.unwrap_or(10),
71 }
72 }
73}
74
75#[derive(Debug, Deserialize)]
77struct MicrosoftErrorResponse {
78 error: MicrosoftErrorDetails,
79}
80
81#[derive(Debug, Deserialize)]
82struct MicrosoftErrorDetails {
83 code: u32,
84 message: String,
85}
86
87#[derive(Debug, Deserialize)]
89pub struct DetectedLanguage {
90 pub language: String,
91 pub score: f64,
92}
93
94#[derive(Debug, Deserialize)]
96pub struct MicrosoftTranslation {
97 #[serde(rename = "detectedLanguage")]
98 pub detected_language: Option<DetectedLanguage>,
99 pub translations: Vec<TranslationResult>,
100}
101
102#[derive(Debug, Deserialize)]
104pub struct TranslationResult {
105 pub text: String,
106 pub to: String,
107}
108
109#[derive(Serialize)]
111struct BatchTranslationRequest {
112 text: String,
113}
114
115pub struct MicrosoftTranslator {
121 client: Client,
122 config: MicrosoftConfig,
123 semaphore: Arc<Semaphore>,
124 cached_token: Arc<Mutex<Option<String>>>,
125 token_expiry: Arc<Mutex<Option<Instant>>>,
126}
127
128impl MicrosoftTranslator {
129 pub fn new(config: MicrosoftConfig) -> Self {
131 let concurrent_limit = config.concurrent_limit;
132 Self {
133 client: Client::new(),
134 config,
135 semaphore: Arc::new(Semaphore::new(concurrent_limit)),
136 cached_token: Arc::new(Mutex::new(None)),
137 token_expiry: Arc::new(Mutex::new(None)),
138 }
139 }
140
141 async fn get_auth_token(&self) -> Result<String, TranslationError> {
143 if let Some(api_key) = &self.config.api_key {
145 return Ok(api_key.clone());
146 }
147
148 let mut token_guard = self.cached_token.lock().await;
149 let mut expiry_guard = self.token_expiry.lock().await;
150
151 if let (Some(token), Some(expiry)) = (token_guard.as_ref(), expiry_guard.as_ref()) {
153 if expiry.saturating_duration_since(Instant::now()) > Duration::from_secs(60) {
154 return Ok(token.clone());
155 }
156 }
157
158 let mut auth_attempts = 3;
160 while auth_attempts > 0 {
161 auth_attempts -= 1;
162 match self.client
163 .get("https://edge.microsoft.com/translate/auth")
164 .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
165 .send()
166 .await
167 {
168 Ok(response) => {
169 if response.status().is_success() {
170 let token = response.text().await.map_err(|e| {
171 TranslationError::AuthenticationError(format!("Failed to read auth response: {}", e))
172 })?;
173 *token_guard = Some(token.clone());
175 *expiry_guard = Some(Instant::now() + Duration::from_secs(540)); return Ok(token);
177 } else {
178 if auth_attempts <= 0 {
179 return Err(TranslationError::AuthenticationError(
180 format!("Failed to authenticate with Microsoft Translator: HTTP {}", response.status())
181 ));
182 }
183 }
184 }
185 Err(e) => {
186 if auth_attempts <= 0 {
187 return Err(TranslationError::NetworkError(e));
188 }
189 }
190 }
191 sleep(Duration::from_secs(1)).await;
192 }
193 Err(TranslationError::AuthenticationError(
194 "Failed to get Microsoft Translator authorization after retries".to_string(),
195 ))
196 }
197
198 async fn clear_cached_token(&self) {
200 *self.cached_token.lock().await = None;
201 *self.token_expiry.lock().await = None;
202 }
203
204 pub async fn translate_batch(
217 &self,
218 texts: &[&str],
219 target_lang: &LanguageIdentifier,
220 source_lang: Option<&LanguageIdentifier>,
221 options: &TranslateOptions,
222 ) -> Result<Vec<MicrosoftTranslation>, TranslationError> {
223 let mut errors = Vec::new();
224 for attempt in 0..=options.max_retries {
225 if attempt > 0 {
226 let delay = Duration::from_millis(100 * 2u64.pow(attempt - 1));
227 sleep(delay).await;
228 }
229
230 match self
231 .try_translate_batch(texts, target_lang, source_lang, options)
232 .await
233 {
234 Ok(result) => return Ok(result),
235 Err(e) => {
236 if e.is_retryable() {
238 errors.push(e);
239 } else {
240 return Err(e);
241 }
242 }
243 }
244 }
245 Err(TranslationError::MaxRetriesExceeded {
246 attempts: options.max_retries + 1,
247 errors,
248 })
249 }
250
251 async fn try_translate_batch(
253 &self,
254 texts: &[&str],
255 target_lang: &LanguageIdentifier,
256 source_lang: Option<&LanguageIdentifier>,
257 options: &TranslateOptions,
258 ) -> Result<Vec<MicrosoftTranslation>, TranslationError> {
259 let _permit =
261 self.semaphore.acquire().await.map_err(|e| {
262 TranslationError::Other(format!("Failed to acquire semaphore: {}", e))
263 })?;
264
265 let token = self.get_auth_token().await?;
267
268 let endpoint = self
270 .config
271 .endpoint
272 .as_deref()
273 .unwrap_or("https://api-edge.cognitive.microsofttranslator.com");
274
275 let client = if let Some(timeout) = options.timeout {
277 Client::builder()
278 .timeout(timeout)
279 .build()
280 .map_err(|e| TranslationError::NetworkError(e))?
281 } else {
282 self.client.clone()
283 };
284
285 let requests: Vec<BatchTranslationRequest> = texts
287 .iter()
288 .map(|text| BatchTranslationRequest {
289 text: text.to_string(),
290 })
291 .collect();
292
293 let target_lang_str = target_lang.to_string();
295 let source_lang_str = source_lang.map(|s| s.to_string());
296 let mut params = vec![
297 ("api-version", "3.0"),
298 ("to", target_lang_str.as_str()),
299 ("includeSentenceLength", "true"),
300 ];
301
302 if let Some(ref source_str) = source_lang_str {
303 params.push(("from", source_str.as_str()));
304 }
305
306 let auth_header = if self.config.api_key.is_some() {
308 format!("Ocp-Apim-Subscription-Key {}", token)
309 } else {
310 format!("Bearer {}", token)
311 };
312
313 let response = client
315 .post(&format!("{}/translate", endpoint))
316 .header("Authorization", auth_header)
317 .header("Content-Type", "application/json")
318 .query(¶ms)
319 .json(&requests)
320 .send()
321 .await?;
322
323 if !response.status().is_success() {
325 let status = response.status();
326 let error_text = response
327 .text()
328 .await
329 .unwrap_or_else(|_| "Unknown error".to_string());
330
331 if status == reqwest::StatusCode::UNAUTHORIZED {
333 self.clear_cached_token().await;
334 }
335
336 if let Ok(error_response) = serde_json::from_str::<MicrosoftErrorResponse>(&error_text)
337 {
338 return Err(TranslationError::HttpError {
339 status,
340 body: format!(
341 "Error {}: {}",
342 error_response.error.code, error_response.error.message
343 ),
344 });
345 }
346
347 return Err(TranslationError::HttpError {
348 status,
349 body: error_text,
350 });
351 }
352
353 let response_body: Vec<MicrosoftTranslation> = response.json().await?;
355 Ok(response_body)
356 }
357
358 pub async fn translate_text(
360 &self,
361 text: &str,
362 target_lang: &LanguageIdentifier,
363 source_lang: Option<&LanguageIdentifier>,
364 options: &TranslateOptions,
365 ) -> Result<String, TranslationError> {
366 let results = self
367 .translate_batch(&[text], target_lang, source_lang, options)
368 .await?;
369
370 if results.is_empty() || results[0].translations.is_empty() {
371 return Err(TranslationError::ServiceError(
372 "No translation results returned".to_string(),
373 ));
374 }
375
376 Ok(results[0].translations[0].text.clone())
377 }
378
379 pub async fn translate_batch_to_strings(
381 &self,
382 texts: &[&str],
383 target_lang: &LanguageIdentifier,
384 source_lang: Option<&LanguageIdentifier>,
385 options: &TranslateOptions,
386 ) -> Result<Vec<String>, TranslationError> {
387 let results = self
388 .translate_batch(texts, target_lang, source_lang, options)
389 .await?;
390 let translated_texts = results
391 .into_iter()
392 .filter_map(|res| res.translations.into_iter().next())
393 .map(|trans_result| trans_result.text)
394 .collect();
395 Ok(translated_texts)
396 }
397}
398
399#[async_trait::async_trait]
400impl Translator for MicrosoftTranslator {
401 async fn translate_with_options(
402 &self,
403 text: &str,
404 target_lang: &LanguageIdentifier,
405 source_lang: Option<&LanguageIdentifier>,
406 options: &TranslateOptions,
407 ) -> Result<String, TranslationError> {
408 self.translate_text(text, target_lang, source_lang, options)
409 .await
410 }
411}
412
413#[cfg(test)]
414mod tests;