async_translate/microsoft/
mod.rs

1//! 微软翻译器实现
2//!
3//! 该翻译器支持两种认证方式:
4//! 1. 自动认证:通过临时token,无需配置API密钥
5//! 2. API Key认证:使用用户提供的API密钥
6
7use crate::{error::TranslationError, options::TranslateOptions, translator::Translator};
8use reqwest::Client;
9use serde::{Deserialize, Serialize};
10use std::sync::Arc;
11use std::time::{Duration, Instant};
12use tokio::sync::{Mutex, Semaphore};
13use tokio::time::sleep;
14use unic_langid::LanguageIdentifier;
15
16/// 微软翻译器配置
17#[derive(Debug, Clone)]
18pub struct MicrosoftConfig {
19    /// 微软翻译服务的端点
20    pub endpoint: Option<String>,
21    /// API Key(可选),如果未设置则使用自动认证
22    pub api_key: Option<String>,
23    /// 并发请求数限制
24    pub concurrent_limit: usize,
25}
26
27impl Default for MicrosoftConfig {
28    fn default() -> Self {
29        Self {
30            endpoint: None, // 使用默认端点
31            api_key: None,  // 使用自动认证
32            concurrent_limit: 10,
33        }
34    }
35}
36
37impl MicrosoftConfig {
38    pub fn builder() -> MicrosoftConfigBuilder {
39        MicrosoftConfigBuilder::default()
40    }
41}
42
43#[derive(Debug, Default)]
44pub struct MicrosoftConfigBuilder {
45    endpoint: Option<String>,
46    api_key: Option<String>,
47    concurrent_limit: Option<usize>,
48}
49
50impl MicrosoftConfigBuilder {
51    pub fn endpoint(mut self, endpoint: impl Into<String>) -> Self {
52        self.endpoint = Some(endpoint.into());
53        self
54    }
55
56    pub fn api_key(mut self, api_key: Option<impl Into<String>>) -> Self {
57        self.api_key = api_key.map(|s| s.into());
58        self
59    }
60
61    pub fn concurrent_limit(mut self, concurrent_limit: usize) -> Self {
62        self.concurrent_limit = Some(concurrent_limit);
63        self
64    }
65
66    pub fn build(self) -> MicrosoftConfig {
67        MicrosoftConfig {
68            endpoint: self.endpoint,
69            api_key: self.api_key,
70            concurrent_limit: self.concurrent_limit.unwrap_or(10),
71        }
72    }
73}
74
75/// 微软翻译器错误响应
76#[derive(Debug, Deserialize)]
77struct MicrosoftErrorResponse {
78    error: MicrosoftErrorDetails,
79}
80
81#[derive(Debug, Deserialize)]
82struct MicrosoftErrorDetails {
83    code: u32,
84    message: String,
85}
86
87/// 微软翻译检测到的语言信息
88#[derive(Debug, Deserialize)]
89pub struct DetectedLanguage {
90    pub language: String,
91    pub score: f64,
92}
93
94/// 微软翻译结果
95#[derive(Debug, Deserialize)]
96pub struct MicrosoftTranslation {
97    #[serde(rename = "detectedLanguage")]
98    pub detected_language: Option<DetectedLanguage>,
99    pub translations: Vec<TranslationResult>,
100}
101
102/// 翻译结果
103#[derive(Debug, Deserialize)]
104pub struct TranslationResult {
105    pub text: String,
106    pub to: String,
107}
108
109/// 用于批量文本翻译的请求
110#[derive(Serialize)]
111struct BatchTranslationRequest {
112    text: String,
113}
114
115/// 微软翻译器实现
116///
117/// 支持两种认证方式:
118/// 1. 自动认证:通过临时token,无需配置API密钥
119/// 2. API Key认证:使用用户提供的API密钥
120pub struct MicrosoftTranslator {
121    client: Client,
122    config: MicrosoftConfig,
123    semaphore: Arc<Semaphore>,
124    cached_token: Arc<Mutex<Option<String>>>,
125    token_expiry: Arc<Mutex<Option<Instant>>>,
126}
127
128impl MicrosoftTranslator {
129    /// 创建新的微软翻译器实例
130    pub fn new(config: MicrosoftConfig) -> Self {
131        let concurrent_limit = config.concurrent_limit;
132        Self {
133            client: Client::new(),
134            config,
135            semaphore: Arc::new(Semaphore::new(concurrent_limit)),
136            cached_token: Arc::new(Mutex::new(None)),
137            token_expiry: Arc::new(Mutex::new(None)),
138        }
139    }
140
141    /// 获取认证token,带缓存和过期处理
142    async fn get_auth_token(&self) -> Result<String, TranslationError> {
143        // 如果配置了API Key,直接使用
144        if let Some(api_key) = &self.config.api_key {
145            return Ok(api_key.clone());
146        }
147
148        let mut token_guard = self.cached_token.lock().await;
149        let mut expiry_guard = self.token_expiry.lock().await;
150
151        // 检查缓存的token是否仍然有效(有效期通常为10分钟,我们提前1分钟刷新)
152        if let (Some(token), Some(expiry)) = (token_guard.as_ref(), expiry_guard.as_ref()) {
153            if expiry.saturating_duration_since(Instant::now()) > Duration::from_secs(60) {
154                return Ok(token.clone());
155            }
156        }
157
158        // 获取新的token
159        let mut auth_attempts = 3;
160        while auth_attempts > 0 {
161            auth_attempts -= 1;
162            match self.client
163                .get("https://edge.microsoft.com/translate/auth")
164                .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
165                .send()
166                .await
167            {
168                Ok(response) => {
169                    if response.status().is_success() {
170                        let token = response.text().await.map_err(|e| {
171                            TranslationError::AuthenticationError(format!("Failed to read auth response: {}", e))
172                        })?;
173                        // 缓存新的token和过期时间
174                        *token_guard = Some(token.clone());
175                        *expiry_guard = Some(Instant::now() + Duration::from_secs(540)); // 9分钟后过期
176                        return Ok(token);
177                    } else {
178                        if auth_attempts <= 0 {
179                            return Err(TranslationError::AuthenticationError(
180                                format!("Failed to authenticate with Microsoft Translator: HTTP {}", response.status())
181                            ));
182                        }
183                    }
184                }
185                Err(e) => {
186                    if auth_attempts <= 0 {
187                        return Err(TranslationError::NetworkError(e));
188                    }
189                }
190            }
191            sleep(Duration::from_secs(1)).await;
192        }
193        Err(TranslationError::AuthenticationError(
194            "Failed to get Microsoft Translator authorization after retries".to_string(),
195        ))
196    }
197
198    /// 强制清除缓存的token
199    async fn clear_cached_token(&self) {
200        *self.cached_token.lock().await = None;
201        *self.token_expiry.lock().await = None;
202    }
203
204    /// 批量翻译文本
205    ///
206    /// # 参数
207    ///
208    /// * `texts` - 需要翻译的文本数组
209    /// * `target_lang` - 目标语言标识符
210    /// * `source_lang` - 源语言标识符 (None表示自动检测)
211    /// * `options` - 翻译配置选项
212    ///
213    /// # 返回值
214    ///
215    /// 返回翻译结果数组
216    pub async fn translate_batch(
217        &self,
218        texts: &[&str],
219        target_lang: &LanguageIdentifier,
220        source_lang: Option<&LanguageIdentifier>,
221        options: &TranslateOptions,
222    ) -> Result<Vec<MicrosoftTranslation>, TranslationError> {
223        let mut errors = Vec::new();
224        for attempt in 0..=options.max_retries {
225            if attempt > 0 {
226                let delay = Duration::from_millis(100 * 2u64.pow(attempt - 1));
227                sleep(delay).await;
228            }
229
230            match self
231                .try_translate_batch(texts, target_lang, source_lang, options)
232                .await
233            {
234                Ok(result) => return Ok(result),
235                Err(e) => {
236                    // 只在可重试的错误上继续
237                    if e.is_retryable() {
238                        errors.push(e);
239                    } else {
240                        return Err(e);
241                    }
242                }
243            }
244        }
245        Err(TranslationError::MaxRetriesExceeded {
246            attempts: options.max_retries + 1,
247            errors,
248        })
249    }
250
251    /// 尝试批量翻译文本(无重试)
252    async fn try_translate_batch(
253        &self,
254        texts: &[&str],
255        target_lang: &LanguageIdentifier,
256        source_lang: Option<&LanguageIdentifier>,
257        options: &TranslateOptions,
258    ) -> Result<Vec<MicrosoftTranslation>, TranslationError> {
259        // 获取并发许可
260        let _permit =
261            self.semaphore.acquire().await.map_err(|e| {
262                TranslationError::Other(format!("Failed to acquire semaphore: {}", e))
263            })?;
264
265        // 获取认证token
266        let token = self.get_auth_token().await?;
267
268        // 确定使用哪个端点
269        let endpoint = self
270            .config
271            .endpoint
272            .as_deref()
273            .unwrap_or("https://api-edge.cognitive.microsofttranslator.com");
274
275        // 根据超时设置创建客户端
276        let client = if let Some(timeout) = options.timeout {
277            Client::builder()
278                .timeout(timeout)
279                .build()
280                .map_err(|e| TranslationError::NetworkError(e))?
281        } else {
282            self.client.clone()
283        };
284
285        // 构造请求
286        let requests: Vec<BatchTranslationRequest> = texts
287            .iter()
288            .map(|text| BatchTranslationRequest {
289                text: text.to_string(),
290            })
291            .collect();
292
293        // 构造查询参数
294        let target_lang_str = target_lang.to_string();
295        let source_lang_str = source_lang.map(|s| s.to_string());
296        let mut params = vec![
297            ("api-version", "3.0"),
298            ("to", target_lang_str.as_str()),
299            ("includeSentenceLength", "true"),
300        ];
301
302        if let Some(ref source_str) = source_lang_str {
303            params.push(("from", source_str.as_str()));
304        }
305
306        // 确定认证头
307        let auth_header = if self.config.api_key.is_some() {
308            format!("Ocp-Apim-Subscription-Key {}", token)
309        } else {
310            format!("Bearer {}", token)
311        };
312
313        // 发送请求
314        let response = client
315            .post(&format!("{}/translate", endpoint))
316            .header("Authorization", auth_header)
317            .header("Content-Type", "application/json")
318            .query(&params)
319            .json(&requests)
320            .send()
321            .await?;
322
323        // 检查HTTP状态码
324        if !response.status().is_success() {
325            let status = response.status();
326            let error_text = response
327                .text()
328                .await
329                .unwrap_or_else(|_| "Unknown error".to_string());
330
331            // 如果是401未授权错误,则清除缓存的token
332            if status == reqwest::StatusCode::UNAUTHORIZED {
333                self.clear_cached_token().await;
334            }
335
336            if let Ok(error_response) = serde_json::from_str::<MicrosoftErrorResponse>(&error_text)
337            {
338                return Err(TranslationError::HttpError {
339                    status,
340                    body: format!(
341                        "Error {}: {}",
342                        error_response.error.code, error_response.error.message
343                    ),
344                });
345            }
346
347            return Err(TranslationError::HttpError {
348                status,
349                body: error_text,
350            });
351        }
352
353        // 解析响应
354        let response_body: Vec<MicrosoftTranslation> = response.json().await?;
355        Ok(response_body)
356    }
357
358    /// 翻译单个文本(公共方法)
359    pub async fn translate_text(
360        &self,
361        text: &str,
362        target_lang: &LanguageIdentifier,
363        source_lang: Option<&LanguageIdentifier>,
364        options: &TranslateOptions,
365    ) -> Result<String, TranslationError> {
366        let results = self
367            .translate_batch(&[text], target_lang, source_lang, options)
368            .await?;
369
370        if results.is_empty() || results[0].translations.is_empty() {
371            return Err(TranslationError::ServiceError(
372                "No translation results returned".to_string(),
373            ));
374        }
375
376        Ok(results[0].translations[0].text.clone())
377    }
378
379    /// 批量翻译文本并返回字符串数组
380    pub async fn translate_batch_to_strings(
381        &self,
382        texts: &[&str],
383        target_lang: &LanguageIdentifier,
384        source_lang: Option<&LanguageIdentifier>,
385        options: &TranslateOptions,
386    ) -> Result<Vec<String>, TranslationError> {
387        let results = self
388            .translate_batch(texts, target_lang, source_lang, options)
389            .await?;
390        let translated_texts = results
391            .into_iter()
392            .filter_map(|res| res.translations.into_iter().next())
393            .map(|trans_result| trans_result.text)
394            .collect();
395        Ok(translated_texts)
396    }
397}
398
399#[async_trait::async_trait]
400impl Translator for MicrosoftTranslator {
401    async fn translate_with_options(
402        &self,
403        text: &str,
404        target_lang: &LanguageIdentifier,
405        source_lang: Option<&LanguageIdentifier>,
406        options: &TranslateOptions,
407    ) -> Result<String, TranslationError> {
408        self.translate_text(text, target_lang, source_lang, options)
409            .await
410    }
411}
412
413#[cfg(test)]
414mod tests;