translation_lib/
translator.rs

1//! 翻译器核心模块
2//!
3//! 提供简化的翻译功能,支持文本和HTML翻译。
4
5use crate::config::TranslationConfig;
6use crate::error::{TranslationError, TranslationResult};
7use crate::types::{TranslationRequest, TranslationResponse};
8use reqwest::Client;
9use std::sync::Arc;
10use std::time::Duration;
11use tokio::sync::Semaphore;
12use tokio::time::sleep;
13
14#[cfg(feature = "cache")]
15use lru::LruCache;
16
17#[cfg(feature = "cache")]
18use crate::types::CacheEntry;
19
20#[cfg(feature = "html-support")]
21use markup5ever_rcdom::{Handle, NodeData, RcDom};
22
23#[cfg(feature = "html-support")]
24use html5ever::parse_document;
25
26#[cfg(feature = "html-support")]
27use html5ever::tendril::TendrilSink;
28
29/// 简化的缓存管理器
30#[cfg(feature = "cache")]
31#[derive(Clone)]
32struct SimpleCache {
33    cache: Arc<std::sync::Mutex<LruCache<String, CacheEntry>>>,
34    ttl: Duration,
35}
36
37#[cfg(feature = "cache")]
38impl SimpleCache {
39    fn new(capacity: usize, ttl: Duration) -> Self {
40        use std::sync::Mutex;
41        Self {
42            cache: Arc::new(Mutex::new(LruCache::new(capacity.try_into().unwrap()))),
43            ttl,
44        }
45    }
46
47    fn get(&self, key: &str) -> Option<String> {
48        let mut cache = self.cache.lock().unwrap();
49        if let Some(entry) = cache.get_mut(key) {
50            if !entry.is_expired(self.ttl) {
51                entry.access();
52                return Some(entry.translated_text.clone());
53            } else {
54                cache.pop(key);
55            }
56        }
57        None
58    }
59
60    fn insert(&self, key: String, value: String) {
61        let mut cache = self.cache.lock().unwrap();
62        cache.put(key, CacheEntry::new(value));
63    }
64}
65
66#[cfg(not(feature = "cache"))]
67#[derive(Clone)]
68struct SimpleCache;
69
70#[cfg(not(feature = "cache"))]
71impl SimpleCache {
72    fn new(_capacity: usize, _ttl: Duration) -> Self {
73        Self
74    }
75
76    fn get(&self, _key: &str) -> Option<String> {
77        None
78    }
79
80    fn insert(&self, _key: String, _value: String) {
81        // No-op when cache is disabled
82    }
83}
84
85/// 核心翻译器
86///
87/// 提供简化的翻译功能,支持文本和HTML翻译。
88#[derive(Clone)]
89pub struct Translator {
90    client: Client,
91    config: TranslationConfig,
92    cache: SimpleCache,
93    semaphore: Arc<Semaphore>,
94}
95
96impl Translator {
97    /// 创建新的翻译器实例
98    pub fn new(config: TranslationConfig) -> TranslationResult<Self> {
99        config.validate()?;
100
101        let client = Client::builder()
102            .timeout(config.timeout)
103            .build()
104            .map_err(|e| {
105                TranslationError::Config(format!("Failed to create HTTP client: {}", e))
106            })?;
107
108        let cache = SimpleCache::new(1000, Duration::from_secs(3600)); // 1小时TTL
109        let semaphore = Arc::new(Semaphore::new(config.max_concurrent_requests));
110
111        Ok(Self {
112            client,
113            config,
114            cache,
115            semaphore,
116        })
117    }
118
119    /// 翻译文本
120    pub async fn translate_text(&self, text: &str, target_lang: &str) -> TranslationResult<String> {
121        // 检查是否需要翻译
122        if !crate::should_translate(text, target_lang) {
123            return Ok(text.to_string());
124        }
125
126        // 检查缓存
127        let cache_key = format!("{}:{}", text, target_lang);
128        if let Some(cached) = self.cache.get(&cache_key) {
129            return Ok(cached);
130        }
131
132        // 执行翻译
133        let result = self.translate_with_retry(text, target_lang).await?;
134
135        // 存入缓存
136        if self.config.enable_cache {
137            self.cache.insert(cache_key, result.clone());
138        }
139
140        Ok(result)
141    }
142
143    /// HTML翻译
144    #[cfg(feature = "html-support")]
145    pub async fn translate_html(&self, html: &str, target_lang: &str) -> TranslationResult<String> {
146        use std::io::Cursor;
147
148        // 解析HTML
149        let dom = parse_document(markup5ever_rcdom::RcDom::default(), Default::default())
150            .from_utf8()
151            .read_from(&mut Cursor::new(html.as_bytes()))
152            .map_err(|e| TranslationError::Parse(format!("Failed to parse HTML: {}", e)))?;
153
154        // 翻译DOM
155        let _translated_dom = self.translate_dom(dom, target_lang).await?;
156
157        // 简化的HTML输出 - 直接返回原HTML(实际实现中需要更复杂的DOM序列化)
158        Ok(html.to_string())
159    }
160
161    #[cfg(not(feature = "html-support"))]
162    pub async fn translate_html(
163        &self,
164        _html: &str,
165        _target_lang: &str,
166    ) -> TranslationResult<String> {
167        Err(TranslationError::Config(
168            "HTML support not enabled. Enable 'html-support' feature".to_string(),
169        ))
170    }
171
172    /// DOM翻译
173    #[cfg(feature = "html-support")]
174    pub async fn translate_dom(&self, dom: RcDom, target_lang: &str) -> TranslationResult<RcDom> {
175        self.translate_node_recursive(&dom.document, target_lang)
176            .await?;
177        Ok(dom)
178    }
179
180    #[cfg(not(feature = "html-support"))]
181    pub async fn translate_dom(&self, _dom: RcDom, _target_lang: &str) -> TranslationResult<RcDom> {
182        Err(TranslationError::Config(
183            "HTML support not enabled. Enable 'html-support' feature".to_string(),
184        ))
185    }
186
187    /// 批量翻译文本
188    pub async fn translate_texts(
189        &self,
190        texts: Vec<&str>,
191        target_lang: &str,
192    ) -> TranslationResult<Vec<String>> {
193        let mut results = Vec::new();
194
195        for text in texts {
196            let result = self.translate_text(text, target_lang).await?;
197            results.push(result);
198        }
199
200        Ok(results)
201    }
202}
203
204impl Translator {
205    /// 内部翻译实现,带重试机制
206    async fn translate_with_retry(
207        &self,
208        text: &str,
209        target_lang: &str,
210    ) -> TranslationResult<String> {
211        let mut retries = 0;
212        let max_retries = 3;
213
214        loop {
215            // 获取信号量许可
216            let _permit = self.semaphore.acquire().await.map_err(|_| {
217                TranslationError::Network("Semaphore acquisition failed".to_string())
218            })?;
219
220            match self.call_translation_api(text, target_lang).await {
221                Ok(result) => return Ok(result),
222                Err(e) if retries >= max_retries => return Err(e),
223                Err(e) if e.is_retryable() => {
224                    retries += 1;
225                    if let Some(delay_ms) = e.retry_delay_ms() {
226                        sleep(Duration::from_millis(delay_ms)).await;
227                    }
228                }
229                Err(e) => return Err(e),
230            }
231        }
232    }
233
234    /// 调用翻译API
235    async fn call_translation_api(
236        &self,
237        text: &str,
238        target_lang: &str,
239    ) -> TranslationResult<String> {
240        let request = TranslationRequest {
241            text: text.to_string(),
242            source_lang: "auto".to_string(),
243            target_lang: target_lang.to_string(),
244        };
245
246        let response = self
247            .client
248            .post(&self.config.api_url)
249            .header("Content-Type", "application/json")
250            .header("Accept", "application/json")
251            .header("User-Agent", "Mozilla/5.0 (compatible; TranslationLib/1.0)")
252            .json(&request)
253            .send()
254            .await?;
255
256        if !response.status().is_success() {
257            return Err(TranslationError::Api(format!(
258                "API request failed with status: {}",
259                response.status()
260            )));
261        }
262
263        // 获取响应文本
264        let response_text = response.text().await?;
265
266        // 解析JSON响应
267        if let Ok(json_value) = serde_json::from_str::<serde_json::Value>(&response_text) {
268            // 检查API特定格式 {"code": 200, "data": "翻译结果"}
269            if let Some(code) = json_value.get("code").and_then(|v| v.as_i64()) {
270                if code == 200 {
271                    if let Some(translated) = json_value.get("data").and_then(|v| v.as_str()) {
272                        return Ok(translated.to_string());
273                    }
274                } else {
275                    return Err(TranslationError::Api(format!(
276                        "API returned error code: {}",
277                        code
278                    )));
279                }
280            }
281
282            // 尝试其他常见格式
283            if let Some(translated) = json_value
284                .get("translated_text")
285                .or_else(|| json_value.get("result"))
286                .or_else(|| json_value.get("translation"))
287                .and_then(|v| v.as_str())
288            {
289                return Ok(translated.to_string());
290            }
291
292            // 尝试解析为标准响应格式
293            if let Ok(translation_response) =
294                serde_json::from_str::<TranslationResponse>(&response_text)
295            {
296                return Ok(translation_response.translated_text);
297            }
298        }
299
300        // 如果JSON解析失败,假设整个响应就是翻译结果
301        Ok(response_text)
302    }
303
304    /// 递归翻译DOM节点  
305    #[cfg(feature = "html-support")]
306    fn translate_node_recursive<'a>(
307        &'a self,
308        node: &'a Handle,
309        target_lang: &'a str,
310    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = TranslationResult<()>> + 'a>> {
311        Box::pin(async move {
312            match &node.data {
313                NodeData::Text { contents } => {
314                    let text = contents.borrow().to_string();
315                    if crate::should_translate(&text, target_lang) {
316                        let translated = self.translate_text(&text, target_lang).await?;
317                        *contents.borrow_mut() = translated.into();
318                    }
319                }
320                NodeData::Element { name, attrs, .. } => {
321                    // 翻译特定属性
322                    let translatable_attrs = ["title", "alt", "placeholder"];
323                    let mut attrs_mut = attrs.borrow_mut();
324
325                    for attr in attrs_mut.iter_mut() {
326                        if translatable_attrs.contains(&attr.name.local.as_ref()) {
327                            let attr_text = attr.value.to_string();
328                            if crate::should_translate(&attr_text, target_lang) {
329                                let translated =
330                                    self.translate_text(&attr_text, target_lang).await?;
331                                attr.value = translated.into();
332                            }
333                        }
334                    }
335
336                    // 跳过某些标签内容
337                    let skip_tags = ["script", "style", "code", "pre"];
338                    if !skip_tags.contains(&name.local.as_ref()) {
339                        // 递归处理子节点
340                        for child in node.children.borrow().iter() {
341                            self.translate_node_recursive(child, target_lang).await?;
342                        }
343                    }
344                }
345                _ => {
346                    // 对于其他节点类型,递归处理子节点
347                    for child in node.children.borrow().iter() {
348                        self.translate_node_recursive(child, target_lang).await?;
349                    }
350                }
351            }
352
353            Ok(())
354        })
355    }
356}