1use crate::types::{TranslationConfig, DeepLXRequest, DeepLXResponse, DpTransRequest, RetryConfig, TextSegment};
6use crate::error::{Result, TranslationError};
7use reqwest::Client;
8use std::sync::Arc;
9use std::time::Duration;
10use tokio::sync::Semaphore;
11use tokio::time::sleep;
12
13#[derive(Clone)]
18pub struct RateLimiter {
19 semaphore: Arc<Semaphore>,
21 delay: Duration,
23}
24
25impl RateLimiter {
26 pub fn new(requests_per_second: f64) -> Self {
40 let permits = (requests_per_second * 2.0).ceil() as usize;
42 let delay = Duration::from_millis((500.0 / requests_per_second) as u64); Self {
45 semaphore: Arc::new(Semaphore::new(permits)),
46 delay,
47 }
48 }
49
50 pub async fn acquire(&self) -> Result<()> {
59 let _permit = self.semaphore.acquire().await
60 .map_err(|e| TranslationError::RateLimitError(format!("Rate limiter error: {}", e)))?;
61 if self.delay > Duration::from_millis(100) {
63 sleep(self.delay).await;
64 }
65 Ok(())
66 }
67}
68
69pub async fn retry_with_backoff<F, Fut, T>(
84 mut operation: F,
85 config: &RetryConfig,
86 rate_limiter: &RateLimiter,
87) -> Result<T>
88where
89 F: FnMut() -> Fut,
90 Fut: std::future::Future<Output = Result<T>>,
91{
92 let mut delay = config.initial_delay_ms;
93
94 for attempt in 0..=config.max_retries {
95 rate_limiter.acquire().await?;
96
97 match operation().await {
98 Ok(result) => return Ok(result),
99 Err(e) if attempt == config.max_retries => return Err(e),
100 Err(e) => {
101 eprintln!("Attempt {} failed: {}. Retrying in {}ms...", attempt + 1, e, delay);
102 sleep(Duration::from_millis(delay)).await;
103 delay = std::cmp::min(
104 (delay as f64 * config.backoff_multiplier) as u64,
105 config.max_delay_ms,
106 );
107 }
108 }
109 }
110
111 unreachable!()
112}
113
114#[derive(Clone)]
136pub struct TranslationService {
137 client: Client,
139 rate_limiter: RateLimiter,
141 config: TranslationConfig,
143}
144
145impl TranslationService {
146 pub fn new(config: TranslationConfig) -> Self {
170 let client = Client::builder()
171 .timeout(std::time::Duration::from_secs(30))
172 .pool_idle_timeout(std::time::Duration::from_secs(30))
173 .pool_max_idle_per_host(5)
174 .tcp_keepalive(std::time::Duration::from_secs(60))
175 .http1_title_case_headers()
176 .http2_keep_alive_interval(None)
177 .user_agent("Mozilla/5.0 (compatible; MarkdownDownloader/1.0)")
178 .build()
179 .unwrap_or_else(|e| {
180 eprintln!("Failed to create optimized client: {}, using default", e);
181 Client::new()
182 });
183
184 Self {
185 client,
186 rate_limiter: RateLimiter::new(config.max_requests_per_second),
187 config,
188 }
189 }
190
191 pub async fn translate(&self, text: &str) -> Result<String> {
240 if !self.config.enabled {
241 return Ok(text.to_string());
242 }
243
244 println!("文本总长度: {} 字符", text.len());
245
246 if text.len() <= self.config.max_text_length {
247 println!("文本较短,直接翻译");
248 return self.translate_chunk(text).await;
249 }
250
251 let chunks = self.split_text_into_chunks(text);
252 println!("文本较长,分为 {} 块进行翻译", chunks.len());
253
254 let mut translated_chunks = Vec::new();
255
256 let mut futures = Vec::new();
258
259 for (i, chunk) in chunks.iter().enumerate() {
260 println!("准备翻译第 {} 块,长度: {} 字符", i + 1, chunk.len());
261
262 if self.is_code_block_chunk(chunk) {
263 let result = chunk.strip_prefix("__CODE_BLOCK__").unwrap_or(chunk).to_string();
265 futures.push(Box::pin(async move { Ok(result) }) as std::pin::Pin<Box<dyn std::future::Future<Output = Result<String>> + Send>>);
266 } else {
267 let chunk_clone = chunk.clone();
269 let translator_clone = self.clone();
270 futures.push(Box::pin(async move {
271 translator_clone.translate_chunk(&chunk_clone).await
272 }) as std::pin::Pin<Box<dyn std::future::Future<Output = Result<String>> + Send>>);
273 }
274 }
275
276 let semaphore = std::sync::Arc::new(tokio::sync::Semaphore::new(5)); let mut handles = Vec::new();
279
280 for (i, future) in futures.into_iter().enumerate() {
281 let semaphore_clone = semaphore.clone();
282 let handle = tokio::spawn(async move {
283 let _permit = semaphore_clone.acquire().await.unwrap();
284 println!("开始翻译第 {} 块", i + 1);
285 let result = future.await;
286 println!("完成翻译第 {} 块", i + 1);
287 result
288 });
289 handles.push(handle);
290 }
291
292 for handle in handles {
294 let result = handle.await.map_err(|e| TranslationError::Custom(e.to_string()))??;
295 translated_chunks.push(result);
296 }
297
298 Ok(translated_chunks.join("\n\n"))
299 }
300
301 fn split_text_into_chunks(&self, text: &str) -> Vec<String> {
302 let mut chunks = Vec::new();
303 let max_length = self.config.max_text_length;
304
305 if text.len() <= max_length {
306 chunks.push(text.to_string());
307 return chunks;
308 }
309
310 let protected_sections = self.identify_code_blocks(text);
311 let segments = self.split_by_code_blocks(text, &protected_sections);
312
313 let mut current_chunk = String::new();
314
315 for segment in segments {
316 if segment.is_code_block {
317 if !current_chunk.is_empty() {
319 chunks.push(current_chunk.clone());
320 current_chunk.clear();
321 }
322 chunks.push(format!("__CODE_BLOCK__{}", segment.content));
324 } else {
325 let paragraphs = self.split_text_by_empty_lines(&segment.content);
326
327 for paragraph in paragraphs {
328 if paragraph.trim().is_empty() {
329 continue;
330 }
331
332 let potential_length = if current_chunk.is_empty() {
333 paragraph.len()
334 } else {
335 current_chunk.len() + 2 + paragraph.len()
336 };
337
338 if potential_length <= max_length {
339 if !current_chunk.is_empty() {
340 current_chunk.push_str("\n\n");
341 }
342 current_chunk.push_str(¶graph);
343 } else {
344 if !current_chunk.is_empty() {
345 chunks.push(current_chunk.clone());
346 current_chunk.clear();
347 }
348
349 if paragraph.len() > max_length {
350 let sub_chunks = self.split_long_paragraph(¶graph, max_length);
351 chunks.extend(sub_chunks);
352 } else {
353 current_chunk = paragraph;
354 }
355 }
356 }
357 }
358 }
359
360 if !current_chunk.is_empty() {
361 chunks.push(current_chunk);
362 }
363
364 if chunks.is_empty() {
365 chunks.push(text.to_string());
366 }
367
368 chunks
369 }
370
371 fn identify_code_blocks(&self, text: &str) -> Vec<(usize, usize)> {
372 let mut code_blocks = Vec::new();
373 let mut in_code_block = false;
374 let mut current_start = 0;
375
376 let lines: Vec<&str> = text.lines().collect();
377 let mut char_pos = 0;
378
379 for (_i, line) in lines.iter().enumerate() {
380 if line.starts_with("```") {
381 if in_code_block {
382 let end_pos = char_pos + line.len();
383 code_blocks.push((current_start, end_pos));
384 in_code_block = false;
385 } else {
386 current_start = char_pos;
387 in_code_block = true;
388 }
389 }
390 char_pos += line.len() + 1;
391 }
392
393 if in_code_block {
394 code_blocks.push((current_start, text.len()));
395 }
396
397 code_blocks
398 }
399
400 fn split_by_code_blocks(&self, text: &str, code_blocks: &[(usize, usize)]) -> Vec<TextSegment> {
401 let mut segments = Vec::new();
402 let mut last_end = 0;
403
404 for &(start, end) in code_blocks {
405 if start > last_end {
406 let content = text[last_end..start].to_string();
407 if !content.trim().is_empty() {
408 segments.push(TextSegment {
409 content,
410 is_code_block: false,
411 });
412 }
413 }
414
415 let content = text[start..end].to_string();
416 segments.push(TextSegment {
417 content,
418 is_code_block: true,
419 });
420
421 last_end = end;
422 }
423
424 if last_end < text.len() {
425 let content = text[last_end..].to_string();
426 if !content.trim().is_empty() {
427 segments.push(TextSegment {
428 content,
429 is_code_block: false,
430 });
431 }
432 }
433
434 if segments.is_empty() {
435 segments.push(TextSegment {
436 content: text.to_string(),
437 is_code_block: false,
438 });
439 }
440
441 segments
442 }
443
444 fn split_text_by_empty_lines(&self, text: &str) -> Vec<String> {
445 let max_length = self.config.max_text_length;
446
447 if text.len() <= max_length {
448 return vec![text.to_string()];
449 }
450
451 let paragraphs: Vec<&str> = text.split("\n\n").collect();
452 let mut result = Vec::new();
453 let mut current_group = Vec::new();
454 let mut current_length = 0;
455
456 for paragraph in paragraphs {
457 let paragraph = paragraph.trim();
458 if paragraph.is_empty() {
459 continue;
460 }
461
462 let para_len = paragraph.len();
463
464 let potential_length = if current_group.is_empty() {
465 para_len
466 } else {
467 current_length + 2 + para_len
468 };
469
470 if potential_length <= max_length {
471 current_group.push(paragraph);
472 current_length = potential_length;
473 } else {
474 if !current_group.is_empty() {
475 result.push(current_group.join("\n\n"));
476 current_group.clear();
477 }
478
479 if para_len > max_length {
480 let sub_parts = self.split_long_paragraph(paragraph, max_length);
481 result.extend(sub_parts);
482 current_length = 0;
483 } else {
484 current_group.push(paragraph);
485 current_length = para_len;
486 }
487 }
488 }
489
490 if !current_group.is_empty() {
491 result.push(current_group.join("\n\n"));
492 }
493
494 result
495 }
496
497 fn split_long_paragraph(&self, paragraph: &str, max_length: usize) -> Vec<String> {
498 let mut chunks = Vec::new();
499 let mut start = 0;
500
501 while start < paragraph.len() {
502 let end = std::cmp::min(start + max_length, paragraph.len());
503 let mut actual_end = end;
504
505 if end < paragraph.len() {
506 for i in (start..end).rev() {
507 let ch = paragraph.chars().nth(i).unwrap_or(' ');
508 if ch == '.' || ch == '!' || ch == '?' || ch == '。' || ch == '!' || ch == '?' {
509 actual_end = i + 1;
510 break;
511 }
512 }
513
514 if actual_end == end {
515 for i in (start..end).rev() {
516 let ch = paragraph.chars().nth(i).unwrap_or(' ');
517 if ch == ' ' || ch == '\n' || ch == '\t' {
518 actual_end = i + 1;
519 break;
520 }
521 }
522 }
523
524 if actual_end == end && end - start < max_length / 2 {
525 actual_end = end;
526 }
527 }
528
529 let chunk = paragraph[start..actual_end].trim().to_string();
530 if !chunk.is_empty() {
531 chunks.push(chunk);
532 }
533
534 start = actual_end;
535 }
536
537 chunks
538 }
539
540 async fn translate_chunk(&self, text: &str) -> Result<String> {
541 println!("发送翻译请求到: {}", self.config.deeplx_api_url);
542 println!("翻译文本长度: {} 字符", text.len());
543
544 let retry_config = RetryConfig::default();
545 let client = &self.client;
546 let config = &self.config;
547 let text_clone = text.to_string();
548
549 let result = retry_with_backoff(
550 || {
551 let client = client.clone();
552 let config = config.clone();
553 let text = text_clone.clone();
554
555 Box::pin(async move {
556 let response = if config.deeplx_api_url.contains("dptrans") {
557 println!("使用dptrans API格式请求");
558
559 let request = DpTransRequest {
560 text: text.clone(),
561 source_lang: if config.source_lang == "auto" { "auto".to_string() } else { config.source_lang.clone() },
562 target_lang: config.target_lang.clone(),
563 };
564
565 client
566 .post(&config.deeplx_api_url)
567 .header("Content-Type", "application/json")
568 .header("Accept", "application/json, text/plain, */*")
569 .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
570 .json(&request)
571 .send()
572 .await
573 .map_err(|e| {
574 TranslationError::Custom(format!("DeepLX网络请求失败: {}", e))
575 })?
576 } else {
577 println!("使用标准DeepLX API格式请求");
578
579 let request = DeepLXRequest {
580 text: text.clone(),
581 source_lang: config.source_lang.clone(),
582 target_lang: config.target_lang.clone(),
583 };
584
585 client
586 .post(&config.deeplx_api_url)
587 .header("Content-Type", "application/json")
588 .header("Accept", "application/json")
589 .json(&request)
590 .send()
591 .await
592 .map_err(|e| {
593 TranslationError::Custom(format!("DeepLX网络请求失败: {}", e))
594 })?
595 };
596
597 let status = response.status();
598 println!("DeepLX响应状态: {}", status);
599
600 if response.status().is_success() {
601 let response_text = response
602 .text()
603 .await
604 .map_err(|e| TranslationError::Custom(format!("读取响应文本失败: {}", e)))?;
605
606 if let Ok(result) = serde_json::from_str::<DeepLXResponse>(&response_text) {
607 if result.code == 200 {
608 if result.data.is_empty() {
609 Err(TranslationError::Custom("DeepLX返回了空的翻译结果".to_string()))
610 } else {
611 Ok(result.data)
612 }
613 } else {
614 Err(TranslationError::ApiError {
615 code: result.code,
616 message: format!("DeepLX翻译失败,返回代码: {}", result.code)
617 })
618 }
619 } else {
620 if response_text.trim().is_empty() {
621 Err(TranslationError::Custom("API返回了空的翻译结果".to_string()))
622 } else if response_text.starts_with("{") {
623 if let Ok(json_value) = serde_json::from_str::<serde_json::Value>(&response_text) {
624 if let Some(translated) = json_value
625 .get("translated_text")
626 .or_else(|| json_value.get("result"))
627 .or_else(|| json_value.get("translation"))
628 .or_else(|| json_value.get("data"))
629 .and_then(|v| v.as_str())
630 {
631 Ok(translated.to_string())
632 } else {
633 Err(TranslationError::ParseError(format!(
634 "无法从JSON响应中提取翻译结果: {}",
635 response_text
636 )))
637 }
638 } else {
639 Err(TranslationError::ParseError(format!("无法解析JSON响应: {}", response_text)))
640 }
641 } else {
642 println!("假设响应是纯文本翻译结果");
643 Ok(response_text)
644 }
645 }
646 } else {
647 let error_text = response
648 .text()
649 .await
650 .unwrap_or_else(|_| "无法读取错误信息".to_string());
651 Err(TranslationError::ApiError {
652 code: status.as_u16() as i32,
653 message: format!("DeepLX API请求失败: {} - {}", status, error_text)
654 })
655 }
656 })
657 },
658 &retry_config,
659 &self.rate_limiter,
660 )
661 .await?;
662
663 Ok(result)
664 }
665
666 fn is_code_block_chunk(&self, chunk: &str) -> bool {
668 chunk.starts_with("__CODE_BLOCK__") || chunk.trim_start().starts_with("```")
669 }
670}