1use crate::error::{Result, TranslationError};
6use crate::types::{
7 DeepLXRequest, DeepLXResponse, DpTransRequest, RetryConfig, TextSegment, TranslationConfig,
8};
9use reqwest::Client;
10use std::sync::Arc;
11use std::time::Duration;
12use tokio::sync::Semaphore;
13use tokio::time::sleep;
14
15#[derive(Clone)]
20pub struct RateLimiter {
21 semaphore: Arc<Semaphore>,
23 delay: Duration,
25}
26
27impl RateLimiter {
28 pub fn new(requests_per_second: f64) -> Self {
42 let permits = (requests_per_second * 2.0).ceil() as usize;
44 let delay = Duration::from_millis((500.0 / requests_per_second) as u64); Self {
47 semaphore: Arc::new(Semaphore::new(permits)),
48 delay,
49 }
50 }
51
52 pub async fn acquire(&self) -> Result<()> {
61 let _permit =
62 self.semaphore.acquire().await.map_err(|e| {
63 TranslationError::RateLimitError(format!("Rate limiter error: {}", e))
64 })?;
65 if self.delay > Duration::from_millis(100) {
67 sleep(self.delay).await;
68 }
69 Ok(())
70 }
71}
72
73pub async fn retry_with_backoff<F, Fut, T>(
88 mut operation: F,
89 config: &RetryConfig,
90 rate_limiter: &RateLimiter,
91) -> Result<T>
92where
93 F: FnMut() -> Fut,
94 Fut: std::future::Future<Output = Result<T>>,
95{
96 let mut delay = config.initial_delay_ms;
97
98 for attempt in 0..=config.max_retries {
99 rate_limiter.acquire().await?;
100
101 match operation().await {
102 Ok(result) => return Ok(result),
103 Err(e) if attempt == config.max_retries => return Err(e),
104 Err(e) => {
105 eprintln!(
106 "Attempt {} failed: {}. Retrying in {}ms...",
107 attempt + 1,
108 e,
109 delay
110 );
111 sleep(Duration::from_millis(delay)).await;
112 delay = std::cmp::min(
113 (delay as f64 * config.backoff_multiplier) as u64,
114 config.max_delay_ms,
115 );
116 }
117 }
118 }
119
120 unreachable!()
121}
122
123#[derive(Clone)]
145pub struct TranslationService {
146 client: Client,
148 rate_limiter: RateLimiter,
150 config: TranslationConfig,
152}
153
154impl TranslationService {
155 pub fn new(config: TranslationConfig) -> Self {
179 let client = Client::builder()
180 .timeout(std::time::Duration::from_secs(30))
181 .pool_idle_timeout(std::time::Duration::from_secs(30))
182 .pool_max_idle_per_host(5)
183 .tcp_keepalive(std::time::Duration::from_secs(60))
184 .http1_title_case_headers()
185 .http2_keep_alive_interval(None)
186 .user_agent("Mozilla/5.0 (compatible; MarkdownDownloader/1.0)")
187 .build()
188 .unwrap_or_else(|e| {
189 eprintln!("Failed to create optimized client: {}, using default", e);
190 Client::new()
191 });
192
193 Self {
194 client,
195 rate_limiter: RateLimiter::new(config.max_requests_per_second),
196 config,
197 }
198 }
199
200 pub async fn translate(&self, text: &str) -> Result<String> {
249 if !self.config.enabled {
250 return Ok(text.to_string());
251 }
252
253 println!("文本总长度: {} 字符", text.len());
254
255 if text.len() <= self.config.max_text_length {
256 println!("文本较短,直接翻译");
257 return self.translate_chunk(text).await;
258 }
259
260 let chunks = self.split_text_into_chunks(text);
261 println!("文本较长,分为 {} 块进行翻译", chunks.len());
262
263 let mut translated_chunks = Vec::new();
264
265 let mut futures = Vec::new();
267
268 for (i, chunk) in chunks.iter().enumerate() {
269 println!("准备翻译第 {} 块,长度: {} 字符", i + 1, chunk.len());
270
271 if self.is_code_block_chunk(chunk) {
272 let result = chunk
274 .strip_prefix("__CODE_BLOCK__")
275 .unwrap_or(chunk)
276 .to_string();
277 futures.push(Box::pin(async move { Ok(result) })
278 as std::pin::Pin<
279 Box<dyn std::future::Future<Output = Result<String>> + Send>,
280 >);
281 } else {
282 let chunk_clone = chunk.clone();
284 let translator_clone = self.clone();
285 futures.push(Box::pin(async move {
286 translator_clone.translate_chunk(&chunk_clone).await
287 })
288 as std::pin::Pin<
289 Box<dyn std::future::Future<Output = Result<String>> + Send>,
290 >);
291 }
292 }
293
294 let semaphore = std::sync::Arc::new(tokio::sync::Semaphore::new(5)); let mut handles = Vec::new();
297
298 for (i, future) in futures.into_iter().enumerate() {
299 let semaphore_clone = semaphore.clone();
300 let handle = tokio::spawn(async move {
301 let _permit = semaphore_clone.acquire().await.unwrap();
302 println!("开始翻译第 {} 块", i + 1);
303 let result = future.await;
304 println!("完成翻译第 {} 块", i + 1);
305 result
306 });
307 handles.push(handle);
308 }
309
310 for handle in handles {
312 let result = handle
313 .await
314 .map_err(|e| TranslationError::Custom(e.to_string()))??;
315 translated_chunks.push(result);
316 }
317
318 Ok(translated_chunks.join("\n\n"))
319 }
320
321 fn split_text_into_chunks(&self, text: &str) -> Vec<String> {
322 let mut chunks = Vec::new();
323 let max_length = self.config.max_text_length;
324
325 if text.len() <= max_length {
326 chunks.push(text.to_string());
327 return chunks;
328 }
329
330 let protected_sections = self.identify_code_blocks(text);
331 let segments = self.split_by_code_blocks(text, &protected_sections);
332
333 let mut current_chunk = String::new();
334
335 for segment in segments {
336 if segment.is_code_block {
337 if !current_chunk.is_empty() {
339 chunks.push(current_chunk.clone());
340 current_chunk.clear();
341 }
342 chunks.push(format!("__CODE_BLOCK__{}", segment.content));
344 } else {
345 let paragraphs = self.split_text_by_empty_lines(&segment.content);
346
347 for paragraph in paragraphs {
348 if paragraph.trim().is_empty() {
349 continue;
350 }
351
352 let potential_length = if current_chunk.is_empty() {
353 paragraph.len()
354 } else {
355 current_chunk.len() + 2 + paragraph.len()
356 };
357
358 if potential_length <= max_length {
359 if !current_chunk.is_empty() {
360 current_chunk.push_str("\n\n");
361 }
362 current_chunk.push_str(¶graph);
363 } else {
364 if !current_chunk.is_empty() {
365 chunks.push(current_chunk.clone());
366 current_chunk.clear();
367 }
368
369 if paragraph.len() > max_length {
370 let sub_chunks = self.split_long_paragraph(¶graph, max_length);
371 chunks.extend(sub_chunks);
372 } else {
373 current_chunk = paragraph;
374 }
375 }
376 }
377 }
378 }
379
380 if !current_chunk.is_empty() {
381 chunks.push(current_chunk);
382 }
383
384 if chunks.is_empty() {
385 chunks.push(text.to_string());
386 }
387
388 chunks
389 }
390
391 fn identify_code_blocks(&self, text: &str) -> Vec<(usize, usize)> {
392 let mut code_blocks = Vec::new();
393 let mut in_code_block = false;
394 let mut current_start = 0;
395
396 let lines: Vec<&str> = text.lines().collect();
397 let mut char_pos = 0;
398
399 for (_i, line) in lines.iter().enumerate() {
400 if line.starts_with("```") {
401 if in_code_block {
402 let end_pos = char_pos + line.len();
403 code_blocks.push((current_start, end_pos));
404 in_code_block = false;
405 } else {
406 current_start = char_pos;
407 in_code_block = true;
408 }
409 }
410 char_pos += line.len() + 1;
411 }
412
413 if in_code_block {
414 code_blocks.push((current_start, text.len()));
415 }
416
417 code_blocks
418 }
419
420 fn split_by_code_blocks(&self, text: &str, code_blocks: &[(usize, usize)]) -> Vec<TextSegment> {
421 let mut segments = Vec::new();
422 let mut last_end = 0;
423
424 for &(start, end) in code_blocks {
425 if start > last_end {
426 let content = text[last_end..start].to_string();
427 if !content.trim().is_empty() {
428 segments.push(TextSegment {
429 content,
430 is_code_block: false,
431 });
432 }
433 }
434
435 let content = text[start..end].to_string();
436 segments.push(TextSegment {
437 content,
438 is_code_block: true,
439 });
440
441 last_end = end;
442 }
443
444 if last_end < text.len() {
445 let content = text[last_end..].to_string();
446 if !content.trim().is_empty() {
447 segments.push(TextSegment {
448 content,
449 is_code_block: false,
450 });
451 }
452 }
453
454 if segments.is_empty() {
455 segments.push(TextSegment {
456 content: text.to_string(),
457 is_code_block: false,
458 });
459 }
460
461 segments
462 }
463
464 fn split_text_by_empty_lines(&self, text: &str) -> Vec<String> {
465 let max_length = self.config.max_text_length;
466
467 if text.len() <= max_length {
468 return vec![text.to_string()];
469 }
470
471 let paragraphs: Vec<&str> = text.split("\n\n").collect();
472 let mut result = Vec::new();
473 let mut current_group = Vec::new();
474 let mut current_length = 0;
475
476 for paragraph in paragraphs {
477 let paragraph = paragraph.trim();
478 if paragraph.is_empty() {
479 continue;
480 }
481
482 let para_len = paragraph.len();
483
484 let potential_length = if current_group.is_empty() {
485 para_len
486 } else {
487 current_length + 2 + para_len
488 };
489
490 if potential_length <= max_length {
491 current_group.push(paragraph);
492 current_length = potential_length;
493 } else {
494 if !current_group.is_empty() {
495 result.push(current_group.join("\n\n"));
496 current_group.clear();
497 }
498
499 if para_len > max_length {
500 let sub_parts = self.split_long_paragraph(paragraph, max_length);
501 result.extend(sub_parts);
502 current_length = 0;
503 } else {
504 current_group.push(paragraph);
505 current_length = para_len;
506 }
507 }
508 }
509
510 if !current_group.is_empty() {
511 result.push(current_group.join("\n\n"));
512 }
513
514 result
515 }
516
517 fn split_long_paragraph(&self, paragraph: &str, max_length: usize) -> Vec<String> {
518 let mut chunks = Vec::new();
519 let mut start = 0;
520
521 while start < paragraph.len() {
522 let end = std::cmp::min(start + max_length, paragraph.len());
523 let mut actual_end = end;
524
525 if end < paragraph.len() {
526 for i in (start..end).rev() {
527 let ch = paragraph.chars().nth(i).unwrap_or(' ');
528 if ch == '.' || ch == '!' || ch == '?' || ch == '。' || ch == '!' || ch == '?'
529 {
530 actual_end = i + 1;
531 break;
532 }
533 }
534
535 if actual_end == end {
536 for i in (start..end).rev() {
537 let ch = paragraph.chars().nth(i).unwrap_or(' ');
538 if ch == ' ' || ch == '\n' || ch == '\t' {
539 actual_end = i + 1;
540 break;
541 }
542 }
543 }
544
545 if actual_end == end && end - start < max_length / 2 {
546 actual_end = end;
547 }
548 }
549
550 let chunk = paragraph[start..actual_end].trim().to_string();
551 if !chunk.is_empty() {
552 chunks.push(chunk);
553 }
554
555 start = actual_end;
556 }
557
558 chunks
559 }
560
561 async fn translate_chunk(&self, text: &str) -> Result<String> {
562 println!("发送翻译请求到: {}", self.config.deeplx_api_url);
563 println!("翻译文本长度: {} 字符", text.len());
564
565 let retry_config = RetryConfig::default();
566 let client = &self.client;
567 let config = &self.config;
568 let text_clone = text.to_string();
569
570 let result = retry_with_backoff(
571 || {
572 let client = client.clone();
573 let config = config.clone();
574 let text = text_clone.clone();
575
576 Box::pin(async move {
577 let response = if config.deeplx_api_url.contains("dptrans") {
578 println!("使用dptrans API格式请求");
579
580 let request = DpTransRequest {
581 text: text.clone(),
582 source_lang: if config.source_lang == "auto" {
583 "auto".to_string()
584 } else {
585 config.source_lang.clone()
586 },
587 target_lang: config.target_lang.clone(),
588 };
589
590 client
591 .post(&config.deeplx_api_url)
592 .header("Content-Type", "application/json")
593 .header("Accept", "application/json, text/plain, */*")
594 .header(
595 "User-Agent",
596 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
597 )
598 .json(&request)
599 .send()
600 .await
601 .map_err(|e| {
602 TranslationError::Custom(format!("DeepLX网络请求失败: {}", e))
603 })?
604 } else {
605 println!("使用标准DeepLX API格式请求");
606
607 let request = DeepLXRequest {
608 text: text.clone(),
609 source_lang: config.source_lang.clone(),
610 target_lang: config.target_lang.clone(),
611 };
612
613 client
614 .post(&config.deeplx_api_url)
615 .header("Content-Type", "application/json")
616 .header("Accept", "application/json")
617 .json(&request)
618 .send()
619 .await
620 .map_err(|e| {
621 TranslationError::Custom(format!("DeepLX网络请求失败: {}", e))
622 })?
623 };
624
625 let status = response.status();
626 println!("DeepLX响应状态: {}", status);
627
628 if response.status().is_success() {
629 let response_text = response.text().await.map_err(|e| {
630 TranslationError::Custom(format!("读取响应文本失败: {}", e))
631 })?;
632
633 if let Ok(result) = serde_json::from_str::<DeepLXResponse>(&response_text) {
634 if result.code == 200 {
635 if result.data.is_empty() {
636 Err(TranslationError::Custom(
637 "DeepLX返回了空的翻译结果".to_string(),
638 ))
639 } else {
640 Ok(result.data)
641 }
642 } else {
643 Err(TranslationError::ApiError {
644 code: result.code,
645 message: format!("DeepLX翻译失败,返回代码: {}", result.code),
646 })
647 }
648 } else {
649 if response_text.trim().is_empty() {
650 Err(TranslationError::Custom(
651 "API返回了空的翻译结果".to_string(),
652 ))
653 } else if response_text.starts_with("{") {
654 if let Ok(json_value) =
655 serde_json::from_str::<serde_json::Value>(&response_text)
656 {
657 if let Some(translated) = json_value
658 .get("translated_text")
659 .or_else(|| json_value.get("result"))
660 .or_else(|| json_value.get("translation"))
661 .or_else(|| json_value.get("data"))
662 .and_then(|v| v.as_str())
663 {
664 Ok(translated.to_string())
665 } else {
666 Err(TranslationError::ParseError(format!(
667 "无法从JSON响应中提取翻译结果: {}",
668 response_text
669 )))
670 }
671 } else {
672 Err(TranslationError::ParseError(format!(
673 "无法解析JSON响应: {}",
674 response_text
675 )))
676 }
677 } else {
678 println!("假设响应是纯文本翻译结果");
679 Ok(response_text)
680 }
681 }
682 } else {
683 let error_text = response
684 .text()
685 .await
686 .unwrap_or_else(|_| "无法读取错误信息".to_string());
687 Err(TranslationError::ApiError {
688 code: status.as_u16() as i32,
689 message: format!("DeepLX API请求失败: {} - {}", status, error_text),
690 })
691 }
692 })
693 },
694 &retry_config,
695 &self.rate_limiter,
696 )
697 .await?;
698
699 Ok(result)
700 }
701
702 fn is_code_block_chunk(&self, chunk: &str) -> bool {
704 chunk.starts_with("__CODE_BLOCK__") || chunk.trim_start().starts_with("```")
705 }
706}