1use std::path::PathBuf;
2
3use crate::scheduler::SchedulerConfig;
4
5#[derive(Debug, Clone)]
6pub struct TranslationConfig {
7 pub source_language: Option<String>,
8 pub target_language: String,
9 pub provider: String,
10 pub model: Option<String>,
11 pub concurrency: usize,
12 pub max_attempts: usize,
13 pub output: PathBuf,
14}
15
16#[derive(Debug, Clone)]
17pub struct SegmentationConfig {
18 pub max_segment_tokens: usize,
19 pub context_tokens: usize,
20}
21
22impl Default for SegmentationConfig {
23 fn default() -> Self {
24 Self {
25 max_segment_tokens: 1_200,
26 context_tokens: 160,
27 }
28 }
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
32pub enum PromptVersion {
33 V1,
34 BatchV1,
35 V2,
36 BatchV2,
37}
38
39impl PromptVersion {
40 pub fn as_str(self) -> &'static str {
41 match self {
42 PromptVersion::V1 => "v1",
43 PromptVersion::BatchV1 => "batch_v1",
44 PromptVersion::V2 => "v2",
45 PromptVersion::BatchV2 => "batch_v2",
46 }
47 }
48}
49
50#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
51#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
52pub enum TranslationProfile {
53 Safe,
54 Balanced,
55 Fastest,
56 FreeTier,
57 TurboTextOnly,
58 V1Fast,
59}
60
61impl TranslationProfile {
62 pub fn namespace_str(self) -> &'static str {
63 match self {
64 TranslationProfile::Safe => "safe",
65 TranslationProfile::Balanced => "balanced",
66 TranslationProfile::Fastest => "fastest",
67 TranslationProfile::FreeTier => "free_tier",
68 TranslationProfile::TurboTextOnly => "turbo_text_only",
69 TranslationProfile::V1Fast => "v1_fast",
70 }
71 }
72
73 pub fn resolve(self) -> ResolvedRunSettings {
74 match self {
75 Self::Safe => ResolvedRunSettings {
76 profile: self,
77 segmentation: SegmentationConfig {
78 max_segment_tokens: 1_200,
79 context_tokens: 160,
80 },
81 batch: BatchConfig {
82 enabled: false,
83 target_tokens: 0,
84 max_items: 0,
85 adaptive_sizing: false,
86 split_on_json_failure: true,
87 repair_invalid_items: true,
88 },
89 scheduler: SchedulerConfig {
90 concurrency: 4,
91 max_attempts: 3,
92 },
93 compact_prompts: false,
94 retry_failed_only: false,
95 adaptive_concurrency: false,
96 provider: ProviderRuntimeConfig {
97 timeout_seconds: 120,
98 provider_max_attempts: 6,
99 validation_max_attempts: 3,
100 retry_after_policy: RetryAfterPolicy::JitteredExponential,
101 max_backoff_seconds: 60,
102 thinking_disabled: false,
103 model_context_tokens: None,
104 max_output_tokens: None,
105 batch_max_output_tokens: None,
106 json_mode: JsonMode::Auto,
107 max_idle_per_host: 32,
108 },
109 qa: QaRunConfig {
110 concurrency: 4,
111 batch_target_tokens: 4_000,
112 model: None,
113 provider: None,
114 base_url: None,
115 api_key_env: None,
116 },
117 double_check: DoubleCheckConfig {
118 mode: DoubleCheckMode::Off,
119 model: None,
120 provider: None,
121 base_url: None,
122 api_key_env: None,
123 concurrency: 4,
124 batch_target_tokens: 8_000,
125 auto_correct: false,
126 correction_rounds: 1,
127 },
128 },
129 Self::Balanced => ResolvedRunSettings {
130 profile: self,
131 segmentation: SegmentationConfig {
132 max_segment_tokens: 2_500,
133 context_tokens: 80,
134 },
135 batch: BatchConfig {
136 enabled: true,
137 target_tokens: 8_000,
138 max_items: 64,
139 adaptive_sizing: false,
140 split_on_json_failure: true,
141 repair_invalid_items: true,
142 },
143 scheduler: SchedulerConfig {
144 concurrency: 16,
145 max_attempts: 2,
146 },
147 compact_prompts: true,
148 retry_failed_only: true,
149 adaptive_concurrency: true,
150 provider: ProviderRuntimeConfig {
151 timeout_seconds: 120,
152 provider_max_attempts: 2,
153 validation_max_attempts: 1,
154 retry_after_policy: RetryAfterPolicy::JitteredExponential,
155 max_backoff_seconds: 30,
156 thinking_disabled: false,
157 model_context_tokens: None,
158 max_output_tokens: None,
159 batch_max_output_tokens: None,
160 json_mode: JsonMode::Auto,
161 max_idle_per_host: 32,
162 },
163 qa: QaRunConfig {
164 concurrency: 8,
165 batch_target_tokens: 8_000,
166 model: None,
167 provider: None,
168 base_url: None,
169 api_key_env: None,
170 },
171 double_check: DoubleCheckConfig {
172 mode: DoubleCheckMode::Off,
173 model: None,
174 provider: None,
175 base_url: None,
176 api_key_env: None,
177 concurrency: 4,
178 batch_target_tokens: 8_000,
179 auto_correct: false,
180 correction_rounds: 1,
181 },
182 },
183 Self::Fastest => ResolvedRunSettings {
184 profile: self,
185 segmentation: SegmentationConfig {
186 max_segment_tokens: 6_000,
187 context_tokens: 20,
188 },
189 batch: BatchConfig {
190 enabled: true,
191 target_tokens: 16_000,
192 max_items: 160,
193 adaptive_sizing: true,
194 split_on_json_failure: true,
195 repair_invalid_items: true,
196 },
197 scheduler: SchedulerConfig {
198 concurrency: 64,
199 max_attempts: 1,
200 },
201 compact_prompts: true,
202 retry_failed_only: true,
203 adaptive_concurrency: true,
204 provider: ProviderRuntimeConfig {
205 timeout_seconds: 120,
206 provider_max_attempts: 2,
207 validation_max_attempts: 1,
208 retry_after_policy: RetryAfterPolicy::JitteredExponential,
209 max_backoff_seconds: 10,
210 thinking_disabled: false,
211 model_context_tokens: None,
212 max_output_tokens: None,
213 batch_max_output_tokens: None,
214 json_mode: JsonMode::Auto,
215 max_idle_per_host: 32,
216 },
217 qa: QaRunConfig {
218 concurrency: 16,
219 batch_target_tokens: 12_000,
220 model: None,
221 provider: None,
222 base_url: None,
223 api_key_env: None,
224 },
225 double_check: DoubleCheckConfig {
226 mode: DoubleCheckMode::Off,
227 model: None,
228 provider: None,
229 base_url: None,
230 api_key_env: None,
231 concurrency: 4,
232 batch_target_tokens: 12_000,
233 auto_correct: false,
234 correction_rounds: 1,
235 },
236 },
237 Self::FreeTier => ResolvedRunSettings {
238 profile: self,
239 segmentation: SegmentationConfig {
240 max_segment_tokens: 2_500,
241 context_tokens: 80,
242 },
243 batch: BatchConfig {
244 enabled: true,
245 target_tokens: 8_000,
246 max_items: 64,
247 adaptive_sizing: false,
248 split_on_json_failure: false,
249 repair_invalid_items: true,
250 },
251 scheduler: SchedulerConfig {
252 concurrency: 1,
253 max_attempts: 2,
254 },
255 compact_prompts: true,
256 retry_failed_only: true,
257 adaptive_concurrency: true,
258 provider: ProviderRuntimeConfig {
259 timeout_seconds: 300,
260 provider_max_attempts: 2,
261 validation_max_attempts: 1,
262 retry_after_policy: RetryAfterPolicy::RespectHeader,
263 max_backoff_seconds: 90,
264 thinking_disabled: false,
265 model_context_tokens: None,
266 max_output_tokens: None,
267 batch_max_output_tokens: None,
268 json_mode: JsonMode::Auto,
269 max_idle_per_host: 8,
270 },
271 qa: QaRunConfig {
272 concurrency: 1,
273 batch_target_tokens: 4_000,
274 model: None,
275 provider: None,
276 base_url: None,
277 api_key_env: None,
278 },
279 double_check: DoubleCheckConfig {
280 mode: DoubleCheckMode::Off,
281 model: None,
282 provider: None,
283 base_url: None,
284 api_key_env: None,
285 concurrency: 1,
286 batch_target_tokens: 4_000,
287 auto_correct: false,
288 correction_rounds: 1,
289 },
290 },
291 Self::TurboTextOnly => ResolvedRunSettings {
292 profile: self,
293 segmentation: SegmentationConfig {
294 max_segment_tokens: 12_000,
295 context_tokens: 0,
296 },
297 batch: BatchConfig {
298 enabled: true,
299 target_tokens: 24_000,
300 max_items: 250,
301 adaptive_sizing: true,
302 split_on_json_failure: true,
303 repair_invalid_items: false,
304 },
305 scheduler: SchedulerConfig {
306 concurrency: 96,
307 max_attempts: 1,
308 },
309 compact_prompts: true,
310 retry_failed_only: true,
311 adaptive_concurrency: true,
312 provider: ProviderRuntimeConfig {
313 timeout_seconds: 120,
314 provider_max_attempts: 1,
315 validation_max_attempts: 1,
316 retry_after_policy: RetryAfterPolicy::None,
317 max_backoff_seconds: 5,
318 thinking_disabled: false,
319 model_context_tokens: None,
320 max_output_tokens: None,
321 batch_max_output_tokens: None,
322 json_mode: JsonMode::Auto,
323 max_idle_per_host: 64,
324 },
325 qa: QaRunConfig {
326 concurrency: 16,
327 batch_target_tokens: 16_000,
328 model: None,
329 provider: None,
330 base_url: None,
331 api_key_env: None,
332 },
333 double_check: DoubleCheckConfig {
334 mode: DoubleCheckMode::Off,
335 model: None,
336 provider: None,
337 base_url: None,
338 api_key_env: None,
339 concurrency: 4,
340 batch_target_tokens: 16_000,
341 auto_correct: false,
342 correction_rounds: 1,
343 },
344 },
345 Self::V1Fast => ResolvedRunSettings {
346 profile: self,
347 segmentation: SegmentationConfig {
348 max_segment_tokens: 12_000,
349 context_tokens: 20,
350 },
351 batch: BatchConfig {
352 enabled: true,
353 target_tokens: 16_000,
354 max_items: 128,
355 adaptive_sizing: true,
356 split_on_json_failure: true,
357 repair_invalid_items: true,
358 },
359 scheduler: SchedulerConfig {
360 concurrency: 32,
361 max_attempts: 1,
362 },
363 compact_prompts: true,
364 retry_failed_only: true,
365 adaptive_concurrency: true,
366 provider: ProviderRuntimeConfig {
367 timeout_seconds: 120,
368 provider_max_attempts: 1,
369 validation_max_attempts: 1,
370 retry_after_policy: RetryAfterPolicy::None,
371 max_backoff_seconds: 5,
372 thinking_disabled: true,
373 model_context_tokens: None,
374 max_output_tokens: None,
375 batch_max_output_tokens: None,
376 json_mode: JsonMode::Auto,
377 max_idle_per_host: 64,
378 },
379 qa: QaRunConfig {
380 concurrency: 4,
381 batch_target_tokens: 4_000,
382 model: None,
383 provider: None,
384 base_url: None,
385 api_key_env: None,
386 },
387 double_check: DoubleCheckConfig {
388 mode: DoubleCheckMode::Off,
389 model: None,
390 provider: None,
391 base_url: None,
392 api_key_env: None,
393 concurrency: 4,
394 batch_target_tokens: 8_000,
395 auto_correct: false,
396 correction_rounds: 1,
397 },
398 },
399 }
400 }
401}
402
403#[derive(Debug, Clone)]
404pub struct ResolvedRunSettings {
405 pub profile: TranslationProfile,
406 pub segmentation: SegmentationConfig,
407 pub batch: BatchConfig,
408 pub scheduler: SchedulerConfig,
409 pub provider: ProviderRuntimeConfig,
410 pub compact_prompts: bool,
411 pub retry_failed_only: bool,
412 pub adaptive_concurrency: bool,
413 pub qa: QaRunConfig,
414 pub double_check: DoubleCheckConfig,
415}
416
417impl ResolvedRunSettings {
418 pub fn apply_provider_preset_runtime(&mut self, overrides: ProviderPresetRuntimeOverrides) {
419 if let Some(v) = overrides.scheduler_concurrency {
420 self.scheduler.concurrency = v.max(1);
421 }
422 if let Some(v) = overrides.provider_max_attempts {
423 self.provider.provider_max_attempts = v.max(1);
424 }
425 if let Some(v) = overrides.validation_max_attempts {
426 self.provider.validation_max_attempts = v.max(1);
427 }
428 if let Some(v) = overrides.retry_after_policy {
429 self.provider.retry_after_policy = v;
430 }
431 if let Some(v) = overrides.max_backoff_seconds {
432 self.provider.max_backoff_seconds = v;
433 }
434 if let Some(v) = overrides.timeout_seconds {
435 self.provider.timeout_seconds = v;
436 }
437 if let Some(v) = overrides.batch_enabled {
438 self.batch.enabled = v;
439 }
440 if let Some(v) = overrides.batch_target_tokens {
441 self.batch.target_tokens = v;
442 }
443 if let Some(v) = overrides.batch_max_items {
444 self.batch.max_items = v;
445 }
446 if let Some(v) = overrides.adaptive_batch_sizing {
447 self.batch.adaptive_sizing = v;
448 }
449 if let Some(v) = overrides.compact_prompts {
450 self.compact_prompts = v;
451 }
452 if let Some(v) = overrides.adaptive_concurrency {
453 self.adaptive_concurrency = v;
454 }
455 if let Some(v) = overrides.thinking_disabled {
456 self.provider.thinking_disabled = v;
457 }
458 if let Some(v) = overrides.model_context_tokens {
459 self.provider.model_context_tokens = Some(v);
460 }
461 if let Some(v) = overrides.max_output_tokens {
462 self.provider.max_output_tokens = Some(v);
463 }
464 if let Some(v) = overrides.batch_max_output_tokens {
465 self.provider.batch_max_output_tokens = Some(v);
466 }
467 if let Some(v) = overrides.json_mode {
468 self.provider.json_mode = v;
469 }
470 if let Some(v) = overrides.max_idle_per_host {
471 self.provider.max_idle_per_host = v;
472 }
473 }
474}
475
476#[derive(Debug, Clone)]
477pub struct BatchConfig {
478 pub enabled: bool,
479 pub target_tokens: usize,
480 pub max_items: usize,
481 pub adaptive_sizing: bool,
482 pub split_on_json_failure: bool,
483 pub repair_invalid_items: bool,
484}
485
486#[derive(Debug, Clone)]
487pub struct QaRunConfig {
488 pub concurrency: usize,
489 pub batch_target_tokens: usize,
490 pub model: Option<String>,
491 pub provider: Option<String>,
492 pub base_url: Option<String>,
493 pub api_key_env: Option<String>,
494}
495
496#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
497#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
498pub enum DoubleCheckMode {
499 Off,
500 Formatting,
501 Semantic,
502 Full,
503}
504
505#[derive(Debug, Clone)]
506pub struct DoubleCheckConfig {
507 pub mode: DoubleCheckMode,
508 pub model: Option<String>,
509 pub provider: Option<String>,
510 pub base_url: Option<String>,
511 pub api_key_env: Option<String>,
512 pub concurrency: usize,
513 pub batch_target_tokens: usize,
514 pub auto_correct: bool,
515 pub correction_rounds: usize,
516}
517
518#[derive(Debug, Clone)]
519pub struct ProviderRuntimeConfig {
520 pub timeout_seconds: u64,
521 pub provider_max_attempts: usize,
522 pub validation_max_attempts: usize,
523 pub retry_after_policy: RetryAfterPolicy,
524 pub max_backoff_seconds: u64,
525 pub thinking_disabled: bool,
526 pub model_context_tokens: Option<u32>,
527 pub max_output_tokens: Option<u32>,
528 pub batch_max_output_tokens: Option<u32>,
529 pub json_mode: JsonMode,
530 pub max_idle_per_host: usize,
531}
532
533#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
534#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
535pub enum JsonMode {
536 Auto,
537 ResponseFormat,
538 PromptOnly,
539}
540
541#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
545#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
546pub enum ContextScope {
547 #[default]
548 Chapter,
549 Book,
550}
551
552impl ContextScope {
553 pub fn as_str(self) -> &'static str {
554 match self {
555 ContextScope::Chapter => "chapter",
556 ContextScope::Book => "book",
557 }
558 }
559}
560
561pub fn cap_output_tokens(
562 computed: u32,
563 estimated_prompt_tokens: usize,
564 model_context_tokens: Option<u32>,
565 user_cap: Option<u32>,
566) -> u32 {
567 let mut out = computed;
568
569 if let Some(context) = model_context_tokens {
570 let prompt = estimated_prompt_tokens as u32;
571 let remaining = context.saturating_sub(prompt);
572 let safe_remaining = remaining.saturating_sub(256);
573 out = out.min(safe_remaining.max(512));
574 }
575
576 if let Some(cap) = user_cap {
577 out = out.min(cap);
578 }
579
580 out.max(256)
581}
582
583#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
584#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
585pub enum ProviderPreset {
586 Auto,
587 OpenRouterFree,
588 OpenRouterPaidFast,
589 DeepSeekFree,
590 DeepSeekPaid,
591 GeminiFlashLite,
592 Custom,
593}
594
595impl ProviderPreset {
596 pub fn resolve(self) -> Option<ProviderPresetResolved> {
597 match self {
598 ProviderPreset::Auto | ProviderPreset::Custom => None,
599 ProviderPreset::OpenRouterFree => Some(ProviderPresetResolved {
600 endpoint: ModelEndpoint {
601 provider: "openrouter".to_string(),
602 model: "google/gemini-2.5-flash-lite".to_string(),
603 base_url: Some("https://openrouter.ai/api/v1".to_string()),
604 api_key_env: Some("OPENROUTER_API_KEY".to_string()),
605 },
606 runtime: ProviderPresetRuntimeOverrides {
607 scheduler_concurrency: Some(2),
608 provider_max_attempts: Some(1),
609 validation_max_attempts: Some(1),
610 retry_after_policy: Some(RetryAfterPolicy::RespectHeader),
611 max_backoff_seconds: Some(90),
612 timeout_seconds: Some(180),
613 batch_enabled: Some(true),
614 batch_target_tokens: Some(6_000),
615 batch_max_items: Some(48),
616 compact_prompts: Some(true),
617 adaptive_concurrency: Some(true),
618 thinking_disabled: Some(true),
619 json_mode: Some(JsonMode::Auto),
620 max_idle_per_host: Some(8),
621 ..Default::default()
622 },
623 }),
624 ProviderPreset::OpenRouterPaidFast => Some(ProviderPresetResolved {
625 endpoint: ModelEndpoint {
626 provider: "openrouter".to_string(),
627 model: "google/gemini-2.5-flash".to_string(),
628 base_url: Some("https://openrouter.ai/api/v1".to_string()),
629 api_key_env: Some("OPENROUTER_API_KEY".to_string()),
630 },
631 runtime: ProviderPresetRuntimeOverrides {
632 scheduler_concurrency: Some(32),
633 provider_max_attempts: Some(1),
634 validation_max_attempts: Some(1),
635 retry_after_policy: Some(RetryAfterPolicy::JitteredExponential),
636 max_backoff_seconds: Some(15),
637 timeout_seconds: Some(120),
638 batch_enabled: Some(true),
639 batch_target_tokens: Some(16_000),
640 batch_max_items: Some(128),
641 adaptive_batch_sizing: Some(true),
642 compact_prompts: Some(true),
643 adaptive_concurrency: Some(true),
644 thinking_disabled: Some(true),
645 json_mode: Some(JsonMode::Auto),
646 max_idle_per_host: Some(64),
647 ..Default::default()
648 },
649 }),
650 ProviderPreset::DeepSeekFree => Some(ProviderPresetResolved {
651 endpoint: ModelEndpoint {
652 provider: "deepseek".to_string(),
653 model: "deepseek-v4-flash".to_string(),
654 base_url: Some("https://api.deepseek.com/v1".to_string()),
655 api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
656 },
657 runtime: ProviderPresetRuntimeOverrides {
658 scheduler_concurrency: Some(1),
659 provider_max_attempts: Some(1),
660 validation_max_attempts: Some(1),
661 retry_after_policy: Some(RetryAfterPolicy::RespectHeader),
662 max_backoff_seconds: Some(120),
663 timeout_seconds: Some(240),
664 batch_enabled: Some(true),
665 batch_target_tokens: Some(4_000),
666 batch_max_items: Some(32),
667 compact_prompts: Some(true),
668 adaptive_concurrency: Some(false),
669 thinking_disabled: Some(true),
670 json_mode: Some(JsonMode::Auto),
671 max_idle_per_host: Some(4),
672 ..Default::default()
673 },
674 }),
675 ProviderPreset::DeepSeekPaid => Some(ProviderPresetResolved {
676 endpoint: ModelEndpoint {
677 provider: "deepseek".to_string(),
678 model: "deepseek-v4-flash".to_string(),
679 base_url: Some("https://api.deepseek.com/v1".to_string()),
680 api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
681 },
682 runtime: ProviderPresetRuntimeOverrides {
683 scheduler_concurrency: Some(8),
684 provider_max_attempts: Some(2),
685 validation_max_attempts: Some(1),
686 retry_after_policy: Some(RetryAfterPolicy::JitteredExponential),
687 max_backoff_seconds: Some(30),
688 timeout_seconds: Some(180),
689 batch_enabled: Some(true),
690 batch_target_tokens: Some(12_000),
691 batch_max_items: Some(96),
692 adaptive_batch_sizing: Some(true),
693 compact_prompts: Some(true),
694 adaptive_concurrency: Some(true),
695 thinking_disabled: Some(false),
696 json_mode: Some(JsonMode::Auto),
697 max_idle_per_host: Some(16),
698 ..Default::default()
699 },
700 }),
701 ProviderPreset::GeminiFlashLite => Some(ProviderPresetResolved {
702 endpoint: ModelEndpoint {
703 provider: "openrouter".to_string(),
704 model: "google/gemini-2.5-flash-lite".to_string(),
705 base_url: Some("https://openrouter.ai/api/v1".to_string()),
706 api_key_env: Some("OPENROUTER_API_KEY".to_string()),
707 },
708 runtime: ProviderPresetRuntimeOverrides {
709 scheduler_concurrency: Some(40),
710 provider_max_attempts: Some(1),
711 validation_max_attempts: Some(1),
712 retry_after_policy: Some(RetryAfterPolicy::JitteredExponential),
713 max_backoff_seconds: Some(15),
714 timeout_seconds: Some(120),
715 batch_enabled: Some(true),
716 batch_target_tokens: Some(20_000),
717 batch_max_items: Some(160),
718 adaptive_batch_sizing: Some(true),
719 compact_prompts: Some(true),
720 adaptive_concurrency: Some(true),
721 thinking_disabled: Some(true),
722 json_mode: Some(JsonMode::Auto),
723 max_idle_per_host: Some(64),
724 ..Default::default()
725 },
726 }),
727 }
728 }
729
730 pub fn endpoint_or_default(self, custom: Option<ModelEndpoint>) -> ModelEndpoint {
731 if let Some(resolved) = self.resolve() {
732 return resolved.endpoint;
733 }
734 match self {
735 ProviderPreset::Auto => ModelEndpoint {
736 provider: "deepseek".to_string(),
737 model: "deepseek-v4-flash".to_string(),
738 base_url: Some("https://api.deepseek.com/v1".to_string()),
739 api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
740 },
741 ProviderPreset::Custom => custom.unwrap_or_else(|| ModelEndpoint {
742 provider: "deepseek".to_string(),
743 model: "deepseek-v4-flash".to_string(),
744 base_url: Some("https://api.deepseek.com/v1".to_string()),
745 api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
746 }),
747 _ => unreachable!("resolved presets returned above"),
748 }
749 }
750}
751
752#[derive(Debug, Clone)]
753pub struct ProviderPresetResolved {
754 pub endpoint: ModelEndpoint,
755 pub runtime: ProviderPresetRuntimeOverrides,
756}
757
758#[derive(Debug, Clone, Default)]
759pub struct ProviderPresetRuntimeOverrides {
760 pub scheduler_concurrency: Option<usize>,
761 pub provider_max_attempts: Option<usize>,
762 pub validation_max_attempts: Option<usize>,
763 pub retry_after_policy: Option<RetryAfterPolicy>,
764 pub max_backoff_seconds: Option<u64>,
765 pub timeout_seconds: Option<u64>,
766 pub batch_enabled: Option<bool>,
767 pub batch_target_tokens: Option<usize>,
768 pub batch_max_items: Option<usize>,
769 pub adaptive_batch_sizing: Option<bool>,
770 pub compact_prompts: Option<bool>,
771 pub adaptive_concurrency: Option<bool>,
772 pub thinking_disabled: Option<bool>,
773 pub model_context_tokens: Option<u32>,
774 pub max_output_tokens: Option<u32>,
775 pub batch_max_output_tokens: Option<u32>,
776 pub json_mode: Option<JsonMode>,
777 pub max_idle_per_host: Option<usize>,
778}
779
780#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
781pub enum RetryAfterPolicy {
782 RespectHeader,
783 JitteredExponential,
784 Fixed,
785 None,
786}
787
788#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
789pub enum ProviderErrorKind {
790 RateLimit,
791 Timeout,
792 Server,
793 Client,
794 InvalidResponse,
795 Unknown,
796}
797
798#[derive(Debug, Clone, Default, serde::Serialize)]
799pub struct ProviderRequestMetric {
800 pub request_id: String,
801 pub batch_id: Option<String>,
802 pub provider: String,
803 pub model: String,
804 pub profile: String,
805 pub items: usize,
806 pub estimated_input_tokens: usize,
807 pub max_output_tokens: Option<u32>,
808 pub input_tokens: Option<u64>,
809 pub output_tokens: Option<u64>,
810 pub latency_ms: u64,
811 pub finish_reason: Option<String>,
812 pub status: String,
813 pub status_code: Option<u16>,
814 pub retry_count: usize,
815 pub backoff_ms: u64,
816 pub error_kind: Option<ProviderErrorKind>,
817}
818
819#[derive(Debug, Clone)]
820pub struct ModelEndpoint {
821 pub provider: String,
822 pub model: String,
823 pub base_url: Option<String>,
824 pub api_key_env: Option<String>,
825}
826
827#[derive(Debug, Clone)]
828pub struct ModelRouteConfig {
829 pub translation: ModelEndpoint,
830 pub repair: Option<ModelEndpoint>,
831 pub qa: Option<ModelEndpoint>,
832 pub double_check: Option<ModelEndpoint>,
833 pub fallback: Option<ModelEndpoint>,
834}
835
836#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
837#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
838pub enum FallbackScope {
839 Failed,
840 NeedsReview,
841 FailedAndNeedsReview,
842}
843
844#[cfg(test)]
845mod tests {
846 use super::*;
847
848 #[test]
849 fn openrouter_paid_fast_preset_sets_runtime_overrides() {
850 let resolved = ProviderPreset::OpenRouterPaidFast
851 .resolve()
852 .expect("preset should resolve");
853 assert_eq!(resolved.endpoint.provider, "openrouter");
854 assert_eq!(resolved.runtime.scheduler_concurrency, Some(32));
855 assert_eq!(resolved.runtime.provider_max_attempts, Some(1));
856 assert_eq!(resolved.runtime.batch_target_tokens, Some(16_000));
857 assert_eq!(resolved.runtime.adaptive_batch_sizing, Some(true));
858 assert_eq!(resolved.runtime.max_idle_per_host, Some(64));
859 }
860
861 #[test]
862 fn openrouter_free_preset_uses_low_concurrency_and_respect_retry_after() {
863 let resolved = ProviderPreset::OpenRouterFree
864 .resolve()
865 .expect("preset should resolve");
866 assert_eq!(resolved.runtime.scheduler_concurrency, Some(2));
867 assert_eq!(resolved.runtime.provider_max_attempts, Some(1));
868 assert_eq!(
869 resolved.runtime.retry_after_policy,
870 Some(RetryAfterPolicy::RespectHeader)
871 );
872 assert_eq!(resolved.runtime.max_idle_per_host, Some(8));
873 }
874
875 #[test]
876 fn runtime_config_event_includes_provider_preset_values() {
877 let event = crate::ProgressEvent::RuntimeConfigResolved {
878 profile: "v1_fast".to_string(),
879 provider_preset: Some("OpenRouterPaidFast".to_string()),
880 provider: "openrouter".to_string(),
881 model: "google/gemini-2.5-flash".to_string(),
882 concurrency: 32,
883 max_attempts: 1,
884 provider_max_attempts: 1,
885 validation_max_attempts: 1,
886 retry_after_policy: "JitteredExponential".to_string(),
887 max_backoff_seconds: 15,
888 timeout_seconds: 120,
889 batch_enabled: true,
890 batch_target_tokens: 16_000,
891 batch_max_items: 128,
892 adaptive_batch_sizing: true,
893 adaptive_concurrency: true,
894 compact_prompts: true,
895 thinking_disabled: true,
896 json_mode: "Auto".to_string(),
897 model_context_tokens: None,
898 max_output_tokens: None,
899 batch_max_output_tokens: None,
900 timestamp_ms: 0,
901 };
902 match event {
903 crate::ProgressEvent::RuntimeConfigResolved {
904 provider_preset,
905 batch_target_tokens,
906 adaptive_batch_sizing,
907 provider_max_attempts,
908 ..
909 } => {
910 assert_eq!(provider_preset.as_deref(), Some("OpenRouterPaidFast"));
911 assert_eq!(batch_target_tokens, 16_000);
912 assert!(adaptive_batch_sizing);
913 assert_eq!(provider_max_attempts, 1);
914 }
915 _ => unreachable!("constructed runtime event"),
916 }
917 }
918
919 #[test]
920 fn v1_fast_uses_single_provider_attempt() {
921 let settings = TranslationProfile::V1Fast.resolve();
922 assert_eq!(settings.scheduler.max_attempts, 1);
923 assert_eq!(settings.provider.provider_max_attempts, 1);
924 assert_eq!(settings.provider.validation_max_attempts, 1);
925 assert!(settings.batch.repair_invalid_items);
926 assert!(settings.adaptive_concurrency);
927 assert!(settings.batch.adaptive_sizing);
928 }
929}