1use std::path::PathBuf;
2
3use crate::scheduler::SchedulerConfig;
4
5#[derive(Debug, Clone)]
6pub struct TranslationConfig {
7 pub source_language: Option<String>,
8 pub target_language: String,
9 pub provider: String,
10 pub model: Option<String>,
11 pub concurrency: usize,
12 pub max_attempts: usize,
13 pub output: PathBuf,
14}
15
16#[derive(Debug, Clone)]
17pub struct SegmentationConfig {
18 pub max_segment_tokens: usize,
19 pub context_tokens: usize,
20}
21
22impl Default for SegmentationConfig {
23 fn default() -> Self {
24 Self {
25 max_segment_tokens: 1_200,
26 context_tokens: 160,
27 }
28 }
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
32pub enum PromptVersion {
33 V1,
34 BatchV1,
35}
36
37impl PromptVersion {
38 pub fn as_str(self) -> &'static str {
39 match self {
40 PromptVersion::V1 => "v1",
41 PromptVersion::BatchV1 => "batch_v1",
42 }
43 }
44}
45
46#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
47#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
48pub enum TranslationProfile {
49 Safe,
50 Balanced,
51 Fastest,
52 FreeTier,
53 TurboTextOnly,
54 V1Fast,
55}
56
57impl TranslationProfile {
58 pub fn namespace_str(self) -> &'static str {
59 match self {
60 TranslationProfile::Safe => "safe",
61 TranslationProfile::Balanced => "balanced",
62 TranslationProfile::Fastest => "fastest",
63 TranslationProfile::FreeTier => "free_tier",
64 TranslationProfile::TurboTextOnly => "turbo_text_only",
65 TranslationProfile::V1Fast => "v1_fast",
66 }
67 }
68
69 pub fn resolve(self) -> ResolvedRunSettings {
70 match self {
71 Self::Safe => ResolvedRunSettings {
72 profile: self,
73 segmentation: SegmentationConfig {
74 max_segment_tokens: 1_200,
75 context_tokens: 160,
76 },
77 batch: BatchConfig {
78 enabled: false,
79 target_tokens: 0,
80 max_items: 0,
81 adaptive_sizing: false,
82 split_on_json_failure: true,
83 repair_invalid_items: true,
84 },
85 scheduler: SchedulerConfig {
86 concurrency: 4,
87 max_attempts: 3,
88 },
89 compact_prompts: false,
90 retry_failed_only: false,
91 adaptive_concurrency: false,
92 provider: ProviderRuntimeConfig {
93 timeout_seconds: 120,
94 provider_max_attempts: 6,
95 validation_max_attempts: 3,
96 retry_after_policy: RetryAfterPolicy::JitteredExponential,
97 max_backoff_seconds: 60,
98 thinking_disabled: false,
99 model_context_tokens: None,
100 max_output_tokens: None,
101 batch_max_output_tokens: None,
102 json_mode: JsonMode::Auto,
103 max_idle_per_host: 32,
104 },
105 qa: QaRunConfig {
106 concurrency: 4,
107 batch_target_tokens: 4_000,
108 model: None,
109 provider: None,
110 base_url: None,
111 api_key_env: None,
112 },
113 double_check: DoubleCheckConfig {
114 mode: DoubleCheckMode::Off,
115 model: None,
116 provider: None,
117 base_url: None,
118 api_key_env: None,
119 concurrency: 4,
120 batch_target_tokens: 8_000,
121 auto_correct: false,
122 correction_rounds: 1,
123 },
124 },
125 Self::Balanced => ResolvedRunSettings {
126 profile: self,
127 segmentation: SegmentationConfig {
128 max_segment_tokens: 2_500,
129 context_tokens: 80,
130 },
131 batch: BatchConfig {
132 enabled: true,
133 target_tokens: 8_000,
134 max_items: 64,
135 adaptive_sizing: false,
136 split_on_json_failure: true,
137 repair_invalid_items: true,
138 },
139 scheduler: SchedulerConfig {
140 concurrency: 16,
141 max_attempts: 2,
142 },
143 compact_prompts: true,
144 retry_failed_only: true,
145 adaptive_concurrency: true,
146 provider: ProviderRuntimeConfig {
147 timeout_seconds: 120,
148 provider_max_attempts: 2,
149 validation_max_attempts: 1,
150 retry_after_policy: RetryAfterPolicy::JitteredExponential,
151 max_backoff_seconds: 30,
152 thinking_disabled: false,
153 model_context_tokens: None,
154 max_output_tokens: None,
155 batch_max_output_tokens: None,
156 json_mode: JsonMode::Auto,
157 max_idle_per_host: 32,
158 },
159 qa: QaRunConfig {
160 concurrency: 8,
161 batch_target_tokens: 8_000,
162 model: None,
163 provider: None,
164 base_url: None,
165 api_key_env: None,
166 },
167 double_check: DoubleCheckConfig {
168 mode: DoubleCheckMode::Off,
169 model: None,
170 provider: None,
171 base_url: None,
172 api_key_env: None,
173 concurrency: 4,
174 batch_target_tokens: 8_000,
175 auto_correct: false,
176 correction_rounds: 1,
177 },
178 },
179 Self::Fastest => ResolvedRunSettings {
180 profile: self,
181 segmentation: SegmentationConfig {
182 max_segment_tokens: 6_000,
183 context_tokens: 20,
184 },
185 batch: BatchConfig {
186 enabled: true,
187 target_tokens: 16_000,
188 max_items: 160,
189 adaptive_sizing: true,
190 split_on_json_failure: true,
191 repair_invalid_items: true,
192 },
193 scheduler: SchedulerConfig {
194 concurrency: 64,
195 max_attempts: 1,
196 },
197 compact_prompts: true,
198 retry_failed_only: true,
199 adaptive_concurrency: true,
200 provider: ProviderRuntimeConfig {
201 timeout_seconds: 120,
202 provider_max_attempts: 2,
203 validation_max_attempts: 1,
204 retry_after_policy: RetryAfterPolicy::JitteredExponential,
205 max_backoff_seconds: 10,
206 thinking_disabled: false,
207 model_context_tokens: None,
208 max_output_tokens: None,
209 batch_max_output_tokens: None,
210 json_mode: JsonMode::Auto,
211 max_idle_per_host: 32,
212 },
213 qa: QaRunConfig {
214 concurrency: 16,
215 batch_target_tokens: 12_000,
216 model: None,
217 provider: None,
218 base_url: None,
219 api_key_env: None,
220 },
221 double_check: DoubleCheckConfig {
222 mode: DoubleCheckMode::Off,
223 model: None,
224 provider: None,
225 base_url: None,
226 api_key_env: None,
227 concurrency: 4,
228 batch_target_tokens: 12_000,
229 auto_correct: false,
230 correction_rounds: 1,
231 },
232 },
233 Self::FreeTier => ResolvedRunSettings {
234 profile: self,
235 segmentation: SegmentationConfig {
236 max_segment_tokens: 2_500,
237 context_tokens: 80,
238 },
239 batch: BatchConfig {
240 enabled: true,
241 target_tokens: 8_000,
242 max_items: 64,
243 adaptive_sizing: false,
244 split_on_json_failure: false,
245 repair_invalid_items: true,
246 },
247 scheduler: SchedulerConfig {
248 concurrency: 1,
249 max_attempts: 2,
250 },
251 compact_prompts: true,
252 retry_failed_only: true,
253 adaptive_concurrency: true,
254 provider: ProviderRuntimeConfig {
255 timeout_seconds: 300,
256 provider_max_attempts: 2,
257 validation_max_attempts: 1,
258 retry_after_policy: RetryAfterPolicy::RespectHeader,
259 max_backoff_seconds: 90,
260 thinking_disabled: false,
261 model_context_tokens: None,
262 max_output_tokens: None,
263 batch_max_output_tokens: None,
264 json_mode: JsonMode::Auto,
265 max_idle_per_host: 8,
266 },
267 qa: QaRunConfig {
268 concurrency: 1,
269 batch_target_tokens: 4_000,
270 model: None,
271 provider: None,
272 base_url: None,
273 api_key_env: None,
274 },
275 double_check: DoubleCheckConfig {
276 mode: DoubleCheckMode::Off,
277 model: None,
278 provider: None,
279 base_url: None,
280 api_key_env: None,
281 concurrency: 1,
282 batch_target_tokens: 4_000,
283 auto_correct: false,
284 correction_rounds: 1,
285 },
286 },
287 Self::TurboTextOnly => ResolvedRunSettings {
288 profile: self,
289 segmentation: SegmentationConfig {
290 max_segment_tokens: 12_000,
291 context_tokens: 0,
292 },
293 batch: BatchConfig {
294 enabled: true,
295 target_tokens: 24_000,
296 max_items: 250,
297 adaptive_sizing: true,
298 split_on_json_failure: true,
299 repair_invalid_items: false,
300 },
301 scheduler: SchedulerConfig {
302 concurrency: 96,
303 max_attempts: 1,
304 },
305 compact_prompts: true,
306 retry_failed_only: true,
307 adaptive_concurrency: true,
308 provider: ProviderRuntimeConfig {
309 timeout_seconds: 120,
310 provider_max_attempts: 1,
311 validation_max_attempts: 1,
312 retry_after_policy: RetryAfterPolicy::None,
313 max_backoff_seconds: 5,
314 thinking_disabled: false,
315 model_context_tokens: None,
316 max_output_tokens: None,
317 batch_max_output_tokens: None,
318 json_mode: JsonMode::Auto,
319 max_idle_per_host: 64,
320 },
321 qa: QaRunConfig {
322 concurrency: 16,
323 batch_target_tokens: 16_000,
324 model: None,
325 provider: None,
326 base_url: None,
327 api_key_env: None,
328 },
329 double_check: DoubleCheckConfig {
330 mode: DoubleCheckMode::Off,
331 model: None,
332 provider: None,
333 base_url: None,
334 api_key_env: None,
335 concurrency: 4,
336 batch_target_tokens: 16_000,
337 auto_correct: false,
338 correction_rounds: 1,
339 },
340 },
341 Self::V1Fast => ResolvedRunSettings {
342 profile: self,
343 segmentation: SegmentationConfig {
344 max_segment_tokens: 12_000,
345 context_tokens: 20,
346 },
347 batch: BatchConfig {
348 enabled: true,
349 target_tokens: 16_000,
350 max_items: 128,
351 adaptive_sizing: true,
352 split_on_json_failure: true,
353 repair_invalid_items: true,
354 },
355 scheduler: SchedulerConfig {
356 concurrency: 32,
357 max_attempts: 1,
358 },
359 compact_prompts: true,
360 retry_failed_only: true,
361 adaptive_concurrency: true,
362 provider: ProviderRuntimeConfig {
363 timeout_seconds: 120,
364 provider_max_attempts: 1,
365 validation_max_attempts: 1,
366 retry_after_policy: RetryAfterPolicy::None,
367 max_backoff_seconds: 5,
368 thinking_disabled: true,
369 model_context_tokens: None,
370 max_output_tokens: None,
371 batch_max_output_tokens: None,
372 json_mode: JsonMode::Auto,
373 max_idle_per_host: 64,
374 },
375 qa: QaRunConfig {
376 concurrency: 4,
377 batch_target_tokens: 4_000,
378 model: None,
379 provider: None,
380 base_url: None,
381 api_key_env: None,
382 },
383 double_check: DoubleCheckConfig {
384 mode: DoubleCheckMode::Off,
385 model: None,
386 provider: None,
387 base_url: None,
388 api_key_env: None,
389 concurrency: 4,
390 batch_target_tokens: 8_000,
391 auto_correct: false,
392 correction_rounds: 1,
393 },
394 },
395 }
396 }
397}
398
399#[derive(Debug, Clone)]
400pub struct ResolvedRunSettings {
401 pub profile: TranslationProfile,
402 pub segmentation: SegmentationConfig,
403 pub batch: BatchConfig,
404 pub scheduler: SchedulerConfig,
405 pub provider: ProviderRuntimeConfig,
406 pub compact_prompts: bool,
407 pub retry_failed_only: bool,
408 pub adaptive_concurrency: bool,
409 pub qa: QaRunConfig,
410 pub double_check: DoubleCheckConfig,
411}
412
413impl ResolvedRunSettings {
414 pub fn apply_provider_preset_runtime(&mut self, overrides: ProviderPresetRuntimeOverrides) {
415 if let Some(v) = overrides.scheduler_concurrency {
416 self.scheduler.concurrency = v.max(1);
417 }
418 if let Some(v) = overrides.provider_max_attempts {
419 self.provider.provider_max_attempts = v.max(1);
420 }
421 if let Some(v) = overrides.validation_max_attempts {
422 self.provider.validation_max_attempts = v.max(1);
423 }
424 if let Some(v) = overrides.retry_after_policy {
425 self.provider.retry_after_policy = v;
426 }
427 if let Some(v) = overrides.max_backoff_seconds {
428 self.provider.max_backoff_seconds = v;
429 }
430 if let Some(v) = overrides.timeout_seconds {
431 self.provider.timeout_seconds = v;
432 }
433 if let Some(v) = overrides.batch_enabled {
434 self.batch.enabled = v;
435 }
436 if let Some(v) = overrides.batch_target_tokens {
437 self.batch.target_tokens = v;
438 }
439 if let Some(v) = overrides.batch_max_items {
440 self.batch.max_items = v;
441 }
442 if let Some(v) = overrides.adaptive_batch_sizing {
443 self.batch.adaptive_sizing = v;
444 }
445 if let Some(v) = overrides.compact_prompts {
446 self.compact_prompts = v;
447 }
448 if let Some(v) = overrides.adaptive_concurrency {
449 self.adaptive_concurrency = v;
450 }
451 if let Some(v) = overrides.thinking_disabled {
452 self.provider.thinking_disabled = v;
453 }
454 if let Some(v) = overrides.model_context_tokens {
455 self.provider.model_context_tokens = Some(v);
456 }
457 if let Some(v) = overrides.max_output_tokens {
458 self.provider.max_output_tokens = Some(v);
459 }
460 if let Some(v) = overrides.batch_max_output_tokens {
461 self.provider.batch_max_output_tokens = Some(v);
462 }
463 if let Some(v) = overrides.json_mode {
464 self.provider.json_mode = v;
465 }
466 if let Some(v) = overrides.max_idle_per_host {
467 self.provider.max_idle_per_host = v;
468 }
469 }
470}
471
472#[derive(Debug, Clone)]
473pub struct BatchConfig {
474 pub enabled: bool,
475 pub target_tokens: usize,
476 pub max_items: usize,
477 pub adaptive_sizing: bool,
478 pub split_on_json_failure: bool,
479 pub repair_invalid_items: bool,
480}
481
482#[derive(Debug, Clone)]
483pub struct QaRunConfig {
484 pub concurrency: usize,
485 pub batch_target_tokens: usize,
486 pub model: Option<String>,
487 pub provider: Option<String>,
488 pub base_url: Option<String>,
489 pub api_key_env: Option<String>,
490}
491
492#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
493#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
494pub enum DoubleCheckMode {
495 Off,
496 Formatting,
497 Semantic,
498 Full,
499}
500
501#[derive(Debug, Clone)]
502pub struct DoubleCheckConfig {
503 pub mode: DoubleCheckMode,
504 pub model: Option<String>,
505 pub provider: Option<String>,
506 pub base_url: Option<String>,
507 pub api_key_env: Option<String>,
508 pub concurrency: usize,
509 pub batch_target_tokens: usize,
510 pub auto_correct: bool,
511 pub correction_rounds: usize,
512}
513
514#[derive(Debug, Clone)]
515pub struct ProviderRuntimeConfig {
516 pub timeout_seconds: u64,
517 pub provider_max_attempts: usize,
518 pub validation_max_attempts: usize,
519 pub retry_after_policy: RetryAfterPolicy,
520 pub max_backoff_seconds: u64,
521 pub thinking_disabled: bool,
522 pub model_context_tokens: Option<u32>,
523 pub max_output_tokens: Option<u32>,
524 pub batch_max_output_tokens: Option<u32>,
525 pub json_mode: JsonMode,
526 pub max_idle_per_host: usize,
527}
528
529#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
530#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
531pub enum JsonMode {
532 Auto,
533 ResponseFormat,
534 PromptOnly,
535}
536
537pub fn cap_output_tokens(
538 computed: u32,
539 estimated_prompt_tokens: usize,
540 model_context_tokens: Option<u32>,
541 user_cap: Option<u32>,
542) -> u32 {
543 let mut out = computed;
544
545 if let Some(context) = model_context_tokens {
546 let prompt = estimated_prompt_tokens as u32;
547 let remaining = context.saturating_sub(prompt);
548 let safe_remaining = remaining.saturating_sub(256);
549 out = out.min(safe_remaining.max(512));
550 }
551
552 if let Some(cap) = user_cap {
553 out = out.min(cap);
554 }
555
556 out.max(256)
557}
558
559#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
560#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
561pub enum ProviderPreset {
562 Auto,
563 OpenRouterFree,
564 OpenRouterPaidFast,
565 DeepSeekFree,
566 DeepSeekPaid,
567 GeminiFlashLite,
568 Custom,
569}
570
571impl ProviderPreset {
572 pub fn resolve(self) -> Option<ProviderPresetResolved> {
573 match self {
574 ProviderPreset::Auto | ProviderPreset::Custom => None,
575 ProviderPreset::OpenRouterFree => Some(ProviderPresetResolved {
576 endpoint: ModelEndpoint {
577 provider: "openrouter".to_string(),
578 model: "google/gemini-2.5-flash-lite".to_string(),
579 base_url: Some("https://openrouter.ai/api/v1".to_string()),
580 api_key_env: Some("OPENROUTER_API_KEY".to_string()),
581 },
582 runtime: ProviderPresetRuntimeOverrides {
583 scheduler_concurrency: Some(2),
584 provider_max_attempts: Some(1),
585 validation_max_attempts: Some(1),
586 retry_after_policy: Some(RetryAfterPolicy::RespectHeader),
587 max_backoff_seconds: Some(90),
588 timeout_seconds: Some(180),
589 batch_enabled: Some(true),
590 batch_target_tokens: Some(6_000),
591 batch_max_items: Some(48),
592 compact_prompts: Some(true),
593 adaptive_concurrency: Some(true),
594 thinking_disabled: Some(true),
595 json_mode: Some(JsonMode::Auto),
596 max_idle_per_host: Some(8),
597 ..Default::default()
598 },
599 }),
600 ProviderPreset::OpenRouterPaidFast => Some(ProviderPresetResolved {
601 endpoint: ModelEndpoint {
602 provider: "openrouter".to_string(),
603 model: "google/gemini-2.5-flash".to_string(),
604 base_url: Some("https://openrouter.ai/api/v1".to_string()),
605 api_key_env: Some("OPENROUTER_API_KEY".to_string()),
606 },
607 runtime: ProviderPresetRuntimeOverrides {
608 scheduler_concurrency: Some(32),
609 provider_max_attempts: Some(1),
610 validation_max_attempts: Some(1),
611 retry_after_policy: Some(RetryAfterPolicy::JitteredExponential),
612 max_backoff_seconds: Some(15),
613 timeout_seconds: Some(120),
614 batch_enabled: Some(true),
615 batch_target_tokens: Some(16_000),
616 batch_max_items: Some(128),
617 adaptive_batch_sizing: Some(true),
618 compact_prompts: Some(true),
619 adaptive_concurrency: Some(true),
620 thinking_disabled: Some(true),
621 json_mode: Some(JsonMode::Auto),
622 max_idle_per_host: Some(64),
623 ..Default::default()
624 },
625 }),
626 ProviderPreset::DeepSeekFree => Some(ProviderPresetResolved {
627 endpoint: ModelEndpoint {
628 provider: "deepseek".to_string(),
629 model: "deepseek-v4-flash".to_string(),
630 base_url: Some("https://api.deepseek.com/v1".to_string()),
631 api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
632 },
633 runtime: ProviderPresetRuntimeOverrides {
634 scheduler_concurrency: Some(1),
635 provider_max_attempts: Some(1),
636 validation_max_attempts: Some(1),
637 retry_after_policy: Some(RetryAfterPolicy::RespectHeader),
638 max_backoff_seconds: Some(120),
639 timeout_seconds: Some(240),
640 batch_enabled: Some(true),
641 batch_target_tokens: Some(4_000),
642 batch_max_items: Some(32),
643 compact_prompts: Some(true),
644 adaptive_concurrency: Some(false),
645 thinking_disabled: Some(true),
646 json_mode: Some(JsonMode::Auto),
647 max_idle_per_host: Some(4),
648 ..Default::default()
649 },
650 }),
651 ProviderPreset::DeepSeekPaid => Some(ProviderPresetResolved {
652 endpoint: ModelEndpoint {
653 provider: "deepseek".to_string(),
654 model: "deepseek-v4-flash".to_string(),
655 base_url: Some("https://api.deepseek.com/v1".to_string()),
656 api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
657 },
658 runtime: ProviderPresetRuntimeOverrides {
659 scheduler_concurrency: Some(8),
660 provider_max_attempts: Some(2),
661 validation_max_attempts: Some(1),
662 retry_after_policy: Some(RetryAfterPolicy::JitteredExponential),
663 max_backoff_seconds: Some(30),
664 timeout_seconds: Some(180),
665 batch_enabled: Some(true),
666 batch_target_tokens: Some(12_000),
667 batch_max_items: Some(96),
668 adaptive_batch_sizing: Some(true),
669 compact_prompts: Some(true),
670 adaptive_concurrency: Some(true),
671 thinking_disabled: Some(false),
672 json_mode: Some(JsonMode::Auto),
673 max_idle_per_host: Some(16),
674 ..Default::default()
675 },
676 }),
677 ProviderPreset::GeminiFlashLite => Some(ProviderPresetResolved {
678 endpoint: ModelEndpoint {
679 provider: "openrouter".to_string(),
680 model: "google/gemini-2.5-flash-lite".to_string(),
681 base_url: Some("https://openrouter.ai/api/v1".to_string()),
682 api_key_env: Some("OPENROUTER_API_KEY".to_string()),
683 },
684 runtime: ProviderPresetRuntimeOverrides {
685 scheduler_concurrency: Some(40),
686 provider_max_attempts: Some(1),
687 validation_max_attempts: Some(1),
688 retry_after_policy: Some(RetryAfterPolicy::JitteredExponential),
689 max_backoff_seconds: Some(15),
690 timeout_seconds: Some(120),
691 batch_enabled: Some(true),
692 batch_target_tokens: Some(20_000),
693 batch_max_items: Some(160),
694 adaptive_batch_sizing: Some(true),
695 compact_prompts: Some(true),
696 adaptive_concurrency: Some(true),
697 thinking_disabled: Some(true),
698 json_mode: Some(JsonMode::Auto),
699 max_idle_per_host: Some(64),
700 ..Default::default()
701 },
702 }),
703 }
704 }
705
706 pub fn endpoint_or_default(self, custom: Option<ModelEndpoint>) -> ModelEndpoint {
707 if let Some(resolved) = self.resolve() {
708 return resolved.endpoint;
709 }
710 match self {
711 ProviderPreset::Auto => ModelEndpoint {
712 provider: "deepseek".to_string(),
713 model: "deepseek-v4-flash".to_string(),
714 base_url: Some("https://api.deepseek.com/v1".to_string()),
715 api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
716 },
717 ProviderPreset::Custom => custom.unwrap_or_else(|| ModelEndpoint {
718 provider: "deepseek".to_string(),
719 model: "deepseek-v4-flash".to_string(),
720 base_url: Some("https://api.deepseek.com/v1".to_string()),
721 api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
722 }),
723 _ => unreachable!("resolved presets returned above"),
724 }
725 }
726}
727
728#[derive(Debug, Clone)]
729pub struct ProviderPresetResolved {
730 pub endpoint: ModelEndpoint,
731 pub runtime: ProviderPresetRuntimeOverrides,
732}
733
734#[derive(Debug, Clone, Default)]
735pub struct ProviderPresetRuntimeOverrides {
736 pub scheduler_concurrency: Option<usize>,
737 pub provider_max_attempts: Option<usize>,
738 pub validation_max_attempts: Option<usize>,
739 pub retry_after_policy: Option<RetryAfterPolicy>,
740 pub max_backoff_seconds: Option<u64>,
741 pub timeout_seconds: Option<u64>,
742 pub batch_enabled: Option<bool>,
743 pub batch_target_tokens: Option<usize>,
744 pub batch_max_items: Option<usize>,
745 pub adaptive_batch_sizing: Option<bool>,
746 pub compact_prompts: Option<bool>,
747 pub adaptive_concurrency: Option<bool>,
748 pub thinking_disabled: Option<bool>,
749 pub model_context_tokens: Option<u32>,
750 pub max_output_tokens: Option<u32>,
751 pub batch_max_output_tokens: Option<u32>,
752 pub json_mode: Option<JsonMode>,
753 pub max_idle_per_host: Option<usize>,
754}
755
756#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
757pub enum RetryAfterPolicy {
758 RespectHeader,
759 JitteredExponential,
760 Fixed,
761 None,
762}
763
764#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
765pub enum ProviderErrorKind {
766 RateLimit,
767 Timeout,
768 Server,
769 Client,
770 InvalidResponse,
771 Unknown,
772}
773
774#[derive(Debug, Clone, Default, serde::Serialize)]
775pub struct ProviderRequestMetric {
776 pub request_id: String,
777 pub batch_id: Option<String>,
778 pub provider: String,
779 pub model: String,
780 pub profile: String,
781 pub items: usize,
782 pub estimated_input_tokens: usize,
783 pub max_output_tokens: Option<u32>,
784 pub input_tokens: Option<u64>,
785 pub output_tokens: Option<u64>,
786 pub latency_ms: u64,
787 pub finish_reason: Option<String>,
788 pub status: String,
789 pub status_code: Option<u16>,
790 pub retry_count: usize,
791 pub backoff_ms: u64,
792 pub error_kind: Option<ProviderErrorKind>,
793}
794
795#[derive(Debug, Clone)]
796pub struct ModelEndpoint {
797 pub provider: String,
798 pub model: String,
799 pub base_url: Option<String>,
800 pub api_key_env: Option<String>,
801}
802
803#[derive(Debug, Clone)]
804pub struct ModelRouteConfig {
805 pub translation: ModelEndpoint,
806 pub repair: Option<ModelEndpoint>,
807 pub qa: Option<ModelEndpoint>,
808 pub double_check: Option<ModelEndpoint>,
809 pub fallback: Option<ModelEndpoint>,
810}
811
812#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
813#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
814pub enum FallbackScope {
815 Failed,
816 NeedsReview,
817 FailedAndNeedsReview,
818}
819
820#[cfg(test)]
821mod tests {
822 use super::*;
823
824 #[test]
825 fn openrouter_paid_fast_preset_sets_runtime_overrides() {
826 let resolved = ProviderPreset::OpenRouterPaidFast
827 .resolve()
828 .expect("preset should resolve");
829 assert_eq!(resolved.endpoint.provider, "openrouter");
830 assert_eq!(resolved.runtime.scheduler_concurrency, Some(32));
831 assert_eq!(resolved.runtime.provider_max_attempts, Some(1));
832 assert_eq!(resolved.runtime.batch_target_tokens, Some(16_000));
833 assert_eq!(resolved.runtime.adaptive_batch_sizing, Some(true));
834 assert_eq!(resolved.runtime.max_idle_per_host, Some(64));
835 }
836
837 #[test]
838 fn openrouter_free_preset_uses_low_concurrency_and_respect_retry_after() {
839 let resolved = ProviderPreset::OpenRouterFree
840 .resolve()
841 .expect("preset should resolve");
842 assert_eq!(resolved.runtime.scheduler_concurrency, Some(2));
843 assert_eq!(resolved.runtime.provider_max_attempts, Some(1));
844 assert_eq!(
845 resolved.runtime.retry_after_policy,
846 Some(RetryAfterPolicy::RespectHeader)
847 );
848 assert_eq!(resolved.runtime.max_idle_per_host, Some(8));
849 }
850
851 #[test]
852 fn runtime_config_event_includes_provider_preset_values() {
853 let event = crate::ProgressEvent::RuntimeConfigResolved {
854 profile: "v1_fast".to_string(),
855 provider_preset: Some("OpenRouterPaidFast".to_string()),
856 provider: "openrouter".to_string(),
857 model: "google/gemini-2.5-flash".to_string(),
858 concurrency: 32,
859 max_attempts: 1,
860 provider_max_attempts: 1,
861 validation_max_attempts: 1,
862 retry_after_policy: "JitteredExponential".to_string(),
863 max_backoff_seconds: 15,
864 timeout_seconds: 120,
865 batch_enabled: true,
866 batch_target_tokens: 16_000,
867 batch_max_items: 128,
868 adaptive_batch_sizing: true,
869 adaptive_concurrency: true,
870 compact_prompts: true,
871 thinking_disabled: true,
872 json_mode: "Auto".to_string(),
873 model_context_tokens: None,
874 max_output_tokens: None,
875 batch_max_output_tokens: None,
876 timestamp_ms: 0,
877 };
878 match event {
879 crate::ProgressEvent::RuntimeConfigResolved {
880 provider_preset,
881 batch_target_tokens,
882 adaptive_batch_sizing,
883 provider_max_attempts,
884 ..
885 } => {
886 assert_eq!(provider_preset.as_deref(), Some("OpenRouterPaidFast"));
887 assert_eq!(batch_target_tokens, 16_000);
888 assert!(adaptive_batch_sizing);
889 assert_eq!(provider_max_attempts, 1);
890 }
891 _ => unreachable!("constructed runtime event"),
892 }
893 }
894
895 #[test]
896 fn v1_fast_uses_single_provider_attempt() {
897 let settings = TranslationProfile::V1Fast.resolve();
898 assert_eq!(settings.scheduler.max_attempts, 1);
899 assert_eq!(settings.provider.provider_max_attempts, 1);
900 assert_eq!(settings.provider.validation_max_attempts, 1);
901 assert!(settings.batch.repair_invalid_items);
902 assert!(settings.adaptive_concurrency);
903 assert!(settings.batch.adaptive_sizing);
904 }
905}