1use std::path::PathBuf;
2
3use crate::scheduler::SchedulerConfig;
4
5#[derive(Debug, Clone)]
6pub struct TranslationConfig {
7 pub source_language: Option<String>,
8 pub target_language: String,
9 pub provider: String,
10 pub model: Option<String>,
11 pub concurrency: usize,
12 pub max_attempts: usize,
13 pub output: PathBuf,
14}
15
16#[derive(Debug, Clone)]
17pub struct SegmentationConfig {
18 pub max_segment_tokens: usize,
19 pub context_tokens: usize,
20}
21
22impl Default for SegmentationConfig {
23 fn default() -> Self {
24 Self {
25 max_segment_tokens: 1_200,
26 context_tokens: 160,
27 }
28 }
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
32pub enum PromptVersion {
33 V1,
34 BatchV1,
35 V2,
36 BatchV2,
37}
38
39impl PromptVersion {
40 pub fn as_str(self) -> &'static str {
41 match self {
42 PromptVersion::V1 => "v1",
43 PromptVersion::BatchV1 => "batch_v1",
44 PromptVersion::V2 => "v2",
45 PromptVersion::BatchV2 => "batch_v2",
46 }
47 }
48}
49
50#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
51#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
52pub enum TranslationProfile {
53 Safe,
54 Balanced,
55 Fastest,
56 FreeTier,
57 TurboTextOnly,
58 V1Fast,
59}
60
61impl TranslationProfile {
62 pub fn namespace_str(self) -> &'static str {
63 match self {
64 TranslationProfile::Safe => "safe",
65 TranslationProfile::Balanced => "balanced",
66 TranslationProfile::Fastest => "fastest",
67 TranslationProfile::FreeTier => "free_tier",
68 TranslationProfile::TurboTextOnly => "turbo_text_only",
69 TranslationProfile::V1Fast => "v1_fast",
70 }
71 }
72
73 pub fn resolve(self) -> ResolvedRunSettings {
74 match self {
75 Self::Safe => ResolvedRunSettings {
76 profile: self,
77 segmentation: SegmentationConfig {
78 max_segment_tokens: 1_200,
79 context_tokens: 160,
80 },
81 batch: BatchConfig {
82 enabled: false,
83 target_tokens: 0,
84 max_items: 0,
85 adaptive_sizing: false,
86 split_on_json_failure: true,
87 repair_invalid_items: true,
88 },
89 scheduler: SchedulerConfig {
90 concurrency: 4,
91 max_attempts: 3,
92 },
93 compact_prompts: false,
94 retry_failed_only: false,
95 adaptive_concurrency: false,
96 provider: ProviderRuntimeConfig {
97 timeout_seconds: 120,
98 provider_max_attempts: 6,
99 validation_max_attempts: 3,
100 retry_after_policy: RetryAfterPolicy::JitteredExponential,
101 max_backoff_seconds: 60,
102 thinking_disabled: false,
103 model_context_tokens: None,
104 max_output_tokens: None,
105 batch_max_output_tokens: None,
106 json_mode: JsonMode::Auto,
107 max_idle_per_host: 32,
108 },
109 qa: QaRunConfig {
110 concurrency: 4,
111 batch_target_tokens: 4_000,
112 model: None,
113 provider: None,
114 base_url: None,
115 api_key_env: None,
116 },
117 double_check: DoubleCheckConfig {
118 mode: DoubleCheckMode::Off,
119 model: None,
120 provider: None,
121 base_url: None,
122 api_key_env: None,
123 concurrency: 4,
124 batch_target_tokens: 8_000,
125 auto_correct: false,
126 correction_rounds: 1,
127 },
128 },
129 Self::Balanced => ResolvedRunSettings {
130 profile: self,
131 segmentation: SegmentationConfig {
132 max_segment_tokens: 2_500,
133 context_tokens: 80,
134 },
135 batch: BatchConfig {
136 enabled: true,
137 target_tokens: 8_000,
138 max_items: 64,
139 adaptive_sizing: false,
140 split_on_json_failure: true,
141 repair_invalid_items: true,
142 },
143 scheduler: SchedulerConfig {
144 concurrency: 16,
145 max_attempts: 2,
146 },
147 compact_prompts: true,
148 retry_failed_only: true,
149 adaptive_concurrency: true,
150 provider: ProviderRuntimeConfig {
151 timeout_seconds: 120,
152 provider_max_attempts: 2,
153 validation_max_attempts: 1,
154 retry_after_policy: RetryAfterPolicy::JitteredExponential,
155 max_backoff_seconds: 30,
156 thinking_disabled: false,
157 model_context_tokens: None,
158 max_output_tokens: None,
159 batch_max_output_tokens: None,
160 json_mode: JsonMode::Auto,
161 max_idle_per_host: 32,
162 },
163 qa: QaRunConfig {
164 concurrency: 8,
165 batch_target_tokens: 8_000,
166 model: None,
167 provider: None,
168 base_url: None,
169 api_key_env: None,
170 },
171 double_check: DoubleCheckConfig {
172 mode: DoubleCheckMode::Off,
173 model: None,
174 provider: None,
175 base_url: None,
176 api_key_env: None,
177 concurrency: 4,
178 batch_target_tokens: 8_000,
179 auto_correct: false,
180 correction_rounds: 1,
181 },
182 },
183 Self::Fastest => ResolvedRunSettings {
184 profile: self,
185 segmentation: SegmentationConfig {
186 max_segment_tokens: 6_000,
187 context_tokens: 20,
188 },
189 batch: BatchConfig {
190 enabled: true,
191 target_tokens: 16_000,
192 max_items: 160,
193 adaptive_sizing: true,
194 split_on_json_failure: true,
195 repair_invalid_items: true,
196 },
197 scheduler: SchedulerConfig {
198 concurrency: 64,
199 max_attempts: 1,
200 },
201 compact_prompts: true,
202 retry_failed_only: true,
203 adaptive_concurrency: true,
204 provider: ProviderRuntimeConfig {
205 timeout_seconds: 120,
206 provider_max_attempts: 2,
207 validation_max_attempts: 1,
208 retry_after_policy: RetryAfterPolicy::JitteredExponential,
209 max_backoff_seconds: 10,
210 thinking_disabled: false,
211 model_context_tokens: None,
212 max_output_tokens: None,
213 batch_max_output_tokens: None,
214 json_mode: JsonMode::Auto,
215 max_idle_per_host: 32,
216 },
217 qa: QaRunConfig {
218 concurrency: 16,
219 batch_target_tokens: 12_000,
220 model: None,
221 provider: None,
222 base_url: None,
223 api_key_env: None,
224 },
225 double_check: DoubleCheckConfig {
226 mode: DoubleCheckMode::Off,
227 model: None,
228 provider: None,
229 base_url: None,
230 api_key_env: None,
231 concurrency: 4,
232 batch_target_tokens: 12_000,
233 auto_correct: false,
234 correction_rounds: 1,
235 },
236 },
237 Self::FreeTier => ResolvedRunSettings {
238 profile: self,
239 segmentation: SegmentationConfig {
240 max_segment_tokens: 2_500,
241 context_tokens: 80,
242 },
243 batch: BatchConfig {
244 enabled: true,
245 target_tokens: 8_000,
246 max_items: 64,
247 adaptive_sizing: false,
248 split_on_json_failure: false,
249 repair_invalid_items: true,
250 },
251 scheduler: SchedulerConfig {
252 concurrency: 1,
253 max_attempts: 2,
254 },
255 compact_prompts: true,
256 retry_failed_only: true,
257 adaptive_concurrency: true,
258 provider: ProviderRuntimeConfig {
259 timeout_seconds: 300,
260 provider_max_attempts: 2,
261 validation_max_attempts: 1,
262 retry_after_policy: RetryAfterPolicy::RespectHeader,
263 max_backoff_seconds: 90,
264 thinking_disabled: false,
265 model_context_tokens: None,
266 max_output_tokens: None,
267 batch_max_output_tokens: None,
268 json_mode: JsonMode::Auto,
269 max_idle_per_host: 8,
270 },
271 qa: QaRunConfig {
272 concurrency: 1,
273 batch_target_tokens: 4_000,
274 model: None,
275 provider: None,
276 base_url: None,
277 api_key_env: None,
278 },
279 double_check: DoubleCheckConfig {
280 mode: DoubleCheckMode::Off,
281 model: None,
282 provider: None,
283 base_url: None,
284 api_key_env: None,
285 concurrency: 1,
286 batch_target_tokens: 4_000,
287 auto_correct: false,
288 correction_rounds: 1,
289 },
290 },
291 Self::TurboTextOnly => ResolvedRunSettings {
292 profile: self,
293 segmentation: SegmentationConfig {
294 max_segment_tokens: 12_000,
295 context_tokens: 0,
296 },
297 batch: BatchConfig {
298 enabled: true,
299 target_tokens: 24_000,
300 max_items: 250,
301 adaptive_sizing: true,
302 split_on_json_failure: true,
303 repair_invalid_items: false,
304 },
305 scheduler: SchedulerConfig {
306 concurrency: 96,
307 max_attempts: 1,
308 },
309 compact_prompts: true,
310 retry_failed_only: true,
311 adaptive_concurrency: true,
312 provider: ProviderRuntimeConfig {
313 timeout_seconds: 120,
314 provider_max_attempts: 1,
315 validation_max_attempts: 1,
316 retry_after_policy: RetryAfterPolicy::None,
317 max_backoff_seconds: 5,
318 thinking_disabled: false,
319 model_context_tokens: None,
320 max_output_tokens: None,
321 batch_max_output_tokens: None,
322 json_mode: JsonMode::Auto,
323 max_idle_per_host: 64,
324 },
325 qa: QaRunConfig {
326 concurrency: 16,
327 batch_target_tokens: 16_000,
328 model: None,
329 provider: None,
330 base_url: None,
331 api_key_env: None,
332 },
333 double_check: DoubleCheckConfig {
334 mode: DoubleCheckMode::Off,
335 model: None,
336 provider: None,
337 base_url: None,
338 api_key_env: None,
339 concurrency: 4,
340 batch_target_tokens: 16_000,
341 auto_correct: false,
342 correction_rounds: 1,
343 },
344 },
345 Self::V1Fast => ResolvedRunSettings {
346 profile: self,
347 segmentation: SegmentationConfig {
348 max_segment_tokens: 12_000,
349 context_tokens: 20,
350 },
351 batch: BatchConfig {
352 enabled: true,
353 target_tokens: 16_000,
354 max_items: 128,
355 adaptive_sizing: true,
356 split_on_json_failure: true,
357 repair_invalid_items: true,
358 },
359 scheduler: SchedulerConfig {
360 concurrency: 32,
361 max_attempts: 1,
362 },
363 compact_prompts: true,
364 retry_failed_only: true,
365 adaptive_concurrency: true,
366 provider: ProviderRuntimeConfig {
367 timeout_seconds: 120,
368 provider_max_attempts: 1,
369 validation_max_attempts: 1,
370 retry_after_policy: RetryAfterPolicy::None,
371 max_backoff_seconds: 5,
372 thinking_disabled: true,
373 model_context_tokens: None,
374 max_output_tokens: None,
375 batch_max_output_tokens: None,
376 json_mode: JsonMode::Auto,
377 max_idle_per_host: 64,
378 },
379 qa: QaRunConfig {
380 concurrency: 4,
381 batch_target_tokens: 4_000,
382 model: None,
383 provider: None,
384 base_url: None,
385 api_key_env: None,
386 },
387 double_check: DoubleCheckConfig {
388 mode: DoubleCheckMode::Off,
389 model: None,
390 provider: None,
391 base_url: None,
392 api_key_env: None,
393 concurrency: 4,
394 batch_target_tokens: 8_000,
395 auto_correct: false,
396 correction_rounds: 1,
397 },
398 },
399 }
400 }
401}
402
403#[derive(Debug, Clone)]
404pub struct ResolvedRunSettings {
405 pub profile: TranslationProfile,
406 pub segmentation: SegmentationConfig,
407 pub batch: BatchConfig,
408 pub scheduler: SchedulerConfig,
409 pub provider: ProviderRuntimeConfig,
410 pub compact_prompts: bool,
411 pub retry_failed_only: bool,
412 pub adaptive_concurrency: bool,
413 pub qa: QaRunConfig,
414 pub double_check: DoubleCheckConfig,
415}
416
417impl ResolvedRunSettings {
418 pub fn apply_provider_preset_runtime(&mut self, overrides: ProviderPresetRuntimeOverrides) {
419 if let Some(v) = overrides.scheduler_concurrency {
420 self.scheduler.concurrency = v.max(1);
421 }
422 if let Some(v) = overrides.provider_max_attempts {
423 self.provider.provider_max_attempts = v.max(1);
424 }
425 if let Some(v) = overrides.validation_max_attempts {
426 self.provider.validation_max_attempts = v.max(1);
427 }
428 if let Some(v) = overrides.retry_after_policy {
429 self.provider.retry_after_policy = v;
430 }
431 if let Some(v) = overrides.max_backoff_seconds {
432 self.provider.max_backoff_seconds = v;
433 }
434 if let Some(v) = overrides.timeout_seconds {
435 self.provider.timeout_seconds = v;
436 }
437 if let Some(v) = overrides.batch_enabled {
438 self.batch.enabled = v;
439 }
440 if let Some(v) = overrides.batch_target_tokens {
441 self.batch.target_tokens = v;
442 }
443 if let Some(v) = overrides.batch_max_items {
444 self.batch.max_items = v;
445 }
446 if let Some(v) = overrides.adaptive_batch_sizing {
447 self.batch.adaptive_sizing = v;
448 }
449 if let Some(v) = overrides.compact_prompts {
450 self.compact_prompts = v;
451 }
452 if let Some(v) = overrides.adaptive_concurrency {
453 self.adaptive_concurrency = v;
454 }
455 if let Some(v) = overrides.thinking_disabled {
456 self.provider.thinking_disabled = v;
457 }
458 if let Some(v) = overrides.model_context_tokens {
459 self.provider.model_context_tokens = Some(v);
460 }
461 if let Some(v) = overrides.max_output_tokens {
462 self.provider.max_output_tokens = Some(v);
463 }
464 if let Some(v) = overrides.batch_max_output_tokens {
465 self.provider.batch_max_output_tokens = Some(v);
466 }
467 if let Some(v) = overrides.json_mode {
468 self.provider.json_mode = v;
469 }
470 if let Some(v) = overrides.max_idle_per_host {
471 self.provider.max_idle_per_host = v;
472 }
473 }
474}
475
476#[derive(Debug, Clone)]
477pub struct BatchConfig {
478 pub enabled: bool,
479 pub target_tokens: usize,
480 pub max_items: usize,
481 pub adaptive_sizing: bool,
482 pub split_on_json_failure: bool,
483 pub repair_invalid_items: bool,
484}
485
486#[derive(Debug, Clone)]
487pub struct QaRunConfig {
488 pub concurrency: usize,
489 pub batch_target_tokens: usize,
490 pub model: Option<String>,
491 pub provider: Option<String>,
492 pub base_url: Option<String>,
493 pub api_key_env: Option<String>,
494}
495
496#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
497#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
498pub enum DoubleCheckMode {
499 Off,
500 Formatting,
501 Semantic,
502 Full,
503}
504
505#[derive(Debug, Clone)]
506pub struct DoubleCheckConfig {
507 pub mode: DoubleCheckMode,
508 pub model: Option<String>,
509 pub provider: Option<String>,
510 pub base_url: Option<String>,
511 pub api_key_env: Option<String>,
512 pub concurrency: usize,
513 pub batch_target_tokens: usize,
514 pub auto_correct: bool,
515 pub correction_rounds: usize,
516}
517
518#[derive(Debug, Clone)]
519pub struct ProviderRuntimeConfig {
520 pub timeout_seconds: u64,
521 pub provider_max_attempts: usize,
522 pub validation_max_attempts: usize,
523 pub retry_after_policy: RetryAfterPolicy,
524 pub max_backoff_seconds: u64,
525 pub thinking_disabled: bool,
526 pub model_context_tokens: Option<u32>,
527 pub max_output_tokens: Option<u32>,
528 pub batch_max_output_tokens: Option<u32>,
529 pub json_mode: JsonMode,
530 pub max_idle_per_host: usize,
531}
532
533#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
534#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
535pub enum JsonMode {
536 Auto,
537 ResponseFormat,
538 PromptOnly,
539}
540
541#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
545#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
546pub enum ContextScope {
547 #[default]
548 Chapter,
549 Book,
550}
551
552impl ContextScope {
553 pub fn as_str(self) -> &'static str {
554 match self {
555 ContextScope::Chapter => "chapter",
556 ContextScope::Book => "book",
557 }
558 }
559}
560
561pub fn cap_output_tokens(
562 computed: u32,
563 estimated_prompt_tokens: usize,
564 model_context_tokens: Option<u32>,
565 user_cap: Option<u32>,
566) -> u32 {
567 let mut out = computed;
568
569 if let Some(context) = model_context_tokens {
570 let prompt = estimated_prompt_tokens as u32;
571 let remaining = context.saturating_sub(prompt);
572 let safe_remaining = remaining.saturating_sub(256);
573 out = out.min(safe_remaining.max(512));
574 }
575
576 if let Some(cap) = user_cap {
577 out = out.min(cap);
578 }
579
580 out.max(256)
581}
582
583#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
584#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
585pub enum ProviderPreset {
586 Auto,
587 OpenRouterFree,
588 OpenRouterPaidFast,
589 DeepSeekFree,
590 DeepSeekPaid,
591 GeminiFlashLite,
592 LocalOllama,
593 LocalLlamacpp,
594 Custom,
595}
596
597impl ProviderPreset {
598 pub fn resolve(self) -> Option<ProviderPresetResolved> {
599 match self {
600 ProviderPreset::Auto | ProviderPreset::Custom => None,
601 ProviderPreset::OpenRouterFree => Some(ProviderPresetResolved {
602 endpoint: ModelEndpoint {
603 provider: "openrouter".to_string(),
604 model: "google/gemini-2.5-flash-lite".to_string(),
605 base_url: Some("https://openrouter.ai/api/v1".to_string()),
606 api_key_env: Some("OPENROUTER_API_KEY".to_string()),
607 },
608 runtime: ProviderPresetRuntimeOverrides {
609 scheduler_concurrency: Some(2),
610 provider_max_attempts: Some(1),
611 validation_max_attempts: Some(1),
612 retry_after_policy: Some(RetryAfterPolicy::RespectHeader),
613 max_backoff_seconds: Some(90),
614 timeout_seconds: Some(180),
615 batch_enabled: Some(true),
616 batch_target_tokens: Some(6_000),
617 batch_max_items: Some(48),
618 compact_prompts: Some(true),
619 adaptive_concurrency: Some(true),
620 thinking_disabled: Some(true),
621 json_mode: Some(JsonMode::Auto),
622 max_idle_per_host: Some(8),
623 ..Default::default()
624 },
625 }),
626 ProviderPreset::OpenRouterPaidFast => Some(ProviderPresetResolved {
627 endpoint: ModelEndpoint {
628 provider: "openrouter".to_string(),
629 model: "google/gemini-2.5-flash".to_string(),
630 base_url: Some("https://openrouter.ai/api/v1".to_string()),
631 api_key_env: Some("OPENROUTER_API_KEY".to_string()),
632 },
633 runtime: ProviderPresetRuntimeOverrides {
634 scheduler_concurrency: Some(32),
635 provider_max_attempts: Some(1),
636 validation_max_attempts: Some(1),
637 retry_after_policy: Some(RetryAfterPolicy::JitteredExponential),
638 max_backoff_seconds: Some(15),
639 timeout_seconds: Some(120),
640 batch_enabled: Some(true),
641 batch_target_tokens: Some(16_000),
642 batch_max_items: Some(128),
643 adaptive_batch_sizing: Some(true),
644 compact_prompts: Some(true),
645 adaptive_concurrency: Some(true),
646 thinking_disabled: Some(true),
647 json_mode: Some(JsonMode::Auto),
648 max_idle_per_host: Some(64),
649 ..Default::default()
650 },
651 }),
652 ProviderPreset::DeepSeekFree => Some(ProviderPresetResolved {
653 endpoint: ModelEndpoint {
654 provider: "deepseek".to_string(),
655 model: "deepseek-v4-flash".to_string(),
656 base_url: Some("https://api.deepseek.com/v1".to_string()),
657 api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
658 },
659 runtime: ProviderPresetRuntimeOverrides {
660 scheduler_concurrency: Some(1),
661 provider_max_attempts: Some(1),
662 validation_max_attempts: Some(1),
663 retry_after_policy: Some(RetryAfterPolicy::RespectHeader),
664 max_backoff_seconds: Some(120),
665 timeout_seconds: Some(240),
666 batch_enabled: Some(true),
667 batch_target_tokens: Some(4_000),
668 batch_max_items: Some(32),
669 compact_prompts: Some(true),
670 adaptive_concurrency: Some(false),
671 thinking_disabled: Some(true),
672 json_mode: Some(JsonMode::Auto),
673 max_idle_per_host: Some(4),
674 ..Default::default()
675 },
676 }),
677 ProviderPreset::DeepSeekPaid => Some(ProviderPresetResolved {
678 endpoint: ModelEndpoint {
679 provider: "deepseek".to_string(),
680 model: "deepseek-v4-flash".to_string(),
681 base_url: Some("https://api.deepseek.com/v1".to_string()),
682 api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
683 },
684 runtime: ProviderPresetRuntimeOverrides {
685 scheduler_concurrency: Some(8),
686 provider_max_attempts: Some(2),
687 validation_max_attempts: Some(1),
688 retry_after_policy: Some(RetryAfterPolicy::JitteredExponential),
689 max_backoff_seconds: Some(30),
690 timeout_seconds: Some(180),
691 batch_enabled: Some(true),
692 batch_target_tokens: Some(12_000),
693 batch_max_items: Some(96),
694 adaptive_batch_sizing: Some(true),
695 compact_prompts: Some(true),
696 adaptive_concurrency: Some(true),
697 thinking_disabled: Some(true),
698 json_mode: Some(JsonMode::Auto),
699 max_idle_per_host: Some(16),
700 ..Default::default()
701 },
702 }),
703 ProviderPreset::GeminiFlashLite => Some(ProviderPresetResolved {
704 endpoint: ModelEndpoint {
705 provider: "openrouter".to_string(),
706 model: "google/gemini-2.5-flash-lite".to_string(),
707 base_url: Some("https://openrouter.ai/api/v1".to_string()),
708 api_key_env: Some("OPENROUTER_API_KEY".to_string()),
709 },
710 runtime: ProviderPresetRuntimeOverrides {
711 scheduler_concurrency: Some(40),
712 provider_max_attempts: Some(1),
713 validation_max_attempts: Some(1),
714 retry_after_policy: Some(RetryAfterPolicy::JitteredExponential),
715 max_backoff_seconds: Some(15),
716 timeout_seconds: Some(120),
717 batch_enabled: Some(true),
718 batch_target_tokens: Some(20_000),
719 batch_max_items: Some(160),
720 adaptive_batch_sizing: Some(true),
721 compact_prompts: Some(true),
722 adaptive_concurrency: Some(true),
723 thinking_disabled: Some(true),
724 json_mode: Some(JsonMode::Auto),
725 max_idle_per_host: Some(64),
726 ..Default::default()
727 },
728 }),
729 ProviderPreset::LocalOllama => Some(ProviderPresetResolved {
730 endpoint: ModelEndpoint {
731 provider: "openai-compatible".to_string(),
732 model: "qwen2.5:14b".to_string(),
733 base_url: Some("http://localhost:11434/v1".to_string()),
734 api_key_env: Some("OLLAMA_API_KEY".to_string()),
735 },
736 runtime: ProviderPresetRuntimeOverrides {
737 scheduler_concurrency: Some(1),
738 provider_max_attempts: Some(1),
739 validation_max_attempts: Some(1),
740 retry_after_policy: Some(RetryAfterPolicy::None),
741 timeout_seconds: Some(300),
742 batch_enabled: Some(true),
743 batch_target_tokens: Some(4_000),
744 batch_max_items: Some(24),
745 compact_prompts: Some(true),
746 adaptive_concurrency: Some(false),
747 thinking_disabled: Some(true),
748 json_mode: Some(JsonMode::Auto),
749 max_idle_per_host: Some(2),
750 ..Default::default()
751 },
752 }),
753 ProviderPreset::LocalLlamacpp => Some(ProviderPresetResolved {
754 endpoint: ModelEndpoint {
755 provider: "openai-compatible".to_string(),
756 model: "local-model".to_string(),
757 base_url: Some("http://localhost:8080/v1".to_string()),
758 api_key_env: Some("LLAMACPP_API_KEY".to_string()),
759 },
760 runtime: ProviderPresetRuntimeOverrides {
761 scheduler_concurrency: Some(1),
762 provider_max_attempts: Some(1),
763 validation_max_attempts: Some(1),
764 retry_after_policy: Some(RetryAfterPolicy::None),
765 timeout_seconds: Some(300),
766 batch_enabled: Some(true),
767 batch_target_tokens: Some(4_000),
768 batch_max_items: Some(24),
769 compact_prompts: Some(true),
770 adaptive_concurrency: Some(false),
771 thinking_disabled: Some(true),
772 json_mode: Some(JsonMode::Auto),
773 max_idle_per_host: Some(2),
774 ..Default::default()
775 },
776 }),
777 }
778 }
779
780 pub fn endpoint_or_default(self, custom: Option<ModelEndpoint>) -> ModelEndpoint {
781 if let Some(resolved) = self.resolve() {
782 return resolved.endpoint;
783 }
784 match self {
785 ProviderPreset::Auto => ModelEndpoint {
786 provider: "deepseek".to_string(),
787 model: "deepseek-v4-flash".to_string(),
788 base_url: Some("https://api.deepseek.com/v1".to_string()),
789 api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
790 },
791 ProviderPreset::Custom => custom.unwrap_or_else(|| ModelEndpoint {
792 provider: "deepseek".to_string(),
793 model: "deepseek-v4-flash".to_string(),
794 base_url: Some("https://api.deepseek.com/v1".to_string()),
795 api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
796 }),
797 _ => unreachable!("resolved presets returned above"),
798 }
799 }
800}
801
802#[derive(Debug, Clone)]
803pub struct ProviderPresetResolved {
804 pub endpoint: ModelEndpoint,
805 pub runtime: ProviderPresetRuntimeOverrides,
806}
807
808#[derive(Debug, Clone, Default)]
809pub struct ProviderPresetRuntimeOverrides {
810 pub scheduler_concurrency: Option<usize>,
811 pub provider_max_attempts: Option<usize>,
812 pub validation_max_attempts: Option<usize>,
813 pub retry_after_policy: Option<RetryAfterPolicy>,
814 pub max_backoff_seconds: Option<u64>,
815 pub timeout_seconds: Option<u64>,
816 pub batch_enabled: Option<bool>,
817 pub batch_target_tokens: Option<usize>,
818 pub batch_max_items: Option<usize>,
819 pub adaptive_batch_sizing: Option<bool>,
820 pub compact_prompts: Option<bool>,
821 pub adaptive_concurrency: Option<bool>,
822 pub thinking_disabled: Option<bool>,
823 pub model_context_tokens: Option<u32>,
824 pub max_output_tokens: Option<u32>,
825 pub batch_max_output_tokens: Option<u32>,
826 pub json_mode: Option<JsonMode>,
827 pub max_idle_per_host: Option<usize>,
828}
829
830#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
831pub enum RetryAfterPolicy {
832 RespectHeader,
833 JitteredExponential,
834 Fixed,
835 None,
836}
837
838#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
839pub enum ProviderErrorKind {
840 RateLimit,
841 Timeout,
842 Server,
843 Client,
844 InvalidResponse,
845 Unknown,
846}
847
848#[derive(Debug, Clone, Default, serde::Serialize)]
849pub struct ProviderRequestMetric {
850 pub request_id: String,
851 pub batch_id: Option<String>,
852 pub provider: String,
853 pub model: String,
854 pub profile: String,
855 pub items: usize,
856 pub estimated_input_tokens: usize,
857 pub max_output_tokens: Option<u32>,
858 pub input_tokens: Option<u64>,
859 pub output_tokens: Option<u64>,
860 pub latency_ms: u64,
861 pub finish_reason: Option<String>,
862 pub status: String,
863 pub status_code: Option<u16>,
864 pub retry_count: usize,
865 pub backoff_ms: u64,
866 pub error_kind: Option<ProviderErrorKind>,
867}
868
869#[derive(Debug, Clone)]
870pub struct ModelEndpoint {
871 pub provider: String,
872 pub model: String,
873 pub base_url: Option<String>,
874 pub api_key_env: Option<String>,
875}
876
877#[derive(Debug, Clone)]
878pub struct ModelRouteConfig {
879 pub translation: ModelEndpoint,
880 pub repair: Option<ModelEndpoint>,
881 pub qa: Option<ModelEndpoint>,
882 pub double_check: Option<ModelEndpoint>,
883 pub fallback: Option<ModelEndpoint>,
884}
885
886#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
887#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
888pub enum FallbackScope {
889 Failed,
890 NeedsReview,
891 FailedAndNeedsReview,
892}
893
894#[cfg(test)]
895mod tests {
896 use super::*;
897
898 #[test]
899 fn openrouter_paid_fast_preset_sets_runtime_overrides() {
900 let resolved = ProviderPreset::OpenRouterPaidFast
901 .resolve()
902 .expect("preset should resolve");
903 assert_eq!(resolved.endpoint.provider, "openrouter");
904 assert_eq!(resolved.runtime.scheduler_concurrency, Some(32));
905 assert_eq!(resolved.runtime.provider_max_attempts, Some(1));
906 assert_eq!(resolved.runtime.batch_target_tokens, Some(16_000));
907 assert_eq!(resolved.runtime.adaptive_batch_sizing, Some(true));
908 assert_eq!(resolved.runtime.max_idle_per_host, Some(64));
909 }
910
911 #[test]
912 fn openrouter_free_preset_uses_low_concurrency_and_respect_retry_after() {
913 let resolved = ProviderPreset::OpenRouterFree
914 .resolve()
915 .expect("preset should resolve");
916 assert_eq!(resolved.runtime.scheduler_concurrency, Some(2));
917 assert_eq!(resolved.runtime.provider_max_attempts, Some(1));
918 assert_eq!(
919 resolved.runtime.retry_after_policy,
920 Some(RetryAfterPolicy::RespectHeader)
921 );
922 assert_eq!(resolved.runtime.max_idle_per_host, Some(8));
923 }
924
925 #[test]
926 fn local_presets_use_openai_compatible_loopback_endpoints() {
927 let ollama = ProviderPreset::LocalOllama
928 .resolve()
929 .expect("Ollama preset should resolve");
930 assert_eq!(ollama.endpoint.provider, "openai-compatible");
931 assert_eq!(
932 ollama.endpoint.base_url.as_deref(),
933 Some("http://localhost:11434/v1")
934 );
935 assert_eq!(
936 ollama.endpoint.api_key_env.as_deref(),
937 Some("OLLAMA_API_KEY")
938 );
939 assert_eq!(ollama.runtime.scheduler_concurrency, Some(1));
940
941 let llamacpp = ProviderPreset::LocalLlamacpp
942 .resolve()
943 .expect("llama.cpp preset should resolve");
944 assert_eq!(llamacpp.endpoint.provider, "openai-compatible");
945 assert_eq!(
946 llamacpp.endpoint.base_url.as_deref(),
947 Some("http://localhost:8080/v1")
948 );
949 assert_eq!(
950 llamacpp.endpoint.api_key_env.as_deref(),
951 Some("LLAMACPP_API_KEY")
952 );
953 }
954
955 #[test]
956 fn deepseek_translation_presets_disable_thinking() {
957 for preset in [ProviderPreset::DeepSeekFree, ProviderPreset::DeepSeekPaid] {
958 let resolved = preset.resolve().expect("preset should resolve");
959 assert_eq!(
960 resolved.runtime.thinking_disabled,
961 Some(true),
962 "translation presets should reserve output tokens for translated prose"
963 );
964 }
965 }
966
967 #[test]
968 fn runtime_config_event_includes_provider_preset_values() {
969 let event = crate::ProgressEvent::RuntimeConfigResolved {
970 profile: "v1_fast".to_string(),
971 provider_preset: Some("OpenRouterPaidFast".to_string()),
972 provider: "openrouter".to_string(),
973 model: "google/gemini-2.5-flash".to_string(),
974 concurrency: 32,
975 max_attempts: 1,
976 provider_max_attempts: 1,
977 validation_max_attempts: 1,
978 retry_after_policy: "JitteredExponential".to_string(),
979 max_backoff_seconds: 15,
980 timeout_seconds: 120,
981 batch_enabled: true,
982 batch_target_tokens: 16_000,
983 batch_max_items: 128,
984 adaptive_batch_sizing: true,
985 adaptive_concurrency: true,
986 compact_prompts: true,
987 thinking_disabled: true,
988 json_mode: "Auto".to_string(),
989 model_context_tokens: None,
990 max_output_tokens: None,
991 batch_max_output_tokens: None,
992 timestamp_ms: 0,
993 };
994 match event {
995 crate::ProgressEvent::RuntimeConfigResolved {
996 provider_preset,
997 batch_target_tokens,
998 adaptive_batch_sizing,
999 provider_max_attempts,
1000 ..
1001 } => {
1002 assert_eq!(provider_preset.as_deref(), Some("OpenRouterPaidFast"));
1003 assert_eq!(batch_target_tokens, 16_000);
1004 assert!(adaptive_batch_sizing);
1005 assert_eq!(provider_max_attempts, 1);
1006 }
1007 _ => unreachable!("constructed runtime event"),
1008 }
1009 }
1010
1011 #[test]
1012 fn v1_fast_uses_single_provider_attempt() {
1013 let settings = TranslationProfile::V1Fast.resolve();
1014 assert_eq!(settings.scheduler.max_attempts, 1);
1015 assert_eq!(settings.provider.provider_max_attempts, 1);
1016 assert_eq!(settings.provider.validation_max_attempts, 1);
1017 assert!(settings.batch.repair_invalid_items);
1018 assert!(settings.adaptive_concurrency);
1019 assert!(settings.batch.adaptive_sizing);
1020 }
1021}