Skip to main content

bookforge_core/
config.rs

1use std::path::PathBuf;
2
3use crate::scheduler::SchedulerConfig;
4
5#[derive(Debug, Clone)]
6pub struct TranslationConfig {
7    pub source_language: Option<String>,
8    pub target_language: String,
9    pub provider: String,
10    pub model: Option<String>,
11    pub concurrency: usize,
12    pub max_attempts: usize,
13    pub output: PathBuf,
14}
15
16#[derive(Debug, Clone)]
17pub struct SegmentationConfig {
18    pub max_segment_tokens: usize,
19    pub context_tokens: usize,
20}
21
22impl Default for SegmentationConfig {
23    fn default() -> Self {
24        Self {
25            max_segment_tokens: 1_200,
26            context_tokens: 160,
27        }
28    }
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
32pub enum PromptVersion {
33    V1,
34    BatchV1,
35}
36
37impl PromptVersion {
38    pub fn as_str(self) -> &'static str {
39        match self {
40            PromptVersion::V1 => "v1",
41            PromptVersion::BatchV1 => "batch_v1",
42        }
43    }
44}
45
46#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
47#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
48pub enum TranslationProfile {
49    Safe,
50    Balanced,
51    Fastest,
52    FreeTier,
53    TurboTextOnly,
54    V1Fast,
55}
56
57impl TranslationProfile {
58    pub fn namespace_str(self) -> &'static str {
59        match self {
60            TranslationProfile::Safe => "safe",
61            TranslationProfile::Balanced => "balanced",
62            TranslationProfile::Fastest => "fastest",
63            TranslationProfile::FreeTier => "free_tier",
64            TranslationProfile::TurboTextOnly => "turbo_text_only",
65            TranslationProfile::V1Fast => "v1_fast",
66        }
67    }
68
69    pub fn resolve(self) -> ResolvedRunSettings {
70        match self {
71            Self::Safe => ResolvedRunSettings {
72                profile: self,
73                segmentation: SegmentationConfig {
74                    max_segment_tokens: 1_200,
75                    context_tokens: 160,
76                },
77                batch: BatchConfig {
78                    enabled: false,
79                    target_tokens: 0,
80                    max_items: 0,
81                    adaptive_sizing: false,
82                    split_on_json_failure: true,
83                    repair_invalid_items: true,
84                },
85                scheduler: SchedulerConfig {
86                    concurrency: 4,
87                    max_attempts: 3,
88                },
89                compact_prompts: false,
90                retry_failed_only: false,
91                adaptive_concurrency: false,
92                provider: ProviderRuntimeConfig {
93                    timeout_seconds: 120,
94                    provider_max_attempts: 6,
95                    validation_max_attempts: 3,
96                    retry_after_policy: RetryAfterPolicy::JitteredExponential,
97                    max_backoff_seconds: 60,
98                    thinking_disabled: false,
99                    model_context_tokens: None,
100                    max_output_tokens: None,
101                    batch_max_output_tokens: None,
102                    json_mode: JsonMode::Auto,
103                    max_idle_per_host: 32,
104                },
105                qa: QaRunConfig {
106                    concurrency: 4,
107                    batch_target_tokens: 4_000,
108                    model: None,
109                    provider: None,
110                    base_url: None,
111                    api_key_env: None,
112                },
113                double_check: DoubleCheckConfig {
114                    mode: DoubleCheckMode::Off,
115                    model: None,
116                    provider: None,
117                    base_url: None,
118                    api_key_env: None,
119                    concurrency: 4,
120                    batch_target_tokens: 8_000,
121                    auto_correct: false,
122                    correction_rounds: 1,
123                },
124            },
125            Self::Balanced => ResolvedRunSettings {
126                profile: self,
127                segmentation: SegmentationConfig {
128                    max_segment_tokens: 2_500,
129                    context_tokens: 80,
130                },
131                batch: BatchConfig {
132                    enabled: true,
133                    target_tokens: 8_000,
134                    max_items: 64,
135                    adaptive_sizing: false,
136                    split_on_json_failure: true,
137                    repair_invalid_items: true,
138                },
139                scheduler: SchedulerConfig {
140                    concurrency: 16,
141                    max_attempts: 2,
142                },
143                compact_prompts: true,
144                retry_failed_only: true,
145                adaptive_concurrency: true,
146                provider: ProviderRuntimeConfig {
147                    timeout_seconds: 120,
148                    provider_max_attempts: 2,
149                    validation_max_attempts: 1,
150                    retry_after_policy: RetryAfterPolicy::JitteredExponential,
151                    max_backoff_seconds: 30,
152                    thinking_disabled: false,
153                    model_context_tokens: None,
154                    max_output_tokens: None,
155                    batch_max_output_tokens: None,
156                    json_mode: JsonMode::Auto,
157                    max_idle_per_host: 32,
158                },
159                qa: QaRunConfig {
160                    concurrency: 8,
161                    batch_target_tokens: 8_000,
162                    model: None,
163                    provider: None,
164                    base_url: None,
165                    api_key_env: None,
166                },
167                double_check: DoubleCheckConfig {
168                    mode: DoubleCheckMode::Off,
169                    model: None,
170                    provider: None,
171                    base_url: None,
172                    api_key_env: None,
173                    concurrency: 4,
174                    batch_target_tokens: 8_000,
175                    auto_correct: false,
176                    correction_rounds: 1,
177                },
178            },
179            Self::Fastest => ResolvedRunSettings {
180                profile: self,
181                segmentation: SegmentationConfig {
182                    max_segment_tokens: 6_000,
183                    context_tokens: 20,
184                },
185                batch: BatchConfig {
186                    enabled: true,
187                    target_tokens: 16_000,
188                    max_items: 160,
189                    adaptive_sizing: true,
190                    split_on_json_failure: true,
191                    repair_invalid_items: true,
192                },
193                scheduler: SchedulerConfig {
194                    concurrency: 64,
195                    max_attempts: 1,
196                },
197                compact_prompts: true,
198                retry_failed_only: true,
199                adaptive_concurrency: true,
200                provider: ProviderRuntimeConfig {
201                    timeout_seconds: 120,
202                    provider_max_attempts: 2,
203                    validation_max_attempts: 1,
204                    retry_after_policy: RetryAfterPolicy::JitteredExponential,
205                    max_backoff_seconds: 10,
206                    thinking_disabled: false,
207                    model_context_tokens: None,
208                    max_output_tokens: None,
209                    batch_max_output_tokens: None,
210                    json_mode: JsonMode::Auto,
211                    max_idle_per_host: 32,
212                },
213                qa: QaRunConfig {
214                    concurrency: 16,
215                    batch_target_tokens: 12_000,
216                    model: None,
217                    provider: None,
218                    base_url: None,
219                    api_key_env: None,
220                },
221                double_check: DoubleCheckConfig {
222                    mode: DoubleCheckMode::Off,
223                    model: None,
224                    provider: None,
225                    base_url: None,
226                    api_key_env: None,
227                    concurrency: 4,
228                    batch_target_tokens: 12_000,
229                    auto_correct: false,
230                    correction_rounds: 1,
231                },
232            },
233            Self::FreeTier => ResolvedRunSettings {
234                profile: self,
235                segmentation: SegmentationConfig {
236                    max_segment_tokens: 2_500,
237                    context_tokens: 80,
238                },
239                batch: BatchConfig {
240                    enabled: true,
241                    target_tokens: 8_000,
242                    max_items: 64,
243                    adaptive_sizing: false,
244                    split_on_json_failure: false,
245                    repair_invalid_items: true,
246                },
247                scheduler: SchedulerConfig {
248                    concurrency: 1,
249                    max_attempts: 2,
250                },
251                compact_prompts: true,
252                retry_failed_only: true,
253                adaptive_concurrency: true,
254                provider: ProviderRuntimeConfig {
255                    timeout_seconds: 300,
256                    provider_max_attempts: 2,
257                    validation_max_attempts: 1,
258                    retry_after_policy: RetryAfterPolicy::RespectHeader,
259                    max_backoff_seconds: 90,
260                    thinking_disabled: false,
261                    model_context_tokens: None,
262                    max_output_tokens: None,
263                    batch_max_output_tokens: None,
264                    json_mode: JsonMode::Auto,
265                    max_idle_per_host: 8,
266                },
267                qa: QaRunConfig {
268                    concurrency: 1,
269                    batch_target_tokens: 4_000,
270                    model: None,
271                    provider: None,
272                    base_url: None,
273                    api_key_env: None,
274                },
275                double_check: DoubleCheckConfig {
276                    mode: DoubleCheckMode::Off,
277                    model: None,
278                    provider: None,
279                    base_url: None,
280                    api_key_env: None,
281                    concurrency: 1,
282                    batch_target_tokens: 4_000,
283                    auto_correct: false,
284                    correction_rounds: 1,
285                },
286            },
287            Self::TurboTextOnly => ResolvedRunSettings {
288                profile: self,
289                segmentation: SegmentationConfig {
290                    max_segment_tokens: 12_000,
291                    context_tokens: 0,
292                },
293                batch: BatchConfig {
294                    enabled: true,
295                    target_tokens: 24_000,
296                    max_items: 250,
297                    adaptive_sizing: true,
298                    split_on_json_failure: true,
299                    repair_invalid_items: false,
300                },
301                scheduler: SchedulerConfig {
302                    concurrency: 96,
303                    max_attempts: 1,
304                },
305                compact_prompts: true,
306                retry_failed_only: true,
307                adaptive_concurrency: true,
308                provider: ProviderRuntimeConfig {
309                    timeout_seconds: 120,
310                    provider_max_attempts: 1,
311                    validation_max_attempts: 1,
312                    retry_after_policy: RetryAfterPolicy::None,
313                    max_backoff_seconds: 5,
314                    thinking_disabled: false,
315                    model_context_tokens: None,
316                    max_output_tokens: None,
317                    batch_max_output_tokens: None,
318                    json_mode: JsonMode::Auto,
319                    max_idle_per_host: 64,
320                },
321                qa: QaRunConfig {
322                    concurrency: 16,
323                    batch_target_tokens: 16_000,
324                    model: None,
325                    provider: None,
326                    base_url: None,
327                    api_key_env: None,
328                },
329                double_check: DoubleCheckConfig {
330                    mode: DoubleCheckMode::Off,
331                    model: None,
332                    provider: None,
333                    base_url: None,
334                    api_key_env: None,
335                    concurrency: 4,
336                    batch_target_tokens: 16_000,
337                    auto_correct: false,
338                    correction_rounds: 1,
339                },
340            },
341            Self::V1Fast => ResolvedRunSettings {
342                profile: self,
343                segmentation: SegmentationConfig {
344                    max_segment_tokens: 12_000,
345                    context_tokens: 20,
346                },
347                batch: BatchConfig {
348                    enabled: true,
349                    target_tokens: 16_000,
350                    max_items: 128,
351                    adaptive_sizing: true,
352                    split_on_json_failure: true,
353                    repair_invalid_items: true,
354                },
355                scheduler: SchedulerConfig {
356                    concurrency: 32,
357                    max_attempts: 1,
358                },
359                compact_prompts: true,
360                retry_failed_only: true,
361                adaptive_concurrency: true,
362                provider: ProviderRuntimeConfig {
363                    timeout_seconds: 120,
364                    provider_max_attempts: 1,
365                    validation_max_attempts: 1,
366                    retry_after_policy: RetryAfterPolicy::None,
367                    max_backoff_seconds: 5,
368                    thinking_disabled: true,
369                    model_context_tokens: None,
370                    max_output_tokens: None,
371                    batch_max_output_tokens: None,
372                    json_mode: JsonMode::Auto,
373                    max_idle_per_host: 64,
374                },
375                qa: QaRunConfig {
376                    concurrency: 4,
377                    batch_target_tokens: 4_000,
378                    model: None,
379                    provider: None,
380                    base_url: None,
381                    api_key_env: None,
382                },
383                double_check: DoubleCheckConfig {
384                    mode: DoubleCheckMode::Off,
385                    model: None,
386                    provider: None,
387                    base_url: None,
388                    api_key_env: None,
389                    concurrency: 4,
390                    batch_target_tokens: 8_000,
391                    auto_correct: false,
392                    correction_rounds: 1,
393                },
394            },
395        }
396    }
397}
398
399#[derive(Debug, Clone)]
400pub struct ResolvedRunSettings {
401    pub profile: TranslationProfile,
402    pub segmentation: SegmentationConfig,
403    pub batch: BatchConfig,
404    pub scheduler: SchedulerConfig,
405    pub provider: ProviderRuntimeConfig,
406    pub compact_prompts: bool,
407    pub retry_failed_only: bool,
408    pub adaptive_concurrency: bool,
409    pub qa: QaRunConfig,
410    pub double_check: DoubleCheckConfig,
411}
412
413impl ResolvedRunSettings {
414    pub fn apply_provider_preset_runtime(&mut self, overrides: ProviderPresetRuntimeOverrides) {
415        if let Some(v) = overrides.scheduler_concurrency {
416            self.scheduler.concurrency = v.max(1);
417        }
418        if let Some(v) = overrides.provider_max_attempts {
419            self.provider.provider_max_attempts = v.max(1);
420        }
421        if let Some(v) = overrides.validation_max_attempts {
422            self.provider.validation_max_attempts = v.max(1);
423        }
424        if let Some(v) = overrides.retry_after_policy {
425            self.provider.retry_after_policy = v;
426        }
427        if let Some(v) = overrides.max_backoff_seconds {
428            self.provider.max_backoff_seconds = v;
429        }
430        if let Some(v) = overrides.timeout_seconds {
431            self.provider.timeout_seconds = v;
432        }
433        if let Some(v) = overrides.batch_enabled {
434            self.batch.enabled = v;
435        }
436        if let Some(v) = overrides.batch_target_tokens {
437            self.batch.target_tokens = v;
438        }
439        if let Some(v) = overrides.batch_max_items {
440            self.batch.max_items = v;
441        }
442        if let Some(v) = overrides.adaptive_batch_sizing {
443            self.batch.adaptive_sizing = v;
444        }
445        if let Some(v) = overrides.compact_prompts {
446            self.compact_prompts = v;
447        }
448        if let Some(v) = overrides.adaptive_concurrency {
449            self.adaptive_concurrency = v;
450        }
451        if let Some(v) = overrides.thinking_disabled {
452            self.provider.thinking_disabled = v;
453        }
454        if let Some(v) = overrides.model_context_tokens {
455            self.provider.model_context_tokens = Some(v);
456        }
457        if let Some(v) = overrides.max_output_tokens {
458            self.provider.max_output_tokens = Some(v);
459        }
460        if let Some(v) = overrides.batch_max_output_tokens {
461            self.provider.batch_max_output_tokens = Some(v);
462        }
463        if let Some(v) = overrides.json_mode {
464            self.provider.json_mode = v;
465        }
466        if let Some(v) = overrides.max_idle_per_host {
467            self.provider.max_idle_per_host = v;
468        }
469    }
470}
471
472#[derive(Debug, Clone)]
473pub struct BatchConfig {
474    pub enabled: bool,
475    pub target_tokens: usize,
476    pub max_items: usize,
477    pub adaptive_sizing: bool,
478    pub split_on_json_failure: bool,
479    pub repair_invalid_items: bool,
480}
481
482#[derive(Debug, Clone)]
483pub struct QaRunConfig {
484    pub concurrency: usize,
485    pub batch_target_tokens: usize,
486    pub model: Option<String>,
487    pub provider: Option<String>,
488    pub base_url: Option<String>,
489    pub api_key_env: Option<String>,
490}
491
492#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
493#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
494pub enum DoubleCheckMode {
495    Off,
496    Formatting,
497    Semantic,
498    Full,
499}
500
501#[derive(Debug, Clone)]
502pub struct DoubleCheckConfig {
503    pub mode: DoubleCheckMode,
504    pub model: Option<String>,
505    pub provider: Option<String>,
506    pub base_url: Option<String>,
507    pub api_key_env: Option<String>,
508    pub concurrency: usize,
509    pub batch_target_tokens: usize,
510    pub auto_correct: bool,
511    pub correction_rounds: usize,
512}
513
514#[derive(Debug, Clone)]
515pub struct ProviderRuntimeConfig {
516    pub timeout_seconds: u64,
517    pub provider_max_attempts: usize,
518    pub validation_max_attempts: usize,
519    pub retry_after_policy: RetryAfterPolicy,
520    pub max_backoff_seconds: u64,
521    pub thinking_disabled: bool,
522    pub model_context_tokens: Option<u32>,
523    pub max_output_tokens: Option<u32>,
524    pub batch_max_output_tokens: Option<u32>,
525    pub json_mode: JsonMode,
526    pub max_idle_per_host: usize,
527}
528
529#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
530#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
531pub enum JsonMode {
532    Auto,
533    ResponseFormat,
534    PromptOnly,
535}
536
537pub fn cap_output_tokens(
538    computed: u32,
539    estimated_prompt_tokens: usize,
540    model_context_tokens: Option<u32>,
541    user_cap: Option<u32>,
542) -> u32 {
543    let mut out = computed;
544
545    if let Some(context) = model_context_tokens {
546        let prompt = estimated_prompt_tokens as u32;
547        let remaining = context.saturating_sub(prompt);
548        let safe_remaining = remaining.saturating_sub(256);
549        out = out.min(safe_remaining.max(512));
550    }
551
552    if let Some(cap) = user_cap {
553        out = out.min(cap);
554    }
555
556    out.max(256)
557}
558
559#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
560#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
561pub enum ProviderPreset {
562    Auto,
563    OpenRouterFree,
564    OpenRouterPaidFast,
565    DeepSeekFree,
566    DeepSeekPaid,
567    GeminiFlashLite,
568    Custom,
569}
570
571impl ProviderPreset {
572    pub fn resolve(self) -> Option<ProviderPresetResolved> {
573        match self {
574            ProviderPreset::Auto | ProviderPreset::Custom => None,
575            ProviderPreset::OpenRouterFree => Some(ProviderPresetResolved {
576                endpoint: ModelEndpoint {
577                    provider: "openrouter".to_string(),
578                    model: "google/gemini-2.5-flash-lite".to_string(),
579                    base_url: Some("https://openrouter.ai/api/v1".to_string()),
580                    api_key_env: Some("OPENROUTER_API_KEY".to_string()),
581                },
582                runtime: ProviderPresetRuntimeOverrides {
583                    scheduler_concurrency: Some(2),
584                    provider_max_attempts: Some(1),
585                    validation_max_attempts: Some(1),
586                    retry_after_policy: Some(RetryAfterPolicy::RespectHeader),
587                    max_backoff_seconds: Some(90),
588                    timeout_seconds: Some(180),
589                    batch_enabled: Some(true),
590                    batch_target_tokens: Some(6_000),
591                    batch_max_items: Some(48),
592                    compact_prompts: Some(true),
593                    adaptive_concurrency: Some(true),
594                    thinking_disabled: Some(true),
595                    json_mode: Some(JsonMode::Auto),
596                    max_idle_per_host: Some(8),
597                    ..Default::default()
598                },
599            }),
600            ProviderPreset::OpenRouterPaidFast => Some(ProviderPresetResolved {
601                endpoint: ModelEndpoint {
602                    provider: "openrouter".to_string(),
603                    model: "google/gemini-2.5-flash".to_string(),
604                    base_url: Some("https://openrouter.ai/api/v1".to_string()),
605                    api_key_env: Some("OPENROUTER_API_KEY".to_string()),
606                },
607                runtime: ProviderPresetRuntimeOverrides {
608                    scheduler_concurrency: Some(32),
609                    provider_max_attempts: Some(1),
610                    validation_max_attempts: Some(1),
611                    retry_after_policy: Some(RetryAfterPolicy::JitteredExponential),
612                    max_backoff_seconds: Some(15),
613                    timeout_seconds: Some(120),
614                    batch_enabled: Some(true),
615                    batch_target_tokens: Some(16_000),
616                    batch_max_items: Some(128),
617                    adaptive_batch_sizing: Some(true),
618                    compact_prompts: Some(true),
619                    adaptive_concurrency: Some(true),
620                    thinking_disabled: Some(true),
621                    json_mode: Some(JsonMode::Auto),
622                    max_idle_per_host: Some(64),
623                    ..Default::default()
624                },
625            }),
626            ProviderPreset::DeepSeekFree => Some(ProviderPresetResolved {
627                endpoint: ModelEndpoint {
628                    provider: "deepseek".to_string(),
629                    model: "deepseek-v4-flash".to_string(),
630                    base_url: Some("https://api.deepseek.com/v1".to_string()),
631                    api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
632                },
633                runtime: ProviderPresetRuntimeOverrides {
634                    scheduler_concurrency: Some(1),
635                    provider_max_attempts: Some(1),
636                    validation_max_attempts: Some(1),
637                    retry_after_policy: Some(RetryAfterPolicy::RespectHeader),
638                    max_backoff_seconds: Some(120),
639                    timeout_seconds: Some(240),
640                    batch_enabled: Some(true),
641                    batch_target_tokens: Some(4_000),
642                    batch_max_items: Some(32),
643                    compact_prompts: Some(true),
644                    adaptive_concurrency: Some(false),
645                    thinking_disabled: Some(true),
646                    json_mode: Some(JsonMode::Auto),
647                    max_idle_per_host: Some(4),
648                    ..Default::default()
649                },
650            }),
651            ProviderPreset::DeepSeekPaid => Some(ProviderPresetResolved {
652                endpoint: ModelEndpoint {
653                    provider: "deepseek".to_string(),
654                    model: "deepseek-v4-flash".to_string(),
655                    base_url: Some("https://api.deepseek.com/v1".to_string()),
656                    api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
657                },
658                runtime: ProviderPresetRuntimeOverrides {
659                    scheduler_concurrency: Some(8),
660                    provider_max_attempts: Some(2),
661                    validation_max_attempts: Some(1),
662                    retry_after_policy: Some(RetryAfterPolicy::JitteredExponential),
663                    max_backoff_seconds: Some(30),
664                    timeout_seconds: Some(180),
665                    batch_enabled: Some(true),
666                    batch_target_tokens: Some(12_000),
667                    batch_max_items: Some(96),
668                    adaptive_batch_sizing: Some(true),
669                    compact_prompts: Some(true),
670                    adaptive_concurrency: Some(true),
671                    thinking_disabled: Some(false),
672                    json_mode: Some(JsonMode::Auto),
673                    max_idle_per_host: Some(16),
674                    ..Default::default()
675                },
676            }),
677            ProviderPreset::GeminiFlashLite => Some(ProviderPresetResolved {
678                endpoint: ModelEndpoint {
679                    provider: "openrouter".to_string(),
680                    model: "google/gemini-2.5-flash-lite".to_string(),
681                    base_url: Some("https://openrouter.ai/api/v1".to_string()),
682                    api_key_env: Some("OPENROUTER_API_KEY".to_string()),
683                },
684                runtime: ProviderPresetRuntimeOverrides {
685                    scheduler_concurrency: Some(40),
686                    provider_max_attempts: Some(1),
687                    validation_max_attempts: Some(1),
688                    retry_after_policy: Some(RetryAfterPolicy::JitteredExponential),
689                    max_backoff_seconds: Some(15),
690                    timeout_seconds: Some(120),
691                    batch_enabled: Some(true),
692                    batch_target_tokens: Some(20_000),
693                    batch_max_items: Some(160),
694                    adaptive_batch_sizing: Some(true),
695                    compact_prompts: Some(true),
696                    adaptive_concurrency: Some(true),
697                    thinking_disabled: Some(true),
698                    json_mode: Some(JsonMode::Auto),
699                    max_idle_per_host: Some(64),
700                    ..Default::default()
701                },
702            }),
703        }
704    }
705
706    pub fn endpoint_or_default(self, custom: Option<ModelEndpoint>) -> ModelEndpoint {
707        if let Some(resolved) = self.resolve() {
708            return resolved.endpoint;
709        }
710        match self {
711            ProviderPreset::Auto => ModelEndpoint {
712                provider: "deepseek".to_string(),
713                model: "deepseek-v4-flash".to_string(),
714                base_url: Some("https://api.deepseek.com/v1".to_string()),
715                api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
716            },
717            ProviderPreset::Custom => custom.unwrap_or_else(|| ModelEndpoint {
718                provider: "deepseek".to_string(),
719                model: "deepseek-v4-flash".to_string(),
720                base_url: Some("https://api.deepseek.com/v1".to_string()),
721                api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
722            }),
723            _ => unreachable!("resolved presets returned above"),
724        }
725    }
726}
727
728#[derive(Debug, Clone)]
729pub struct ProviderPresetResolved {
730    pub endpoint: ModelEndpoint,
731    pub runtime: ProviderPresetRuntimeOverrides,
732}
733
734#[derive(Debug, Clone, Default)]
735pub struct ProviderPresetRuntimeOverrides {
736    pub scheduler_concurrency: Option<usize>,
737    pub provider_max_attempts: Option<usize>,
738    pub validation_max_attempts: Option<usize>,
739    pub retry_after_policy: Option<RetryAfterPolicy>,
740    pub max_backoff_seconds: Option<u64>,
741    pub timeout_seconds: Option<u64>,
742    pub batch_enabled: Option<bool>,
743    pub batch_target_tokens: Option<usize>,
744    pub batch_max_items: Option<usize>,
745    pub adaptive_batch_sizing: Option<bool>,
746    pub compact_prompts: Option<bool>,
747    pub adaptive_concurrency: Option<bool>,
748    pub thinking_disabled: Option<bool>,
749    pub model_context_tokens: Option<u32>,
750    pub max_output_tokens: Option<u32>,
751    pub batch_max_output_tokens: Option<u32>,
752    pub json_mode: Option<JsonMode>,
753    pub max_idle_per_host: Option<usize>,
754}
755
756#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
757pub enum RetryAfterPolicy {
758    RespectHeader,
759    JitteredExponential,
760    Fixed,
761    None,
762}
763
764#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
765pub enum ProviderErrorKind {
766    RateLimit,
767    Timeout,
768    Server,
769    Client,
770    InvalidResponse,
771    Unknown,
772}
773
774#[derive(Debug, Clone, Default, serde::Serialize)]
775pub struct ProviderRequestMetric {
776    pub request_id: String,
777    pub batch_id: Option<String>,
778    pub provider: String,
779    pub model: String,
780    pub profile: String,
781    pub items: usize,
782    pub estimated_input_tokens: usize,
783    pub max_output_tokens: Option<u32>,
784    pub input_tokens: Option<u64>,
785    pub output_tokens: Option<u64>,
786    pub latency_ms: u64,
787    pub finish_reason: Option<String>,
788    pub status: String,
789    pub status_code: Option<u16>,
790    pub retry_count: usize,
791    pub backoff_ms: u64,
792    pub error_kind: Option<ProviderErrorKind>,
793}
794
795#[derive(Debug, Clone)]
796pub struct ModelEndpoint {
797    pub provider: String,
798    pub model: String,
799    pub base_url: Option<String>,
800    pub api_key_env: Option<String>,
801}
802
803#[derive(Debug, Clone)]
804pub struct ModelRouteConfig {
805    pub translation: ModelEndpoint,
806    pub repair: Option<ModelEndpoint>,
807    pub qa: Option<ModelEndpoint>,
808    pub double_check: Option<ModelEndpoint>,
809    pub fallback: Option<ModelEndpoint>,
810}
811
812#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
813#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
814pub enum FallbackScope {
815    Failed,
816    NeedsReview,
817    FailedAndNeedsReview,
818}
819
820#[cfg(test)]
821mod tests {
822    use super::*;
823
824    #[test]
825    fn openrouter_paid_fast_preset_sets_runtime_overrides() {
826        let resolved = ProviderPreset::OpenRouterPaidFast
827            .resolve()
828            .expect("preset should resolve");
829        assert_eq!(resolved.endpoint.provider, "openrouter");
830        assert_eq!(resolved.runtime.scheduler_concurrency, Some(32));
831        assert_eq!(resolved.runtime.provider_max_attempts, Some(1));
832        assert_eq!(resolved.runtime.batch_target_tokens, Some(16_000));
833        assert_eq!(resolved.runtime.adaptive_batch_sizing, Some(true));
834        assert_eq!(resolved.runtime.max_idle_per_host, Some(64));
835    }
836
837    #[test]
838    fn openrouter_free_preset_uses_low_concurrency_and_respect_retry_after() {
839        let resolved = ProviderPreset::OpenRouterFree
840            .resolve()
841            .expect("preset should resolve");
842        assert_eq!(resolved.runtime.scheduler_concurrency, Some(2));
843        assert_eq!(resolved.runtime.provider_max_attempts, Some(1));
844        assert_eq!(
845            resolved.runtime.retry_after_policy,
846            Some(RetryAfterPolicy::RespectHeader)
847        );
848        assert_eq!(resolved.runtime.max_idle_per_host, Some(8));
849    }
850
851    #[test]
852    fn runtime_config_event_includes_provider_preset_values() {
853        let event = crate::ProgressEvent::RuntimeConfigResolved {
854            profile: "v1_fast".to_string(),
855            provider_preset: Some("OpenRouterPaidFast".to_string()),
856            provider: "openrouter".to_string(),
857            model: "google/gemini-2.5-flash".to_string(),
858            concurrency: 32,
859            max_attempts: 1,
860            provider_max_attempts: 1,
861            validation_max_attempts: 1,
862            retry_after_policy: "JitteredExponential".to_string(),
863            max_backoff_seconds: 15,
864            timeout_seconds: 120,
865            batch_enabled: true,
866            batch_target_tokens: 16_000,
867            batch_max_items: 128,
868            adaptive_batch_sizing: true,
869            adaptive_concurrency: true,
870            compact_prompts: true,
871            thinking_disabled: true,
872            json_mode: "Auto".to_string(),
873            model_context_tokens: None,
874            max_output_tokens: None,
875            batch_max_output_tokens: None,
876            timestamp_ms: 0,
877        };
878        match event {
879            crate::ProgressEvent::RuntimeConfigResolved {
880                provider_preset,
881                batch_target_tokens,
882                adaptive_batch_sizing,
883                provider_max_attempts,
884                ..
885            } => {
886                assert_eq!(provider_preset.as_deref(), Some("OpenRouterPaidFast"));
887                assert_eq!(batch_target_tokens, 16_000);
888                assert!(adaptive_batch_sizing);
889                assert_eq!(provider_max_attempts, 1);
890            }
891            _ => unreachable!("constructed runtime event"),
892        }
893    }
894
895    #[test]
896    fn v1_fast_uses_single_provider_attempt() {
897        let settings = TranslationProfile::V1Fast.resolve();
898        assert_eq!(settings.scheduler.max_attempts, 1);
899        assert_eq!(settings.provider.provider_max_attempts, 1);
900        assert_eq!(settings.provider.validation_max_attempts, 1);
901        assert!(settings.batch.repair_invalid_items);
902        assert!(settings.adaptive_concurrency);
903        assert!(settings.batch.adaptive_sizing);
904    }
905}