Skip to main content

bookforge_core/
config.rs

1use std::path::PathBuf;
2
3use crate::scheduler::SchedulerConfig;
4
5#[derive(Debug, Clone)]
6pub struct TranslationConfig {
7    pub source_language: Option<String>,
8    pub target_language: String,
9    pub provider: String,
10    pub model: Option<String>,
11    pub concurrency: usize,
12    pub max_attempts: usize,
13    pub output: PathBuf,
14}
15
16#[derive(Debug, Clone)]
17pub struct SegmentationConfig {
18    pub max_segment_tokens: usize,
19    pub context_tokens: usize,
20}
21
22impl Default for SegmentationConfig {
23    fn default() -> Self {
24        Self {
25            max_segment_tokens: 1_200,
26            context_tokens: 160,
27        }
28    }
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
32pub enum PromptVersion {
33    V1,
34    BatchV1,
35    V2,
36    BatchV2,
37}
38
39impl PromptVersion {
40    pub fn as_str(self) -> &'static str {
41        match self {
42            PromptVersion::V1 => "v1",
43            PromptVersion::BatchV1 => "batch_v1",
44            PromptVersion::V2 => "v2",
45            PromptVersion::BatchV2 => "batch_v2",
46        }
47    }
48}
49
50#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
51#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
52pub enum TranslationProfile {
53    Safe,
54    Balanced,
55    Fastest,
56    FreeTier,
57    TurboTextOnly,
58    V1Fast,
59}
60
61impl TranslationProfile {
62    pub fn namespace_str(self) -> &'static str {
63        match self {
64            TranslationProfile::Safe => "safe",
65            TranslationProfile::Balanced => "balanced",
66            TranslationProfile::Fastest => "fastest",
67            TranslationProfile::FreeTier => "free_tier",
68            TranslationProfile::TurboTextOnly => "turbo_text_only",
69            TranslationProfile::V1Fast => "v1_fast",
70        }
71    }
72
73    pub fn resolve(self) -> ResolvedRunSettings {
74        match self {
75            Self::Safe => ResolvedRunSettings {
76                profile: self,
77                segmentation: SegmentationConfig {
78                    max_segment_tokens: 1_200,
79                    context_tokens: 160,
80                },
81                batch: BatchConfig {
82                    enabled: false,
83                    target_tokens: 0,
84                    max_items: 0,
85                    adaptive_sizing: false,
86                    split_on_json_failure: true,
87                    repair_invalid_items: true,
88                },
89                scheduler: SchedulerConfig {
90                    concurrency: 4,
91                    max_attempts: 3,
92                },
93                compact_prompts: false,
94                retry_failed_only: false,
95                adaptive_concurrency: false,
96                provider: ProviderRuntimeConfig {
97                    timeout_seconds: 120,
98                    provider_max_attempts: 6,
99                    validation_max_attempts: 3,
100                    retry_after_policy: RetryAfterPolicy::JitteredExponential,
101                    max_backoff_seconds: 60,
102                    thinking_disabled: false,
103                    model_context_tokens: None,
104                    max_output_tokens: None,
105                    batch_max_output_tokens: None,
106                    json_mode: JsonMode::Auto,
107                    max_idle_per_host: 32,
108                },
109                qa: QaRunConfig {
110                    concurrency: 4,
111                    batch_target_tokens: 4_000,
112                    model: None,
113                    provider: None,
114                    base_url: None,
115                    api_key_env: None,
116                },
117                double_check: DoubleCheckConfig {
118                    mode: DoubleCheckMode::Off,
119                    model: None,
120                    provider: None,
121                    base_url: None,
122                    api_key_env: None,
123                    concurrency: 4,
124                    batch_target_tokens: 8_000,
125                    auto_correct: false,
126                    correction_rounds: 1,
127                },
128            },
129            Self::Balanced => ResolvedRunSettings {
130                profile: self,
131                segmentation: SegmentationConfig {
132                    max_segment_tokens: 2_500,
133                    context_tokens: 80,
134                },
135                batch: BatchConfig {
136                    enabled: true,
137                    target_tokens: 8_000,
138                    max_items: 64,
139                    adaptive_sizing: false,
140                    split_on_json_failure: true,
141                    repair_invalid_items: true,
142                },
143                scheduler: SchedulerConfig {
144                    concurrency: 16,
145                    max_attempts: 2,
146                },
147                compact_prompts: true,
148                retry_failed_only: true,
149                adaptive_concurrency: true,
150                provider: ProviderRuntimeConfig {
151                    timeout_seconds: 120,
152                    provider_max_attempts: 2,
153                    validation_max_attempts: 1,
154                    retry_after_policy: RetryAfterPolicy::JitteredExponential,
155                    max_backoff_seconds: 30,
156                    thinking_disabled: false,
157                    model_context_tokens: None,
158                    max_output_tokens: None,
159                    batch_max_output_tokens: None,
160                    json_mode: JsonMode::Auto,
161                    max_idle_per_host: 32,
162                },
163                qa: QaRunConfig {
164                    concurrency: 8,
165                    batch_target_tokens: 8_000,
166                    model: None,
167                    provider: None,
168                    base_url: None,
169                    api_key_env: None,
170                },
171                double_check: DoubleCheckConfig {
172                    mode: DoubleCheckMode::Off,
173                    model: None,
174                    provider: None,
175                    base_url: None,
176                    api_key_env: None,
177                    concurrency: 4,
178                    batch_target_tokens: 8_000,
179                    auto_correct: false,
180                    correction_rounds: 1,
181                },
182            },
183            Self::Fastest => ResolvedRunSettings {
184                profile: self,
185                segmentation: SegmentationConfig {
186                    max_segment_tokens: 6_000,
187                    context_tokens: 20,
188                },
189                batch: BatchConfig {
190                    enabled: true,
191                    target_tokens: 16_000,
192                    max_items: 160,
193                    adaptive_sizing: true,
194                    split_on_json_failure: true,
195                    repair_invalid_items: true,
196                },
197                scheduler: SchedulerConfig {
198                    concurrency: 64,
199                    max_attempts: 1,
200                },
201                compact_prompts: true,
202                retry_failed_only: true,
203                adaptive_concurrency: true,
204                provider: ProviderRuntimeConfig {
205                    timeout_seconds: 120,
206                    provider_max_attempts: 2,
207                    validation_max_attempts: 1,
208                    retry_after_policy: RetryAfterPolicy::JitteredExponential,
209                    max_backoff_seconds: 10,
210                    thinking_disabled: false,
211                    model_context_tokens: None,
212                    max_output_tokens: None,
213                    batch_max_output_tokens: None,
214                    json_mode: JsonMode::Auto,
215                    max_idle_per_host: 32,
216                },
217                qa: QaRunConfig {
218                    concurrency: 16,
219                    batch_target_tokens: 12_000,
220                    model: None,
221                    provider: None,
222                    base_url: None,
223                    api_key_env: None,
224                },
225                double_check: DoubleCheckConfig {
226                    mode: DoubleCheckMode::Off,
227                    model: None,
228                    provider: None,
229                    base_url: None,
230                    api_key_env: None,
231                    concurrency: 4,
232                    batch_target_tokens: 12_000,
233                    auto_correct: false,
234                    correction_rounds: 1,
235                },
236            },
237            Self::FreeTier => ResolvedRunSettings {
238                profile: self,
239                segmentation: SegmentationConfig {
240                    max_segment_tokens: 2_500,
241                    context_tokens: 80,
242                },
243                batch: BatchConfig {
244                    enabled: true,
245                    target_tokens: 8_000,
246                    max_items: 64,
247                    adaptive_sizing: false,
248                    split_on_json_failure: false,
249                    repair_invalid_items: true,
250                },
251                scheduler: SchedulerConfig {
252                    concurrency: 1,
253                    max_attempts: 2,
254                },
255                compact_prompts: true,
256                retry_failed_only: true,
257                adaptive_concurrency: true,
258                provider: ProviderRuntimeConfig {
259                    timeout_seconds: 300,
260                    provider_max_attempts: 2,
261                    validation_max_attempts: 1,
262                    retry_after_policy: RetryAfterPolicy::RespectHeader,
263                    max_backoff_seconds: 90,
264                    thinking_disabled: false,
265                    model_context_tokens: None,
266                    max_output_tokens: None,
267                    batch_max_output_tokens: None,
268                    json_mode: JsonMode::Auto,
269                    max_idle_per_host: 8,
270                },
271                qa: QaRunConfig {
272                    concurrency: 1,
273                    batch_target_tokens: 4_000,
274                    model: None,
275                    provider: None,
276                    base_url: None,
277                    api_key_env: None,
278                },
279                double_check: DoubleCheckConfig {
280                    mode: DoubleCheckMode::Off,
281                    model: None,
282                    provider: None,
283                    base_url: None,
284                    api_key_env: None,
285                    concurrency: 1,
286                    batch_target_tokens: 4_000,
287                    auto_correct: false,
288                    correction_rounds: 1,
289                },
290            },
291            Self::TurboTextOnly => ResolvedRunSettings {
292                profile: self,
293                segmentation: SegmentationConfig {
294                    max_segment_tokens: 12_000,
295                    context_tokens: 0,
296                },
297                batch: BatchConfig {
298                    enabled: true,
299                    target_tokens: 24_000,
300                    max_items: 250,
301                    adaptive_sizing: true,
302                    split_on_json_failure: true,
303                    repair_invalid_items: false,
304                },
305                scheduler: SchedulerConfig {
306                    concurrency: 96,
307                    max_attempts: 1,
308                },
309                compact_prompts: true,
310                retry_failed_only: true,
311                adaptive_concurrency: true,
312                provider: ProviderRuntimeConfig {
313                    timeout_seconds: 120,
314                    provider_max_attempts: 1,
315                    validation_max_attempts: 1,
316                    retry_after_policy: RetryAfterPolicy::None,
317                    max_backoff_seconds: 5,
318                    thinking_disabled: false,
319                    model_context_tokens: None,
320                    max_output_tokens: None,
321                    batch_max_output_tokens: None,
322                    json_mode: JsonMode::Auto,
323                    max_idle_per_host: 64,
324                },
325                qa: QaRunConfig {
326                    concurrency: 16,
327                    batch_target_tokens: 16_000,
328                    model: None,
329                    provider: None,
330                    base_url: None,
331                    api_key_env: None,
332                },
333                double_check: DoubleCheckConfig {
334                    mode: DoubleCheckMode::Off,
335                    model: None,
336                    provider: None,
337                    base_url: None,
338                    api_key_env: None,
339                    concurrency: 4,
340                    batch_target_tokens: 16_000,
341                    auto_correct: false,
342                    correction_rounds: 1,
343                },
344            },
345            Self::V1Fast => ResolvedRunSettings {
346                profile: self,
347                segmentation: SegmentationConfig {
348                    max_segment_tokens: 12_000,
349                    context_tokens: 20,
350                },
351                batch: BatchConfig {
352                    enabled: true,
353                    target_tokens: 16_000,
354                    max_items: 128,
355                    adaptive_sizing: true,
356                    split_on_json_failure: true,
357                    repair_invalid_items: true,
358                },
359                scheduler: SchedulerConfig {
360                    concurrency: 32,
361                    max_attempts: 1,
362                },
363                compact_prompts: true,
364                retry_failed_only: true,
365                adaptive_concurrency: true,
366                provider: ProviderRuntimeConfig {
367                    timeout_seconds: 120,
368                    provider_max_attempts: 1,
369                    validation_max_attempts: 1,
370                    retry_after_policy: RetryAfterPolicy::None,
371                    max_backoff_seconds: 5,
372                    thinking_disabled: true,
373                    model_context_tokens: None,
374                    max_output_tokens: None,
375                    batch_max_output_tokens: None,
376                    json_mode: JsonMode::Auto,
377                    max_idle_per_host: 64,
378                },
379                qa: QaRunConfig {
380                    concurrency: 4,
381                    batch_target_tokens: 4_000,
382                    model: None,
383                    provider: None,
384                    base_url: None,
385                    api_key_env: None,
386                },
387                double_check: DoubleCheckConfig {
388                    mode: DoubleCheckMode::Off,
389                    model: None,
390                    provider: None,
391                    base_url: None,
392                    api_key_env: None,
393                    concurrency: 4,
394                    batch_target_tokens: 8_000,
395                    auto_correct: false,
396                    correction_rounds: 1,
397                },
398            },
399        }
400    }
401}
402
403#[derive(Debug, Clone)]
404pub struct ResolvedRunSettings {
405    pub profile: TranslationProfile,
406    pub segmentation: SegmentationConfig,
407    pub batch: BatchConfig,
408    pub scheduler: SchedulerConfig,
409    pub provider: ProviderRuntimeConfig,
410    pub compact_prompts: bool,
411    pub retry_failed_only: bool,
412    pub adaptive_concurrency: bool,
413    pub qa: QaRunConfig,
414    pub double_check: DoubleCheckConfig,
415}
416
417impl ResolvedRunSettings {
418    pub fn apply_provider_preset_runtime(&mut self, overrides: ProviderPresetRuntimeOverrides) {
419        if let Some(v) = overrides.scheduler_concurrency {
420            self.scheduler.concurrency = v.max(1);
421        }
422        if let Some(v) = overrides.provider_max_attempts {
423            self.provider.provider_max_attempts = v.max(1);
424        }
425        if let Some(v) = overrides.validation_max_attempts {
426            self.provider.validation_max_attempts = v.max(1);
427        }
428        if let Some(v) = overrides.retry_after_policy {
429            self.provider.retry_after_policy = v;
430        }
431        if let Some(v) = overrides.max_backoff_seconds {
432            self.provider.max_backoff_seconds = v;
433        }
434        if let Some(v) = overrides.timeout_seconds {
435            self.provider.timeout_seconds = v;
436        }
437        if let Some(v) = overrides.batch_enabled {
438            self.batch.enabled = v;
439        }
440        if let Some(v) = overrides.batch_target_tokens {
441            self.batch.target_tokens = v;
442        }
443        if let Some(v) = overrides.batch_max_items {
444            self.batch.max_items = v;
445        }
446        if let Some(v) = overrides.adaptive_batch_sizing {
447            self.batch.adaptive_sizing = v;
448        }
449        if let Some(v) = overrides.compact_prompts {
450            self.compact_prompts = v;
451        }
452        if let Some(v) = overrides.adaptive_concurrency {
453            self.adaptive_concurrency = v;
454        }
455        if let Some(v) = overrides.thinking_disabled {
456            self.provider.thinking_disabled = v;
457        }
458        if let Some(v) = overrides.model_context_tokens {
459            self.provider.model_context_tokens = Some(v);
460        }
461        if let Some(v) = overrides.max_output_tokens {
462            self.provider.max_output_tokens = Some(v);
463        }
464        if let Some(v) = overrides.batch_max_output_tokens {
465            self.provider.batch_max_output_tokens = Some(v);
466        }
467        if let Some(v) = overrides.json_mode {
468            self.provider.json_mode = v;
469        }
470        if let Some(v) = overrides.max_idle_per_host {
471            self.provider.max_idle_per_host = v;
472        }
473    }
474}
475
476#[derive(Debug, Clone)]
477pub struct BatchConfig {
478    pub enabled: bool,
479    pub target_tokens: usize,
480    pub max_items: usize,
481    pub adaptive_sizing: bool,
482    pub split_on_json_failure: bool,
483    pub repair_invalid_items: bool,
484}
485
486#[derive(Debug, Clone)]
487pub struct QaRunConfig {
488    pub concurrency: usize,
489    pub batch_target_tokens: usize,
490    pub model: Option<String>,
491    pub provider: Option<String>,
492    pub base_url: Option<String>,
493    pub api_key_env: Option<String>,
494}
495
496#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
497#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
498pub enum DoubleCheckMode {
499    Off,
500    Formatting,
501    Semantic,
502    Full,
503}
504
505#[derive(Debug, Clone)]
506pub struct DoubleCheckConfig {
507    pub mode: DoubleCheckMode,
508    pub model: Option<String>,
509    pub provider: Option<String>,
510    pub base_url: Option<String>,
511    pub api_key_env: Option<String>,
512    pub concurrency: usize,
513    pub batch_target_tokens: usize,
514    pub auto_correct: bool,
515    pub correction_rounds: usize,
516}
517
518#[derive(Debug, Clone)]
519pub struct ProviderRuntimeConfig {
520    pub timeout_seconds: u64,
521    pub provider_max_attempts: usize,
522    pub validation_max_attempts: usize,
523    pub retry_after_policy: RetryAfterPolicy,
524    pub max_backoff_seconds: u64,
525    pub thinking_disabled: bool,
526    pub model_context_tokens: Option<u32>,
527    pub max_output_tokens: Option<u32>,
528    pub batch_max_output_tokens: Option<u32>,
529    pub json_mode: JsonMode,
530    pub max_idle_per_host: usize,
531}
532
533#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
534#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
535pub enum JsonMode {
536    Auto,
537    ResponseFormat,
538    PromptOnly,
539}
540
541/// Whether sliding-context injection considers all prior segments or only
542/// those in the same chapter. Chapter scope keeps concurrency high; book
543/// scope serializes across the whole document.
544#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
545#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
546pub enum ContextScope {
547    #[default]
548    Chapter,
549    Book,
550}
551
552impl ContextScope {
553    pub fn as_str(self) -> &'static str {
554        match self {
555            ContextScope::Chapter => "chapter",
556            ContextScope::Book => "book",
557        }
558    }
559}
560
561pub fn cap_output_tokens(
562    computed: u32,
563    estimated_prompt_tokens: usize,
564    model_context_tokens: Option<u32>,
565    user_cap: Option<u32>,
566) -> u32 {
567    let mut out = computed;
568
569    if let Some(context) = model_context_tokens {
570        let prompt = estimated_prompt_tokens as u32;
571        let remaining = context.saturating_sub(prompt);
572        let safe_remaining = remaining.saturating_sub(256);
573        out = out.min(safe_remaining.max(512));
574    }
575
576    if let Some(cap) = user_cap {
577        out = out.min(cap);
578    }
579
580    out.max(256)
581}
582
583#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
584#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
585pub enum ProviderPreset {
586    Auto,
587    OpenRouterFree,
588    OpenRouterPaidFast,
589    DeepSeekFree,
590    DeepSeekPaid,
591    GeminiFlashLite,
592    Custom,
593}
594
595impl ProviderPreset {
596    pub fn resolve(self) -> Option<ProviderPresetResolved> {
597        match self {
598            ProviderPreset::Auto | ProviderPreset::Custom => None,
599            ProviderPreset::OpenRouterFree => Some(ProviderPresetResolved {
600                endpoint: ModelEndpoint {
601                    provider: "openrouter".to_string(),
602                    model: "google/gemini-2.5-flash-lite".to_string(),
603                    base_url: Some("https://openrouter.ai/api/v1".to_string()),
604                    api_key_env: Some("OPENROUTER_API_KEY".to_string()),
605                },
606                runtime: ProviderPresetRuntimeOverrides {
607                    scheduler_concurrency: Some(2),
608                    provider_max_attempts: Some(1),
609                    validation_max_attempts: Some(1),
610                    retry_after_policy: Some(RetryAfterPolicy::RespectHeader),
611                    max_backoff_seconds: Some(90),
612                    timeout_seconds: Some(180),
613                    batch_enabled: Some(true),
614                    batch_target_tokens: Some(6_000),
615                    batch_max_items: Some(48),
616                    compact_prompts: Some(true),
617                    adaptive_concurrency: Some(true),
618                    thinking_disabled: Some(true),
619                    json_mode: Some(JsonMode::Auto),
620                    max_idle_per_host: Some(8),
621                    ..Default::default()
622                },
623            }),
624            ProviderPreset::OpenRouterPaidFast => Some(ProviderPresetResolved {
625                endpoint: ModelEndpoint {
626                    provider: "openrouter".to_string(),
627                    model: "google/gemini-2.5-flash".to_string(),
628                    base_url: Some("https://openrouter.ai/api/v1".to_string()),
629                    api_key_env: Some("OPENROUTER_API_KEY".to_string()),
630                },
631                runtime: ProviderPresetRuntimeOverrides {
632                    scheduler_concurrency: Some(32),
633                    provider_max_attempts: Some(1),
634                    validation_max_attempts: Some(1),
635                    retry_after_policy: Some(RetryAfterPolicy::JitteredExponential),
636                    max_backoff_seconds: Some(15),
637                    timeout_seconds: Some(120),
638                    batch_enabled: Some(true),
639                    batch_target_tokens: Some(16_000),
640                    batch_max_items: Some(128),
641                    adaptive_batch_sizing: Some(true),
642                    compact_prompts: Some(true),
643                    adaptive_concurrency: Some(true),
644                    thinking_disabled: Some(true),
645                    json_mode: Some(JsonMode::Auto),
646                    max_idle_per_host: Some(64),
647                    ..Default::default()
648                },
649            }),
650            ProviderPreset::DeepSeekFree => Some(ProviderPresetResolved {
651                endpoint: ModelEndpoint {
652                    provider: "deepseek".to_string(),
653                    model: "deepseek-v4-flash".to_string(),
654                    base_url: Some("https://api.deepseek.com/v1".to_string()),
655                    api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
656                },
657                runtime: ProviderPresetRuntimeOverrides {
658                    scheduler_concurrency: Some(1),
659                    provider_max_attempts: Some(1),
660                    validation_max_attempts: Some(1),
661                    retry_after_policy: Some(RetryAfterPolicy::RespectHeader),
662                    max_backoff_seconds: Some(120),
663                    timeout_seconds: Some(240),
664                    batch_enabled: Some(true),
665                    batch_target_tokens: Some(4_000),
666                    batch_max_items: Some(32),
667                    compact_prompts: Some(true),
668                    adaptive_concurrency: Some(false),
669                    thinking_disabled: Some(true),
670                    json_mode: Some(JsonMode::Auto),
671                    max_idle_per_host: Some(4),
672                    ..Default::default()
673                },
674            }),
675            ProviderPreset::DeepSeekPaid => Some(ProviderPresetResolved {
676                endpoint: ModelEndpoint {
677                    provider: "deepseek".to_string(),
678                    model: "deepseek-v4-flash".to_string(),
679                    base_url: Some("https://api.deepseek.com/v1".to_string()),
680                    api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
681                },
682                runtime: ProviderPresetRuntimeOverrides {
683                    scheduler_concurrency: Some(8),
684                    provider_max_attempts: Some(2),
685                    validation_max_attempts: Some(1),
686                    retry_after_policy: Some(RetryAfterPolicy::JitteredExponential),
687                    max_backoff_seconds: Some(30),
688                    timeout_seconds: Some(180),
689                    batch_enabled: Some(true),
690                    batch_target_tokens: Some(12_000),
691                    batch_max_items: Some(96),
692                    adaptive_batch_sizing: Some(true),
693                    compact_prompts: Some(true),
694                    adaptive_concurrency: Some(true),
695                    thinking_disabled: Some(false),
696                    json_mode: Some(JsonMode::Auto),
697                    max_idle_per_host: Some(16),
698                    ..Default::default()
699                },
700            }),
701            ProviderPreset::GeminiFlashLite => Some(ProviderPresetResolved {
702                endpoint: ModelEndpoint {
703                    provider: "openrouter".to_string(),
704                    model: "google/gemini-2.5-flash-lite".to_string(),
705                    base_url: Some("https://openrouter.ai/api/v1".to_string()),
706                    api_key_env: Some("OPENROUTER_API_KEY".to_string()),
707                },
708                runtime: ProviderPresetRuntimeOverrides {
709                    scheduler_concurrency: Some(40),
710                    provider_max_attempts: Some(1),
711                    validation_max_attempts: Some(1),
712                    retry_after_policy: Some(RetryAfterPolicy::JitteredExponential),
713                    max_backoff_seconds: Some(15),
714                    timeout_seconds: Some(120),
715                    batch_enabled: Some(true),
716                    batch_target_tokens: Some(20_000),
717                    batch_max_items: Some(160),
718                    adaptive_batch_sizing: Some(true),
719                    compact_prompts: Some(true),
720                    adaptive_concurrency: Some(true),
721                    thinking_disabled: Some(true),
722                    json_mode: Some(JsonMode::Auto),
723                    max_idle_per_host: Some(64),
724                    ..Default::default()
725                },
726            }),
727        }
728    }
729
730    pub fn endpoint_or_default(self, custom: Option<ModelEndpoint>) -> ModelEndpoint {
731        if let Some(resolved) = self.resolve() {
732            return resolved.endpoint;
733        }
734        match self {
735            ProviderPreset::Auto => ModelEndpoint {
736                provider: "deepseek".to_string(),
737                model: "deepseek-v4-flash".to_string(),
738                base_url: Some("https://api.deepseek.com/v1".to_string()),
739                api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
740            },
741            ProviderPreset::Custom => custom.unwrap_or_else(|| ModelEndpoint {
742                provider: "deepseek".to_string(),
743                model: "deepseek-v4-flash".to_string(),
744                base_url: Some("https://api.deepseek.com/v1".to_string()),
745                api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
746            }),
747            _ => unreachable!("resolved presets returned above"),
748        }
749    }
750}
751
752#[derive(Debug, Clone)]
753pub struct ProviderPresetResolved {
754    pub endpoint: ModelEndpoint,
755    pub runtime: ProviderPresetRuntimeOverrides,
756}
757
758#[derive(Debug, Clone, Default)]
759pub struct ProviderPresetRuntimeOverrides {
760    pub scheduler_concurrency: Option<usize>,
761    pub provider_max_attempts: Option<usize>,
762    pub validation_max_attempts: Option<usize>,
763    pub retry_after_policy: Option<RetryAfterPolicy>,
764    pub max_backoff_seconds: Option<u64>,
765    pub timeout_seconds: Option<u64>,
766    pub batch_enabled: Option<bool>,
767    pub batch_target_tokens: Option<usize>,
768    pub batch_max_items: Option<usize>,
769    pub adaptive_batch_sizing: Option<bool>,
770    pub compact_prompts: Option<bool>,
771    pub adaptive_concurrency: Option<bool>,
772    pub thinking_disabled: Option<bool>,
773    pub model_context_tokens: Option<u32>,
774    pub max_output_tokens: Option<u32>,
775    pub batch_max_output_tokens: Option<u32>,
776    pub json_mode: Option<JsonMode>,
777    pub max_idle_per_host: Option<usize>,
778}
779
780#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
781pub enum RetryAfterPolicy {
782    RespectHeader,
783    JitteredExponential,
784    Fixed,
785    None,
786}
787
788#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
789pub enum ProviderErrorKind {
790    RateLimit,
791    Timeout,
792    Server,
793    Client,
794    InvalidResponse,
795    Unknown,
796}
797
798#[derive(Debug, Clone, Default, serde::Serialize)]
799pub struct ProviderRequestMetric {
800    pub request_id: String,
801    pub batch_id: Option<String>,
802    pub provider: String,
803    pub model: String,
804    pub profile: String,
805    pub items: usize,
806    pub estimated_input_tokens: usize,
807    pub max_output_tokens: Option<u32>,
808    pub input_tokens: Option<u64>,
809    pub output_tokens: Option<u64>,
810    pub latency_ms: u64,
811    pub finish_reason: Option<String>,
812    pub status: String,
813    pub status_code: Option<u16>,
814    pub retry_count: usize,
815    pub backoff_ms: u64,
816    pub error_kind: Option<ProviderErrorKind>,
817}
818
819#[derive(Debug, Clone)]
820pub struct ModelEndpoint {
821    pub provider: String,
822    pub model: String,
823    pub base_url: Option<String>,
824    pub api_key_env: Option<String>,
825}
826
827#[derive(Debug, Clone)]
828pub struct ModelRouteConfig {
829    pub translation: ModelEndpoint,
830    pub repair: Option<ModelEndpoint>,
831    pub qa: Option<ModelEndpoint>,
832    pub double_check: Option<ModelEndpoint>,
833    pub fallback: Option<ModelEndpoint>,
834}
835
836#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
837#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
838pub enum FallbackScope {
839    Failed,
840    NeedsReview,
841    FailedAndNeedsReview,
842}
843
844#[cfg(test)]
845mod tests {
846    use super::*;
847
848    #[test]
849    fn openrouter_paid_fast_preset_sets_runtime_overrides() {
850        let resolved = ProviderPreset::OpenRouterPaidFast
851            .resolve()
852            .expect("preset should resolve");
853        assert_eq!(resolved.endpoint.provider, "openrouter");
854        assert_eq!(resolved.runtime.scheduler_concurrency, Some(32));
855        assert_eq!(resolved.runtime.provider_max_attempts, Some(1));
856        assert_eq!(resolved.runtime.batch_target_tokens, Some(16_000));
857        assert_eq!(resolved.runtime.adaptive_batch_sizing, Some(true));
858        assert_eq!(resolved.runtime.max_idle_per_host, Some(64));
859    }
860
861    #[test]
862    fn openrouter_free_preset_uses_low_concurrency_and_respect_retry_after() {
863        let resolved = ProviderPreset::OpenRouterFree
864            .resolve()
865            .expect("preset should resolve");
866        assert_eq!(resolved.runtime.scheduler_concurrency, Some(2));
867        assert_eq!(resolved.runtime.provider_max_attempts, Some(1));
868        assert_eq!(
869            resolved.runtime.retry_after_policy,
870            Some(RetryAfterPolicy::RespectHeader)
871        );
872        assert_eq!(resolved.runtime.max_idle_per_host, Some(8));
873    }
874
875    #[test]
876    fn runtime_config_event_includes_provider_preset_values() {
877        let event = crate::ProgressEvent::RuntimeConfigResolved {
878            profile: "v1_fast".to_string(),
879            provider_preset: Some("OpenRouterPaidFast".to_string()),
880            provider: "openrouter".to_string(),
881            model: "google/gemini-2.5-flash".to_string(),
882            concurrency: 32,
883            max_attempts: 1,
884            provider_max_attempts: 1,
885            validation_max_attempts: 1,
886            retry_after_policy: "JitteredExponential".to_string(),
887            max_backoff_seconds: 15,
888            timeout_seconds: 120,
889            batch_enabled: true,
890            batch_target_tokens: 16_000,
891            batch_max_items: 128,
892            adaptive_batch_sizing: true,
893            adaptive_concurrency: true,
894            compact_prompts: true,
895            thinking_disabled: true,
896            json_mode: "Auto".to_string(),
897            model_context_tokens: None,
898            max_output_tokens: None,
899            batch_max_output_tokens: None,
900            timestamp_ms: 0,
901        };
902        match event {
903            crate::ProgressEvent::RuntimeConfigResolved {
904                provider_preset,
905                batch_target_tokens,
906                adaptive_batch_sizing,
907                provider_max_attempts,
908                ..
909            } => {
910                assert_eq!(provider_preset.as_deref(), Some("OpenRouterPaidFast"));
911                assert_eq!(batch_target_tokens, 16_000);
912                assert!(adaptive_batch_sizing);
913                assert_eq!(provider_max_attempts, 1);
914            }
915            _ => unreachable!("constructed runtime event"),
916        }
917    }
918
919    #[test]
920    fn v1_fast_uses_single_provider_attempt() {
921        let settings = TranslationProfile::V1Fast.resolve();
922        assert_eq!(settings.scheduler.max_attempts, 1);
923        assert_eq!(settings.provider.provider_max_attempts, 1);
924        assert_eq!(settings.provider.validation_max_attempts, 1);
925        assert!(settings.batch.repair_invalid_items);
926        assert!(settings.adaptive_concurrency);
927        assert!(settings.batch.adaptive_sizing);
928    }
929}