Skip to main content

bookforge_core/
config.rs

1use std::path::PathBuf;
2
3use crate::scheduler::SchedulerConfig;
4
5#[derive(Debug, Clone)]
6pub struct TranslationConfig {
7    pub source_language: Option<String>,
8    pub target_language: String,
9    pub provider: String,
10    pub model: Option<String>,
11    pub concurrency: usize,
12    pub max_attempts: usize,
13    pub output: PathBuf,
14}
15
16#[derive(Debug, Clone)]
17pub struct SegmentationConfig {
18    pub max_segment_tokens: usize,
19    pub context_tokens: usize,
20}
21
22impl Default for SegmentationConfig {
23    fn default() -> Self {
24        Self {
25            max_segment_tokens: 1_200,
26            context_tokens: 160,
27        }
28    }
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
32pub enum PromptVersion {
33    V1,
34    BatchV1,
35    V2,
36    BatchV2,
37}
38
39impl PromptVersion {
40    pub fn as_str(self) -> &'static str {
41        match self {
42            PromptVersion::V1 => "v1",
43            PromptVersion::BatchV1 => "batch_v1",
44            PromptVersion::V2 => "v2",
45            PromptVersion::BatchV2 => "batch_v2",
46        }
47    }
48}
49
50#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
51#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
52pub enum TranslationProfile {
53    Safe,
54    Balanced,
55    Fastest,
56    FreeTier,
57    TurboTextOnly,
58    V1Fast,
59}
60
61impl TranslationProfile {
62    pub fn namespace_str(self) -> &'static str {
63        match self {
64            TranslationProfile::Safe => "safe",
65            TranslationProfile::Balanced => "balanced",
66            TranslationProfile::Fastest => "fastest",
67            TranslationProfile::FreeTier => "free_tier",
68            TranslationProfile::TurboTextOnly => "turbo_text_only",
69            TranslationProfile::V1Fast => "v1_fast",
70        }
71    }
72
73    pub fn resolve(self) -> ResolvedRunSettings {
74        match self {
75            Self::Safe => ResolvedRunSettings {
76                profile: self,
77                segmentation: SegmentationConfig {
78                    max_segment_tokens: 1_200,
79                    context_tokens: 160,
80                },
81                batch: BatchConfig {
82                    enabled: false,
83                    target_tokens: 0,
84                    max_items: 0,
85                    adaptive_sizing: false,
86                    split_on_json_failure: true,
87                    repair_invalid_items: true,
88                },
89                scheduler: SchedulerConfig {
90                    concurrency: 4,
91                    max_attempts: 3,
92                },
93                compact_prompts: false,
94                retry_failed_only: false,
95                adaptive_concurrency: false,
96                provider: ProviderRuntimeConfig {
97                    timeout_seconds: 120,
98                    provider_max_attempts: 6,
99                    validation_max_attempts: 3,
100                    retry_after_policy: RetryAfterPolicy::JitteredExponential,
101                    max_backoff_seconds: 60,
102                    thinking_disabled: false,
103                    model_context_tokens: None,
104                    max_output_tokens: None,
105                    batch_max_output_tokens: None,
106                    json_mode: JsonMode::Auto,
107                    max_idle_per_host: 32,
108                },
109                qa: QaRunConfig {
110                    concurrency: 4,
111                    batch_target_tokens: 4_000,
112                    model: None,
113                    provider: None,
114                    base_url: None,
115                    api_key_env: None,
116                },
117                double_check: DoubleCheckConfig {
118                    mode: DoubleCheckMode::Off,
119                    model: None,
120                    provider: None,
121                    base_url: None,
122                    api_key_env: None,
123                    concurrency: 4,
124                    batch_target_tokens: 8_000,
125                    auto_correct: false,
126                    correction_rounds: 1,
127                },
128            },
129            Self::Balanced => ResolvedRunSettings {
130                profile: self,
131                segmentation: SegmentationConfig {
132                    max_segment_tokens: 2_500,
133                    context_tokens: 80,
134                },
135                batch: BatchConfig {
136                    enabled: true,
137                    target_tokens: 8_000,
138                    max_items: 64,
139                    adaptive_sizing: false,
140                    split_on_json_failure: true,
141                    repair_invalid_items: true,
142                },
143                scheduler: SchedulerConfig {
144                    concurrency: 16,
145                    max_attempts: 2,
146                },
147                compact_prompts: true,
148                retry_failed_only: true,
149                adaptive_concurrency: true,
150                provider: ProviderRuntimeConfig {
151                    timeout_seconds: 120,
152                    provider_max_attempts: 2,
153                    validation_max_attempts: 1,
154                    retry_after_policy: RetryAfterPolicy::JitteredExponential,
155                    max_backoff_seconds: 30,
156                    thinking_disabled: false,
157                    model_context_tokens: None,
158                    max_output_tokens: None,
159                    batch_max_output_tokens: None,
160                    json_mode: JsonMode::Auto,
161                    max_idle_per_host: 32,
162                },
163                qa: QaRunConfig {
164                    concurrency: 8,
165                    batch_target_tokens: 8_000,
166                    model: None,
167                    provider: None,
168                    base_url: None,
169                    api_key_env: None,
170                },
171                double_check: DoubleCheckConfig {
172                    mode: DoubleCheckMode::Off,
173                    model: None,
174                    provider: None,
175                    base_url: None,
176                    api_key_env: None,
177                    concurrency: 4,
178                    batch_target_tokens: 8_000,
179                    auto_correct: false,
180                    correction_rounds: 1,
181                },
182            },
183            Self::Fastest => ResolvedRunSettings {
184                profile: self,
185                segmentation: SegmentationConfig {
186                    max_segment_tokens: 6_000,
187                    context_tokens: 20,
188                },
189                batch: BatchConfig {
190                    enabled: true,
191                    target_tokens: 16_000,
192                    max_items: 160,
193                    adaptive_sizing: true,
194                    split_on_json_failure: true,
195                    repair_invalid_items: true,
196                },
197                scheduler: SchedulerConfig {
198                    concurrency: 64,
199                    max_attempts: 1,
200                },
201                compact_prompts: true,
202                retry_failed_only: true,
203                adaptive_concurrency: true,
204                provider: ProviderRuntimeConfig {
205                    timeout_seconds: 120,
206                    provider_max_attempts: 2,
207                    validation_max_attempts: 1,
208                    retry_after_policy: RetryAfterPolicy::JitteredExponential,
209                    max_backoff_seconds: 10,
210                    thinking_disabled: false,
211                    model_context_tokens: None,
212                    max_output_tokens: None,
213                    batch_max_output_tokens: None,
214                    json_mode: JsonMode::Auto,
215                    max_idle_per_host: 32,
216                },
217                qa: QaRunConfig {
218                    concurrency: 16,
219                    batch_target_tokens: 12_000,
220                    model: None,
221                    provider: None,
222                    base_url: None,
223                    api_key_env: None,
224                },
225                double_check: DoubleCheckConfig {
226                    mode: DoubleCheckMode::Off,
227                    model: None,
228                    provider: None,
229                    base_url: None,
230                    api_key_env: None,
231                    concurrency: 4,
232                    batch_target_tokens: 12_000,
233                    auto_correct: false,
234                    correction_rounds: 1,
235                },
236            },
237            Self::FreeTier => ResolvedRunSettings {
238                profile: self,
239                segmentation: SegmentationConfig {
240                    max_segment_tokens: 2_500,
241                    context_tokens: 80,
242                },
243                batch: BatchConfig {
244                    enabled: true,
245                    target_tokens: 8_000,
246                    max_items: 64,
247                    adaptive_sizing: false,
248                    split_on_json_failure: false,
249                    repair_invalid_items: true,
250                },
251                scheduler: SchedulerConfig {
252                    concurrency: 1,
253                    max_attempts: 2,
254                },
255                compact_prompts: true,
256                retry_failed_only: true,
257                adaptive_concurrency: true,
258                provider: ProviderRuntimeConfig {
259                    timeout_seconds: 300,
260                    provider_max_attempts: 2,
261                    validation_max_attempts: 1,
262                    retry_after_policy: RetryAfterPolicy::RespectHeader,
263                    max_backoff_seconds: 90,
264                    thinking_disabled: false,
265                    model_context_tokens: None,
266                    max_output_tokens: None,
267                    batch_max_output_tokens: None,
268                    json_mode: JsonMode::Auto,
269                    max_idle_per_host: 8,
270                },
271                qa: QaRunConfig {
272                    concurrency: 1,
273                    batch_target_tokens: 4_000,
274                    model: None,
275                    provider: None,
276                    base_url: None,
277                    api_key_env: None,
278                },
279                double_check: DoubleCheckConfig {
280                    mode: DoubleCheckMode::Off,
281                    model: None,
282                    provider: None,
283                    base_url: None,
284                    api_key_env: None,
285                    concurrency: 1,
286                    batch_target_tokens: 4_000,
287                    auto_correct: false,
288                    correction_rounds: 1,
289                },
290            },
291            Self::TurboTextOnly => ResolvedRunSettings {
292                profile: self,
293                segmentation: SegmentationConfig {
294                    max_segment_tokens: 12_000,
295                    context_tokens: 0,
296                },
297                batch: BatchConfig {
298                    enabled: true,
299                    target_tokens: 24_000,
300                    max_items: 250,
301                    adaptive_sizing: true,
302                    split_on_json_failure: true,
303                    repair_invalid_items: false,
304                },
305                scheduler: SchedulerConfig {
306                    concurrency: 96,
307                    max_attempts: 1,
308                },
309                compact_prompts: true,
310                retry_failed_only: true,
311                adaptive_concurrency: true,
312                provider: ProviderRuntimeConfig {
313                    timeout_seconds: 120,
314                    provider_max_attempts: 1,
315                    validation_max_attempts: 1,
316                    retry_after_policy: RetryAfterPolicy::None,
317                    max_backoff_seconds: 5,
318                    thinking_disabled: false,
319                    model_context_tokens: None,
320                    max_output_tokens: None,
321                    batch_max_output_tokens: None,
322                    json_mode: JsonMode::Auto,
323                    max_idle_per_host: 64,
324                },
325                qa: QaRunConfig {
326                    concurrency: 16,
327                    batch_target_tokens: 16_000,
328                    model: None,
329                    provider: None,
330                    base_url: None,
331                    api_key_env: None,
332                },
333                double_check: DoubleCheckConfig {
334                    mode: DoubleCheckMode::Off,
335                    model: None,
336                    provider: None,
337                    base_url: None,
338                    api_key_env: None,
339                    concurrency: 4,
340                    batch_target_tokens: 16_000,
341                    auto_correct: false,
342                    correction_rounds: 1,
343                },
344            },
345            Self::V1Fast => ResolvedRunSettings {
346                profile: self,
347                segmentation: SegmentationConfig {
348                    max_segment_tokens: 12_000,
349                    context_tokens: 20,
350                },
351                batch: BatchConfig {
352                    enabled: true,
353                    target_tokens: 16_000,
354                    max_items: 128,
355                    adaptive_sizing: true,
356                    split_on_json_failure: true,
357                    repair_invalid_items: true,
358                },
359                scheduler: SchedulerConfig {
360                    concurrency: 32,
361                    max_attempts: 1,
362                },
363                compact_prompts: true,
364                retry_failed_only: true,
365                adaptive_concurrency: true,
366                provider: ProviderRuntimeConfig {
367                    timeout_seconds: 120,
368                    provider_max_attempts: 1,
369                    validation_max_attempts: 1,
370                    retry_after_policy: RetryAfterPolicy::None,
371                    max_backoff_seconds: 5,
372                    thinking_disabled: true,
373                    model_context_tokens: None,
374                    max_output_tokens: None,
375                    batch_max_output_tokens: None,
376                    json_mode: JsonMode::Auto,
377                    max_idle_per_host: 64,
378                },
379                qa: QaRunConfig {
380                    concurrency: 4,
381                    batch_target_tokens: 4_000,
382                    model: None,
383                    provider: None,
384                    base_url: None,
385                    api_key_env: None,
386                },
387                double_check: DoubleCheckConfig {
388                    mode: DoubleCheckMode::Off,
389                    model: None,
390                    provider: None,
391                    base_url: None,
392                    api_key_env: None,
393                    concurrency: 4,
394                    batch_target_tokens: 8_000,
395                    auto_correct: false,
396                    correction_rounds: 1,
397                },
398            },
399        }
400    }
401}
402
403#[derive(Debug, Clone)]
404pub struct ResolvedRunSettings {
405    pub profile: TranslationProfile,
406    pub segmentation: SegmentationConfig,
407    pub batch: BatchConfig,
408    pub scheduler: SchedulerConfig,
409    pub provider: ProviderRuntimeConfig,
410    pub compact_prompts: bool,
411    pub retry_failed_only: bool,
412    pub adaptive_concurrency: bool,
413    pub qa: QaRunConfig,
414    pub double_check: DoubleCheckConfig,
415}
416
417impl ResolvedRunSettings {
418    pub fn apply_provider_preset_runtime(&mut self, overrides: ProviderPresetRuntimeOverrides) {
419        if let Some(v) = overrides.scheduler_concurrency {
420            self.scheduler.concurrency = v.max(1);
421        }
422        if let Some(v) = overrides.provider_max_attempts {
423            self.provider.provider_max_attempts = v.max(1);
424        }
425        if let Some(v) = overrides.validation_max_attempts {
426            self.provider.validation_max_attempts = v.max(1);
427        }
428        if let Some(v) = overrides.retry_after_policy {
429            self.provider.retry_after_policy = v;
430        }
431        if let Some(v) = overrides.max_backoff_seconds {
432            self.provider.max_backoff_seconds = v;
433        }
434        if let Some(v) = overrides.timeout_seconds {
435            self.provider.timeout_seconds = v;
436        }
437        if let Some(v) = overrides.batch_enabled {
438            self.batch.enabled = v;
439        }
440        if let Some(v) = overrides.batch_target_tokens {
441            self.batch.target_tokens = v;
442        }
443        if let Some(v) = overrides.batch_max_items {
444            self.batch.max_items = v;
445        }
446        if let Some(v) = overrides.adaptive_batch_sizing {
447            self.batch.adaptive_sizing = v;
448        }
449        if let Some(v) = overrides.compact_prompts {
450            self.compact_prompts = v;
451        }
452        if let Some(v) = overrides.adaptive_concurrency {
453            self.adaptive_concurrency = v;
454        }
455        if let Some(v) = overrides.thinking_disabled {
456            self.provider.thinking_disabled = v;
457        }
458        if let Some(v) = overrides.model_context_tokens {
459            self.provider.model_context_tokens = Some(v);
460        }
461        if let Some(v) = overrides.max_output_tokens {
462            self.provider.max_output_tokens = Some(v);
463        }
464        if let Some(v) = overrides.batch_max_output_tokens {
465            self.provider.batch_max_output_tokens = Some(v);
466        }
467        if let Some(v) = overrides.json_mode {
468            self.provider.json_mode = v;
469        }
470        if let Some(v) = overrides.max_idle_per_host {
471            self.provider.max_idle_per_host = v;
472        }
473    }
474}
475
476#[derive(Debug, Clone)]
477pub struct BatchConfig {
478    pub enabled: bool,
479    pub target_tokens: usize,
480    pub max_items: usize,
481    pub adaptive_sizing: bool,
482    pub split_on_json_failure: bool,
483    pub repair_invalid_items: bool,
484}
485
486#[derive(Debug, Clone)]
487pub struct QaRunConfig {
488    pub concurrency: usize,
489    pub batch_target_tokens: usize,
490    pub model: Option<String>,
491    pub provider: Option<String>,
492    pub base_url: Option<String>,
493    pub api_key_env: Option<String>,
494}
495
496#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
497#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
498pub enum DoubleCheckMode {
499    Off,
500    Formatting,
501    Semantic,
502    Full,
503}
504
505#[derive(Debug, Clone)]
506pub struct DoubleCheckConfig {
507    pub mode: DoubleCheckMode,
508    pub model: Option<String>,
509    pub provider: Option<String>,
510    pub base_url: Option<String>,
511    pub api_key_env: Option<String>,
512    pub concurrency: usize,
513    pub batch_target_tokens: usize,
514    pub auto_correct: bool,
515    pub correction_rounds: usize,
516}
517
518#[derive(Debug, Clone)]
519pub struct ProviderRuntimeConfig {
520    pub timeout_seconds: u64,
521    pub provider_max_attempts: usize,
522    pub validation_max_attempts: usize,
523    pub retry_after_policy: RetryAfterPolicy,
524    pub max_backoff_seconds: u64,
525    pub thinking_disabled: bool,
526    pub model_context_tokens: Option<u32>,
527    pub max_output_tokens: Option<u32>,
528    pub batch_max_output_tokens: Option<u32>,
529    pub json_mode: JsonMode,
530    pub max_idle_per_host: usize,
531}
532
533#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
534#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
535pub enum JsonMode {
536    Auto,
537    ResponseFormat,
538    PromptOnly,
539}
540
541/// Whether sliding-context injection considers all prior segments or only
542/// those in the same chapter. Chapter scope keeps concurrency high; book
543/// scope serializes across the whole document.
544#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
545#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
546pub enum ContextScope {
547    #[default]
548    Chapter,
549    Book,
550}
551
552impl ContextScope {
553    pub fn as_str(self) -> &'static str {
554        match self {
555            ContextScope::Chapter => "chapter",
556            ContextScope::Book => "book",
557        }
558    }
559}
560
561pub fn cap_output_tokens(
562    computed: u32,
563    estimated_prompt_tokens: usize,
564    model_context_tokens: Option<u32>,
565    user_cap: Option<u32>,
566) -> u32 {
567    let mut out = computed;
568
569    if let Some(context) = model_context_tokens {
570        let prompt = estimated_prompt_tokens as u32;
571        let remaining = context.saturating_sub(prompt);
572        let safe_remaining = remaining.saturating_sub(256);
573        out = out.min(safe_remaining.max(512));
574    }
575
576    if let Some(cap) = user_cap {
577        out = out.min(cap);
578    }
579
580    out.max(256)
581}
582
583#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
584#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
585pub enum ProviderPreset {
586    Auto,
587    OpenRouterFree,
588    OpenRouterPaidFast,
589    DeepSeekFree,
590    DeepSeekPaid,
591    GeminiFlashLite,
592    LocalOllama,
593    LocalLlamacpp,
594    Custom,
595}
596
597impl ProviderPreset {
598    pub fn resolve(self) -> Option<ProviderPresetResolved> {
599        match self {
600            ProviderPreset::Auto | ProviderPreset::Custom => None,
601            ProviderPreset::OpenRouterFree => Some(ProviderPresetResolved {
602                endpoint: ModelEndpoint {
603                    provider: "openrouter".to_string(),
604                    model: "google/gemini-2.5-flash-lite".to_string(),
605                    base_url: Some("https://openrouter.ai/api/v1".to_string()),
606                    api_key_env: Some("OPENROUTER_API_KEY".to_string()),
607                },
608                runtime: ProviderPresetRuntimeOverrides {
609                    scheduler_concurrency: Some(2),
610                    provider_max_attempts: Some(1),
611                    validation_max_attempts: Some(1),
612                    retry_after_policy: Some(RetryAfterPolicy::RespectHeader),
613                    max_backoff_seconds: Some(90),
614                    timeout_seconds: Some(180),
615                    batch_enabled: Some(true),
616                    batch_target_tokens: Some(6_000),
617                    batch_max_items: Some(48),
618                    compact_prompts: Some(true),
619                    adaptive_concurrency: Some(true),
620                    thinking_disabled: Some(true),
621                    json_mode: Some(JsonMode::Auto),
622                    max_idle_per_host: Some(8),
623                    ..Default::default()
624                },
625            }),
626            ProviderPreset::OpenRouterPaidFast => Some(ProviderPresetResolved {
627                endpoint: ModelEndpoint {
628                    provider: "openrouter".to_string(),
629                    model: "google/gemini-2.5-flash".to_string(),
630                    base_url: Some("https://openrouter.ai/api/v1".to_string()),
631                    api_key_env: Some("OPENROUTER_API_KEY".to_string()),
632                },
633                runtime: ProviderPresetRuntimeOverrides {
634                    scheduler_concurrency: Some(32),
635                    provider_max_attempts: Some(1),
636                    validation_max_attempts: Some(1),
637                    retry_after_policy: Some(RetryAfterPolicy::JitteredExponential),
638                    max_backoff_seconds: Some(15),
639                    timeout_seconds: Some(120),
640                    batch_enabled: Some(true),
641                    batch_target_tokens: Some(16_000),
642                    batch_max_items: Some(128),
643                    adaptive_batch_sizing: Some(true),
644                    compact_prompts: Some(true),
645                    adaptive_concurrency: Some(true),
646                    thinking_disabled: Some(true),
647                    json_mode: Some(JsonMode::Auto),
648                    max_idle_per_host: Some(64),
649                    ..Default::default()
650                },
651            }),
652            ProviderPreset::DeepSeekFree => Some(ProviderPresetResolved {
653                endpoint: ModelEndpoint {
654                    provider: "deepseek".to_string(),
655                    model: "deepseek-v4-flash".to_string(),
656                    base_url: Some("https://api.deepseek.com/v1".to_string()),
657                    api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
658                },
659                runtime: ProviderPresetRuntimeOverrides {
660                    scheduler_concurrency: Some(1),
661                    provider_max_attempts: Some(1),
662                    validation_max_attempts: Some(1),
663                    retry_after_policy: Some(RetryAfterPolicy::RespectHeader),
664                    max_backoff_seconds: Some(120),
665                    timeout_seconds: Some(240),
666                    batch_enabled: Some(true),
667                    batch_target_tokens: Some(4_000),
668                    batch_max_items: Some(32),
669                    compact_prompts: Some(true),
670                    adaptive_concurrency: Some(false),
671                    thinking_disabled: Some(true),
672                    json_mode: Some(JsonMode::Auto),
673                    max_idle_per_host: Some(4),
674                    ..Default::default()
675                },
676            }),
677            ProviderPreset::DeepSeekPaid => Some(ProviderPresetResolved {
678                endpoint: ModelEndpoint {
679                    provider: "deepseek".to_string(),
680                    model: "deepseek-v4-flash".to_string(),
681                    base_url: Some("https://api.deepseek.com/v1".to_string()),
682                    api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
683                },
684                runtime: ProviderPresetRuntimeOverrides {
685                    scheduler_concurrency: Some(8),
686                    provider_max_attempts: Some(2),
687                    validation_max_attempts: Some(1),
688                    retry_after_policy: Some(RetryAfterPolicy::JitteredExponential),
689                    max_backoff_seconds: Some(30),
690                    timeout_seconds: Some(180),
691                    batch_enabled: Some(true),
692                    batch_target_tokens: Some(12_000),
693                    batch_max_items: Some(96),
694                    adaptive_batch_sizing: Some(true),
695                    compact_prompts: Some(true),
696                    adaptive_concurrency: Some(true),
697                    thinking_disabled: Some(true),
698                    json_mode: Some(JsonMode::Auto),
699                    max_idle_per_host: Some(16),
700                    ..Default::default()
701                },
702            }),
703            ProviderPreset::GeminiFlashLite => Some(ProviderPresetResolved {
704                endpoint: ModelEndpoint {
705                    provider: "openrouter".to_string(),
706                    model: "google/gemini-2.5-flash-lite".to_string(),
707                    base_url: Some("https://openrouter.ai/api/v1".to_string()),
708                    api_key_env: Some("OPENROUTER_API_KEY".to_string()),
709                },
710                runtime: ProviderPresetRuntimeOverrides {
711                    scheduler_concurrency: Some(40),
712                    provider_max_attempts: Some(1),
713                    validation_max_attempts: Some(1),
714                    retry_after_policy: Some(RetryAfterPolicy::JitteredExponential),
715                    max_backoff_seconds: Some(15),
716                    timeout_seconds: Some(120),
717                    batch_enabled: Some(true),
718                    batch_target_tokens: Some(20_000),
719                    batch_max_items: Some(160),
720                    adaptive_batch_sizing: Some(true),
721                    compact_prompts: Some(true),
722                    adaptive_concurrency: Some(true),
723                    thinking_disabled: Some(true),
724                    json_mode: Some(JsonMode::Auto),
725                    max_idle_per_host: Some(64),
726                    ..Default::default()
727                },
728            }),
729            ProviderPreset::LocalOllama => Some(ProviderPresetResolved {
730                endpoint: ModelEndpoint {
731                    provider: "openai-compatible".to_string(),
732                    model: "qwen2.5:14b".to_string(),
733                    base_url: Some("http://localhost:11434/v1".to_string()),
734                    api_key_env: Some("OLLAMA_API_KEY".to_string()),
735                },
736                runtime: ProviderPresetRuntimeOverrides {
737                    scheduler_concurrency: Some(1),
738                    provider_max_attempts: Some(1),
739                    validation_max_attempts: Some(1),
740                    retry_after_policy: Some(RetryAfterPolicy::None),
741                    timeout_seconds: Some(300),
742                    batch_enabled: Some(true),
743                    batch_target_tokens: Some(4_000),
744                    batch_max_items: Some(24),
745                    compact_prompts: Some(true),
746                    adaptive_concurrency: Some(false),
747                    thinking_disabled: Some(true),
748                    json_mode: Some(JsonMode::Auto),
749                    max_idle_per_host: Some(2),
750                    ..Default::default()
751                },
752            }),
753            ProviderPreset::LocalLlamacpp => Some(ProviderPresetResolved {
754                endpoint: ModelEndpoint {
755                    provider: "openai-compatible".to_string(),
756                    model: "local-model".to_string(),
757                    base_url: Some("http://localhost:8080/v1".to_string()),
758                    api_key_env: Some("LLAMACPP_API_KEY".to_string()),
759                },
760                runtime: ProviderPresetRuntimeOverrides {
761                    scheduler_concurrency: Some(1),
762                    provider_max_attempts: Some(1),
763                    validation_max_attempts: Some(1),
764                    retry_after_policy: Some(RetryAfterPolicy::None),
765                    timeout_seconds: Some(300),
766                    batch_enabled: Some(true),
767                    batch_target_tokens: Some(4_000),
768                    batch_max_items: Some(24),
769                    compact_prompts: Some(true),
770                    adaptive_concurrency: Some(false),
771                    thinking_disabled: Some(true),
772                    json_mode: Some(JsonMode::Auto),
773                    max_idle_per_host: Some(2),
774                    ..Default::default()
775                },
776            }),
777        }
778    }
779
780    pub fn endpoint_or_default(self, custom: Option<ModelEndpoint>) -> ModelEndpoint {
781        if let Some(resolved) = self.resolve() {
782            return resolved.endpoint;
783        }
784        match self {
785            ProviderPreset::Auto => ModelEndpoint {
786                provider: "deepseek".to_string(),
787                model: "deepseek-v4-flash".to_string(),
788                base_url: Some("https://api.deepseek.com/v1".to_string()),
789                api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
790            },
791            ProviderPreset::Custom => custom.unwrap_or_else(|| ModelEndpoint {
792                provider: "deepseek".to_string(),
793                model: "deepseek-v4-flash".to_string(),
794                base_url: Some("https://api.deepseek.com/v1".to_string()),
795                api_key_env: Some("DEEPSEEK_API_KEY".to_string()),
796            }),
797            _ => unreachable!("resolved presets returned above"),
798        }
799    }
800}
801
802#[derive(Debug, Clone)]
803pub struct ProviderPresetResolved {
804    pub endpoint: ModelEndpoint,
805    pub runtime: ProviderPresetRuntimeOverrides,
806}
807
808#[derive(Debug, Clone, Default)]
809pub struct ProviderPresetRuntimeOverrides {
810    pub scheduler_concurrency: Option<usize>,
811    pub provider_max_attempts: Option<usize>,
812    pub validation_max_attempts: Option<usize>,
813    pub retry_after_policy: Option<RetryAfterPolicy>,
814    pub max_backoff_seconds: Option<u64>,
815    pub timeout_seconds: Option<u64>,
816    pub batch_enabled: Option<bool>,
817    pub batch_target_tokens: Option<usize>,
818    pub batch_max_items: Option<usize>,
819    pub adaptive_batch_sizing: Option<bool>,
820    pub compact_prompts: Option<bool>,
821    pub adaptive_concurrency: Option<bool>,
822    pub thinking_disabled: Option<bool>,
823    pub model_context_tokens: Option<u32>,
824    pub max_output_tokens: Option<u32>,
825    pub batch_max_output_tokens: Option<u32>,
826    pub json_mode: Option<JsonMode>,
827    pub max_idle_per_host: Option<usize>,
828}
829
830#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
831pub enum RetryAfterPolicy {
832    RespectHeader,
833    JitteredExponential,
834    Fixed,
835    None,
836}
837
838#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
839pub enum ProviderErrorKind {
840    RateLimit,
841    Timeout,
842    Server,
843    Client,
844    InvalidResponse,
845    Unknown,
846}
847
848#[derive(Debug, Clone, Default, serde::Serialize)]
849pub struct ProviderRequestMetric {
850    pub request_id: String,
851    pub batch_id: Option<String>,
852    pub provider: String,
853    pub model: String,
854    pub profile: String,
855    pub items: usize,
856    pub estimated_input_tokens: usize,
857    pub max_output_tokens: Option<u32>,
858    pub input_tokens: Option<u64>,
859    pub output_tokens: Option<u64>,
860    pub latency_ms: u64,
861    pub finish_reason: Option<String>,
862    pub status: String,
863    pub status_code: Option<u16>,
864    pub retry_count: usize,
865    pub backoff_ms: u64,
866    pub error_kind: Option<ProviderErrorKind>,
867}
868
869#[derive(Debug, Clone)]
870pub struct ModelEndpoint {
871    pub provider: String,
872    pub model: String,
873    pub base_url: Option<String>,
874    pub api_key_env: Option<String>,
875}
876
877#[derive(Debug, Clone)]
878pub struct ModelRouteConfig {
879    pub translation: ModelEndpoint,
880    pub repair: Option<ModelEndpoint>,
881    pub qa: Option<ModelEndpoint>,
882    pub double_check: Option<ModelEndpoint>,
883    pub fallback: Option<ModelEndpoint>,
884}
885
886#[cfg_attr(feature = "cli", derive(clap::ValueEnum))]
887#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
888pub enum FallbackScope {
889    Failed,
890    NeedsReview,
891    FailedAndNeedsReview,
892}
893
894#[cfg(test)]
895mod tests {
896    use super::*;
897
898    #[test]
899    fn openrouter_paid_fast_preset_sets_runtime_overrides() {
900        let resolved = ProviderPreset::OpenRouterPaidFast
901            .resolve()
902            .expect("preset should resolve");
903        assert_eq!(resolved.endpoint.provider, "openrouter");
904        assert_eq!(resolved.runtime.scheduler_concurrency, Some(32));
905        assert_eq!(resolved.runtime.provider_max_attempts, Some(1));
906        assert_eq!(resolved.runtime.batch_target_tokens, Some(16_000));
907        assert_eq!(resolved.runtime.adaptive_batch_sizing, Some(true));
908        assert_eq!(resolved.runtime.max_idle_per_host, Some(64));
909    }
910
911    #[test]
912    fn openrouter_free_preset_uses_low_concurrency_and_respect_retry_after() {
913        let resolved = ProviderPreset::OpenRouterFree
914            .resolve()
915            .expect("preset should resolve");
916        assert_eq!(resolved.runtime.scheduler_concurrency, Some(2));
917        assert_eq!(resolved.runtime.provider_max_attempts, Some(1));
918        assert_eq!(
919            resolved.runtime.retry_after_policy,
920            Some(RetryAfterPolicy::RespectHeader)
921        );
922        assert_eq!(resolved.runtime.max_idle_per_host, Some(8));
923    }
924
925    #[test]
926    fn local_presets_use_openai_compatible_loopback_endpoints() {
927        let ollama = ProviderPreset::LocalOllama
928            .resolve()
929            .expect("Ollama preset should resolve");
930        assert_eq!(ollama.endpoint.provider, "openai-compatible");
931        assert_eq!(
932            ollama.endpoint.base_url.as_deref(),
933            Some("http://localhost:11434/v1")
934        );
935        assert_eq!(
936            ollama.endpoint.api_key_env.as_deref(),
937            Some("OLLAMA_API_KEY")
938        );
939        assert_eq!(ollama.runtime.scheduler_concurrency, Some(1));
940
941        let llamacpp = ProviderPreset::LocalLlamacpp
942            .resolve()
943            .expect("llama.cpp preset should resolve");
944        assert_eq!(llamacpp.endpoint.provider, "openai-compatible");
945        assert_eq!(
946            llamacpp.endpoint.base_url.as_deref(),
947            Some("http://localhost:8080/v1")
948        );
949        assert_eq!(
950            llamacpp.endpoint.api_key_env.as_deref(),
951            Some("LLAMACPP_API_KEY")
952        );
953    }
954
955    #[test]
956    fn deepseek_translation_presets_disable_thinking() {
957        for preset in [ProviderPreset::DeepSeekFree, ProviderPreset::DeepSeekPaid] {
958            let resolved = preset.resolve().expect("preset should resolve");
959            assert_eq!(
960                resolved.runtime.thinking_disabled,
961                Some(true),
962                "translation presets should reserve output tokens for translated prose"
963            );
964        }
965    }
966
967    #[test]
968    fn runtime_config_event_includes_provider_preset_values() {
969        let event = crate::ProgressEvent::RuntimeConfigResolved {
970            profile: "v1_fast".to_string(),
971            provider_preset: Some("OpenRouterPaidFast".to_string()),
972            provider: "openrouter".to_string(),
973            model: "google/gemini-2.5-flash".to_string(),
974            concurrency: 32,
975            max_attempts: 1,
976            provider_max_attempts: 1,
977            validation_max_attempts: 1,
978            retry_after_policy: "JitteredExponential".to_string(),
979            max_backoff_seconds: 15,
980            timeout_seconds: 120,
981            batch_enabled: true,
982            batch_target_tokens: 16_000,
983            batch_max_items: 128,
984            adaptive_batch_sizing: true,
985            adaptive_concurrency: true,
986            compact_prompts: true,
987            thinking_disabled: true,
988            json_mode: "Auto".to_string(),
989            model_context_tokens: None,
990            max_output_tokens: None,
991            batch_max_output_tokens: None,
992            timestamp_ms: 0,
993        };
994        match event {
995            crate::ProgressEvent::RuntimeConfigResolved {
996                provider_preset,
997                batch_target_tokens,
998                adaptive_batch_sizing,
999                provider_max_attempts,
1000                ..
1001            } => {
1002                assert_eq!(provider_preset.as_deref(), Some("OpenRouterPaidFast"));
1003                assert_eq!(batch_target_tokens, 16_000);
1004                assert!(adaptive_batch_sizing);
1005                assert_eq!(provider_max_attempts, 1);
1006            }
1007            _ => unreachable!("constructed runtime event"),
1008        }
1009    }
1010
1011    #[test]
1012    fn v1_fast_uses_single_provider_attempt() {
1013        let settings = TranslationProfile::V1Fast.resolve();
1014        assert_eq!(settings.scheduler.max_attempts, 1);
1015        assert_eq!(settings.provider.provider_max_attempts, 1);
1016        assert_eq!(settings.provider.validation_max_attempts, 1);
1017        assert!(settings.batch.repair_invalid_items);
1018        assert!(settings.adaptive_concurrency);
1019        assert!(settings.batch.adaptive_sizing);
1020    }
1021}