1use std::path::PathBuf;
2
3use crate::{
4 BatchConfig, DoubleCheckConfig, JsonMode, ProviderPreset, ProviderRuntimeConfig, QaRunConfig,
5 ResolvedRunSettings, RetryAfterPolicy, SchedulerConfig, SegmentationConfig, TranslationProfile,
6 config::ContextScope,
7 glossary::{GlossaryFormat, GlossaryTerm},
8};
9
10#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
11pub struct RunConfigSnapshot {
12 pub input_path: PathBuf,
13 #[serde(default)]
14 pub input_snapshot_path: Option<PathBuf>,
15 #[serde(default)]
16 pub input_sha256: Option<String>,
17 pub output_path: PathBuf,
18 pub events_path: Option<PathBuf>,
19 pub report_json_path: Option<PathBuf>,
20 pub report_markdown_path: Option<PathBuf>,
21 pub source_language: Option<String>,
22 pub target_language: String,
23 pub provider: String,
24 pub model: String,
25 pub base_url: Option<String>,
26 pub api_key_env: Option<String>,
27 pub profile: TranslationProfile,
28 pub provider_preset: Option<ProviderPreset>,
29 pub prompt_version: String,
30 pub cache_namespace: String,
31 #[serde(default)]
32 pub book_id: Option<String>,
33 #[serde(default)]
34 pub series_id: Option<String>,
35 #[serde(default = "default_glossary_budget_tokens")]
36 pub glossary_budget_tokens: usize,
37 #[serde(default = "default_glossary_format")]
38 pub glossary_format: GlossaryFormat,
39 #[serde(default)]
40 pub prompt_extra: Option<String>,
41 #[serde(default)]
42 pub glossary_fingerprint: String,
43 #[serde(default)]
44 pub glossary_terms: Vec<GlossaryTerm>,
45 #[serde(default)]
46 pub context_window: usize,
47 #[serde(default = "default_context_budget_tokens")]
48 pub context_budget_tokens: usize,
49 #[serde(default)]
50 pub context_scope: ContextScope,
51 #[serde(default)]
54 pub style_fingerprint: String,
55 #[serde(default)]
59 pub style_rendered_block: String,
60 #[serde(default)]
64 pub entities_fingerprint: String,
65 #[serde(default)]
67 pub entities_rendered_block: String,
68 pub settings: ResolvedRunSettingsSnapshot,
69}
70
71fn default_context_budget_tokens() -> usize {
72 1200
73}
74
75fn default_glossary_budget_tokens() -> usize {
76 800
77}
78
79fn default_glossary_format() -> GlossaryFormat {
80 GlossaryFormat::Json
81}
82
83#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
84pub struct ResolvedRunSettingsSnapshot {
85 pub profile: TranslationProfile,
86 pub segmentation: SegmentationConfigSnapshot,
87 pub batch: BatchConfigSnapshot,
88 pub scheduler: SchedulerConfigSnapshot,
89 pub provider: ProviderRuntimeConfigSnapshot,
90 pub compact_prompts: bool,
91 pub retry_failed_only: bool,
92 pub adaptive_concurrency: bool,
93 pub qa: QaRunConfigSnapshot,
94 pub double_check: DoubleCheckConfigSnapshot,
95}
96
97#[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
98pub struct SegmentationConfigSnapshot {
99 pub max_segment_tokens: usize,
100 pub context_tokens: usize,
101}
102
103#[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
104pub struct BatchConfigSnapshot {
105 pub enabled: bool,
106 pub target_tokens: usize,
107 pub max_items: usize,
108 pub adaptive_sizing: bool,
109 pub split_on_json_failure: bool,
110 pub repair_invalid_items: bool,
111}
112
113#[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
114pub struct SchedulerConfigSnapshot {
115 pub concurrency: usize,
116 pub max_attempts: usize,
117}
118
119#[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
120pub struct ProviderRuntimeConfigSnapshot {
121 pub timeout_seconds: u64,
122 pub provider_max_attempts: usize,
123 pub validation_max_attempts: usize,
124 pub retry_after_policy: RetryAfterPolicy,
125 pub max_backoff_seconds: u64,
126 pub thinking_disabled: bool,
127 pub model_context_tokens: Option<u32>,
128 pub max_output_tokens: Option<u32>,
129 pub batch_max_output_tokens: Option<u32>,
130 pub json_mode: JsonMode,
131 pub max_idle_per_host: usize,
132}
133
134#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
135pub struct QaRunConfigSnapshot {
136 pub concurrency: usize,
137 pub batch_target_tokens: usize,
138 pub model: Option<String>,
139 pub provider: Option<String>,
140 pub base_url: Option<String>,
141 pub api_key_env: Option<String>,
142}
143
144#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
145pub struct DoubleCheckConfigSnapshot {
146 pub mode: crate::DoubleCheckMode,
147 pub model: Option<String>,
148 pub provider: Option<String>,
149 pub base_url: Option<String>,
150 pub api_key_env: Option<String>,
151 pub concurrency: usize,
152 pub batch_target_tokens: usize,
153 pub auto_correct: bool,
154 pub correction_rounds: usize,
155}
156
157impl ResolvedRunSettingsSnapshot {
158 pub fn from_settings(settings: &ResolvedRunSettings) -> Self {
159 Self {
160 profile: settings.profile,
161 segmentation: SegmentationConfigSnapshot {
162 max_segment_tokens: settings.segmentation.max_segment_tokens,
163 context_tokens: settings.segmentation.context_tokens,
164 },
165 batch: BatchConfigSnapshot {
166 enabled: settings.batch.enabled,
167 target_tokens: settings.batch.target_tokens,
168 max_items: settings.batch.max_items,
169 adaptive_sizing: settings.batch.adaptive_sizing,
170 split_on_json_failure: settings.batch.split_on_json_failure,
171 repair_invalid_items: settings.batch.repair_invalid_items,
172 },
173 scheduler: SchedulerConfigSnapshot {
174 concurrency: settings.scheduler.concurrency,
175 max_attempts: settings.scheduler.max_attempts,
176 },
177 provider: ProviderRuntimeConfigSnapshot {
178 timeout_seconds: settings.provider.timeout_seconds,
179 provider_max_attempts: settings.provider.provider_max_attempts,
180 validation_max_attempts: settings.provider.validation_max_attempts,
181 retry_after_policy: settings.provider.retry_after_policy,
182 max_backoff_seconds: settings.provider.max_backoff_seconds,
183 thinking_disabled: settings.provider.thinking_disabled,
184 model_context_tokens: settings.provider.model_context_tokens,
185 max_output_tokens: settings.provider.max_output_tokens,
186 batch_max_output_tokens: settings.provider.batch_max_output_tokens,
187 json_mode: settings.provider.json_mode,
188 max_idle_per_host: settings.provider.max_idle_per_host,
189 },
190 compact_prompts: settings.compact_prompts,
191 retry_failed_only: settings.retry_failed_only,
192 adaptive_concurrency: settings.adaptive_concurrency,
193 qa: QaRunConfigSnapshot {
194 concurrency: settings.qa.concurrency,
195 batch_target_tokens: settings.qa.batch_target_tokens,
196 model: settings.qa.model.clone(),
197 provider: settings.qa.provider.clone(),
198 base_url: settings.qa.base_url.clone(),
199 api_key_env: settings.qa.api_key_env.clone(),
200 },
201 double_check: DoubleCheckConfigSnapshot {
202 mode: settings.double_check.mode,
203 model: settings.double_check.model.clone(),
204 provider: settings.double_check.provider.clone(),
205 base_url: settings.double_check.base_url.clone(),
206 api_key_env: settings.double_check.api_key_env.clone(),
207 concurrency: settings.double_check.concurrency,
208 batch_target_tokens: settings.double_check.batch_target_tokens,
209 auto_correct: settings.double_check.auto_correct,
210 correction_rounds: settings.double_check.correction_rounds,
211 },
212 }
213 }
214
215 pub fn to_settings(&self) -> ResolvedRunSettings {
216 ResolvedRunSettings {
217 profile: self.profile,
218 segmentation: SegmentationConfig {
219 max_segment_tokens: self.segmentation.max_segment_tokens,
220 context_tokens: self.segmentation.context_tokens,
221 },
222 batch: BatchConfig {
223 enabled: self.batch.enabled,
224 target_tokens: self.batch.target_tokens,
225 max_items: self.batch.max_items,
226 adaptive_sizing: self.batch.adaptive_sizing,
227 split_on_json_failure: self.batch.split_on_json_failure,
228 repair_invalid_items: self.batch.repair_invalid_items,
229 },
230 scheduler: SchedulerConfig {
231 concurrency: self.scheduler.concurrency,
232 max_attempts: self.scheduler.max_attempts,
233 },
234 provider: ProviderRuntimeConfig {
235 timeout_seconds: self.provider.timeout_seconds,
236 provider_max_attempts: self.provider.provider_max_attempts,
237 validation_max_attempts: self.provider.validation_max_attempts,
238 retry_after_policy: self.provider.retry_after_policy,
239 max_backoff_seconds: self.provider.max_backoff_seconds,
240 thinking_disabled: self.provider.thinking_disabled,
241 model_context_tokens: self.provider.model_context_tokens,
242 max_output_tokens: self.provider.max_output_tokens,
243 batch_max_output_tokens: self.provider.batch_max_output_tokens,
244 json_mode: self.provider.json_mode,
245 max_idle_per_host: self.provider.max_idle_per_host,
246 },
247 compact_prompts: self.compact_prompts,
248 retry_failed_only: self.retry_failed_only,
249 adaptive_concurrency: self.adaptive_concurrency,
250 qa: QaRunConfig {
251 concurrency: self.qa.concurrency,
252 batch_target_tokens: self.qa.batch_target_tokens,
253 model: self.qa.model.clone(),
254 provider: self.qa.provider.clone(),
255 base_url: self.qa.base_url.clone(),
256 api_key_env: self.qa.api_key_env.clone(),
257 },
258 double_check: DoubleCheckConfig {
259 mode: self.double_check.mode,
260 model: self.double_check.model.clone(),
261 provider: self.double_check.provider.clone(),
262 base_url: self.double_check.base_url.clone(),
263 api_key_env: self.double_check.api_key_env.clone(),
264 concurrency: self.double_check.concurrency,
265 batch_target_tokens: self.double_check.batch_target_tokens,
266 auto_correct: self.double_check.auto_correct,
267 correction_rounds: self.double_check.correction_rounds,
268 },
269 }
270 }
271}