Skip to main content

bookforge_core/
run_snapshot.rs

1use std::path::PathBuf;
2
3use crate::{
4    BatchConfig, DoubleCheckConfig, JsonMode, ProviderPreset, ProviderRuntimeConfig, QaRunConfig,
5    ResolvedRunSettings, RetryAfterPolicy, SchedulerConfig, SegmentationConfig, TranslationProfile,
6    config::ContextScope,
7    glossary::{GlossaryFormat, GlossaryTerm},
8};
9
10#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
11pub struct RunConfigSnapshot {
12    pub input_path: PathBuf,
13    #[serde(default)]
14    pub input_snapshot_path: Option<PathBuf>,
15    #[serde(default)]
16    pub input_sha256: Option<String>,
17    pub output_path: PathBuf,
18    pub events_path: Option<PathBuf>,
19    pub report_json_path: Option<PathBuf>,
20    pub report_markdown_path: Option<PathBuf>,
21    pub source_language: Option<String>,
22    pub target_language: String,
23    pub provider: String,
24    pub model: String,
25    pub base_url: Option<String>,
26    pub api_key_env: Option<String>,
27    pub profile: TranslationProfile,
28    pub provider_preset: Option<ProviderPreset>,
29    pub prompt_version: String,
30    pub cache_namespace: String,
31    #[serde(default)]
32    pub book_id: Option<String>,
33    #[serde(default)]
34    pub series_id: Option<String>,
35    #[serde(default = "default_glossary_budget_tokens")]
36    pub glossary_budget_tokens: usize,
37    #[serde(default = "default_glossary_format")]
38    pub glossary_format: GlossaryFormat,
39    #[serde(default)]
40    pub prompt_extra: Option<String>,
41    #[serde(default)]
42    pub glossary_fingerprint: String,
43    #[serde(default)]
44    pub glossary_terms: Vec<GlossaryTerm>,
45    #[serde(default)]
46    pub context_window: usize,
47    #[serde(default = "default_context_budget_tokens")]
48    pub context_budget_tokens: usize,
49    #[serde(default)]
50    pub context_scope: ContextScope,
51    /// SHA-256 of the merged style sheet's normalized JSON form. Stable
52    /// for users without `--style` (fingerprint of `None`).
53    #[serde(default)]
54    pub style_fingerprint: String,
55    /// Pre-rendered style guide block — captured so resume reproduces the
56    /// exact prompt the original run sent, even if the source TOML files
57    /// have moved or been edited.
58    #[serde(default)]
59    pub style_rendered_block: String,
60    /// SHA-256 of the merged entity set. Same opt-in stance as
61    /// `style_fingerprint`: empty rendered block means the cache
62    /// namespace ignores this field.
63    #[serde(default)]
64    pub entities_fingerprint: String,
65    /// Pre-rendered entity grammatical-agreement block.
66    #[serde(default)]
67    pub entities_rendered_block: String,
68    pub settings: ResolvedRunSettingsSnapshot,
69}
70
71fn default_context_budget_tokens() -> usize {
72    1200
73}
74
75fn default_glossary_budget_tokens() -> usize {
76    800
77}
78
79fn default_glossary_format() -> GlossaryFormat {
80    GlossaryFormat::Json
81}
82
83#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
84pub struct ResolvedRunSettingsSnapshot {
85    pub profile: TranslationProfile,
86    pub segmentation: SegmentationConfigSnapshot,
87    pub batch: BatchConfigSnapshot,
88    pub scheduler: SchedulerConfigSnapshot,
89    pub provider: ProviderRuntimeConfigSnapshot,
90    pub compact_prompts: bool,
91    pub retry_failed_only: bool,
92    pub adaptive_concurrency: bool,
93    pub qa: QaRunConfigSnapshot,
94    pub double_check: DoubleCheckConfigSnapshot,
95}
96
97#[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
98pub struct SegmentationConfigSnapshot {
99    pub max_segment_tokens: usize,
100    pub context_tokens: usize,
101}
102
103#[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
104pub struct BatchConfigSnapshot {
105    pub enabled: bool,
106    pub target_tokens: usize,
107    pub max_items: usize,
108    pub adaptive_sizing: bool,
109    pub split_on_json_failure: bool,
110    pub repair_invalid_items: bool,
111}
112
113#[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
114pub struct SchedulerConfigSnapshot {
115    pub concurrency: usize,
116    pub max_attempts: usize,
117}
118
119#[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
120pub struct ProviderRuntimeConfigSnapshot {
121    pub timeout_seconds: u64,
122    pub provider_max_attempts: usize,
123    pub validation_max_attempts: usize,
124    pub retry_after_policy: RetryAfterPolicy,
125    pub max_backoff_seconds: u64,
126    pub thinking_disabled: bool,
127    pub model_context_tokens: Option<u32>,
128    pub max_output_tokens: Option<u32>,
129    pub batch_max_output_tokens: Option<u32>,
130    pub json_mode: JsonMode,
131    pub max_idle_per_host: usize,
132}
133
134#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
135pub struct QaRunConfigSnapshot {
136    pub concurrency: usize,
137    pub batch_target_tokens: usize,
138    pub model: Option<String>,
139    pub provider: Option<String>,
140    pub base_url: Option<String>,
141    pub api_key_env: Option<String>,
142}
143
144#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
145pub struct DoubleCheckConfigSnapshot {
146    pub mode: crate::DoubleCheckMode,
147    pub model: Option<String>,
148    pub provider: Option<String>,
149    pub base_url: Option<String>,
150    pub api_key_env: Option<String>,
151    pub concurrency: usize,
152    pub batch_target_tokens: usize,
153    pub auto_correct: bool,
154    pub correction_rounds: usize,
155}
156
157impl ResolvedRunSettingsSnapshot {
158    pub fn from_settings(settings: &ResolvedRunSettings) -> Self {
159        Self {
160            profile: settings.profile,
161            segmentation: SegmentationConfigSnapshot {
162                max_segment_tokens: settings.segmentation.max_segment_tokens,
163                context_tokens: settings.segmentation.context_tokens,
164            },
165            batch: BatchConfigSnapshot {
166                enabled: settings.batch.enabled,
167                target_tokens: settings.batch.target_tokens,
168                max_items: settings.batch.max_items,
169                adaptive_sizing: settings.batch.adaptive_sizing,
170                split_on_json_failure: settings.batch.split_on_json_failure,
171                repair_invalid_items: settings.batch.repair_invalid_items,
172            },
173            scheduler: SchedulerConfigSnapshot {
174                concurrency: settings.scheduler.concurrency,
175                max_attempts: settings.scheduler.max_attempts,
176            },
177            provider: ProviderRuntimeConfigSnapshot {
178                timeout_seconds: settings.provider.timeout_seconds,
179                provider_max_attempts: settings.provider.provider_max_attempts,
180                validation_max_attempts: settings.provider.validation_max_attempts,
181                retry_after_policy: settings.provider.retry_after_policy,
182                max_backoff_seconds: settings.provider.max_backoff_seconds,
183                thinking_disabled: settings.provider.thinking_disabled,
184                model_context_tokens: settings.provider.model_context_tokens,
185                max_output_tokens: settings.provider.max_output_tokens,
186                batch_max_output_tokens: settings.provider.batch_max_output_tokens,
187                json_mode: settings.provider.json_mode,
188                max_idle_per_host: settings.provider.max_idle_per_host,
189            },
190            compact_prompts: settings.compact_prompts,
191            retry_failed_only: settings.retry_failed_only,
192            adaptive_concurrency: settings.adaptive_concurrency,
193            qa: QaRunConfigSnapshot {
194                concurrency: settings.qa.concurrency,
195                batch_target_tokens: settings.qa.batch_target_tokens,
196                model: settings.qa.model.clone(),
197                provider: settings.qa.provider.clone(),
198                base_url: settings.qa.base_url.clone(),
199                api_key_env: settings.qa.api_key_env.clone(),
200            },
201            double_check: DoubleCheckConfigSnapshot {
202                mode: settings.double_check.mode,
203                model: settings.double_check.model.clone(),
204                provider: settings.double_check.provider.clone(),
205                base_url: settings.double_check.base_url.clone(),
206                api_key_env: settings.double_check.api_key_env.clone(),
207                concurrency: settings.double_check.concurrency,
208                batch_target_tokens: settings.double_check.batch_target_tokens,
209                auto_correct: settings.double_check.auto_correct,
210                correction_rounds: settings.double_check.correction_rounds,
211            },
212        }
213    }
214
215    pub fn to_settings(&self) -> ResolvedRunSettings {
216        ResolvedRunSettings {
217            profile: self.profile,
218            segmentation: SegmentationConfig {
219                max_segment_tokens: self.segmentation.max_segment_tokens,
220                context_tokens: self.segmentation.context_tokens,
221            },
222            batch: BatchConfig {
223                enabled: self.batch.enabled,
224                target_tokens: self.batch.target_tokens,
225                max_items: self.batch.max_items,
226                adaptive_sizing: self.batch.adaptive_sizing,
227                split_on_json_failure: self.batch.split_on_json_failure,
228                repair_invalid_items: self.batch.repair_invalid_items,
229            },
230            scheduler: SchedulerConfig {
231                concurrency: self.scheduler.concurrency,
232                max_attempts: self.scheduler.max_attempts,
233            },
234            provider: ProviderRuntimeConfig {
235                timeout_seconds: self.provider.timeout_seconds,
236                provider_max_attempts: self.provider.provider_max_attempts,
237                validation_max_attempts: self.provider.validation_max_attempts,
238                retry_after_policy: self.provider.retry_after_policy,
239                max_backoff_seconds: self.provider.max_backoff_seconds,
240                thinking_disabled: self.provider.thinking_disabled,
241                model_context_tokens: self.provider.model_context_tokens,
242                max_output_tokens: self.provider.max_output_tokens,
243                batch_max_output_tokens: self.provider.batch_max_output_tokens,
244                json_mode: self.provider.json_mode,
245                max_idle_per_host: self.provider.max_idle_per_host,
246            },
247            compact_prompts: self.compact_prompts,
248            retry_failed_only: self.retry_failed_only,
249            adaptive_concurrency: self.adaptive_concurrency,
250            qa: QaRunConfig {
251                concurrency: self.qa.concurrency,
252                batch_target_tokens: self.qa.batch_target_tokens,
253                model: self.qa.model.clone(),
254                provider: self.qa.provider.clone(),
255                base_url: self.qa.base_url.clone(),
256                api_key_env: self.qa.api_key_env.clone(),
257            },
258            double_check: DoubleCheckConfig {
259                mode: self.double_check.mode,
260                model: self.double_check.model.clone(),
261                provider: self.double_check.provider.clone(),
262                base_url: self.double_check.base_url.clone(),
263                api_key_env: self.double_check.api_key_env.clone(),
264                concurrency: self.double_check.concurrency,
265                batch_target_tokens: self.double_check.batch_target_tokens,
266                auto_correct: self.double_check.auto_correct,
267                correction_rounds: self.double_check.correction_rounds,
268            },
269        }
270    }
271}