1use std::{
2 cell::RefCell,
3 collections::HashMap,
4 fs,
5 path::{Path, PathBuf},
6 time::{Duration, SystemTime, UNIX_EPOCH},
7};
8
9use bookforge_core::{
10 Result as CoreResult,
11 entity::EntityGender,
12 glossary::{GlossaryCategory, GlossaryScopeKind, GlossaryStatus, GlossaryTerm},
13 ir::BlockId,
14 run_snapshot::RunConfigSnapshot,
15 segment::{BlockTranslation, Segment},
16};
17use rusqlite::{Connection, OptionalExtension, params, types::Type};
18use sha2::{Digest, Sha256};
19use std::str::FromStr;
20
21pub type Result<T> = std::result::Result<T, StoreError>;
22
23#[derive(Debug, thiserror::Error)]
24pub enum StoreError {
25 #[error("I/O error: {0}")]
26 Io(#[from] std::io::Error),
27
28 #[error("SQLite error: {0}")]
29 Sqlite(#[from] rusqlite::Error),
30
31 #[error("core error: {0}")]
32 Core(#[from] bookforge_core::BookforgeError),
33
34 #[error("serialization error: {0}")]
35 Serialization(String),
36}
37
38pub struct JobStore {
39 conn: RefCell<Connection>,
40 path: PathBuf,
41}
42
43#[derive(Debug, Clone)]
44pub struct JobRecord {
45 pub id: String,
46 pub input_path: PathBuf,
47 pub input_snapshot_path: Option<PathBuf>,
48 pub input_sha256: Option<String>,
49 pub output_path: PathBuf,
50 pub input_hash: String,
51 pub source_lang: Option<String>,
52 pub target_lang: String,
53 pub provider: String,
54 pub model: String,
55 pub base_url: Option<String>,
56 pub api_key_env: Option<String>,
57 pub status: String,
58 pub events_path: Option<PathBuf>,
59 pub report_json_path: Option<PathBuf>,
60 pub report_markdown_path: Option<PathBuf>,
61 pub book_id: Option<String>,
62 pub series_id: Option<String>,
63}
64
65#[derive(Debug, Clone, Default)]
66pub struct JobSummary {
67 pub id: String,
68 pub status: String,
69 pub total_segments: usize,
70 pub succeeded: usize,
71 pub failed: usize,
72 pub needs_review: usize,
73 pub retry_pending: usize,
74 pub cached: usize,
75 pub retried: usize,
76 pub input_tokens: u64,
77 pub input_cached_tokens: u64,
78 pub output_tokens: u64,
79}
80
81#[derive(Debug, Clone, Copy)]
82pub struct CreateJob<'a> {
83 pub input: &'a Path,
84 pub output: &'a Path,
85 pub source_lang: Option<&'a str>,
86 pub target_lang: &'a str,
87 pub provider: &'a str,
88 pub model: &'a str,
89 pub base_url: Option<&'a str>,
90 pub api_key_env: Option<&'a str>,
91 pub book_id: Option<&'a str>,
92 pub series_id: Option<&'a str>,
93}
94
95#[derive(Debug, Clone)]
96pub struct SegmentRecord {
97 pub id: String,
98 pub status: String,
99 pub attempts: usize,
100 pub error: Option<String>,
101 pub input_tokens: Option<u64>,
102 pub input_cached_tokens: Option<u64>,
103 pub output_tokens: Option<u64>,
104 pub tokens_estimated: bool,
105}
106
107#[derive(Debug, Clone, PartialEq, Eq)]
108pub struct StoredBlockTranslation {
109 pub segment_id: String,
110 pub block_id: String,
111 pub text: String,
112}
113
114#[derive(Debug, Clone, PartialEq, Eq)]
115pub struct StoredSegmentTranslation {
116 pub segment_id: String,
117 pub ordinal: usize,
118 pub status: String,
119 pub error: Option<String>,
120 pub translated_text: String,
121 pub blocks: Vec<BlockTranslation>,
122}
123
124#[derive(Debug, Clone, PartialEq, Eq)]
125pub struct CachedTranslation {
126 pub translated_text: String,
127 pub blocks: Vec<BlockTranslation>,
128}
129
130#[derive(Debug, Clone, Copy)]
131pub struct SaveTranslation<'a> {
132 pub job_id: &'a str,
133 pub segment_id: &'a str,
134 pub translated_text: &'a str,
135 pub blocks: &'a [BlockTranslation],
136 pub provider: &'a str,
137 pub model: &'a str,
138 pub prompt_version: &'a str,
139 pub input_tokens: Option<u64>,
140 pub input_cached_tokens: Option<u64>,
141 pub output_tokens: Option<u64>,
142 pub tokens_estimated: bool,
143}
144
145#[derive(Debug, Clone, Copy)]
146pub struct SaveNeedsReview<'a> {
147 pub job_id: &'a str,
148 pub segment_id: &'a str,
149 pub preserved_text: &'a str,
150 pub blocks: &'a [BlockTranslation],
151 pub provider: &'a str,
152 pub model: &'a str,
153 pub prompt_version: &'a str,
154 pub error: &'a str,
155 pub input_tokens: Option<u64>,
156 pub input_cached_tokens: Option<u64>,
157 pub output_tokens: Option<u64>,
158 pub tokens_estimated: bool,
159}
160
161#[derive(Debug, Clone, Copy)]
162pub struct SaveCachedTranslation<'a> {
163 pub job_id: &'a str,
164 pub segment_id: &'a str,
165 pub translated_text: &'a str,
166 pub blocks: &'a [BlockTranslation],
167 pub provider: &'a str,
168 pub model: &'a str,
169 pub prompt_version: &'a str,
170}
171
172#[derive(Debug, Clone, Copy)]
173pub struct CacheLookupRequest<'a> {
174 pub prompt_version: &'a str,
175 pub provider: &'a str,
176 pub model: &'a str,
177 pub source_lang: Option<&'a str>,
178 pub target_lang: &'a str,
179 pub cache_namespace: &'a str,
180}
181
182#[derive(Debug, Clone, Copy)]
183pub struct NewSegmentFlag<'a> {
184 pub job_id: &'a str,
185 pub segment_id: &'a str,
186 pub kind: &'a str,
187 pub note: Option<&'a str>,
188 pub suggested_source: Option<&'a str>,
189 pub suggested_target: Option<&'a str>,
190 pub consumed: bool,
191}
192
193#[derive(Debug, Clone, Copy, Default)]
194pub struct GlossaryFilter<'a> {
195 pub scope_kind: Option<GlossaryScopeKind>,
196 pub scope_id: Option<&'a str>,
197 pub source_language: Option<&'a str>,
198 pub target_language: Option<&'a str>,
199 pub active_only: bool,
200}
201
202#[derive(Debug, Clone, Copy)]
203pub struct NewGlossaryCandidate<'a> {
204 pub source_text: &'a str,
205 pub category: GlossaryCategory,
206 pub source_count: usize,
207}
208
209#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
210pub struct GlossaryCandidateUpsertResult {
211 pub inserted: usize,
212 pub updated: usize,
213 pub skipped: usize,
214}
215
216#[derive(Debug, Clone, PartialEq, Eq)]
217pub struct StoredGlossaryCandidate {
218 pub id: i64,
219 pub source_text: String,
220 pub target_text: Option<String>,
221 pub category: GlossaryCategory,
222 pub notes: Option<String>,
223 pub case_sensitive: bool,
224 pub always_active: bool,
225 pub status: GlossaryStatus,
226 pub source_language: String,
227 pub target_language: String,
228 pub source_count: usize,
229}
230
231#[derive(Debug, Clone, Copy)]
232pub struct NewStyleSheet<'a> {
233 pub scope_kind: GlossaryScopeKind,
234 pub scope_id: Option<&'a str>,
235 pub target_language: &'a str,
236 pub content_toml: &'a str,
237 pub fingerprint: &'a str,
238}
239
240#[derive(Debug, Clone, PartialEq, Eq)]
241pub struct StoredStyleSheet {
242 pub id: i64,
243 pub scope_kind: GlossaryScopeKind,
244 pub scope_id: Option<String>,
245 pub target_language: String,
246 pub content_toml: String,
247 pub fingerprint: String,
248}
249
250#[derive(Debug, Clone, Copy)]
251pub struct NewEntity<'a> {
252 pub scope_kind: GlossaryScopeKind,
253 pub scope_id: Option<&'a str>,
254 pub source_name: &'a str,
255 pub target_name: &'a str,
256 pub gender_target: Option<EntityGender>,
257 pub role: Option<&'a str>,
258 pub notes: Option<&'a str>,
259 pub source_language: &'a str,
260 pub target_language: &'a str,
261}
262
263#[derive(Debug, Clone, PartialEq, Eq)]
264pub struct StoredEntity {
265 pub id: i64,
266 pub scope_kind: GlossaryScopeKind,
267 pub scope_id: Option<String>,
268 pub source_name: String,
269 pub target_name: String,
270 pub gender_target: Option<EntityGender>,
271 pub role: Option<String>,
272 pub notes: Option<String>,
273 pub source_language: String,
274 pub target_language: String,
275}
276
277#[derive(Debug, Clone)]
278pub struct StorageDoctor {
279 pub database_path: PathBuf,
280 pub database_exists: bool,
281 pub wal_present: bool,
282 pub shm_present: bool,
283 pub journal_mode: String,
284 pub integrity_check: String,
285 pub wal_sidecars_normal: bool,
286 pub note: String,
287}
288
289pub fn run_doctor(db_path: Option<PathBuf>) -> Result<StorageDoctor> {
290 let path = db_path.unwrap_or_else(|| PathBuf::from(".bookforge/jobs.sqlite"));
291 let database_exists = path.exists();
292 let wal_path = path.with_extension("sqlite-wal");
293 let shm_path = path.with_extension("sqlite-shm");
294 let wal_present = wal_path.exists();
295 let shm_present = shm_path.exists();
296
297 let (journal_mode, integrity_check, wal_sidecars_normal, note) = if database_exists {
298 let conn = Connection::open(&path)?;
299 let journal_mode: String = conn
300 .pragma_query_value(None, "journal_mode", |row| row.get(0))
301 .unwrap_or_else(|_| "unknown".to_string());
302 let integrity_check: String = conn
303 .pragma_query_value(None, "integrity_check", |row| row.get(0))
304 .unwrap_or_else(|_| "error".to_string());
305 let _ = conn.execute_batch("PRAGMA wal_checkpoint(PASSIVE);");
306
307 let wal_sidecars_normal = if wal_present || shm_present {
308 integrity_check == "ok"
309 } else {
310 true
311 };
312
313 let note = if wal_present || shm_present {
314 "WAL sidecar files are normal. SQLite will recover them automatically. \
315 Do not delete them manually while BookForge is running."
316 .to_string()
317 } else {
318 String::new()
319 };
320
321 (journal_mode, integrity_check, wal_sidecars_normal, note)
322 } else {
323 ("unknown".to_string(), String::new(), true, String::new())
324 };
325
326 Ok(StorageDoctor {
327 database_path: path,
328 database_exists,
329 wal_present,
330 shm_present,
331 journal_mode,
332 integrity_check,
333 wal_sidecars_normal,
334 note,
335 })
336}
337
338impl JobStore {
339 pub fn open_default() -> Result<Self> {
340 Self::open(".bookforge/jobs.sqlite")
341 }
342
343 pub fn open(path: impl Into<PathBuf>) -> Result<Self> {
344 let path = path.into();
345 if let Some(parent) = path.parent() {
346 fs::create_dir_all(parent)?;
347 }
348 let conn = Connection::open(&path)?;
349
350 conn.busy_timeout(Duration::from_secs(5))?;
351 conn.pragma_update(None, "journal_mode", "WAL")?;
352 conn.pragma_update(None, "synchronous", "NORMAL")?;
353 conn.pragma_update(None, "foreign_keys", "ON")?;
354
355 let store = Self {
356 conn: RefCell::new(conn),
357 path,
358 };
359 store.migrate()?;
360 Ok(store)
361 }
362
363 pub fn path(&self) -> &Path {
364 &self.path
365 }
366
367 pub fn create_job(&self, request: CreateJob<'_>) -> Result<JobRecord> {
368 let input_hash = file_hash(request.input)?;
369 let id = format!("job_{}_{}", unix_timestamp_nanos(), &input_hash[..12]);
370 let now = timestamp_string();
371 let input_path = request.input.to_path_buf();
372 let output_path = request.output.to_path_buf();
373 let conn = self.conn.borrow();
374 conn.execute(
375 "INSERT INTO jobs
376 (id, input_path, output_path, input_hash, source_lang, target_lang, provider, model, base_url, api_key_env, book_id, series_id, status, created_at, updated_at)
377 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, 'running', ?13, ?13)",
378 params![
379 id,
380 input_path.to_string_lossy(),
381 output_path.to_string_lossy(),
382 input_hash,
383 request.source_lang,
384 request.target_lang,
385 request.provider,
386 request.model,
387 request.base_url,
388 request.api_key_env,
389 request.book_id,
390 request.series_id,
391 now,
392 ],
393 )?;
394
395 Ok(JobRecord {
396 id,
397 input_path,
398 input_snapshot_path: None,
399 input_sha256: None,
400 output_path,
401 input_hash,
402 source_lang: request.source_lang.map(ToOwned::to_owned),
403 target_lang: request.target_lang.to_string(),
404 provider: request.provider.to_string(),
405 model: request.model.to_string(),
406 base_url: request.base_url.map(ToOwned::to_owned),
407 api_key_env: request.api_key_env.map(ToOwned::to_owned),
408 status: "running".to_string(),
409 events_path: None,
410 report_json_path: None,
411 report_markdown_path: None,
412 book_id: request.book_id.map(ToOwned::to_owned),
413 series_id: request.series_id.map(ToOwned::to_owned),
414 })
415 }
416
417 pub fn update_job_config_snapshot(
418 &self,
419 job_id: &str,
420 snapshot: &RunConfigSnapshot,
421 ) -> Result<()> {
422 let json = serde_json::to_string(snapshot)
423 .map_err(|e| StoreError::Serialization(e.to_string()))?;
424 let conn = self.conn.borrow();
425 conn.execute(
426 "UPDATE jobs
427 SET config_json = ?1,
428 events_path = ?2,
429 report_json_path = ?3,
430 report_markdown_path = ?4,
431 input_snapshot_path = ?5,
432 input_sha256 = ?6,
433 updated_at = ?7
434 WHERE id = ?8",
435 params![
436 json,
437 snapshot
438 .events_path
439 .as_ref()
440 .map(|path| path.to_string_lossy().to_string()),
441 snapshot
442 .report_json_path
443 .as_ref()
444 .map(|path| path.to_string_lossy().to_string()),
445 snapshot
446 .report_markdown_path
447 .as_ref()
448 .map(|path| path.to_string_lossy().to_string()),
449 snapshot
450 .input_snapshot_path
451 .as_ref()
452 .map(|path| path.to_string_lossy().to_string()),
453 snapshot.input_sha256.as_deref(),
454 timestamp_string(),
455 job_id,
456 ],
457 )?;
458 Ok(())
459 }
460
461 pub fn update_job_input_snapshot(
462 &self,
463 job_id: &str,
464 snapshot_path: &Path,
465 input_sha256: &str,
466 ) -> Result<()> {
467 let conn = self.conn.borrow();
468 conn.execute(
469 "UPDATE jobs
470 SET input_snapshot_path = ?1,
471 input_sha256 = ?2,
472 updated_at = ?3
473 WHERE id = ?4",
474 params![
475 snapshot_path.to_string_lossy(),
476 input_sha256,
477 timestamp_string(),
478 job_id
479 ],
480 )?;
481 Ok(())
482 }
483
484 pub fn load_job_config_snapshot(&self, job_id: &str) -> Result<Option<RunConfigSnapshot>> {
485 let conn = self.conn.borrow();
486 let Some(json) = conn
487 .query_row(
488 "SELECT config_json FROM jobs WHERE id = ?1",
489 params![job_id],
490 |row| row.get::<_, Option<String>>(0),
491 )
492 .optional()?
493 .flatten()
494 else {
495 return Ok(None);
496 };
497
498 serde_json::from_str(&json)
499 .map(Some)
500 .map_err(|e| StoreError::Serialization(e.to_string()))
501 }
502
503 pub fn update_job_event_path(&self, job_id: &str, path: &Path) -> Result<()> {
504 let conn = self.conn.borrow();
505 conn.execute(
506 "UPDATE jobs SET events_path = ?1, updated_at = ?2 WHERE id = ?3",
507 params![path.to_string_lossy(), timestamp_string(), job_id],
508 )?;
509 Ok(())
510 }
511
512 pub fn update_job_report_paths(
513 &self,
514 job_id: &str,
515 json_path: &Path,
516 markdown_path: &Path,
517 ) -> Result<()> {
518 let conn = self.conn.borrow();
519 conn.execute(
520 "UPDATE jobs
521 SET report_json_path = ?1, report_markdown_path = ?2, updated_at = ?3
522 WHERE id = ?4",
523 params![
524 json_path.to_string_lossy(),
525 markdown_path.to_string_lossy(),
526 timestamp_string(),
527 job_id
528 ],
529 )?;
530 Ok(())
531 }
532
533 pub fn update_job_output_path(&self, job_id: &str, path: &Path) -> Result<()> {
534 let conn = self.conn.borrow();
535 conn.execute(
536 "UPDATE jobs SET output_path = ?1, updated_at = ?2 WHERE id = ?3",
537 params![path.to_string_lossy(), timestamp_string(), job_id],
538 )?;
539 Ok(())
540 }
541
542 pub fn insert_segments(
543 &self,
544 job_id: &str,
545 segments: &[Segment],
546 prompt_version: &str,
547 provider: &str,
548 model: &str,
549 cache_namespace: &str,
550 ) -> Result<()> {
551 let mut conn = self.conn.borrow_mut();
552 let tx = conn.transaction()?;
553 for segment in segments {
554 tx.execute(
555 "INSERT OR IGNORE INTO segments
556 (id, job_id, section_id, ordinal, source_hash, prompt_version, provider, model, status, attempts, cache_namespace)
557 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, 'queued', 0, ?9)",
558 params![
559 segment.id.0,
560 job_id,
561 segment.section_id.0,
562 segment.ordinal as i64,
563 segment.checksum,
564 prompt_version,
565 provider,
566 model,
567 cache_namespace,
568 ],
569 )?;
570 }
571 tx.commit()?;
572 Ok(())
573 }
574
575 pub fn save_translation(&self, request: SaveTranslation<'_>) -> Result<()> {
576 let now = timestamp_string();
577 let translated_hash = stable_hash(request.translated_text);
578 {
579 let conn = self.conn.borrow();
580 conn.execute(
581 "INSERT OR REPLACE INTO translations
582 (segment_id, job_id, translated_text, provider, model, prompt_version, created_at)
583 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
584 params![
585 request.segment_id,
586 request.job_id,
587 request.translated_text,
588 request.provider,
589 request.model,
590 request.prompt_version,
591 now
592 ],
593 )?;
594 replace_block_translations(&conn, request.job_id, request.segment_id, request.blocks)?;
595 conn.execute(
596 "UPDATE segments
597 SET status = 'succeeded',
598 attempts = attempts + 1,
599 tokens_input = ?1,
600 tokens_input_cached = ?2,
601 tokens_output = ?3,
602 tokens_estimated = ?4,
603 translated_hash = ?5,
604 error = NULL
605 WHERE job_id = ?6 AND id = ?7",
606 params![
607 request.input_tokens.map(|value| value as i64),
608 request.input_cached_tokens.map(|value| value as i64),
609 request.output_tokens.map(|value| value as i64),
610 if request.tokens_estimated {
611 1_i64
612 } else {
613 0_i64
614 },
615 translated_hash,
616 request.job_id,
617 request.segment_id,
618 ],
619 )?;
620 }
621 self.touch_job(request.job_id, "running")?;
622 Ok(())
623 }
624
625 pub fn save_needs_review(&self, request: SaveNeedsReview<'_>) -> Result<()> {
626 let now = timestamp_string();
627 let translated_hash = stable_hash(request.preserved_text);
628 {
629 let conn = self.conn.borrow();
630 conn.execute(
631 "INSERT OR REPLACE INTO translations
632 (segment_id, job_id, translated_text, provider, model, prompt_version, created_at)
633 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
634 params![
635 request.segment_id,
636 request.job_id,
637 request.preserved_text,
638 request.provider,
639 request.model,
640 request.prompt_version,
641 now
642 ],
643 )?;
644 replace_block_translations(&conn, request.job_id, request.segment_id, request.blocks)?;
645 conn.execute(
646 "UPDATE segments
647 SET status = 'needs_review',
648 attempts = attempts + 1,
649 tokens_input = ?1,
650 tokens_input_cached = ?2,
651 tokens_output = ?3,
652 tokens_estimated = ?4,
653 translated_hash = ?5,
654 error = ?6
655 WHERE job_id = ?7 AND id = ?8",
656 params![
657 request.input_tokens.map(|value| value as i64),
658 request.input_cached_tokens.map(|value| value as i64),
659 request.output_tokens.map(|value| value as i64),
660 if request.tokens_estimated {
661 1_i64
662 } else {
663 0_i64
664 },
665 translated_hash,
666 request.error,
667 request.job_id,
668 request.segment_id
669 ],
670 )?;
671 }
672 self.touch_job(request.job_id, "needs_review")?;
673 Ok(())
674 }
675
676 pub fn save_cached_translation(&self, request: SaveCachedTranslation<'_>) -> Result<()> {
677 let now = timestamp_string();
678 let translated_hash = stable_hash(request.translated_text);
679 {
680 let conn = self.conn.borrow();
681 conn.execute(
682 "INSERT OR REPLACE INTO translations
683 (segment_id, job_id, translated_text, provider, model, prompt_version, created_at)
684 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
685 params![
686 request.segment_id,
687 request.job_id,
688 request.translated_text,
689 request.provider,
690 request.model,
691 request.prompt_version,
692 now
693 ],
694 )?;
695 replace_block_translations(&conn, request.job_id, request.segment_id, request.blocks)?;
696 conn.execute(
697 "UPDATE segments
698 SET status = 'skipped_cached',
699 tokens_input = NULL,
700 tokens_input_cached = NULL,
701 tokens_output = NULL,
702 tokens_estimated = 0,
703 translated_hash = ?1,
704 error = NULL
705 WHERE job_id = ?2 AND id = ?3",
706 params![translated_hash, request.job_id, request.segment_id],
707 )?;
708 }
709 self.touch_job(request.job_id, "running")?;
710 Ok(())
711 }
712
713 pub fn mark_job_complete(&self, job_id: &str) -> Result<()> {
714 self.touch_job(job_id, "succeeded")
715 }
716
717 pub fn mark_job_running(&self, job_id: &str) -> Result<()> {
718 self.touch_job(job_id, "running")
719 }
720
721 pub fn mark_job_succeeded(&self, job_id: &str) -> Result<()> {
722 self.mark_job_complete(job_id)
723 }
724
725 pub fn mark_job_needs_review(&self, job_id: &str) -> Result<()> {
726 self.touch_job(job_id, "needs_review")
727 }
728
729 pub fn mark_job_interrupted(&self, job_id: &str) -> Result<()> {
730 self.touch_job(job_id, "interrupted")
731 }
732
733 pub fn mark_job_failed(&self, job_id: &str) -> Result<()> {
734 self.touch_job(job_id, "failed")
735 }
736
737 pub fn mark_segment_failed(&self, job_id: &str, segment_id: &str, error: &str) -> Result<()> {
738 {
739 let conn = self.conn.borrow();
740 conn.execute(
741 "UPDATE segments SET status = 'failed', attempts = attempts + 1, error = ?1 WHERE job_id = ?2 AND id = ?3",
742 params![error, job_id, segment_id],
743 )?;
744 }
745 self.touch_job(job_id, "failed")?;
746 Ok(())
747 }
748
749 pub fn mark_segment_failed_if_unfinished(
750 &self,
751 job_id: &str,
752 segment_id: &str,
753 error: &str,
754 ) -> Result<()> {
755 {
756 let conn = self.conn.borrow();
757 conn.execute(
758 "UPDATE segments
759 SET status = 'failed', attempts = attempts + 1, error = ?1
760 WHERE job_id = ?2
761 AND id = ?3
762 AND status NOT IN ('succeeded', 'skipped_cached', 'needs_review')",
763 params![error, job_id, segment_id],
764 )?;
765 }
766 self.touch_job(job_id, "failed")?;
767 Ok(())
768 }
769
770 pub fn mark_unfinished_segments_failed(
771 &self,
772 job_id: &str,
773 candidate_segment_ids: &[String],
774 error: &str,
775 ) -> Result<usize> {
776 const SQLITE_IN_CHUNK_SIZE: usize = 900;
777 let mut updated = 0;
778
779 for chunk in candidate_segment_ids.chunks(SQLITE_IN_CHUNK_SIZE) {
780 if chunk.is_empty() {
781 continue;
782 }
783
784 let placeholders = std::iter::repeat_n("?", chunk.len())
785 .collect::<Vec<_>>()
786 .join(", ");
787 let sql = format!(
788 "UPDATE segments
789 SET status = 'failed', attempts = attempts + 1, error = ?
790 WHERE job_id = ?
791 AND id IN ({placeholders})
792 AND status NOT IN ('succeeded', 'skipped_cached', 'needs_review')"
793 );
794
795 let conn = self.conn.borrow();
796 let mut params: Vec<&dyn rusqlite::types::ToSql> = Vec::with_capacity(chunk.len() + 2);
797 params.push(&error);
798 params.push(&job_id);
799 for id in chunk {
800 params.push(id);
801 }
802 updated += conn.execute(&sql, params.as_slice())?;
803 }
804
805 if updated > 0 {
806 self.touch_job(job_id, "failed")?;
807 }
808 Ok(updated)
809 }
810
811 pub fn get_job(&self, job_id: &str) -> Result<Option<JobRecord>> {
812 let conn = self.conn.borrow();
813 conn.query_row(
814 "SELECT id, input_path, input_snapshot_path, input_sha256, output_path, input_hash, source_lang, target_lang, provider, model, base_url, api_key_env, status,
815 events_path, report_json_path, report_markdown_path, book_id, series_id
816 FROM jobs WHERE id = ?1",
817 params![job_id],
818 |row| {
819 Ok(JobRecord {
820 id: row.get(0)?,
821 input_path: PathBuf::from(row.get::<_, String>(1)?),
822 input_snapshot_path: row.get::<_, Option<String>>(2)?.map(PathBuf::from),
823 input_sha256: row.get(3)?,
824 output_path: PathBuf::from(row.get::<_, String>(4)?),
825 input_hash: row.get(5)?,
826 source_lang: row.get(6)?,
827 target_lang: row.get(7)?,
828 provider: row.get(8)?,
829 model: row.get(9)?,
830 base_url: row.get(10)?,
831 api_key_env: row.get(11)?,
832 status: row.get(12)?,
833 events_path: row.get::<_, Option<String>>(13)?.map(PathBuf::from),
834 report_json_path: row.get::<_, Option<String>>(14)?.map(PathBuf::from),
835 report_markdown_path: row.get::<_, Option<String>>(15)?.map(PathBuf::from),
836 book_id: row.get(16)?,
837 series_id: row.get(17)?,
838 })
839 },
840 )
841 .optional()
842 .map_err(StoreError::from)
843 }
844
845 pub fn summary(&self, job_id: &str) -> Result<Option<JobSummary>> {
846 let Some(job) = self.get_job(job_id)? else {
847 return Ok(None);
848 };
849 let conn = self.conn.borrow();
850 let mut summary = JobSummary {
851 id: job.id,
852 status: job.status,
853 ..JobSummary::default()
854 };
855
856 let mut stmt = conn.prepare(
857 "SELECT status,
858 COUNT(*),
859 COALESCE(SUM(COALESCE(tokens_input, input_tokens)), 0),
860 COALESCE(SUM(tokens_input_cached), 0),
861 COALESCE(SUM(COALESCE(tokens_output, output_tokens)), 0)
862 FROM segments WHERE job_id = ?1 GROUP BY status",
863 )?;
864 let rows = stmt.query_map(params![job_id], |row| {
865 Ok((
866 row.get::<_, String>(0)?,
867 row.get::<_, i64>(1)?,
868 row.get::<_, i64>(2)?,
869 row.get::<_, i64>(3)?,
870 row.get::<_, i64>(4)?,
871 ))
872 })?;
873
874 for row in rows {
875 let (status, count, input_tokens, input_cached_tokens, output_tokens) = row?;
876 let count = count as usize;
877 summary.total_segments += count;
878 summary.input_tokens += input_tokens as u64;
879 summary.input_cached_tokens += input_cached_tokens as u64;
880 summary.output_tokens += output_tokens as u64;
881 match status.as_str() {
882 "succeeded" => summary.succeeded += count,
883 "failed" => summary.failed += count,
884 "needs_review" => summary.needs_review += count,
885 "retry_pending" => summary.retry_pending += count,
886 "skipped_cached" => summary.cached += count,
887 _ => {}
888 }
889 }
890
891 summary.retried = conn.query_row(
892 "SELECT COUNT(*) FROM segments WHERE job_id = ?1 AND attempts > 1",
893 params![job_id],
894 |row| row.get::<_, i64>(0),
895 )? as usize;
896
897 Ok(Some(summary))
898 }
899
900 pub fn retry_segments(&self, job_id: &str, scope: RetryScope) -> Result<usize> {
901 let where_status = match scope {
902 RetryScope::Failed => "status = 'failed'",
903 RetryScope::NeedsReview => "status = 'needs_review'",
904 RetryScope::All => "status IN ('failed', 'needs_review')",
905 };
906 let sql = format!(
907 "UPDATE segments SET status = 'retry_pending', error = NULL WHERE job_id = ?1 AND {where_status}"
908 );
909 let count = {
910 let conn = self.conn.borrow();
911 conn.execute(&sql, params![job_id])?
912 };
913 self.touch_job(job_id, "retry_pending")?;
914 Ok(count)
915 }
916
917 pub fn insert_segment_flags(&self, flags: &[NewSegmentFlag<'_>]) -> Result<usize> {
918 let mut conn = self.conn.borrow_mut();
919 let tx = conn.transaction()?;
920 let ingested_at = timestamp_string();
921 let mut inserted = 0usize;
922 for flag in flags {
923 inserted += tx.execute(
924 "INSERT INTO segment_flags
925 (job_id, segment_id, kind, note, suggested_source, suggested_target, ingested_at, consumed)
926 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
927 params![
928 flag.job_id,
929 flag.segment_id,
930 flag.kind,
931 flag.note,
932 flag.suggested_source,
933 flag.suggested_target,
934 ingested_at,
935 if flag.consumed { 1_i64 } else { 0_i64 },
936 ],
937 )?;
938 }
939 tx.commit()?;
940 Ok(inserted)
941 }
942
943 pub fn mark_segments_needs_review(
944 &self,
945 job_id: &str,
946 segment_ids: &[String],
947 reason: &str,
948 ) -> Result<usize> {
949 const SQLITE_IN_CHUNK_SIZE: usize = 900;
950 let mut updated = 0usize;
951 for chunk in segment_ids.chunks(SQLITE_IN_CHUNK_SIZE) {
952 if chunk.is_empty() {
953 continue;
954 }
955 let placeholders = std::iter::repeat_n("?", chunk.len())
956 .collect::<Vec<_>>()
957 .join(", ");
958 let sql = format!(
959 "UPDATE segments
960 SET status = 'needs_review',
961 error = ?
962 WHERE job_id = ?
963 AND id IN ({placeholders})"
964 );
965 let conn = self.conn.borrow();
966 let mut params: Vec<&dyn rusqlite::types::ToSql> = Vec::with_capacity(chunk.len() + 2);
967 params.push(&reason);
968 params.push(&job_id);
969 for id in chunk {
970 params.push(id);
971 }
972 updated += conn.execute(&sql, params.as_slice())?;
973 }
974 if updated > 0 {
975 self.touch_job(job_id, "needs_review")?;
976 }
977 Ok(updated)
978 }
979
980 pub fn segment_flag_count(&self, job_id: &str) -> Result<usize> {
981 let conn = self.conn.borrow();
982 let count = conn.query_row(
983 "SELECT COUNT(*) FROM segment_flags WHERE job_id = ?1",
984 params![job_id],
985 |row| row.get::<_, i64>(0),
986 )?;
987 Ok(count as usize)
988 }
989
990 pub fn upsert_glossary_terms(&self, terms: &[GlossaryTerm]) -> Result<usize> {
991 let mut conn = self.conn.borrow_mut();
992 let tx = conn.transaction()?;
993 let now = timestamp_string();
994 let mut changed = 0usize;
995 for term in terms {
996 let existing_id = tx
997 .query_row(
998 "SELECT id FROM glossary_terms
999 WHERE scope_kind = ?1
1000 AND ((?2 IS NULL AND scope_id IS NULL) OR scope_id = ?2)
1001 AND source_text = ?3
1002 AND source_language = ?4
1003 AND target_language = ?5",
1004 params![
1005 term.scope_kind.as_str(),
1006 term.scope_id.as_deref(),
1007 term.source_text,
1008 term.source_language,
1009 term.target_language,
1010 ],
1011 |row| row.get::<_, i64>(0),
1012 )
1013 .optional()?;
1014 if let Some(id) = existing_id {
1015 changed += tx.execute(
1016 "UPDATE glossary_terms
1017 SET target_text = ?1,
1018 category = ?2,
1019 notes = ?3,
1020 case_sensitive = ?4,
1021 always_active = ?5,
1022 status = ?6,
1023 source_count = ?7,
1024 updated_at = ?8
1025 WHERE id = ?9",
1026 params![
1027 term.target_text,
1028 term.category.as_str(),
1029 term.notes.as_deref(),
1030 if term.case_sensitive { 1_i64 } else { 0_i64 },
1031 if term.always_active { 1_i64 } else { 0_i64 },
1032 term.status.as_str(),
1033 term.source_count as i64,
1034 now,
1035 id,
1036 ],
1037 )?;
1038 } else {
1039 changed += tx.execute(
1040 "INSERT INTO glossary_terms
1041 (scope_kind, scope_id, source_text, target_text, category, notes,
1042 case_sensitive, always_active, status, source_language, target_language,
1043 source_count, created_at, updated_at)
1044 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?13)",
1045 params![
1046 term.scope_kind.as_str(),
1047 term.scope_id.as_deref(),
1048 term.source_text,
1049 term.target_text,
1050 term.category.as_str(),
1051 term.notes.as_deref(),
1052 if term.case_sensitive { 1_i64 } else { 0_i64 },
1053 if term.always_active { 1_i64 } else { 0_i64 },
1054 term.status.as_str(),
1055 term.source_language,
1056 term.target_language,
1057 term.source_count as i64,
1058 now,
1059 ],
1060 )?;
1061 }
1062 }
1063 tx.commit()?;
1064 Ok(changed)
1065 }
1066
1067 pub fn add_glossary_term(&self, term: &GlossaryTerm) -> Result<i64> {
1068 self.upsert_glossary_terms(std::slice::from_ref(term))?;
1069 let conn = self.conn.borrow();
1070 let id = conn.query_row(
1071 "SELECT id FROM glossary_terms
1072 WHERE scope_kind = ?1
1073 AND ((?2 IS NULL AND scope_id IS NULL) OR scope_id = ?2)
1074 AND source_text = ?3
1075 AND source_language = ?4
1076 AND target_language = ?5",
1077 params![
1078 term.scope_kind.as_str(),
1079 term.scope_id.as_deref(),
1080 term.source_text,
1081 term.source_language,
1082 term.target_language,
1083 ],
1084 |row| row.get::<_, i64>(0),
1085 )?;
1086 Ok(id)
1087 }
1088
1089 pub fn upsert_glossary_candidates(
1090 &self,
1091 book_id: &str,
1092 source_language: &str,
1093 target_language: &str,
1094 candidates: &[NewGlossaryCandidate<'_>],
1095 ) -> Result<GlossaryCandidateUpsertResult> {
1096 let mut conn = self.conn.borrow_mut();
1097 let tx = conn.transaction()?;
1098 let now = timestamp_string();
1099 let mut result = GlossaryCandidateUpsertResult::default();
1100
1101 for candidate in candidates {
1102 let existing = tx
1103 .query_row(
1104 "SELECT id, status FROM glossary_terms
1105 WHERE scope_kind = 'book'
1106 AND scope_id = ?1
1107 AND source_text = ?2
1108 AND source_language = ?3
1109 AND target_language = ?4",
1110 params![
1111 book_id,
1112 candidate.source_text,
1113 source_language,
1114 target_language
1115 ],
1116 |row| Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?)),
1117 )
1118 .optional()?;
1119
1120 match existing {
1121 Some((id, status)) if status == GlossaryStatus::AutoCandidate.as_str() => {
1122 tx.execute(
1123 "UPDATE glossary_terms
1124 SET category = ?1,
1125 source_count = ?2,
1126 updated_at = ?3
1127 WHERE id = ?4",
1128 params![
1129 candidate.category.as_str(),
1130 candidate.source_count as i64,
1131 now,
1132 id,
1133 ],
1134 )?;
1135 result.updated += 1;
1136 }
1137 Some(_) => {
1138 result.skipped += 1;
1139 }
1140 None => {
1141 tx.execute(
1142 "INSERT INTO glossary_terms
1143 (scope_kind, scope_id, source_text, target_text, category, notes,
1144 case_sensitive, always_active, status, source_language, target_language,
1145 source_count, created_at, updated_at)
1146 VALUES ('book', ?1, ?2, NULL, ?3, NULL, 1, 0, 'auto_candidate',
1147 ?4, ?5, ?6, ?7, ?7)",
1148 params![
1149 book_id,
1150 candidate.source_text,
1151 candidate.category.as_str(),
1152 source_language,
1153 target_language,
1154 candidate.source_count as i64,
1155 now,
1156 ],
1157 )?;
1158 result.inserted += 1;
1159 }
1160 }
1161 }
1162
1163 tx.commit()?;
1164 Ok(result)
1165 }
1166
1167 pub fn list_glossary_candidate_language_pairs(
1168 &self,
1169 book_id: &str,
1170 ) -> Result<Vec<(String, String)>> {
1171 let conn = self.conn.borrow();
1172 let mut stmt = conn.prepare(
1173 "SELECT DISTINCT source_language, target_language
1174 FROM glossary_terms
1175 WHERE scope_kind = 'book'
1176 AND scope_id = ?1
1177 AND status = 'auto_candidate'
1178 ORDER BY source_language, target_language",
1179 )?;
1180 let rows = stmt.query_map(params![book_id], |row| {
1181 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1182 })?;
1183 rows.collect::<std::result::Result<Vec<_>, _>>()
1184 .map_err(StoreError::from)
1185 }
1186
1187 pub fn list_glossary_candidates(
1188 &self,
1189 book_id: &str,
1190 source_language: &str,
1191 target_language: &str,
1192 ) -> Result<Vec<StoredGlossaryCandidate>> {
1193 let conn = self.conn.borrow();
1194 let mut stmt = conn.prepare(
1195 "SELECT id, source_text, target_text, category, notes, case_sensitive,
1196 always_active, status, source_language, target_language, source_count
1197 FROM glossary_terms
1198 WHERE scope_kind = 'book'
1199 AND scope_id = ?1
1200 AND source_language = ?2
1201 AND target_language = ?3
1202 AND status = 'auto_candidate'
1203 ORDER BY source_count DESC, source_text",
1204 )?;
1205 let rows = stmt.query_map(
1206 params![book_id, source_language, target_language],
1207 glossary_candidate_from_row,
1208 )?;
1209 rows.collect::<std::result::Result<Vec<_>, _>>()
1210 .map_err(StoreError::from)
1211 }
1212
1213 pub fn accept_glossary_candidate(&self, id: i64, target_text: Option<&str>) -> Result<bool> {
1214 let conn = self.conn.borrow();
1215 let Some((source_text, existing_target)) = conn
1216 .query_row(
1217 "SELECT source_text, target_text
1218 FROM glossary_terms
1219 WHERE id = ?1 AND status = 'auto_candidate'",
1220 params![id],
1221 |row| Ok((row.get::<_, String>(0)?, row.get::<_, Option<String>>(1)?)),
1222 )
1223 .optional()?
1224 else {
1225 return Ok(false);
1226 };
1227 let target = target_text
1228 .filter(|value| !value.trim().is_empty())
1229 .map(ToOwned::to_owned)
1230 .or_else(|| existing_target.filter(|value| !value.trim().is_empty()))
1231 .unwrap_or(source_text);
1232 let updated = conn.execute(
1233 "UPDATE glossary_terms
1234 SET target_text = ?1,
1235 status = 'accepted',
1236 updated_at = ?2
1237 WHERE id = ?3 AND status = 'auto_candidate'",
1238 params![target, timestamp_string(), id],
1239 )?;
1240 Ok(updated > 0)
1241 }
1242
1243 pub fn reject_glossary_candidate(&self, id: i64) -> Result<bool> {
1244 let conn = self.conn.borrow();
1245 let updated = conn.execute(
1246 "UPDATE glossary_terms
1247 SET status = 'rejected',
1248 updated_at = ?1
1249 WHERE id = ?2 AND status = 'auto_candidate'",
1250 params![timestamp_string(), id],
1251 )?;
1252 Ok(updated > 0)
1253 }
1254
1255 pub fn list_glossary_terms(&self, filter: GlossaryFilter<'_>) -> Result<Vec<GlossaryTerm>> {
1256 let conn = self.conn.borrow();
1257 let mut sql = String::from(
1258 "SELECT id, scope_kind, scope_id, source_text, COALESCE(target_text, ''), category, notes,
1259 case_sensitive, always_active, status, source_language, target_language,
1260 source_count
1261 FROM glossary_terms
1262 WHERE 1 = 1",
1263 );
1264 let mut values: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new();
1265 if let Some(scope_kind) = filter.scope_kind {
1266 sql.push_str(" AND scope_kind = ?");
1267 values.push(Box::new(scope_kind.as_str().to_string()));
1268 }
1269 if let Some(scope_id) = filter.scope_id {
1270 sql.push_str(" AND scope_id = ?");
1271 values.push(Box::new(scope_id.to_string()));
1272 }
1273 if let Some(source_language) = filter.source_language {
1274 sql.push_str(" AND source_language = ?");
1275 values.push(Box::new(source_language.to_string()));
1276 }
1277 if let Some(target_language) = filter.target_language {
1278 sql.push_str(" AND target_language = ?");
1279 values.push(Box::new(target_language.to_string()));
1280 }
1281 if filter.active_only {
1282 sql.push_str(" AND status IN ('user_seeded', 'accepted')");
1283 }
1284 sql.push_str(
1285 " ORDER BY source_language, target_language, scope_kind, scope_id, source_text",
1286 );
1287 let param_refs = values
1288 .iter()
1289 .map(|value| value.as_ref())
1290 .collect::<Vec<&dyn rusqlite::types::ToSql>>();
1291 let mut stmt = conn.prepare(&sql)?;
1292 let rows = stmt.query_map(param_refs.as_slice(), glossary_term_from_row)?;
1293 rows.collect::<std::result::Result<Vec<_>, _>>()
1294 .map_err(StoreError::from)
1295 }
1296
1297 pub fn load_active_glossary_terms(
1298 &self,
1299 source_language: &str,
1300 target_language: &str,
1301 book_id: Option<&str>,
1302 series_id: Option<&str>,
1303 ) -> Result<Vec<GlossaryTerm>> {
1304 let conn = self.conn.borrow();
1305 let mut stmt = conn.prepare(
1306 "SELECT id, scope_kind, scope_id, source_text, COALESCE(target_text, ''), category, notes,
1307 case_sensitive, always_active, status, source_language, target_language,
1308 source_count
1309 FROM glossary_terms
1310 WHERE source_language = ?1
1311 AND target_language = ?2
1312 AND status IN ('user_seeded', 'accepted')
1313 AND (
1314 scope_kind = 'global'
1315 OR (scope_kind = 'series' AND scope_id = ?3)
1316 OR (scope_kind = 'book' AND scope_id = ?4)
1317 )
1318 ORDER BY scope_kind, scope_id, source_text",
1319 )?;
1320 let rows = stmt.query_map(
1321 params![source_language, target_language, series_id, book_id],
1322 glossary_term_from_row,
1323 )?;
1324 rows.collect::<std::result::Result<Vec<_>, _>>()
1325 .map_err(StoreError::from)
1326 }
1327
1328 pub fn load_active_glossary_terms_for_target(
1329 &self,
1330 target_language: &str,
1331 book_id: Option<&str>,
1332 series_id: Option<&str>,
1333 ) -> Result<Vec<GlossaryTerm>> {
1334 let conn = self.conn.borrow();
1335 let mut stmt = conn.prepare(
1336 "SELECT id, scope_kind, scope_id, source_text, COALESCE(target_text, ''), category, notes,
1337 case_sensitive, always_active, status, source_language, target_language,
1338 source_count
1339 FROM glossary_terms
1340 WHERE target_language = ?1
1341 AND status IN ('user_seeded', 'accepted')
1342 AND (
1343 scope_kind = 'global'
1344 OR (scope_kind = 'series' AND scope_id = ?2)
1345 OR (scope_kind = 'book' AND scope_id = ?3)
1346 )
1347 ORDER BY scope_kind, scope_id, source_language, source_text",
1348 )?;
1349 let rows = stmt.query_map(
1350 params![target_language, series_id, book_id],
1351 glossary_term_from_row,
1352 )?;
1353 rows.collect::<std::result::Result<Vec<_>, _>>()
1354 .map_err(StoreError::from)
1355 }
1356
1357 pub fn remove_glossary_term(&self, id: i64) -> Result<usize> {
1358 let conn = self.conn.borrow();
1359 conn.execute("DELETE FROM glossary_terms WHERE id = ?1", params![id])
1360 .map_err(StoreError::from)
1361 }
1362
1363 pub fn clear_glossary_scope(
1364 &self,
1365 scope_kind: GlossaryScopeKind,
1366 scope_id: Option<&str>,
1367 ) -> Result<usize> {
1368 let conn = self.conn.borrow();
1369 let count = if scope_kind == GlossaryScopeKind::Global {
1370 conn.execute("DELETE FROM glossary_terms WHERE scope_kind = 'global'", [])?
1371 } else {
1372 conn.execute(
1373 "DELETE FROM glossary_terms WHERE scope_kind = ?1 AND scope_id = ?2",
1374 params![scope_kind.as_str(), scope_id],
1375 )?
1376 };
1377 Ok(count)
1378 }
1379
1380 pub fn upsert_style_sheet(&self, record: &NewStyleSheet<'_>) -> Result<i64> {
1383 let conn = self.conn.borrow();
1384 let now = timestamp_string();
1385 conn.execute(
1386 "INSERT INTO style_sheets
1387 (scope_kind, scope_id, target_language, content_toml, fingerprint, created_at, updated_at)
1388 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?6)
1389 ON CONFLICT(scope_kind, scope_id, target_language) DO UPDATE SET
1390 content_toml = excluded.content_toml,
1391 fingerprint = excluded.fingerprint,
1392 updated_at = excluded.updated_at",
1393 params![
1394 record.scope_kind.as_str(),
1395 record.scope_id,
1396 record.target_language,
1397 record.content_toml,
1398 record.fingerprint,
1399 now,
1400 ],
1401 )?;
1402 let id: i64 = conn.query_row(
1403 "SELECT id FROM style_sheets
1404 WHERE scope_kind = ?1 AND IFNULL(scope_id, '') = IFNULL(?2, '')
1405 AND target_language = ?3",
1406 params![
1407 record.scope_kind.as_str(),
1408 record.scope_id,
1409 record.target_language
1410 ],
1411 |row| row.get(0),
1412 )?;
1413 Ok(id)
1414 }
1415
1416 pub fn load_active_style_sheets(
1420 &self,
1421 target_language: &str,
1422 book_id: Option<&str>,
1423 series_id: Option<&str>,
1424 ) -> Result<Vec<StoredStyleSheet>> {
1425 let conn = self.conn.borrow();
1426 let mut stmt = conn.prepare(
1427 "SELECT id, scope_kind, scope_id, target_language, content_toml, fingerprint
1428 FROM style_sheets
1429 WHERE target_language = ?1
1430 AND ( (scope_kind = 'global')
1431 OR (scope_kind = 'series' AND scope_id = ?2)
1432 OR (scope_kind = 'book' AND scope_id = ?3) )",
1433 )?;
1434 let rows = stmt.query_map(params![target_language, series_id, book_id], |row| {
1435 Ok(StoredStyleSheet {
1436 id: row.get(0)?,
1437 scope_kind: parse_row_enum(&row.get::<_, String>(1)?, 1)?,
1438 scope_id: row.get(2)?,
1439 target_language: row.get(3)?,
1440 content_toml: row.get(4)?,
1441 fingerprint: row.get(5)?,
1442 })
1443 })?;
1444 rows.collect::<std::result::Result<Vec<_>, _>>()
1445 .map_err(StoreError::from)
1446 }
1447
1448 pub fn list_style_sheets(
1449 &self,
1450 target_language: Option<&str>,
1451 scope_kind: Option<GlossaryScopeKind>,
1452 scope_id: Option<&str>,
1453 ) -> Result<Vec<StoredStyleSheet>> {
1454 let conn = self.conn.borrow();
1455 let mut stmt = conn.prepare(
1456 "SELECT id, scope_kind, scope_id, target_language, content_toml, fingerprint
1457 FROM style_sheets
1458 WHERE (?1 IS NULL OR target_language = ?1)
1459 AND (?2 IS NULL OR scope_kind = ?2)
1460 AND (?3 IS NULL OR scope_id = ?3)
1461 ORDER BY scope_kind, scope_id",
1462 )?;
1463 let scope_text = scope_kind.map(|s| s.as_str().to_string());
1464 let rows = stmt.query_map(params![target_language, scope_text, scope_id], |row| {
1465 Ok(StoredStyleSheet {
1466 id: row.get(0)?,
1467 scope_kind: parse_row_enum(&row.get::<_, String>(1)?, 1)?,
1468 scope_id: row.get(2)?,
1469 target_language: row.get(3)?,
1470 content_toml: row.get(4)?,
1471 fingerprint: row.get(5)?,
1472 })
1473 })?;
1474 rows.collect::<std::result::Result<Vec<_>, _>>()
1475 .map_err(StoreError::from)
1476 }
1477
1478 pub fn clear_style_scope(
1479 &self,
1480 scope_kind: GlossaryScopeKind,
1481 scope_id: Option<&str>,
1482 ) -> Result<usize> {
1483 let conn = self.conn.borrow();
1484 let count = if scope_kind == GlossaryScopeKind::Global {
1485 conn.execute("DELETE FROM style_sheets WHERE scope_kind = 'global'", [])?
1486 } else {
1487 conn.execute(
1488 "DELETE FROM style_sheets WHERE scope_kind = ?1 AND scope_id = ?2",
1489 params![scope_kind.as_str(), scope_id],
1490 )?
1491 };
1492 Ok(count)
1493 }
1494
1495 pub fn upsert_entities(&self, entities: &[NewEntity<'_>]) -> Result<usize> {
1496 if entities.is_empty() {
1497 return Ok(0);
1498 }
1499 let conn = self.conn.borrow();
1500 let now = timestamp_string();
1501 let mut changed = 0usize;
1502 for entity in entities {
1503 conn.execute(
1504 "INSERT INTO entities
1505 (scope_kind, scope_id, source_name, target_name, gender_target,
1506 role, notes, source_language, target_language, created_at, updated_at)
1507 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?10)
1508 ON CONFLICT(scope_kind, scope_id, source_name, source_language, target_language)
1509 DO UPDATE SET
1510 target_name = excluded.target_name,
1511 gender_target = excluded.gender_target,
1512 role = excluded.role,
1513 notes = excluded.notes,
1514 updated_at = excluded.updated_at",
1515 params![
1516 entity.scope_kind.as_str(),
1517 entity.scope_id,
1518 entity.source_name,
1519 entity.target_name,
1520 entity.gender_target.map(|g| g.as_short()),
1521 entity.role,
1522 entity.notes,
1523 entity.source_language,
1524 entity.target_language,
1525 now,
1526 ],
1527 )?;
1528 changed += 1;
1529 }
1530 Ok(changed)
1531 }
1532
1533 pub fn load_active_entities(
1537 &self,
1538 source_language: &str,
1539 target_language: &str,
1540 book_id: Option<&str>,
1541 series_id: Option<&str>,
1542 ) -> Result<Vec<StoredEntity>> {
1543 let conn = self.conn.borrow();
1544 let mut stmt = conn.prepare(
1545 "SELECT id, scope_kind, scope_id, source_name, target_name, gender_target,
1546 role, notes, source_language, target_language
1547 FROM entities
1548 WHERE source_language = ?1
1549 AND target_language = ?2
1550 AND ( (scope_kind = 'global')
1551 OR (scope_kind = 'series' AND scope_id = ?3)
1552 OR (scope_kind = 'book' AND scope_id = ?4) )",
1553 )?;
1554 let rows = stmt.query_map(
1555 params![source_language, target_language, series_id, book_id],
1556 |row| {
1557 let gender: Option<String> = row.get(5)?;
1558 Ok(StoredEntity {
1559 id: row.get(0)?,
1560 scope_kind: parse_row_enum(&row.get::<_, String>(1)?, 1)?,
1561 scope_id: row.get(2)?,
1562 source_name: row.get(3)?,
1563 target_name: row.get(4)?,
1564 gender_target: gender.and_then(|g| parse_gender_short(&g)),
1565 role: row.get(6)?,
1566 notes: row.get(7)?,
1567 source_language: row.get(8)?,
1568 target_language: row.get(9)?,
1569 })
1570 },
1571 )?;
1572 rows.collect::<std::result::Result<Vec<_>, _>>()
1573 .map_err(StoreError::from)
1574 }
1575
1576 pub fn list_entities(
1577 &self,
1578 source_language: Option<&str>,
1579 target_language: Option<&str>,
1580 scope_kind: Option<GlossaryScopeKind>,
1581 scope_id: Option<&str>,
1582 ) -> Result<Vec<StoredEntity>> {
1583 let conn = self.conn.borrow();
1584 let mut stmt = conn.prepare(
1585 "SELECT id, scope_kind, scope_id, source_name, target_name, gender_target,
1586 role, notes, source_language, target_language
1587 FROM entities
1588 WHERE (?1 IS NULL OR source_language = ?1)
1589 AND (?2 IS NULL OR target_language = ?2)
1590 AND (?3 IS NULL OR scope_kind = ?3)
1591 AND (?4 IS NULL OR scope_id = ?4)
1592 ORDER BY scope_kind, scope_id, source_name",
1593 )?;
1594 let scope_text = scope_kind.map(|s| s.as_str().to_string());
1595 let rows = stmt.query_map(
1596 params![source_language, target_language, scope_text, scope_id],
1597 |row| {
1598 let gender: Option<String> = row.get(5)?;
1599 Ok(StoredEntity {
1600 id: row.get(0)?,
1601 scope_kind: parse_row_enum(&row.get::<_, String>(1)?, 1)?,
1602 scope_id: row.get(2)?,
1603 source_name: row.get(3)?,
1604 target_name: row.get(4)?,
1605 gender_target: gender.and_then(|g| parse_gender_short(&g)),
1606 role: row.get(6)?,
1607 notes: row.get(7)?,
1608 source_language: row.get(8)?,
1609 target_language: row.get(9)?,
1610 })
1611 },
1612 )?;
1613 rows.collect::<std::result::Result<Vec<_>, _>>()
1614 .map_err(StoreError::from)
1615 }
1616
1617 pub fn clear_entities_scope(
1618 &self,
1619 scope_kind: GlossaryScopeKind,
1620 scope_id: Option<&str>,
1621 ) -> Result<usize> {
1622 let conn = self.conn.borrow();
1623 let count = if scope_kind == GlossaryScopeKind::Global {
1624 conn.execute("DELETE FROM entities WHERE scope_kind = 'global'", [])?
1625 } else {
1626 conn.execute(
1627 "DELETE FROM entities WHERE scope_kind = ?1 AND scope_id = ?2",
1628 params![scope_kind.as_str(), scope_id],
1629 )?
1630 };
1631 Ok(count)
1632 }
1633
1634 pub fn pending_segment_ids(&self, job_id: &str) -> Result<Vec<String>> {
1635 let conn = self.conn.borrow();
1636 let mut stmt = conn.prepare(
1637 "SELECT id FROM segments
1638 WHERE job_id = ?1 AND status IN ('queued', 'retry_pending')
1639 ORDER BY ordinal",
1640 )?;
1641 let rows = stmt.query_map(params![job_id], |row| row.get::<_, String>(0))?;
1642 rows.collect::<std::result::Result<Vec<_>, _>>()
1643 .map_err(StoreError::from)
1644 }
1645
1646 pub fn segment_records(&self, job_id: &str) -> Result<Vec<SegmentRecord>> {
1647 let conn = self.conn.borrow();
1648 let mut stmt = conn.prepare(
1649 "SELECT id,
1650 status,
1651 attempts,
1652 error,
1653 COALESCE(tokens_input, input_tokens),
1654 tokens_input_cached,
1655 COALESCE(tokens_output, output_tokens),
1656 tokens_estimated
1657 FROM segments WHERE job_id = ?1 ORDER BY ordinal",
1658 )?;
1659 let rows = stmt.query_map(params![job_id], |row| {
1660 Ok(SegmentRecord {
1661 id: row.get(0)?,
1662 status: row.get(1)?,
1663 attempts: row.get::<_, i64>(2)? as usize,
1664 error: row.get(3)?,
1665 input_tokens: row.get::<_, Option<i64>>(4)?.map(|value| value as u64),
1666 input_cached_tokens: row.get::<_, Option<i64>>(5)?.map(|value| value as u64),
1667 output_tokens: row.get::<_, Option<i64>>(6)?.map(|value| value as u64),
1668 tokens_estimated: row.get::<_, i64>(7)? != 0,
1669 })
1670 })?;
1671 rows.collect::<std::result::Result<Vec<_>, _>>()
1672 .map_err(StoreError::from)
1673 }
1674
1675 pub fn load_block_translations(&self, job_id: &str) -> Result<Vec<StoredBlockTranslation>> {
1676 let conn = self.conn.borrow();
1677 let mut stmt = conn.prepare(
1678 "SELECT segment_id, block_id, translated_text
1679 FROM translation_blocks WHERE job_id = ?1 ORDER BY segment_id, block_id",
1680 )?;
1681 let rows = stmt.query_map(params![job_id], |row| {
1682 Ok(StoredBlockTranslation {
1683 segment_id: row.get(0)?,
1684 block_id: row.get(1)?,
1685 text: row.get(2)?,
1686 })
1687 })?;
1688 rows.collect::<std::result::Result<Vec<_>, _>>()
1689 .map_err(StoreError::from)
1690 }
1691
1692 pub fn load_terminal_segment_translations(
1693 &self,
1694 job_id: &str,
1695 ) -> Result<Vec<StoredSegmentTranslation>> {
1696 let conn = self.conn.borrow();
1697 let mut stmt = conn.prepare(
1698 "SELECT s.id, s.ordinal, s.status, s.error, t.translated_text
1699 FROM segments s
1700 JOIN translations t ON t.job_id = s.job_id AND t.segment_id = s.id
1701 WHERE s.job_id = ?1 AND s.status IN ('succeeded', 'skipped_cached', 'needs_review')
1702 ORDER BY s.ordinal",
1703 )?;
1704 let rows = stmt.query_map(params![job_id], |row| {
1705 Ok((
1706 row.get::<_, String>(0)?,
1707 row.get::<_, i64>(1)?,
1708 row.get::<_, String>(2)?,
1709 row.get::<_, Option<String>>(3)?,
1710 row.get::<_, String>(4)?,
1711 ))
1712 })?;
1713
1714 let mut records = Vec::new();
1715 for row in rows {
1716 let (segment_id, ordinal, status, error, translated_text) = row?;
1717 let mut block_stmt = conn.prepare(
1718 "SELECT block_id, translated_text
1719 FROM translation_blocks
1720 WHERE job_id = ?1 AND segment_id = ?2
1721 ORDER BY block_id",
1722 )?;
1723 let blocks = block_stmt
1724 .query_map(params![job_id, segment_id.as_str()], |row| {
1725 Ok(BlockTranslation {
1726 block_id: BlockId(row.get::<_, String>(0)?),
1727 text: row.get(1)?,
1728 })
1729 })?
1730 .collect::<std::result::Result<Vec<_>, _>>()?;
1731 records.push(StoredSegmentTranslation {
1732 segment_id,
1733 ordinal: ordinal as usize,
1734 status,
1735 error,
1736 translated_text,
1737 blocks,
1738 });
1739 }
1740
1741 Ok(records)
1742 }
1743
1744 pub fn resumable_segment_ids(&self, job_id: &str) -> Result<Vec<String>> {
1745 let conn = self.conn.borrow();
1746 let mut stmt = conn.prepare(
1747 "SELECT id FROM segments
1748 WHERE job_id = ?1 AND status IN ('queued', 'retry_pending', 'failed')
1749 ORDER BY ordinal",
1750 )?;
1751 let rows = stmt.query_map(params![job_id], |row| row.get::<_, String>(0))?;
1752 rows.collect::<std::result::Result<Vec<_>, _>>()
1753 .map_err(StoreError::from)
1754 }
1755
1756 #[allow(clippy::too_many_arguments)]
1757 pub fn find_cached_translation(
1758 &self,
1759 segment: &Segment,
1760 prompt_version: &str,
1761 provider: &str,
1762 model: &str,
1763 source_lang: Option<&str>,
1764 target_lang: &str,
1765 cache_namespace: &str,
1766 ) -> Result<Option<CachedTranslation>> {
1767 let conn = self.conn.borrow();
1768 let cached = conn
1769 .query_row(
1770 "SELECT t.job_id, t.segment_id, t.translated_text
1771 FROM translations t
1772 JOIN segments s ON s.job_id = t.job_id AND s.id = t.segment_id
1773 JOIN jobs j ON j.id = t.job_id
1774 WHERE s.source_hash = ?1
1775 AND s.prompt_version = ?2
1776 AND s.provider = ?3
1777 AND s.model = ?4
1778 AND ((?5 IS NULL AND j.source_lang IS NULL) OR j.source_lang = ?5)
1779 AND j.target_lang = ?6
1780 AND s.cache_namespace = ?7
1781 AND s.status IN ('succeeded', 'skipped_cached')
1782 ORDER BY CASE s.status WHEN 'succeeded' THEN 0 ELSE 1 END,
1783 CAST(t.created_at AS INTEGER) DESC,
1784 t.rowid DESC
1785 LIMIT 1",
1786 params![
1787 segment.checksum,
1788 prompt_version,
1789 provider,
1790 model,
1791 source_lang,
1792 target_lang,
1793 cache_namespace,
1794 ],
1795 |row| {
1796 Ok((
1797 row.get::<_, String>(0)?,
1798 row.get::<_, String>(1)?,
1799 row.get::<_, String>(2)?,
1800 ))
1801 },
1802 )
1803 .optional()?;
1804
1805 let Some((job_id, segment_id, translated_text)) = cached else {
1806 return Ok(None);
1807 };
1808
1809 let mut stmt = conn.prepare(
1810 "SELECT block_id, translated_text
1811 FROM translation_blocks
1812 WHERE job_id = ?1 AND segment_id = ?2
1813 ORDER BY block_id",
1814 )?;
1815 let rows = stmt.query_map(params![job_id, segment_id], |row| {
1816 Ok(BlockTranslation {
1817 block_id: BlockId(row.get::<_, String>(0)?),
1818 text: row.get(1)?,
1819 })
1820 })?;
1821 let blocks = rows.collect::<std::result::Result<Vec<_>, _>>()?;
1822
1823 let mut by_id = blocks
1824 .into_iter()
1825 .map(|block| (block.block_id.0.clone(), block))
1826 .collect::<HashMap<_, _>>();
1827
1828 let mut ordered = Vec::with_capacity(segment.block_ids.len());
1829 for id in &segment.block_ids {
1830 let Some(block) = by_id.remove(&id.0) else {
1831 return Ok(None);
1832 };
1833 ordered.push(block);
1834 }
1835 if !by_id.is_empty() {
1836 return Ok(None);
1837 }
1838
1839 Ok(Some(CachedTranslation {
1840 translated_text,
1841 blocks: ordered,
1842 }))
1843 }
1844
1845 pub fn find_cached_translations_batch(
1846 &self,
1847 segments: &[Segment],
1848 request: CacheLookupRequest<'_>,
1849 ) -> Result<HashMap<String, CachedTranslation>> {
1850 let mut results = HashMap::new();
1851 if segments.is_empty() {
1852 return Ok(results);
1853 }
1854
1855 const SQLITE_IN_CHUNK_SIZE: usize = 900;
1856
1857 for chunk in segments.chunks(SQLITE_IN_CHUNK_SIZE) {
1858 let hashes: Vec<&str> = chunk.iter().map(|s| s.checksum.as_str()).collect();
1859 let placeholders: Vec<String> =
1860 (0..hashes.len()).map(|i| format!("?{}", i + 1)).collect();
1861 let placeholders_sql = placeholders.join(", ");
1862
1863 let sql = format!(
1864 "SELECT t.job_id, t.segment_id, t.translated_text, s.source_hash
1865 FROM translations t
1866 JOIN segments s ON s.job_id = t.job_id AND s.id = t.segment_id
1867 JOIN jobs j ON j.id = t.job_id
1868 WHERE s.source_hash IN ({placeholders_sql})
1869 AND s.prompt_version = ?{}
1870 AND s.provider = ?{}
1871 AND s.model = ?{}
1872 AND ((?{} IS NULL AND j.source_lang IS NULL) OR j.source_lang = ?{})
1873 AND j.target_lang = ?{}
1874 AND s.cache_namespace = ?{}
1875 AND s.status IN ('succeeded', 'skipped_cached')
1876 ORDER BY CASE s.status WHEN 'succeeded' THEN 0 ELSE 1 END,
1877 CAST(t.created_at AS INTEGER) DESC,
1878 t.rowid DESC",
1879 hashes.len() + 1,
1880 hashes.len() + 2,
1881 hashes.len() + 3,
1882 hashes.len() + 4,
1883 hashes.len() + 5,
1884 hashes.len() + 6,
1885 hashes.len() + 7,
1886 );
1887
1888 let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new();
1889 for hash in &hashes {
1890 params.push(Box::new(hash.to_string()));
1891 }
1892 params.push(Box::new(request.prompt_version.to_string()));
1893 params.push(Box::new(request.provider.to_string()));
1894 params.push(Box::new(request.model.to_string()));
1895 params.push(Box::new(request.source_lang.map(|s| s.to_string())));
1896 params.push(Box::new(request.source_lang.map(|s| s.to_string())));
1897 params.push(Box::new(request.target_lang.to_string()));
1898 params.push(Box::new(request.cache_namespace.to_string()));
1899
1900 let conn = self.conn.borrow();
1901 let mut stmt = conn.prepare(&sql)?;
1902 let param_refs: Vec<&dyn rusqlite::types::ToSql> =
1903 params.iter().map(|p| p.as_ref()).collect();
1904
1905 let rows = stmt.query_map(param_refs.as_slice(), |row| {
1906 Ok((
1907 row.get::<_, String>(0)?,
1908 row.get::<_, String>(1)?,
1909 row.get::<_, String>(2)?,
1910 row.get::<_, String>(3)?,
1911 ))
1912 })?;
1913
1914 let mut hash_to_hit: HashMap<String, (String, String, String)> = HashMap::new();
1915 for row in rows {
1916 let (job_id, segment_id, translated_text, source_hash) = row?;
1917 hash_to_hit
1918 .entry(source_hash)
1919 .or_insert((job_id, segment_id, translated_text));
1920 }
1921
1922 for segment in chunk {
1923 if let Some((job_id, segment_id, translated_text)) =
1924 hash_to_hit.get(&segment.checksum)
1925 {
1926 let mut block_stmt = conn.prepare(
1927 "SELECT block_id, translated_text
1928 FROM translation_blocks
1929 WHERE job_id = ?1 AND segment_id = ?2
1930 ORDER BY block_id",
1931 )?;
1932 let block_rows = block_stmt.query_map(params![job_id, segment_id], |row| {
1933 Ok(BlockTranslation {
1934 block_id: BlockId(row.get::<_, String>(0)?),
1935 text: row.get(1)?,
1936 })
1937 })?;
1938 let blocks = block_rows.collect::<std::result::Result<Vec<_>, _>>()?;
1939
1940 let mut by_id = blocks
1941 .into_iter()
1942 .map(|block| (block.block_id.0.clone(), block))
1943 .collect::<HashMap<_, _>>();
1944
1945 let mut ordered = Vec::with_capacity(segment.block_ids.len());
1946 let mut valid = true;
1947 for id in &segment.block_ids {
1948 let Some(block) = by_id.remove(&id.0) else {
1949 valid = false;
1950 break;
1951 };
1952 ordered.push(block);
1953 }
1954 if !valid || !by_id.is_empty() {
1955 continue;
1956 }
1957
1958 results.insert(
1959 segment.id.0.clone(),
1960 CachedTranslation {
1961 translated_text: translated_text.clone(),
1962 blocks: ordered,
1963 },
1964 );
1965 }
1966 }
1967 }
1968
1969 Ok(results)
1970 }
1971
1972 fn migrate(&self) -> Result<()> {
1973 let conn = self.conn.borrow();
1974 if table_exists(&conn, "translations")?
1975 && !table_has_column(&conn, "translations", "job_id")?
1976 {
1977 let suffix = unix_timestamp_nanos();
1978 rename_table_if_exists(&conn, "qa_findings", suffix)?;
1979 rename_table_if_exists(&conn, "translation_blocks", suffix)?;
1980 rename_table_if_exists(&conn, "translations", suffix)?;
1981 rename_table_if_exists(&conn, "segments", suffix)?;
1982 rename_table_if_exists(&conn, "jobs", suffix)?;
1983 }
1984 conn.execute_batch(
1985 "
1986 PRAGMA foreign_keys = ON;
1987 CREATE TABLE IF NOT EXISTS _migrations (
1988 version INTEGER PRIMARY KEY,
1989 name TEXT NOT NULL,
1990 applied_at TEXT NOT NULL
1991 );
1992
1993 CREATE TABLE IF NOT EXISTS jobs (
1994 id TEXT PRIMARY KEY,
1995 input_path TEXT NOT NULL DEFAULT '',
1996 input_snapshot_path TEXT,
1997 input_sha256 TEXT,
1998 output_path TEXT NOT NULL DEFAULT '',
1999 input_hash TEXT NOT NULL,
2000 source_lang TEXT,
2001 target_lang TEXT NOT NULL,
2002 provider TEXT NOT NULL,
2003 model TEXT NOT NULL,
2004 base_url TEXT,
2005 api_key_env TEXT,
2006 status TEXT NOT NULL,
2007 config_json TEXT,
2008 events_path TEXT,
2009 report_json_path TEXT,
2010 report_markdown_path TEXT,
2011 book_id TEXT,
2012 series_id TEXT,
2013 created_at TEXT NOT NULL,
2014 updated_at TEXT NOT NULL
2015 );
2016
2017 CREATE TABLE IF NOT EXISTS segments (
2018 id TEXT NOT NULL,
2019 job_id TEXT NOT NULL,
2020 section_id TEXT NOT NULL,
2021 ordinal INTEGER NOT NULL,
2022 source_hash TEXT NOT NULL,
2023 prompt_version TEXT NOT NULL,
2024 provider TEXT NOT NULL,
2025 model TEXT NOT NULL,
2026 status TEXT NOT NULL,
2027 attempts INTEGER NOT NULL DEFAULT 0,
2028 input_tokens INTEGER,
2029 output_tokens INTEGER,
2030 tokens_input INTEGER,
2031 tokens_input_cached INTEGER,
2032 tokens_output INTEGER,
2033 tokens_estimated INTEGER NOT NULL DEFAULT 0,
2034 cost_estimate REAL,
2035 error TEXT,
2036 translated_hash TEXT,
2037 PRIMARY KEY (job_id, id),
2038 FOREIGN KEY(job_id) REFERENCES jobs(id)
2039 );
2040
2041 CREATE TABLE IF NOT EXISTS translations (
2042 segment_id TEXT NOT NULL,
2043 job_id TEXT NOT NULL,
2044 translated_text TEXT NOT NULL,
2045 provider TEXT NOT NULL,
2046 model TEXT NOT NULL,
2047 prompt_version TEXT NOT NULL,
2048 created_at TEXT NOT NULL,
2049 PRIMARY KEY (job_id, segment_id),
2050 FOREIGN KEY(job_id, segment_id) REFERENCES segments(job_id, id)
2051 );
2052
2053 CREATE TABLE IF NOT EXISTS translation_blocks (
2054 segment_id TEXT NOT NULL,
2055 job_id TEXT NOT NULL,
2056 block_id TEXT NOT NULL,
2057 translated_text TEXT NOT NULL,
2058 PRIMARY KEY (job_id, segment_id, block_id),
2059 FOREIGN KEY(job_id, segment_id) REFERENCES segments(job_id, id)
2060 );
2061
2062 CREATE TABLE IF NOT EXISTS qa_findings (
2063 id TEXT PRIMARY KEY,
2064 segment_id TEXT NOT NULL,
2065 job_id TEXT NOT NULL,
2066 severity TEXT NOT NULL,
2067 kind TEXT NOT NULL,
2068 message TEXT NOT NULL,
2069 FOREIGN KEY(job_id, segment_id) REFERENCES segments(job_id, id)
2070 );
2071
2072 CREATE TABLE IF NOT EXISTS segment_flags (
2073 id INTEGER PRIMARY KEY,
2074 job_id TEXT NOT NULL,
2075 segment_id TEXT NOT NULL,
2076 kind TEXT NOT NULL,
2077 note TEXT,
2078 suggested_source TEXT,
2079 suggested_target TEXT,
2080 ingested_at TEXT NOT NULL,
2081 consumed INTEGER NOT NULL DEFAULT 0,
2082 FOREIGN KEY(job_id) REFERENCES jobs(id) ON DELETE CASCADE
2083 );
2084
2085 CREATE TABLE IF NOT EXISTS glossary_terms (
2086 id INTEGER PRIMARY KEY,
2087 scope_kind TEXT NOT NULL CHECK(scope_kind IN ('global', 'series', 'book')),
2088 scope_id TEXT,
2089 source_text TEXT NOT NULL,
2090 target_text TEXT,
2091 category TEXT NOT NULL CHECK(category IN
2092 ('person', 'place', 'object', 'invented', 'style', 'phrase', 'other')),
2093 notes TEXT,
2094 case_sensitive INTEGER NOT NULL DEFAULT 0,
2095 always_active INTEGER NOT NULL DEFAULT 0,
2096 status TEXT NOT NULL CHECK(status IN
2097 ('user_seeded', 'auto_candidate', 'accepted', 'rejected'))
2098 DEFAULT 'user_seeded',
2099 source_language TEXT NOT NULL,
2100 target_language TEXT NOT NULL,
2101 source_count INTEGER DEFAULT 0,
2102 created_at TEXT NOT NULL,
2103 updated_at TEXT NOT NULL,
2104 UNIQUE(scope_kind, scope_id, source_text, source_language, target_language)
2105 );
2106
2107 CREATE TABLE IF NOT EXISTS style_sheets (
2108 id INTEGER PRIMARY KEY,
2109 scope_kind TEXT NOT NULL CHECK(scope_kind IN ('global', 'series', 'book')),
2110 scope_id TEXT,
2111 target_language TEXT NOT NULL,
2112 content_toml TEXT NOT NULL,
2113 fingerprint TEXT NOT NULL,
2114 created_at TEXT NOT NULL,
2115 updated_at TEXT NOT NULL,
2116 UNIQUE(scope_kind, scope_id, target_language)
2117 );
2118
2119 CREATE TABLE IF NOT EXISTS entities (
2120 id INTEGER PRIMARY KEY,
2121 scope_kind TEXT NOT NULL CHECK(scope_kind IN ('global', 'series', 'book')),
2122 scope_id TEXT,
2123 source_name TEXT NOT NULL,
2124 target_name TEXT NOT NULL,
2125 gender_target TEXT
2126 CHECK(gender_target IS NULL OR gender_target IN ('m', 'f', 'n')),
2127 role TEXT,
2128 notes TEXT,
2129 source_language TEXT NOT NULL,
2130 target_language TEXT NOT NULL,
2131 created_at TEXT NOT NULL,
2132 updated_at TEXT NOT NULL,
2133 UNIQUE(scope_kind, scope_id, source_name, source_language, target_language)
2134 );
2135 ",
2136 )?;
2137 ensure_glossary_target_nullable(&conn)?;
2138 ensure_column(&conn, "jobs", "input_path", "TEXT NOT NULL DEFAULT ''")?;
2139 ensure_column(&conn, "jobs", "input_snapshot_path", "TEXT")?;
2140 ensure_column(&conn, "jobs", "input_sha256", "TEXT")?;
2141 ensure_column(&conn, "jobs", "output_path", "TEXT NOT NULL DEFAULT ''")?;
2142 ensure_column(&conn, "jobs", "base_url", "TEXT")?;
2143 ensure_column(&conn, "jobs", "api_key_env", "TEXT")?;
2144 ensure_column(&conn, "jobs", "config_json", "TEXT")?;
2145 ensure_column(&conn, "jobs", "events_path", "TEXT")?;
2146 ensure_column(&conn, "jobs", "report_json_path", "TEXT")?;
2147 ensure_column(&conn, "jobs", "report_markdown_path", "TEXT")?;
2148 ensure_column(&conn, "jobs", "book_id", "TEXT")?;
2149 ensure_column(&conn, "jobs", "series_id", "TEXT")?;
2150 ensure_column(
2151 &conn,
2152 "segments",
2153 "cache_namespace",
2154 "TEXT NOT NULL DEFAULT ''",
2155 )?;
2156 ensure_column(&conn, "segments", "tokens_input", "INTEGER")?;
2157 ensure_column(&conn, "segments", "tokens_input_cached", "INTEGER")?;
2158 ensure_column(&conn, "segments", "tokens_output", "INTEGER")?;
2159 ensure_column(
2160 &conn,
2161 "segments",
2162 "tokens_estimated",
2163 "INTEGER NOT NULL DEFAULT 0",
2164 )?;
2165 conn.execute_batch(
2166 "CREATE INDEX IF NOT EXISTS idx_segments_cache_lookup
2167 ON segments(source_hash, cache_namespace, prompt_version, provider, model, status);
2168 CREATE INDEX IF NOT EXISTS idx_segment_flags_job
2169 ON segment_flags(job_id, consumed);
2170 CREATE INDEX IF NOT EXISTS idx_glossary_lookup
2171 ON glossary_terms(source_language, target_language, scope_kind, scope_id, status);
2172 CREATE INDEX IF NOT EXISTS idx_style_lookup
2173 ON style_sheets(target_language, scope_kind, scope_id);
2174 CREATE INDEX IF NOT EXISTS idx_entity_lookup
2175 ON entities(source_language, target_language, scope_kind, scope_id);",
2176 )?;
2177 record_migration(&conn, 1, "initial")?;
2178 record_migration(&conn, 2, "v1_0_1_input_snapshot")?;
2179 record_migration(&conn, 3, "v1_1_segment_flags")?;
2180 record_migration(&conn, 4, "v1_2_glossary_terms")?;
2181 record_migration(&conn, 5, "v1_2_1_nullable_glossary_candidate_targets")?;
2182 record_migration(&conn, 6, "v1_3_context_styles_entities")?;
2183 Ok(())
2184 }
2185
2186 fn touch_job(&self, job_id: &str, status: &str) -> Result<()> {
2187 let conn = self.conn.borrow();
2188 conn.execute(
2189 "UPDATE jobs SET status = ?1, updated_at = ?2 WHERE id = ?3",
2190 params![status, timestamp_string(), job_id],
2191 )?;
2192 Ok(())
2193 }
2194}
2195
2196fn table_exists(conn: &Connection, table: &str) -> rusqlite::Result<bool> {
2197 conn.query_row(
2198 "SELECT EXISTS(SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ?1)",
2199 params![table],
2200 |row| row.get::<_, bool>(0),
2201 )
2202}
2203
2204fn table_has_column(conn: &Connection, table: &str, column: &str) -> rusqlite::Result<bool> {
2205 let mut stmt = conn.prepare(&format!("PRAGMA table_info({table})"))?;
2206 let rows = stmt.query_map([], |row| row.get::<_, String>(1))?;
2207 for row in rows {
2208 if row? == column {
2209 return Ok(true);
2210 }
2211 }
2212 Ok(false)
2213}
2214
2215fn rename_table_if_exists(conn: &Connection, table: &str, suffix: u128) -> rusqlite::Result<()> {
2216 if table_exists(conn, table)? {
2217 conn.execute(
2218 &format!("ALTER TABLE {table} RENAME TO {table}_legacy_{suffix}"),
2219 [],
2220 )?;
2221 }
2222 Ok(())
2223}
2224
2225fn ensure_column(
2226 conn: &Connection,
2227 table: &str,
2228 column: &str,
2229 definition: &str,
2230) -> rusqlite::Result<()> {
2231 if !table_has_column(conn, table, column)? {
2232 conn.execute(
2233 &format!("ALTER TABLE {table} ADD COLUMN {column} {definition}"),
2234 [],
2235 )?;
2236 }
2237 Ok(())
2238}
2239
2240fn ensure_glossary_target_nullable(conn: &Connection) -> rusqlite::Result<()> {
2241 if !table_exists(conn, "glossary_terms")?
2242 || !table_column_is_not_null(conn, "glossary_terms", "target_text")?
2243 {
2244 return Ok(());
2245 }
2246
2247 let legacy_table = format!("glossary_terms_v1_2_0_{}", unix_timestamp_nanos());
2248 conn.execute_batch(&format!(
2249 "
2250 DROP INDEX IF EXISTS idx_glossary_lookup;
2251 ALTER TABLE glossary_terms RENAME TO {legacy_table};
2252 CREATE TABLE glossary_terms (
2253 id INTEGER PRIMARY KEY,
2254 scope_kind TEXT NOT NULL CHECK(scope_kind IN ('global', 'series', 'book')),
2255 scope_id TEXT,
2256 source_text TEXT NOT NULL,
2257 target_text TEXT,
2258 category TEXT NOT NULL CHECK(category IN
2259 ('person', 'place', 'object', 'invented', 'style', 'phrase', 'other')),
2260 notes TEXT,
2261 case_sensitive INTEGER NOT NULL DEFAULT 0,
2262 always_active INTEGER NOT NULL DEFAULT 0,
2263 status TEXT NOT NULL CHECK(status IN
2264 ('user_seeded', 'auto_candidate', 'accepted', 'rejected'))
2265 DEFAULT 'user_seeded',
2266 source_language TEXT NOT NULL,
2267 target_language TEXT NOT NULL,
2268 source_count INTEGER DEFAULT 0,
2269 created_at TEXT NOT NULL,
2270 updated_at TEXT NOT NULL,
2271 UNIQUE(scope_kind, scope_id, source_text, source_language, target_language)
2272 );
2273 INSERT INTO glossary_terms
2274 (id, scope_kind, scope_id, source_text, target_text, category, notes,
2275 case_sensitive, always_active, status, source_language, target_language,
2276 source_count, created_at, updated_at)
2277 SELECT id, scope_kind, scope_id, source_text, target_text, category, notes,
2278 case_sensitive, always_active, status, source_language, target_language,
2279 source_count, created_at, updated_at
2280 FROM {legacy_table};
2281 DROP TABLE {legacy_table};
2282 ",
2283 ))?;
2284 Ok(())
2285}
2286
2287fn table_column_is_not_null(
2288 conn: &Connection,
2289 table: &str,
2290 column: &str,
2291) -> rusqlite::Result<bool> {
2292 let mut stmt = conn.prepare(&format!("PRAGMA table_info({table})"))?;
2293 let mut rows = stmt.query([])?;
2294 while let Some(row) = rows.next()? {
2295 let name: String = row.get(1)?;
2296 if name == column {
2297 let not_null: i64 = row.get(3)?;
2298 return Ok(not_null != 0);
2299 }
2300 }
2301 Ok(false)
2302}
2303
2304fn record_migration(conn: &Connection, version: i64, name: &str) -> rusqlite::Result<()> {
2305 conn.execute(
2306 "INSERT OR IGNORE INTO _migrations (version, name, applied_at)
2307 VALUES (?1, ?2, ?3)",
2308 params![version, name, timestamp_string()],
2309 )?;
2310 Ok(())
2311}
2312
2313fn glossary_term_from_row(row: &rusqlite::Row<'_>) -> rusqlite::Result<GlossaryTerm> {
2314 let scope_kind_text: String = row.get(1)?;
2315 let category_text: String = row.get(5)?;
2316 let status_text: String = row.get(9)?;
2317 Ok(GlossaryTerm {
2318 id: Some(row.get(0)?),
2319 scope_kind: parse_row_enum(&scope_kind_text, 1)?,
2320 scope_id: row.get(2)?,
2321 source_text: row.get(3)?,
2322 target_text: row.get(4)?,
2323 category: parse_row_enum(&category_text, 5)?,
2324 notes: row.get(6)?,
2325 case_sensitive: row.get::<_, i64>(7)? != 0,
2326 always_active: row.get::<_, i64>(8)? != 0,
2327 status: parse_row_enum(&status_text, 9)?,
2328 source_language: row.get(10)?,
2329 target_language: row.get(11)?,
2330 source_count: row.get::<_, Option<i64>>(12)?.unwrap_or(0).max(0) as usize,
2331 })
2332}
2333
2334fn glossary_candidate_from_row(
2335 row: &rusqlite::Row<'_>,
2336) -> rusqlite::Result<StoredGlossaryCandidate> {
2337 let category_text: String = row.get(3)?;
2338 let status_text: String = row.get(7)?;
2339 Ok(StoredGlossaryCandidate {
2340 id: row.get(0)?,
2341 source_text: row.get(1)?,
2342 target_text: row.get(2)?,
2343 category: parse_row_enum(&category_text, 3)?,
2344 notes: row.get(4)?,
2345 case_sensitive: row.get::<_, i64>(5)? != 0,
2346 always_active: row.get::<_, i64>(6)? != 0,
2347 status: parse_row_enum(&status_text, 7)?,
2348 source_language: row.get(8)?,
2349 target_language: row.get(9)?,
2350 source_count: row.get::<_, Option<i64>>(10)?.unwrap_or(0).max(0) as usize,
2351 })
2352}
2353
2354fn parse_gender_short(value: &str) -> Option<EntityGender> {
2355 match value {
2356 "m" => Some(EntityGender::Masculine),
2357 "f" => Some(EntityGender::Feminine),
2358 "n" => Some(EntityGender::Neuter),
2359 _ => None,
2360 }
2361}
2362
2363fn parse_row_enum<T>(value: &str, column: usize) -> rusqlite::Result<T>
2364where
2365 T: FromStr<Err = String>,
2366{
2367 value.parse::<T>().map_err(|err| {
2368 rusqlite::Error::FromSqlConversionFailure(column, Type::Text, Box::new(RowEnumError(err)))
2369 })
2370}
2371
2372#[derive(Debug)]
2373struct RowEnumError(String);
2374
2375impl std::fmt::Display for RowEnumError {
2376 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2377 f.write_str(&self.0)
2378 }
2379}
2380
2381impl std::error::Error for RowEnumError {}
2382
2383fn replace_block_translations(
2384 conn: &Connection,
2385 job_id: &str,
2386 segment_id: &str,
2387 blocks: &[BlockTranslation],
2388) -> rusqlite::Result<()> {
2389 conn.execute(
2390 "DELETE FROM translation_blocks WHERE job_id = ?1 AND segment_id = ?2",
2391 params![job_id, segment_id],
2392 )?;
2393 for block in blocks {
2394 conn.execute(
2395 "INSERT INTO translation_blocks (segment_id, job_id, block_id, translated_text)
2396 VALUES (?1, ?2, ?3, ?4)",
2397 params![
2398 segment_id,
2399 job_id,
2400 block.block_id.0.as_str(),
2401 block.text.as_str()
2402 ],
2403 )?;
2404 }
2405 Ok(())
2406}
2407
2408#[derive(Debug, Clone, Copy)]
2409pub enum RetryScope {
2410 Failed,
2411 NeedsReview,
2412 All,
2413}
2414
2415fn file_hash(path: &Path) -> CoreResult<String> {
2416 let bytes = fs::read(path)?;
2417 Ok(stable_hash_bytes(&bytes))
2418}
2419
2420fn stable_hash(text: &str) -> String {
2421 stable_hash_bytes(text.as_bytes())
2422}
2423
2424fn stable_hash_bytes(bytes: &[u8]) -> String {
2425 let digest = Sha256::digest(bytes);
2426 let mut output = String::with_capacity(digest.len() * 2);
2427 for byte in digest {
2428 use std::fmt::Write as _;
2429 write!(&mut output, "{byte:02x}").expect("writing to string should not fail");
2430 }
2431 output
2432}
2433
2434fn unix_timestamp() -> u64 {
2435 SystemTime::now()
2436 .duration_since(UNIX_EPOCH)
2437 .unwrap_or_default()
2438 .as_secs()
2439}
2440
2441fn unix_timestamp_nanos() -> u128 {
2442 SystemTime::now()
2443 .duration_since(UNIX_EPOCH)
2444 .unwrap_or_default()
2445 .as_nanos()
2446}
2447
2448fn timestamp_string() -> String {
2449 unix_timestamp().to_string()
2450}
2451
2452#[cfg(test)]
2453mod tests {
2454 use super::*;
2455 use bookforge_core::{
2456 ir::{BlockId, SectionId},
2457 segment::{
2458 Segment, SegmentBlock, SegmentConstraints, SegmentContext, SegmentId, SegmentMetadata,
2459 SegmentSource, SegmentTextRun,
2460 },
2461 };
2462
2463 #[test]
2464 fn store_reuses_connection_across_job_operations() {
2465 let db_path = temp_path("jobs.sqlite");
2466 let input_path = temp_path("input.epub");
2467 fs::write(&input_path, b"epub bytes").expect("input fixture should be writable");
2468
2469 let store = JobStore::open(&db_path).expect("store should open");
2470 let job = store
2471 .create_job(CreateJob {
2472 input: &input_path,
2473 output: &temp_path("output.epub"),
2474 source_lang: Some("English"),
2475 target_lang: "Italian",
2476 provider: "mock",
2477 model: "mock-prefix",
2478 base_url: None,
2479 api_key_env: None,
2480 book_id: None,
2481 series_id: None,
2482 })
2483 .expect("job should be created");
2484 let segments = vec![segment("seg_a", 0), segment("seg_b", 1)];
2485 store
2486 .insert_segments(&job.id, &segments, "v1", "mock", "mock-prefix", "test_ns")
2487 .expect("segments should insert");
2488
2489 store
2490 .save_translation(SaveTranslation {
2491 job_id: &job.id,
2492 segment_id: "seg_a",
2493 translated_text: "Tradotto",
2494 blocks: &[BlockTranslation {
2495 block_id: BlockId("b_000000".to_string()),
2496 text: "Tradotto".to_string(),
2497 }],
2498 provider: "mock",
2499 model: "mock-prefix",
2500 prompt_version: "v1",
2501 input_tokens: Some(11),
2502 input_cached_tokens: Some(0),
2503 output_tokens: Some(7),
2504 tokens_estimated: false,
2505 })
2506 .expect("translation should save");
2507 store
2508 .mark_segment_failed(&job.id, "seg_b", "provider unavailable")
2509 .expect("segment should be marked failed");
2510
2511 let summary = store
2512 .summary(&job.id)
2513 .expect("summary should load")
2514 .expect("job should exist");
2515 assert_eq!(summary.total_segments, 2);
2516 assert_eq!(summary.succeeded, 1);
2517 assert_eq!(summary.failed, 1);
2518 assert_eq!(summary.input_tokens, 11);
2519 assert_eq!(summary.output_tokens, 7);
2520 let blocks = store
2521 .load_block_translations(&job.id)
2522 .expect("block translations should load");
2523 assert_eq!(blocks.len(), 1);
2524 assert_eq!(blocks[0].text, "Tradotto");
2525
2526 let _ = fs::remove_file(db_path);
2527 let _ = fs::remove_file(input_path);
2528 }
2529
2530 fn segment(id: &str, ordinal: usize) -> Segment {
2531 let block_id = BlockId(format!("b_{ordinal:06}"));
2532 Segment {
2533 id: SegmentId(id.to_string()),
2534 section_id: SectionId("sec_000000".to_string()),
2535 ordinal,
2536 block_ids: vec![block_id.clone()],
2537 source: SegmentSource {
2538 text: format!("Source {ordinal}"),
2539 blocks: vec![SegmentBlock {
2540 block_id,
2541 kind: "paragraph".to_string(),
2542 text: format!("Source {ordinal}"),
2543 text_runs: vec![SegmentTextRun {
2544 id: format!("r{ordinal}"),
2545 text: format!("Source {ordinal}"),
2546 }],
2547 protected_spans: Vec::new(),
2548 }],
2549 token_estimate: 2,
2550 },
2551 context: SegmentContext::default(),
2552 metadata: SegmentMetadata::default(),
2553 constraints: SegmentConstraints::default(),
2554 checksum: format!("checksum_{ordinal}"),
2555 }
2556 }
2557
2558 fn temp_path(name: &str) -> PathBuf {
2559 std::env::temp_dir().join(format!(
2560 "bookforge-store-test-{}-{}-{name}",
2561 std::process::id(),
2562 unix_timestamp_nanos()
2563 ))
2564 }
2565
2566 fn build_seeded_store_with_translation(
2567 db_path: &PathBuf,
2568 cache_namespace: &str,
2569 block_ids: &[&str],
2570 ) -> (JobStore, JobRecord, Segment) {
2571 let input_path = temp_path("input.epub");
2572 fs::write(&input_path, b"epub bytes").expect("input fixture should be writable");
2573
2574 let store = JobStore::open(db_path).expect("store should open");
2575 let job = store
2576 .create_job(CreateJob {
2577 input: &input_path,
2578 output: &temp_path("output.epub"),
2579 source_lang: Some("English"),
2580 target_lang: "Italian",
2581 provider: "mock",
2582 model: "mock-prefix",
2583 base_url: None,
2584 api_key_env: None,
2585 book_id: None,
2586 series_id: None,
2587 })
2588 .expect("job should be created");
2589
2590 let mut seg = segment("seg_a", 0);
2591 let blocks: Vec<BlockTranslation> = block_ids
2592 .iter()
2593 .map(|id| BlockTranslation {
2594 block_id: BlockId(id.to_string()),
2595 text: format!("Tradotto {id}"),
2596 })
2597 .collect();
2598 seg.block_ids = block_ids.iter().map(|id| BlockId(id.to_string())).collect();
2599
2600 store
2601 .insert_segments(
2602 &job.id,
2603 std::slice::from_ref(&seg),
2604 "v1",
2605 "mock",
2606 "mock-prefix",
2607 cache_namespace,
2608 )
2609 .expect("segments should insert");
2610 store
2611 .save_translation(SaveTranslation {
2612 job_id: &job.id,
2613 segment_id: "seg_a",
2614 translated_text: "Tradotto",
2615 blocks: &blocks,
2616 provider: "mock",
2617 model: "mock-prefix",
2618 prompt_version: "v1",
2619 input_tokens: Some(11),
2620 input_cached_tokens: Some(0),
2621 output_tokens: Some(7),
2622 tokens_estimated: false,
2623 })
2624 .expect("translation should save");
2625
2626 let _ = fs::remove_file(input_path);
2627 (store, job, seg)
2628 }
2629
2630 #[test]
2631 fn job_config_snapshot_round_trips_through_store() {
2632 let db_path = temp_path("snapshot.sqlite");
2633 let input_path = temp_path("input.epub");
2634 fs::write(&input_path, b"epub bytes").expect("input fixture should be writable");
2635 let store = JobStore::open(&db_path).expect("store should open");
2636 let job = store
2637 .create_job(CreateJob {
2638 input: &input_path,
2639 output: &temp_path("output.epub"),
2640 source_lang: Some("English"),
2641 target_lang: "Italian",
2642 provider: "openrouter",
2643 model: "model",
2644 base_url: Some("https://example.test/v1"),
2645 api_key_env: Some("OPENROUTER_API_KEY"),
2646 book_id: None,
2647 series_id: None,
2648 })
2649 .expect("job should be created");
2650 let settings = bookforge_core::TranslationProfile::Balanced.resolve();
2651 let snapshot = RunConfigSnapshot {
2652 input_path: input_path.clone(),
2653 input_snapshot_path: Some(temp_path("input-snapshot.epub")),
2654 input_sha256: Some("abc123".to_string()),
2655 output_path: temp_path("translated.epub"),
2656 events_path: Some(temp_path("events.jsonl")),
2657 report_json_path: Some(temp_path("report.json")),
2658 report_markdown_path: Some(temp_path("report.md")),
2659 source_language: Some("English".to_string()),
2660 target_language: "Italian".to_string(),
2661 provider: "openrouter".to_string(),
2662 model: "model".to_string(),
2663 base_url: Some("https://example.test/v1".to_string()),
2664 api_key_env: Some("OPENROUTER_API_KEY".to_string()),
2665 profile: settings.profile,
2666 provider_preset: None,
2667 prompt_version: "batch_v1".to_string(),
2668 cache_namespace: "cache_ns".to_string(),
2669 book_id: None,
2670 series_id: None,
2671 glossary_budget_tokens: 800,
2672 glossary_format: bookforge_core::GlossaryFormat::Json,
2673 prompt_extra: None,
2674 glossary_fingerprint: String::new(),
2675 glossary_terms: Vec::new(),
2676 context_window: 0,
2677 context_budget_tokens: 1200,
2678 context_scope: bookforge_core::config::ContextScope::Chapter,
2679 style_fingerprint: String::new(),
2680 style_rendered_block: String::new(),
2681 entities_fingerprint: String::new(),
2682 entities_rendered_block: String::new(),
2683 settings: bookforge_core::ResolvedRunSettingsSnapshot::from_settings(&settings),
2684 };
2685
2686 store
2687 .update_job_config_snapshot(&job.id, &snapshot)
2688 .expect("snapshot should persist");
2689 let loaded = store
2690 .load_job_config_snapshot(&job.id)
2691 .expect("snapshot should load")
2692 .expect("snapshot should exist");
2693 assert_eq!(loaded, snapshot);
2694
2695 let reloaded_job = store
2696 .get_job(&job.id)
2697 .expect("job should load")
2698 .expect("job should exist");
2699 assert_eq!(reloaded_job.events_path, snapshot.events_path);
2700 assert_eq!(reloaded_job.report_json_path, snapshot.report_json_path);
2701 assert_eq!(
2702 reloaded_job.report_markdown_path,
2703 snapshot.report_markdown_path
2704 );
2705
2706 let _ = fs::remove_file(db_path);
2707 let _ = fs::remove_file(input_path);
2708 }
2709
2710 #[test]
2711 fn job_config_snapshot_does_not_store_api_key_value() {
2712 let db_path = temp_path("snapshot_secret.sqlite");
2713 let input_path = temp_path("input.epub");
2714 fs::write(&input_path, b"epub bytes").expect("input fixture should be writable");
2715 let api_key_env = "BOOKFORGE_TEST_API_KEY_VALUE_NOT_STORED";
2716 let api_key_value = "sk-live-secret-that-must-not-be-persisted";
2717 unsafe {
2720 std::env::set_var(api_key_env, api_key_value);
2721 }
2722
2723 let store = JobStore::open(&db_path).expect("store should open");
2724 let job = store
2725 .create_job(CreateJob {
2726 input: &input_path,
2727 output: &temp_path("output.epub"),
2728 source_lang: Some("English"),
2729 target_lang: "Italian",
2730 provider: "openrouter",
2731 model: "model",
2732 base_url: Some("https://example.test/v1"),
2733 api_key_env: Some(api_key_env),
2734 book_id: None,
2735 series_id: None,
2736 })
2737 .expect("job should be created");
2738 let settings = bookforge_core::TranslationProfile::Balanced.resolve();
2739 let snapshot = RunConfigSnapshot {
2740 input_path: input_path.clone(),
2741 input_snapshot_path: None,
2742 input_sha256: None,
2743 output_path: temp_path("translated.epub"),
2744 events_path: Some(temp_path("events.jsonl")),
2745 report_json_path: Some(temp_path("report.json")),
2746 report_markdown_path: Some(temp_path("report.md")),
2747 source_language: Some("English".to_string()),
2748 target_language: "Italian".to_string(),
2749 provider: "openrouter".to_string(),
2750 model: "model".to_string(),
2751 base_url: Some("https://example.test/v1".to_string()),
2752 api_key_env: Some(api_key_env.to_string()),
2753 profile: settings.profile,
2754 provider_preset: None,
2755 prompt_version: "batch_v1".to_string(),
2756 cache_namespace: "cache_ns".to_string(),
2757 book_id: None,
2758 series_id: None,
2759 glossary_budget_tokens: 800,
2760 glossary_format: bookforge_core::GlossaryFormat::Json,
2761 prompt_extra: None,
2762 glossary_fingerprint: String::new(),
2763 glossary_terms: Vec::new(),
2764 context_window: 0,
2765 context_budget_tokens: 1200,
2766 context_scope: bookforge_core::config::ContextScope::Chapter,
2767 style_fingerprint: String::new(),
2768 style_rendered_block: String::new(),
2769 entities_fingerprint: String::new(),
2770 entities_rendered_block: String::new(),
2771 settings: bookforge_core::ResolvedRunSettingsSnapshot::from_settings(&settings),
2772 };
2773
2774 store
2775 .update_job_config_snapshot(&job.id, &snapshot)
2776 .expect("snapshot should persist");
2777 let raw_json = {
2778 let conn = store.conn.borrow();
2779 conn.query_row(
2780 "SELECT config_json FROM jobs WHERE id = ?1",
2781 params![job.id],
2782 |row| row.get::<_, String>(0),
2783 )
2784 .expect("raw snapshot JSON should load")
2785 };
2786
2787 assert!(raw_json.contains(api_key_env));
2788 assert!(!raw_json.contains(api_key_value));
2789
2790 unsafe {
2791 std::env::remove_var(api_key_env);
2792 }
2793 let _ = fs::remove_file(db_path);
2794 let _ = fs::remove_file(input_path);
2795 }
2796
2797 #[test]
2798 fn terminal_loading_and_resumable_ids_preserve_lifecycle_boundaries() {
2799 let db_path = temp_path("terminal_resume.sqlite");
2800 let input_path = temp_path("input.epub");
2801 fs::write(&input_path, b"epub bytes").expect("input fixture should be writable");
2802 let store = JobStore::open(&db_path).expect("store should open");
2803 let job = store
2804 .create_job(CreateJob {
2805 input: &input_path,
2806 output: &temp_path("output.epub"),
2807 source_lang: Some("English"),
2808 target_lang: "Italian",
2809 provider: "mock",
2810 model: "mock-prefix",
2811 base_url: None,
2812 api_key_env: None,
2813 book_id: None,
2814 series_id: None,
2815 })
2816 .expect("job should be created");
2817 let segments = vec![
2818 segment("seg_done", 0),
2819 segment("seg_cached", 1),
2820 segment("seg_review", 2),
2821 segment("seg_failed", 3),
2822 segment("seg_queued", 4),
2823 ];
2824 store
2825 .insert_segments(&job.id, &segments, "v1", "mock", "mock-prefix", "ns")
2826 .expect("segments should insert");
2827 store
2828 .save_translation(SaveTranslation {
2829 job_id: &job.id,
2830 segment_id: "seg_done",
2831 translated_text: "Done",
2832 blocks: &[BlockTranslation {
2833 block_id: BlockId("b_000000".to_string()),
2834 text: "Done".to_string(),
2835 }],
2836 provider: "mock",
2837 model: "mock-prefix",
2838 prompt_version: "v1",
2839 input_tokens: None,
2840 input_cached_tokens: None,
2841 output_tokens: None,
2842 tokens_estimated: false,
2843 })
2844 .expect("done should save");
2845 store
2846 .save_cached_translation(SaveCachedTranslation {
2847 job_id: &job.id,
2848 segment_id: "seg_cached",
2849 translated_text: "Cached",
2850 blocks: &[BlockTranslation {
2851 block_id: BlockId("b_000001".to_string()),
2852 text: "Cached".to_string(),
2853 }],
2854 provider: "mock",
2855 model: "mock-prefix",
2856 prompt_version: "v1",
2857 })
2858 .expect("cached should save");
2859 store
2860 .save_needs_review(SaveNeedsReview {
2861 job_id: &job.id,
2862 segment_id: "seg_review",
2863 preserved_text: "Review",
2864 blocks: &[BlockTranslation {
2865 block_id: BlockId("b_000002".to_string()),
2866 text: "Review".to_string(),
2867 }],
2868 provider: "mock",
2869 model: "mock-prefix",
2870 prompt_version: "v1",
2871 error: "needs eyes",
2872 input_tokens: None,
2873 input_cached_tokens: None,
2874 output_tokens: None,
2875 tokens_estimated: false,
2876 })
2877 .expect("review should save");
2878 store
2879 .mark_segment_failed(&job.id, "seg_failed", "failed")
2880 .expect("failed should mark");
2881
2882 let terminal = store
2883 .load_terminal_segment_translations(&job.id)
2884 .expect("terminal records should load");
2885 let ids = terminal
2886 .iter()
2887 .map(|record| record.segment_id.as_str())
2888 .collect::<Vec<_>>();
2889 assert_eq!(ids, vec!["seg_done", "seg_cached", "seg_review"]);
2890 assert_eq!(terminal[0].blocks[0].block_id.0, "b_000000");
2891 assert_eq!(terminal[2].status, "needs_review");
2892
2893 let resumable = store
2894 .resumable_segment_ids(&job.id)
2895 .expect("resumable ids should load");
2896 assert_eq!(resumable, vec!["seg_failed", "seg_queued"]);
2897
2898 let _ = fs::remove_file(db_path);
2899 let _ = fs::remove_file(input_path);
2900 }
2901
2902 #[test]
2903 fn mark_unfinished_segments_failed_preserves_terminal_segments() {
2904 let db_path = temp_path("unfinished_preserve.sqlite");
2905 let input_path = temp_path("input.epub");
2906 fs::write(&input_path, b"epub bytes").expect("input fixture should be writable");
2907 let store = JobStore::open(&db_path).expect("store should open");
2908 let job = store
2909 .create_job(CreateJob {
2910 input: &input_path,
2911 output: &temp_path("output.epub"),
2912 source_lang: Some("English"),
2913 target_lang: "Italian",
2914 provider: "mock",
2915 model: "mock-prefix",
2916 base_url: None,
2917 api_key_env: None,
2918 book_id: None,
2919 series_id: None,
2920 })
2921 .expect("job should be created");
2922 let segments = vec![
2923 segment("seg_succeeded", 0),
2924 segment("seg_cached", 1),
2925 segment("seg_review", 2),
2926 segment("seg_queued", 3),
2927 segment("seg_retry", 4),
2928 ];
2929 store
2930 .insert_segments(&job.id, &segments, "v1", "mock", "mock-prefix", "test_ns")
2931 .expect("segments should insert");
2932 store
2933 .save_translation(SaveTranslation {
2934 job_id: &job.id,
2935 segment_id: "seg_succeeded",
2936 translated_text: "Done",
2937 blocks: &[BlockTranslation {
2938 block_id: BlockId("b_000000".to_string()),
2939 text: "Done".to_string(),
2940 }],
2941 provider: "mock",
2942 model: "mock-prefix",
2943 prompt_version: "v1",
2944 input_tokens: None,
2945 input_cached_tokens: None,
2946 output_tokens: None,
2947 tokens_estimated: false,
2948 })
2949 .expect("succeeded segment should save");
2950 store
2951 .save_cached_translation(SaveCachedTranslation {
2952 job_id: &job.id,
2953 segment_id: "seg_cached",
2954 translated_text: "Cached",
2955 blocks: &[BlockTranslation {
2956 block_id: BlockId("b_000001".to_string()),
2957 text: "Cached".to_string(),
2958 }],
2959 provider: "mock",
2960 model: "mock-prefix",
2961 prompt_version: "v1",
2962 })
2963 .expect("cached segment should save");
2964 store
2965 .save_needs_review(SaveNeedsReview {
2966 job_id: &job.id,
2967 segment_id: "seg_review",
2968 preserved_text: "Needs review",
2969 blocks: &[BlockTranslation {
2970 block_id: BlockId("b_000002".to_string()),
2971 text: "Needs review".to_string(),
2972 }],
2973 provider: "mock",
2974 model: "mock-prefix",
2975 prompt_version: "v1",
2976 error: "qa issue",
2977 input_tokens: None,
2978 input_cached_tokens: None,
2979 output_tokens: None,
2980 tokens_estimated: false,
2981 })
2982 .expect("needs-review segment should save");
2983 store
2984 .retry_segments(&job.id, RetryScope::Failed)
2985 .expect("retry with no failed segments should be harmless");
2986 {
2987 let conn = store.conn.borrow();
2988 conn.execute(
2989 "UPDATE segments SET status = 'retry_pending' WHERE job_id = ?1 AND id = 'seg_retry'",
2990 params![job.id],
2991 )
2992 .expect("test status update should work");
2993 }
2994
2995 let candidate_ids = segments
2996 .iter()
2997 .map(|segment| segment.id.0.clone())
2998 .collect::<Vec<_>>();
2999 let changed = store
3000 .mark_unfinished_segments_failed(&job.id, &candidate_ids, "run failed")
3001 .expect("unfinished segments should be marked failed");
3002 assert_eq!(changed, 2);
3003
3004 let records = store.segment_records(&job.id).expect("records should load");
3005 let statuses = records
3006 .into_iter()
3007 .map(|record| (record.id, record.status))
3008 .collect::<HashMap<_, _>>();
3009 assert_eq!(statuses["seg_succeeded"], "succeeded");
3010 assert_eq!(statuses["seg_cached"], "skipped_cached");
3011 assert_eq!(statuses["seg_review"], "needs_review");
3012 assert_eq!(statuses["seg_queued"], "failed");
3013 assert_eq!(statuses["seg_retry"], "failed");
3014
3015 let _ = fs::remove_file(db_path);
3016 let _ = fs::remove_file(input_path);
3017 }
3018
3019 #[test]
3020 fn mark_unfinished_segments_failed_marks_only_resumable_segments() {
3021 let db_path = temp_path("unfinished_resumable_only.sqlite");
3022 let input_path = temp_path("input.epub");
3023 fs::write(&input_path, b"epub bytes").expect("input fixture should be writable");
3024 let store = JobStore::open(&db_path).expect("store should open");
3025 let job = store
3026 .create_job(CreateJob {
3027 input: &input_path,
3028 output: &temp_path("output.epub"),
3029 source_lang: Some("English"),
3030 target_lang: "Italian",
3031 provider: "mock",
3032 model: "mock-prefix",
3033 base_url: None,
3034 api_key_env: None,
3035 book_id: None,
3036 series_id: None,
3037 })
3038 .expect("job should be created");
3039 let segments = vec![
3040 segment("seg_succeeded", 0),
3041 segment("seg_cached", 1),
3042 segment("seg_review", 2),
3043 segment("seg_failed", 3),
3044 segment("seg_retry", 4),
3045 segment("seg_queued", 5),
3046 ];
3047 store
3048 .insert_segments(&job.id, &segments, "v1", "mock", "mock-prefix", "test_ns")
3049 .expect("segments should insert");
3050 {
3051 let conn = store.conn.borrow();
3052 for (id, status) in [
3053 ("seg_succeeded", "succeeded"),
3054 ("seg_cached", "skipped_cached"),
3055 ("seg_review", "needs_review"),
3056 ("seg_failed", "failed"),
3057 ("seg_retry", "retry_pending"),
3058 ("seg_queued", "queued"),
3059 ] {
3060 conn.execute(
3061 "UPDATE segments SET status = ?1 WHERE job_id = ?2 AND id = ?3",
3062 params![status, job.id, id],
3063 )
3064 .expect("status should update");
3065 }
3066 }
3067
3068 let candidate_ids = segments
3069 .iter()
3070 .map(|segment| segment.id.0.clone())
3071 .collect::<Vec<_>>();
3072 let changed = store
3073 .mark_unfinished_segments_failed(&job.id, &candidate_ids, "run failed")
3074 .expect("unfinished segments should be marked failed");
3075
3076 assert_eq!(changed, 3);
3077 let records = store.segment_records(&job.id).expect("records should load");
3078 let statuses = records
3079 .into_iter()
3080 .map(|record| (record.id, record.status))
3081 .collect::<HashMap<_, _>>();
3082 assert_eq!(statuses["seg_succeeded"], "succeeded");
3083 assert_eq!(statuses["seg_cached"], "skipped_cached");
3084 assert_eq!(statuses["seg_review"], "needs_review");
3085 assert_eq!(statuses["seg_failed"], "failed");
3086 assert_eq!(statuses["seg_retry"], "failed");
3087 assert_eq!(statuses["seg_queued"], "failed");
3088
3089 let _ = fs::remove_file(db_path);
3090 let _ = fs::remove_file(input_path);
3091 }
3092
3093 #[test]
3094 fn resumable_segment_ids_excludes_succeeded_cached_and_needs_review() {
3095 let db_path = temp_path("resumable_excludes.sqlite");
3096 let input_path = temp_path("input.epub");
3097 fs::write(&input_path, b"epub bytes").expect("input fixture should be writable");
3098 let store = JobStore::open(&db_path).expect("store should open");
3099 let job = store
3100 .create_job(CreateJob {
3101 input: &input_path,
3102 output: &temp_path("output.epub"),
3103 source_lang: Some("English"),
3104 target_lang: "Italian",
3105 provider: "mock",
3106 model: "mock-prefix",
3107 base_url: None,
3108 api_key_env: None,
3109 book_id: None,
3110 series_id: None,
3111 })
3112 .expect("job should be created");
3113 let segments = vec![
3114 segment("seg_succeeded", 0),
3115 segment("seg_cached", 1),
3116 segment("seg_review", 2),
3117 ];
3118 store
3119 .insert_segments(&job.id, &segments, "v1", "mock", "mock-prefix", "test_ns")
3120 .expect("segments should insert");
3121 {
3122 let conn = store.conn.borrow();
3123 for (id, status) in [
3124 ("seg_succeeded", "succeeded"),
3125 ("seg_cached", "skipped_cached"),
3126 ("seg_review", "needs_review"),
3127 ] {
3128 conn.execute(
3129 "UPDATE segments SET status = ?1 WHERE job_id = ?2 AND id = ?3",
3130 params![status, job.id, id],
3131 )
3132 .expect("status should update");
3133 }
3134 }
3135
3136 let ids = store
3137 .resumable_segment_ids(&job.id)
3138 .expect("resumable ids should load");
3139
3140 assert!(ids.is_empty());
3141 let _ = fs::remove_file(db_path);
3142 let _ = fs::remove_file(input_path);
3143 }
3144
3145 #[test]
3146 fn resumable_segment_ids_includes_failed_retry_pending_and_pending() {
3147 let db_path = temp_path("resumable_includes.sqlite");
3148 let input_path = temp_path("input.epub");
3149 fs::write(&input_path, b"epub bytes").expect("input fixture should be writable");
3150 let store = JobStore::open(&db_path).expect("store should open");
3151 let job = store
3152 .create_job(CreateJob {
3153 input: &input_path,
3154 output: &temp_path("output.epub"),
3155 source_lang: Some("English"),
3156 target_lang: "Italian",
3157 provider: "mock",
3158 model: "mock-prefix",
3159 base_url: None,
3160 api_key_env: None,
3161 book_id: None,
3162 series_id: None,
3163 })
3164 .expect("job should be created");
3165 let segments = vec![
3166 segment("seg_failed", 0),
3167 segment("seg_retry", 1),
3168 segment("seg_queued", 2),
3169 ];
3170 store
3171 .insert_segments(&job.id, &segments, "v1", "mock", "mock-prefix", "test_ns")
3172 .expect("segments should insert");
3173 {
3174 let conn = store.conn.borrow();
3175 for (id, status) in [
3176 ("seg_failed", "failed"),
3177 ("seg_retry", "retry_pending"),
3178 ("seg_queued", "queued"),
3179 ] {
3180 conn.execute(
3181 "UPDATE segments SET status = ?1 WHERE job_id = ?2 AND id = ?3",
3182 params![status, job.id, id],
3183 )
3184 .expect("status should update");
3185 }
3186 }
3187
3188 let ids = store
3189 .resumable_segment_ids(&job.id)
3190 .expect("resumable ids should load");
3191
3192 assert_eq!(ids, vec!["seg_failed", "seg_retry", "seg_queued"]);
3193 let _ = fs::remove_file(db_path);
3194 let _ = fs::remove_file(input_path);
3195 }
3196
3197 #[test]
3198 fn cached_translation_requires_matching_cache_namespace() {
3199 let db_path = temp_path("ns_match.sqlite");
3200 let (store, _job, seg) =
3201 build_seeded_store_with_translation(&db_path, "ns_one", &["b_000000"]);
3202
3203 let hit = store
3204 .find_cached_translation(
3205 &seg,
3206 "v1",
3207 "mock",
3208 "mock-prefix",
3209 Some("English"),
3210 "Italian",
3211 "ns_one",
3212 )
3213 .expect("query ok");
3214 assert!(hit.is_some(), "matching namespace should hit");
3215
3216 let miss = store
3217 .find_cached_translation(
3218 &seg,
3219 "v1",
3220 "mock",
3221 "mock-prefix",
3222 Some("English"),
3223 "Italian",
3224 "ns_two",
3225 )
3226 .expect("query ok");
3227 assert!(miss.is_none(), "different namespace must not hit");
3228
3229 let _ = fs::remove_file(db_path);
3230 }
3231
3232 #[test]
3233 fn cached_translation_rejects_mismatched_block_ids() {
3234 let db_path = temp_path("blockid_match.sqlite");
3235 let (store, _job, mut seg) =
3236 build_seeded_store_with_translation(&db_path, "ns_x", &["b_000000"]);
3237
3238 seg.block_ids = vec![BlockId("b_999999".to_string())];
3240
3241 let miss = store
3242 .find_cached_translation(
3243 &seg,
3244 "v1",
3245 "mock",
3246 "mock-prefix",
3247 Some("English"),
3248 "Italian",
3249 "ns_x",
3250 )
3251 .expect("query ok");
3252 assert!(
3253 miss.is_none(),
3254 "mismatched block_ids must reject the cached row"
3255 );
3256
3257 let _ = fs::remove_file(db_path);
3258 }
3259
3260 #[test]
3261 fn cached_translation_prefers_repaired_succeeded_rows_over_cached_clones() {
3262 let db_path = temp_path("cache_prefers_repaired.sqlite");
3263 let (store, _stale_job, seg) =
3264 build_seeded_store_with_translation(&db_path, "cache_ns", &["b_000000"]);
3265 let cached_input = temp_path("cached-input.epub");
3266 let repaired_input = temp_path("repaired-input.epub");
3267 fs::write(&cached_input, b"cached input").expect("cached input should be writable");
3268 fs::write(&repaired_input, b"repaired input").expect("repaired input should be writable");
3269
3270 let cached_job = store
3271 .create_job(CreateJob {
3272 input: &cached_input,
3273 output: &temp_path("cached-output.epub"),
3274 source_lang: Some("English"),
3275 target_lang: "Italian",
3276 provider: "mock",
3277 model: "mock-prefix",
3278 base_url: None,
3279 api_key_env: None,
3280 book_id: None,
3281 series_id: None,
3282 })
3283 .expect("cached job should be created");
3284 store
3285 .insert_segments(
3286 &cached_job.id,
3287 std::slice::from_ref(&seg),
3288 "v1",
3289 "mock",
3290 "mock-prefix",
3291 "cache_ns",
3292 )
3293 .expect("cached job segment should insert");
3294 store
3295 .save_cached_translation(SaveCachedTranslation {
3296 job_id: &cached_job.id,
3297 segment_id: "seg_a",
3298 translated_text: "stale cached clone",
3299 blocks: &[BlockTranslation {
3300 block_id: BlockId("b_000000".to_string()),
3301 text: "stale cached clone".to_string(),
3302 }],
3303 provider: "mock",
3304 model: "mock-prefix",
3305 prompt_version: "v1",
3306 })
3307 .expect("cached clone should save");
3308
3309 let repaired_job = store
3310 .create_job(CreateJob {
3311 input: &repaired_input,
3312 output: &temp_path("repaired-output.epub"),
3313 source_lang: Some("English"),
3314 target_lang: "Italian",
3315 provider: "mock",
3316 model: "mock-prefix",
3317 base_url: None,
3318 api_key_env: None,
3319 book_id: None,
3320 series_id: None,
3321 })
3322 .expect("repaired job should be created");
3323 store
3324 .insert_segments(
3325 &repaired_job.id,
3326 std::slice::from_ref(&seg),
3327 "v1",
3328 "mock",
3329 "mock-prefix",
3330 "cache_ns",
3331 )
3332 .expect("repaired job segment should insert");
3333 store
3334 .save_translation(SaveTranslation {
3335 job_id: &repaired_job.id,
3336 segment_id: "seg_a",
3337 translated_text: "repaired translation",
3338 blocks: &[BlockTranslation {
3339 block_id: BlockId("b_000000".to_string()),
3340 text: "repaired translation".to_string(),
3341 }],
3342 provider: "mock",
3343 model: "mock-prefix",
3344 prompt_version: "v1",
3345 input_tokens: Some(12),
3346 input_cached_tokens: Some(0),
3347 output_tokens: Some(8),
3348 tokens_estimated: false,
3349 })
3350 .expect("repaired translation should save");
3351
3352 let request = CacheLookupRequest {
3353 prompt_version: "v1",
3354 provider: "mock",
3355 model: "mock-prefix",
3356 source_lang: Some("English"),
3357 target_lang: "Italian",
3358 cache_namespace: "cache_ns",
3359 };
3360 let single_hit = store
3361 .find_cached_translation(
3362 &seg,
3363 request.prompt_version,
3364 request.provider,
3365 request.model,
3366 request.source_lang,
3367 request.target_lang,
3368 request.cache_namespace,
3369 )
3370 .expect("single lookup should succeed")
3371 .expect("single lookup should hit");
3372 let batch_hit = store
3373 .find_cached_translations_batch(std::slice::from_ref(&seg), request)
3374 .expect("batch lookup should succeed")
3375 .remove(&seg.id.0)
3376 .expect("batch lookup should hit");
3377
3378 assert_eq!(single_hit.translated_text, "repaired translation");
3379 assert_eq!(single_hit.blocks[0].text, "repaired translation");
3380 assert_eq!(batch_hit.translated_text, "repaired translation");
3381 assert_eq!(batch_hit.blocks[0].text, "repaired translation");
3382
3383 let _ = fs::remove_file(db_path);
3384 let _ = fs::remove_file(cached_input);
3385 let _ = fs::remove_file(repaired_input);
3386 }
3387
3388 #[test]
3389 fn old_empty_cache_namespace_rows_do_not_match_new_runs() {
3390 let db_path = temp_path("legacy_ns.sqlite");
3391 let (store, _job, seg) = build_seeded_store_with_translation(&db_path, "", &["b_000000"]);
3393
3394 let miss = store
3395 .find_cached_translation(
3396 &seg,
3397 "v1",
3398 "mock",
3399 "mock-prefix",
3400 Some("English"),
3401 "Italian",
3402 "real_ns",
3403 )
3404 .expect("query ok");
3405 assert!(
3406 miss.is_none(),
3407 "legacy empty-namespace row must not satisfy a real namespace lookup"
3408 );
3409
3410 let _ = fs::remove_file(db_path);
3411 }
3412
3413 #[test]
3414 fn job_store_enables_wal_and_busy_timeout() {
3415 let db_path = temp_path("wal_busy.sqlite");
3416 let store = JobStore::open(&db_path).expect("store should open");
3417
3418 let conn = store.conn.borrow();
3419 let journal_mode: String = conn
3420 .pragma_query_value(None, "journal_mode", |row| row.get(0))
3421 .expect("pragma journal_mode should succeed");
3422 assert_eq!(
3423 journal_mode.to_lowercase(),
3424 "wal",
3425 "WAL journal mode must be enabled"
3426 );
3427
3428 let busy_timeout: i64 = conn
3429 .pragma_query_value(None, "busy_timeout", |row| row.get(0))
3430 .expect("pragma busy_timeout should succeed");
3431 assert!(
3432 busy_timeout >= 5000,
3433 "busy_timeout should be at least 5000ms, got {busy_timeout}"
3434 );
3435
3436 let wal_path = db_path.with_extension("sqlite-wal");
3437 let shm_path = db_path.with_extension("sqlite-shm");
3438 let _ = fs::remove_file(db_path);
3442 let _ = fs::remove_file(wal_path);
3443 let _ = fs::remove_file(shm_path);
3444 }
3445
3446 #[test]
3447 fn job_store_enables_foreign_keys_on_every_connection() {
3448 let db_path = temp_path("fk.sqlite");
3449 let store = JobStore::open(&db_path).expect("store should open");
3450
3451 let conn = store.conn.borrow();
3452 let fk_enabled: i64 = conn
3453 .pragma_query_value(None, "foreign_keys", |row| row.get(0))
3454 .expect("pragma foreign_keys should succeed");
3455 assert_eq!(
3456 fk_enabled, 1,
3457 "foreign_keys pragma must be ON on every connection"
3458 );
3459
3460 let _ = fs::remove_file(db_path);
3461 }
3462
3463 #[test]
3464 fn doctor_reports_wal_sidecars_as_normal_when_integrity_check_passes() {
3465 let db_path = temp_path("doctor.sqlite");
3466 let store = JobStore::open(&db_path).expect("store should open");
3467
3468 let input_path = temp_path("input_doctor.epub");
3470 fs::write(&input_path, b"epub bytes").expect("test epub");
3471 let _job = store
3472 .create_job(CreateJob {
3473 input: &input_path,
3474 output: &temp_path("out_doctor.epub"),
3475 source_lang: Some("English"),
3476 target_lang: "Italian",
3477 provider: "mock",
3478 model: "mock-prefix",
3479 base_url: None,
3480 api_key_env: None,
3481 book_id: None,
3482 series_id: None,
3483 })
3484 .expect("job created");
3485 drop(store);
3486
3487 let doctor = run_doctor(Some(db_path.clone())).expect("doctor should run");
3488 assert!(doctor.database_exists, "database should exist");
3489 assert_eq!(
3490 doctor.journal_mode.to_lowercase(),
3491 "wal",
3492 "journal mode should be wal"
3493 );
3494 assert_eq!(doctor.integrity_check, "ok", "integrity check should pass");
3495 assert!(
3496 doctor.wal_sidecars_normal,
3497 "wal sidecars should be reported as normal"
3498 );
3499
3500 if doctor.wal_present || doctor.shm_present {
3501 assert!(
3502 !doctor.note.is_empty(),
3503 "doctor must explain WAL sidecars when they are present"
3504 );
3505 }
3506
3507 let wal_path = db_path.with_extension("sqlite-wal");
3508 let shm_path = db_path.with_extension("sqlite-shm");
3509 let _ = fs::remove_file(&db_path);
3510 let _ = fs::remove_file(input_path);
3511 let _ = fs::remove_file(wal_path);
3512 let _ = fs::remove_file(shm_path);
3513 }
3514
3515 #[test]
3516 fn checkpoint_writer_and_reader_do_not_immediately_busy_fail() {
3517 let db_path = temp_path("concurrent.sqlite");
3518 let input_path = temp_path("input_conc.epub");
3519 fs::write(&input_path, b"epub bytes").expect("test epub");
3520
3521 let store_w = JobStore::open(&db_path).expect("store_w open");
3523 let job = store_w
3524 .create_job(CreateJob {
3525 input: &input_path,
3526 output: &temp_path("out_conc.epub"),
3527 source_lang: Some("English"),
3528 target_lang: "Italian",
3529 provider: "mock",
3530 model: "mock-prefix",
3531 base_url: None,
3532 api_key_env: None,
3533 book_id: None,
3534 series_id: None,
3535 })
3536 .expect("job created");
3537 store_w
3538 .insert_segments(
3539 &job.id,
3540 &[segment("seg_conc", 0)],
3541 "v1",
3542 "mock",
3543 "mock-prefix",
3544 "ns",
3545 )
3546 .expect("segments inserted");
3547
3548 let store_r = JobStore::open(&db_path).expect("store_r open");
3550 let summary = store_r
3551 .summary(&job.id)
3552 .expect("summary should load")
3553 .expect("job should exist");
3554 assert_eq!(summary.total_segments, 1);
3555
3556 let wal_path = db_path.with_extension("sqlite-wal");
3557 let shm_path = db_path.with_extension("sqlite-shm");
3558 let _ = fs::remove_file(&db_path);
3559 let _ = fs::remove_file(input_path);
3560 let _ = fs::remove_file(wal_path);
3561 let _ = fs::remove_file(shm_path);
3562 }
3563
3564 #[test]
3565 fn migrate_creates_glossary_terms_table() {
3566 let db_path = temp_path("glossary_migrate.sqlite");
3567 let store = JobStore::open(&db_path).expect("store opens");
3568 let conn = store.conn.borrow();
3569 let table: String = conn
3570 .query_row(
3571 "SELECT name FROM sqlite_master WHERE type = 'table' AND name = 'glossary_terms'",
3572 [],
3573 |row| row.get(0),
3574 )
3575 .expect("glossary_terms table exists");
3576 assert_eq!(table, "glossary_terms");
3577 let index: String = conn
3578 .query_row(
3579 "SELECT name FROM sqlite_master WHERE type = 'index' AND name = 'idx_glossary_lookup'",
3580 [],
3581 |row| row.get(0),
3582 )
3583 .expect("idx_glossary_lookup exists");
3584 assert_eq!(index, "idx_glossary_lookup");
3585 let version: i64 = conn
3586 .query_row(
3587 "SELECT version FROM _migrations WHERE name = 'v1_2_glossary_terms'",
3588 [],
3589 |row| row.get(0),
3590 )
3591 .expect("v1_2 migration recorded");
3592 assert_eq!(version, 4);
3593 drop(conn);
3594 let _ = fs::remove_file(&db_path);
3595 }
3596
3597 #[test]
3598 fn migrate_is_idempotent_v1_2() {
3599 let db_path = temp_path("glossary_idem.sqlite");
3600 {
3602 let _store = JobStore::open(&db_path).expect("first open");
3603 }
3604 {
3605 let _store = JobStore::open(&db_path).expect("second open");
3606 }
3607 let _ = fs::remove_file(&db_path);
3608 }
3609
3610 #[test]
3611 fn migrate_rebuilds_glossary_terms_with_nullable_target_text() {
3612 let db_path = temp_path("glossary_nullable_target.sqlite");
3613 {
3614 let conn = Connection::open(&db_path).expect("legacy db opens");
3615 conn.execute_batch(
3616 "
3617 CREATE TABLE _migrations (
3618 version INTEGER PRIMARY KEY,
3619 name TEXT NOT NULL,
3620 applied_at TEXT NOT NULL
3621 );
3622 CREATE TABLE glossary_terms (
3623 id INTEGER PRIMARY KEY,
3624 scope_kind TEXT NOT NULL CHECK(scope_kind IN ('global', 'series', 'book')),
3625 scope_id TEXT,
3626 source_text TEXT NOT NULL,
3627 target_text TEXT NOT NULL,
3628 category TEXT NOT NULL CHECK(category IN
3629 ('person', 'place', 'object', 'invented', 'style', 'phrase', 'other')),
3630 notes TEXT,
3631 case_sensitive INTEGER NOT NULL DEFAULT 0,
3632 always_active INTEGER NOT NULL DEFAULT 0,
3633 status TEXT NOT NULL CHECK(status IN
3634 ('user_seeded', 'auto_candidate', 'accepted', 'rejected'))
3635 DEFAULT 'user_seeded',
3636 source_language TEXT NOT NULL,
3637 target_language TEXT NOT NULL,
3638 source_count INTEGER DEFAULT 0,
3639 created_at TEXT NOT NULL,
3640 updated_at TEXT NOT NULL,
3641 UNIQUE(scope_kind, scope_id, source_text, source_language, target_language)
3642 );
3643 CREATE INDEX idx_glossary_lookup
3644 ON glossary_terms(source_language, target_language, scope_kind, scope_id, status);
3645 INSERT INTO glossary_terms
3646 (id, scope_kind, scope_id, source_text, target_text, category, notes,
3647 case_sensitive, always_active, status, source_language, target_language,
3648 source_count, created_at, updated_at)
3649 VALUES
3650 (42, 'book', 'ivan', 'Ivan Ilych', 'Ivan Il''ich', 'person', 'legacy',
3651 1, 0, 'user_seeded', 'English', 'Italian', 9, 'created', 'updated');
3652 ",
3653 )
3654 .expect("legacy schema should initialize");
3655 }
3656
3657 let store = JobStore::open(&db_path).expect("store opens and migrates");
3658 let conn = store.conn.borrow();
3659 assert!(
3660 !table_column_is_not_null(&conn, "glossary_terms", "target_text")
3661 .expect("table info should load")
3662 );
3663 let row = conn
3664 .query_row(
3665 "SELECT id, target_text, created_at, updated_at FROM glossary_terms WHERE id = 42",
3666 [],
3667 |row| {
3668 Ok((
3669 row.get::<_, i64>(0)?,
3670 row.get::<_, String>(1)?,
3671 row.get::<_, String>(2)?,
3672 row.get::<_, String>(3)?,
3673 ))
3674 },
3675 )
3676 .expect("legacy glossary row should survive");
3677 assert_eq!(row.0, 42);
3678 assert_eq!(row.1, "Ivan Il'ich");
3679 assert_eq!(row.2, "created");
3680 assert_eq!(row.3, "updated");
3681 let version: i64 = conn
3682 .query_row(
3683 "SELECT version FROM _migrations WHERE name = 'v1_2_1_nullable_glossary_candidate_targets'",
3684 [],
3685 |row| row.get(0),
3686 )
3687 .expect("v1.2.1 migration recorded");
3688 assert_eq!(version, 5);
3689 let index: String = conn
3690 .query_row(
3691 "SELECT name FROM sqlite_master WHERE type = 'index' AND name = 'idx_glossary_lookup'",
3692 [],
3693 |row| row.get(0),
3694 )
3695 .expect("idx_glossary_lookup should be recreated");
3696 assert_eq!(index, "idx_glossary_lookup");
3697 let duplicate = conn.execute(
3698 "INSERT INTO glossary_terms
3699 (scope_kind, scope_id, source_text, target_text, category, notes,
3700 case_sensitive, always_active, status, source_language, target_language,
3701 source_count, created_at, updated_at)
3702 VALUES
3703 ('book', 'ivan', 'Ivan Ilych', 'duplicate', 'person', NULL,
3704 1, 0, 'user_seeded', 'English', 'Italian', 1, 'now', 'now')",
3705 [],
3706 );
3707 assert!(
3708 duplicate.is_err(),
3709 "unique constraint should survive the rebuild"
3710 );
3711 drop(conn);
3712 let _ = fs::remove_file(&db_path);
3713 }
3714
3715 #[test]
3716 fn glossary_candidate_upsert_updates_auto_and_skips_rejected() {
3717 let db_path = temp_path("glossary_candidates.sqlite");
3718 let store = JobStore::open(&db_path).expect("store opens");
3719 let first = store
3720 .upsert_glossary_candidates(
3721 "ivan",
3722 "English",
3723 "Italian",
3724 &[NewGlossaryCandidate {
3725 source_text: "Ivan Ilych",
3726 category: GlossaryCategory::Other,
3727 source_count: 4,
3728 }],
3729 )
3730 .expect("candidate inserts");
3731 assert_eq!(first.inserted, 1);
3732
3733 let candidates = store
3734 .list_glossary_candidates("ivan", "English", "Italian")
3735 .expect("candidates should list");
3736 assert_eq!(candidates.len(), 1);
3737 assert_eq!(candidates[0].target_text, None);
3738 assert_eq!(candidates[0].source_count, 4);
3739
3740 let second = store
3741 .upsert_glossary_candidates(
3742 "ivan",
3743 "English",
3744 "Italian",
3745 &[NewGlossaryCandidate {
3746 source_text: "Ivan Ilych",
3747 category: GlossaryCategory::Other,
3748 source_count: 7,
3749 }],
3750 )
3751 .expect("candidate updates");
3752 assert_eq!(second.updated, 1);
3753 assert_eq!(
3754 store
3755 .list_glossary_candidates("ivan", "English", "Italian")
3756 .expect("candidates should list")[0]
3757 .source_count,
3758 7
3759 );
3760
3761 assert!(
3762 store
3763 .reject_glossary_candidate(candidates[0].id)
3764 .expect("candidate rejects")
3765 );
3766 let third = store
3767 .upsert_glossary_candidates(
3768 "ivan",
3769 "English",
3770 "Italian",
3771 &[NewGlossaryCandidate {
3772 source_text: "Ivan Ilych",
3773 category: GlossaryCategory::Other,
3774 source_count: 9,
3775 }],
3776 )
3777 .expect("rejected candidate is skipped");
3778 assert_eq!(third.skipped, 1);
3779 assert!(
3780 store
3781 .list_glossary_candidates("ivan", "English", "Italian")
3782 .expect("rejected candidates are not pending")
3783 .is_empty()
3784 );
3785
3786 let all = store
3787 .list_glossary_terms(GlossaryFilter {
3788 scope_kind: Some(GlossaryScopeKind::Book),
3789 scope_id: Some("ivan"),
3790 source_language: Some("English"),
3791 target_language: Some("Italian"),
3792 active_only: false,
3793 })
3794 .expect("terms should list");
3795 assert_eq!(all.len(), 1);
3796 assert_eq!(all[0].status, GlossaryStatus::Rejected);
3797 assert_eq!(all[0].source_count, 7);
3798
3799 let seeded = glossary_term(
3800 bookforge_core::GlossaryScopeKind::Book,
3801 Some("ivan"),
3802 "Aragorn",
3803 "Aragorn",
3804 );
3805 let mut accepted = glossary_term(
3806 bookforge_core::GlossaryScopeKind::Book,
3807 Some("ivan"),
3808 "Mount Doom",
3809 "Monte Fato",
3810 );
3811 accepted.status = GlossaryStatus::Accepted;
3812 store
3813 .upsert_glossary_terms(&[seeded, accepted])
3814 .expect("active terms should insert");
3815 let fourth = store
3816 .upsert_glossary_candidates(
3817 "ivan",
3818 "English",
3819 "Italian",
3820 &[
3821 NewGlossaryCandidate {
3822 source_text: "Aragorn",
3823 category: GlossaryCategory::Other,
3824 source_count: 12,
3825 },
3826 NewGlossaryCandidate {
3827 source_text: "Mount Doom",
3828 category: GlossaryCategory::Other,
3829 source_count: 11,
3830 },
3831 ],
3832 )
3833 .expect("active terms are skipped");
3834 assert_eq!(fourth.skipped, 2);
3835
3836 let _ = fs::remove_file(&db_path);
3837 }
3838
3839 #[test]
3840 fn glossary_terms_upsert_list_and_active_lookup() {
3841 let db_path = temp_path("glossary_terms.sqlite");
3842 let store = JobStore::open(&db_path).expect("store opens");
3843 let mut global = glossary_term(
3844 bookforge_core::GlossaryScopeKind::Global,
3845 None,
3846 "Aragorn",
3847 "Aragorn",
3848 );
3849 let book = glossary_term(
3850 bookforge_core::GlossaryScopeKind::Book,
3851 Some("fellowship"),
3852 "Aragorn",
3853 "Granpasso",
3854 );
3855
3856 assert_eq!(
3857 store
3858 .upsert_glossary_terms(&[global.clone(), book.clone()])
3859 .expect("terms upsert"),
3860 2
3861 );
3862 global.target_text = "Aragorn II".to_string();
3863 store
3864 .upsert_glossary_terms(&[global.clone()])
3865 .expect("global term updates instead of duplicating");
3866
3867 let all = store
3868 .list_glossary_terms(GlossaryFilter::default())
3869 .expect("terms list");
3870 assert_eq!(all.len(), 2);
3871
3872 let active = store
3873 .load_active_glossary_terms("English", "Italian", Some("fellowship"), Some("lotr"))
3874 .expect("active terms");
3875 assert_eq!(active.len(), 2);
3876 assert!(active.iter().any(|term| term.target_text == "Granpasso"));
3877 let active_by_target = store
3878 .load_active_glossary_terms_for_target("Italian", Some("fellowship"), Some("lotr"))
3879 .expect("target-only active terms");
3880 assert_eq!(active_by_target.len(), 2);
3881 assert!(
3882 active_by_target
3883 .iter()
3884 .any(|term| term.source_language == "English" && term.target_text == "Granpasso")
3885 );
3886
3887 let removed = store
3888 .clear_glossary_scope(bookforge_core::GlossaryScopeKind::Global, None)
3889 .expect("global clear");
3890 assert_eq!(removed, 1);
3891
3892 let _ = fs::remove_file(&db_path);
3893 }
3894
3895 #[test]
3896 fn create_job_persists_book_and_series_ids() {
3897 let db_path = temp_path("glossary_jobids.sqlite");
3898 let input_path = temp_path("input_jobids.epub");
3899 fs::write(&input_path, b"epub bytes").expect("input fixture");
3900 let store = JobStore::open(&db_path).expect("store opens");
3901 let job = store
3902 .create_job(CreateJob {
3903 input: &input_path,
3904 output: &temp_path("out_jobids.epub"),
3905 source_lang: Some("English"),
3906 target_lang: "Italian",
3907 provider: "mock",
3908 model: "mock",
3909 base_url: None,
3910 api_key_env: None,
3911 book_id: Some("fellowship"),
3912 series_id: Some("lord-of-the-rings"),
3913 })
3914 .expect("job created");
3915 assert_eq!(job.book_id.as_deref(), Some("fellowship"));
3916 assert_eq!(job.series_id.as_deref(), Some("lord-of-the-rings"));
3917 let loaded = store
3918 .get_job(&job.id)
3919 .expect("get_job ok")
3920 .expect("job present");
3921 assert_eq!(loaded.book_id.as_deref(), Some("fellowship"));
3922 assert_eq!(loaded.series_id.as_deref(), Some("lord-of-the-rings"));
3923 let _ = fs::remove_file(&db_path);
3924 let _ = fs::remove_file(input_path);
3925 }
3926
3927 fn glossary_term(
3928 scope_kind: bookforge_core::GlossaryScopeKind,
3929 scope_id: Option<&str>,
3930 source: &str,
3931 target: &str,
3932 ) -> bookforge_core::GlossaryTerm {
3933 bookforge_core::GlossaryTerm {
3934 id: None,
3935 scope_kind,
3936 scope_id: scope_id.map(ToOwned::to_owned),
3937 source_text: source.to_string(),
3938 target_text: target.to_string(),
3939 category: bookforge_core::GlossaryCategory::Person,
3940 notes: None,
3941 case_sensitive: true,
3942 always_active: false,
3943 status: bookforge_core::GlossaryStatus::UserSeeded,
3944 source_language: "English".to_string(),
3945 target_language: "Italian".to_string(),
3946 source_count: 0,
3947 }
3948 }
3949}