subx_cli/core/translation/request.rs
1//! Translation request/response data structures.
2//!
3//! These types describe the inputs and outputs of the translation engine
4//! independently of any particular AI provider. They are intentionally small
5//! and serializable so they can be reused by CLI helpers, integration tests,
6//! and structured logging.
7
8use std::collections::BTreeMap;
9
10use serde::{Deserialize, Serialize};
11
12/// Effective terminology map (source term -> target term).
13///
14/// A [`BTreeMap`] is used so iteration order is deterministic, which keeps
15/// translation prompts stable across runs.
16pub type TerminologyMap = BTreeMap<String, String>;
17
18/// A single terminology entry returned by the AI extraction pass.
19#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
20pub struct TerminologyEntry {
21 /// Source-language term as it appears in subtitle text.
22 pub source: String,
23 /// Translated term to use consistently in the target language.
24 pub target: String,
25}
26
27/// User-provided glossary entry, parsed from a UTF-8 glossary file.
28///
29/// Glossary entries take precedence over AI-generated terminology.
30#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
31pub struct GlossaryEntry {
32 /// Source-language term.
33 pub source: String,
34 /// Authoritative target-language translation.
35 pub target: String,
36}
37
38/// One cue in a translation request, identified by its UUIDv7 cue ID.
39#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
40pub struct TranslationCue {
41 /// Stable UUIDv7 cue ID assigned in subtitle order.
42 pub id: String,
43 /// Visible cue text passed to the AI provider.
44 pub text: String,
45}
46
47/// Input parameters for translating a single subtitle file.
48#[derive(Debug, Clone)]
49pub struct TranslationRequest {
50 /// Required target language identifier (e.g. `zh-TW`, `ja`).
51 pub target_language: String,
52 /// Optional source language hint.
53 pub source_language: Option<String>,
54 /// Optional UTF-8 glossary text to include in prompts as authoritative
55 /// guidance.
56 pub glossary_text: Option<String>,
57 /// Optional inline domain/tone guidance.
58 pub context: Option<String>,
59 /// Pre-parsed user glossary entries (already authoritative).
60 pub glossary_entries: Vec<GlossaryEntry>,
61}
62
63/// A batch of cues sent in a single AI translation request.
64#[derive(Debug, Clone)]
65pub struct TranslationBatch {
66 /// Cues in this batch, in subtitle order.
67 pub cues: Vec<TranslationCue>,
68}
69
70/// Outcome metadata for a successful translation pass.
71#[derive(Debug, Clone, Default)]
72pub struct TranslationOutcome {
73 /// Effective terminology map used for the translation prompt
74 /// (glossary entries already merged on top of generated terms).
75 pub effective_terminology: TerminologyMap,
76 /// Number of cues that were translated.
77 pub translated_cue_count: usize,
78 /// Number of AI translation batches issued.
79 pub batch_count: usize,
80}
81
82/// Result returned by the engine after translating a single subtitle file.
83#[derive(Debug, Clone)]
84pub struct TranslationResult {
85 /// Translated subtitle in the format-agnostic representation. Use
86 /// [`crate::core::formats::manager::FormatManager`] to serialize it back
87 /// to a format-specific text representation.
88 pub subtitle: crate::core::formats::Subtitle,
89 /// Outcome metadata (terminology map, batch counts, etc.).
90 pub outcome: TranslationOutcome,
91}
92
93/// Merge a generated terminology map with user glossary entries.
94///
95/// User glossary entries override generated entries with the same source
96/// term, matching the policy in the OpenSpec change.
97pub fn merge_terminology(generated: TerminologyMap, glossary: &[GlossaryEntry]) -> TerminologyMap {
98 let mut merged = generated;
99 for entry in glossary {
100 let source = entry.source.trim().to_string();
101 let target = entry.target.trim().to_string();
102 if source.is_empty() || target.is_empty() {
103 continue;
104 }
105 merged.insert(source, target);
106 }
107 merged
108}
109
110#[cfg(test)]
111mod tests {
112 use super::*;
113
114 #[test]
115 fn glossary_overrides_generated_terms() {
116 let mut generated = TerminologyMap::new();
117 generated.insert("Alice".to_string(), "愛麗絲".to_string());
118 generated.insert("Bob".to_string(), "鮑伯".to_string());
119
120 let glossary = vec![GlossaryEntry {
121 source: "Alice".to_string(),
122 target: "艾莉絲".to_string(),
123 }];
124 let merged = merge_terminology(generated, &glossary);
125 assert_eq!(merged.get("Alice").unwrap(), "艾莉絲");
126 assert_eq!(merged.get("Bob").unwrap(), "鮑伯");
127 }
128
129 #[test]
130 fn empty_glossary_entries_are_ignored() {
131 let mut generated = TerminologyMap::new();
132 generated.insert("Alice".to_string(), "愛麗絲".to_string());
133 let glossary = vec![GlossaryEntry {
134 source: " ".to_string(),
135 target: "x".to_string(),
136 }];
137 let merged = merge_terminology(generated, &glossary);
138 assert_eq!(merged.len(), 1);
139 }
140}