Skip to main content

subx_cli/core/translation/
request.rs

1//! Translation request/response data structures.
2//!
3//! These types describe the inputs and outputs of the translation engine
4//! independently of any particular AI provider. They are intentionally small
5//! and serializable so they can be reused by CLI helpers, integration tests,
6//! and structured logging.
7
8use std::collections::BTreeMap;
9
10use serde::{Deserialize, Serialize};
11
12/// Effective terminology map (source term -> target term).
13///
14/// A [`BTreeMap`] is used so iteration order is deterministic, which keeps
15/// translation prompts stable across runs.
16pub type TerminologyMap = BTreeMap<String, String>;
17
18/// A single terminology entry returned by the AI extraction pass.
19#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
20pub struct TerminologyEntry {
21    /// Source-language term as it appears in subtitle text.
22    pub source: String,
23    /// Translated term to use consistently in the target language.
24    pub target: String,
25}
26
27/// User-provided glossary entry, parsed from a UTF-8 glossary file.
28///
29/// Glossary entries take precedence over AI-generated terminology.
30#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
31pub struct GlossaryEntry {
32    /// Source-language term.
33    pub source: String,
34    /// Authoritative target-language translation.
35    pub target: String,
36}
37
38/// One cue in a translation request, identified by its UUIDv7 cue ID.
39#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
40pub struct TranslationCue {
41    /// Stable UUIDv7 cue ID assigned in subtitle order.
42    pub id: String,
43    /// Visible cue text passed to the AI provider.
44    pub text: String,
45}
46
47/// Input parameters for translating a single subtitle file.
48#[derive(Debug, Clone)]
49pub struct TranslationRequest {
50    /// Required target language identifier (e.g. `zh-TW`, `ja`).
51    pub target_language: String,
52    /// Optional source language hint.
53    pub source_language: Option<String>,
54    /// Optional UTF-8 glossary text to include in prompts as authoritative
55    /// guidance.
56    pub glossary_text: Option<String>,
57    /// Optional inline domain/tone guidance.
58    pub context: Option<String>,
59    /// Pre-parsed user glossary entries (already authoritative).
60    pub glossary_entries: Vec<GlossaryEntry>,
61}
62
63/// A batch of cues sent in a single AI translation request.
64#[derive(Debug, Clone)]
65pub struct TranslationBatch {
66    /// Cues in this batch, in subtitle order.
67    pub cues: Vec<TranslationCue>,
68}
69
70/// Outcome metadata for a successful translation pass.
71#[derive(Debug, Clone, Default)]
72pub struct TranslationOutcome {
73    /// Effective terminology map used for the translation prompt
74    /// (glossary entries already merged on top of generated terms).
75    pub effective_terminology: TerminologyMap,
76    /// Number of cues that were translated.
77    pub translated_cue_count: usize,
78    /// Number of AI translation batches issued.
79    pub batch_count: usize,
80}
81
82/// Result returned by the engine after translating a single subtitle file.
83#[derive(Debug, Clone)]
84pub struct TranslationResult {
85    /// Translated subtitle in the format-agnostic representation. Use
86    /// [`crate::core::formats::manager::FormatManager`] to serialize it back
87    /// to a format-specific text representation.
88    pub subtitle: crate::core::formats::Subtitle,
89    /// Outcome metadata (terminology map, batch counts, etc.).
90    pub outcome: TranslationOutcome,
91}
92
93/// Merge a generated terminology map with user glossary entries.
94///
95/// User glossary entries override generated entries with the same source
96/// term, matching the policy in the OpenSpec change.
97pub fn merge_terminology(generated: TerminologyMap, glossary: &[GlossaryEntry]) -> TerminologyMap {
98    let mut merged = generated;
99    for entry in glossary {
100        let source = entry.source.trim().to_string();
101        let target = entry.target.trim().to_string();
102        if source.is_empty() || target.is_empty() {
103            continue;
104        }
105        merged.insert(source, target);
106    }
107    merged
108}
109
110#[cfg(test)]
111mod tests {
112    use super::*;
113
114    #[test]
115    fn glossary_overrides_generated_terms() {
116        let mut generated = TerminologyMap::new();
117        generated.insert("Alice".to_string(), "愛麗絲".to_string());
118        generated.insert("Bob".to_string(), "鮑伯".to_string());
119
120        let glossary = vec![GlossaryEntry {
121            source: "Alice".to_string(),
122            target: "艾莉絲".to_string(),
123        }];
124        let merged = merge_terminology(generated, &glossary);
125        assert_eq!(merged.get("Alice").unwrap(), "艾莉絲");
126        assert_eq!(merged.get("Bob").unwrap(), "鮑伯");
127    }
128
129    #[test]
130    fn empty_glossary_entries_are_ignored() {
131        let mut generated = TerminologyMap::new();
132        generated.insert("Alice".to_string(), "愛麗絲".to_string());
133        let glossary = vec![GlossaryEntry {
134            source: "  ".to_string(),
135            target: "x".to_string(),
136        }];
137        let merged = merge_terminology(generated, &glossary);
138        assert_eq!(merged.len(), 1);
139    }
140}