use std::collections::BTreeMap;
use crate::domain::ThinkingMode;
use crate::error::TranslatorError;
use crate::providers::{TranslatedItem, TranslationBatch};
pub fn build_messages(batch: &TranslationBatch, thinking_mode: ThinkingMode) -> (String, String) {
let mut system_prompt = String::from(
"You translate subtitle cues. Reply with exactly one line per input in the format `1: translated text`. Do not return JSON, code fences, explanations, or extra lines. Keep the same order and count as the input. Preserve subtitle markup such as ASS override tags, VTT tags, HTML tags, and backslash escapes. Represent real line breaks inside one cue as the two characters \\n.",
);
match thinking_mode {
ThinkingMode::Off => {
system_prompt.push_str(
" Do not expose reasoning, chain-of-thought, or analysis. Return only the final numbered translations.",
);
}
ThinkingMode::On => {
system_prompt.push_str(
" Thinking mode is enabled when the provider supports it, but the visible output must still contain only the final numbered translations.",
);
}
ThinkingMode::Auto => {}
}
if let Some(extra) = batch.system_prompt.as_deref() {
system_prompt.push_str("\n\nAdditional instructions:\n");
system_prompt.push_str(extra);
}
let source_language = batch.source_language.as_deref().unwrap_or("auto-detect");
let mut user_prompt = format!(
"Translate the following subtitle cues from {source_language} to {}.\n\nRules:\n- Output exactly one numbered line per input in the form `<number>: <translation>`\n- Keep numbering unchanged\n- Preserve subtitle tags and backslash escapes\n- Use the literal sequence \\n for line breaks inside one cue\n- Do not merge, split, or omit cues\n\nInput:\n",
batch.target_language
);
for (index, item) in batch.items.iter().enumerate() {
user_prompt.push_str(&format!(
"{}: {}\n",
index + 1,
encode_prompt_text(&item.text)
));
}
(system_prompt, user_prompt)
}
pub fn parse_numbered_response(
batch: &TranslationBatch,
content: &str,
) -> Result<Vec<TranslatedItem>, TranslatorError> {
let mut parsed = BTreeMap::new();
for line in content.lines() {
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with("```") {
continue;
}
let Some((index, value)) = parse_numbered_line(trimmed) else {
continue;
};
if index == 0 || index > batch.items.len() {
return Err(TranslatorError::ProviderProtocol(format!(
"provider returned an out-of-range cue number: {index}"
)));
}
if parsed.insert(index, decode_model_text(value)).is_some() {
return Err(TranslatorError::ProviderProtocol(format!(
"provider returned duplicate translations for cue number {index}"
)));
}
}
if parsed.len() != batch.items.len() {
return Err(TranslatorError::ProviderProtocol(format!(
"provider returned {} numbered translations for {} requested cues",
parsed.len(),
batch.items.len()
)));
}
let mut translated = Vec::with_capacity(batch.items.len());
for (index, item) in batch.items.iter().enumerate() {
let number = index + 1;
let text = parsed.remove(&number).ok_or_else(|| {
TranslatorError::ProviderProtocol(format!(
"provider did not return a translation for cue number {number}"
))
})?;
translated.push(TranslatedItem {
id: item.id.clone(),
text,
});
}
Ok(translated)
}
fn parse_numbered_line(line: &str) -> Option<(usize, &str)> {
let mut digits_end = 0usize;
for character in line.chars() {
if character.is_ascii_digit() {
digits_end += character.len_utf8();
} else {
break;
}
}
if digits_end == 0 {
return None;
}
let number = line[..digits_end].parse::<usize>().ok()?;
let remainder = &line[digits_end..];
let remainder = remainder.strip_prefix(':')?;
Some((number, remainder.trim_start()))
}
fn encode_prompt_text(text: &str) -> String {
let mut encoded = String::with_capacity(text.len());
for character in text.chars() {
match character {
'\\' => encoded.push_str("\\\\"),
'\n' => encoded.push_str("\\n"),
_ => encoded.push(character),
}
}
encoded
}
fn decode_model_text(text: &str) -> String {
let mut decoded = String::with_capacity(text.len());
let mut characters = text.chars();
while let Some(character) = characters.next() {
if character != '\\' {
decoded.push(character);
continue;
}
match characters.next() {
Some('n') => decoded.push('\n'),
Some('\\') => decoded.push('\\'),
Some(other) => {
decoded.push('\\');
decoded.push(other);
}
None => decoded.push('\\'),
}
}
decoded
}
#[cfg(test)]
mod tests {
use crate::providers::{TranslationBatch, TranslationBatchItem};
use super::{build_messages, parse_numbered_response};
use crate::domain::ThinkingMode;
fn sample_batch() -> TranslationBatch {
TranslationBatch {
source_language: Some("English".to_owned()),
target_language: "Portuguese".to_owned(),
system_prompt: None,
items: vec![
TranslationBatchItem {
id: "cue-1".to_owned(),
text: "Hello there".to_owned(),
},
TranslationBatchItem {
id: "cue-2".to_owned(),
text: "First line\nSecond line with \\N tag".to_owned(),
},
],
}
}
#[test]
fn builds_compact_numbered_prompt_without_json_payload() {
let (system_prompt, user_prompt) = build_messages(&sample_batch(), ThinkingMode::Off);
assert!(system_prompt.contains("Reply with exactly one line per input"));
assert!(system_prompt.contains("Do not expose reasoning"));
assert!(user_prompt.contains("1: Hello there"));
assert!(user_prompt.contains("2: First line\\nSecond line with \\\\N tag"));
assert!(!user_prompt.contains("\"task\""));
assert!(!user_prompt.contains("{\"translations\""));
}
#[test]
fn parses_numbered_response_and_restores_escaped_newlines() {
let translations = parse_numbered_response(
&sample_batch(),
"1: Olá\n2: Primeira linha\\nSegunda linha com \\\\N tag",
)
.expect("numbered response should parse");
assert_eq!(translations[0].id, "cue-1");
assert_eq!(translations[0].text, "Olá");
assert_eq!(translations[1].id, "cue-2");
assert_eq!(
translations[1].text,
"Primeira linha\nSegunda linha com \\N tag"
);
}
#[test]
fn ignores_fences_and_requires_all_numbers() {
let error = parse_numbered_response(&sample_batch(), "```\n1: Olá\n```")
.expect_err("missing numbered line should fail");
assert!(
error
.to_string()
.contains("provider returned 1 numbered translations for 2 requested cues")
);
}
}