use super::{StreamResult, chat_stream};
use crate::config::LlmProviderConfig;
use crate::monitor::LogStatus;
const SYSTEM_PROMPT: &str = r#"Translate the article to the target language. Then write a one-sentence summary — just what the article is about, no filler.
Output format (each tag on its own line):
|||TITLE|||
<translated title>
|||END_TITLE|||
|||CONTENT|||
<translated content>
|||END_CONTENT|||
|||SUMMARY|||
<summary, one sentence, under 30 words>
|||END_SUMMARY|||
Keep HTML tags and code blocks intact."#;
#[derive(Debug, Default)]
pub struct ParsedArticle {
pub title: Option<String>,
pub content: Option<String>,
pub summary: Option<String>,
}
enum Section {
Title,
Content,
Summary,
}
pub async fn translate_and_summarize(
config: &LlmProviderConfig,
title: &str,
content: &str,
target_lang: &str,
retry_ctx: &mut super::retry::RetryContext,
) -> Result<(StreamResult, ParsedArticle), crate::error::AppError> {
let prompt = format!(
"Target language: {}\n\nTitle: {}\n\nContent:\n{}",
target_lang, title, content
);
let append = config.prompt_append.clone().unwrap_or_default();
let full = if append.is_empty() {
prompt
} else {
format!("{}\n{}", prompt, append)
};
loop {
retry_ctx.prepare_retry().await;
let log_id = retry_ctx.current_log_id().unwrap().to_string();
let monitor = retry_ctx.monitor.clone();
let feed_name = retry_ctx.feed_name.clone();
let ot = move |t: &str| {
let m = monitor.clone();
let f = feed_name.clone();
let l = log_id.clone();
let s = t.to_string();
tokio::task::spawn(async move {
m.write().await.update_log(&f, &l, |log| {
log.streamed_text.push_str(&s);
log.status = LogStatus::Streaming {
tokens: log.streamed_text.clone(),
};
});
});
};
match chat_stream(config, SYSTEM_PROMPT, &full, ot).await {
Ok(result) => match parse_llm_output(&result.text) {
Ok(parsed) => {
retry_ctx.mark_success(&result.usage).await;
return Ok((result, parsed));
}
Err(e) => {
retry_ctx.record_failure(e).await;
if !retry_ctx.should_retry() {
return Err(retry_ctx.take_last_error().unwrap());
}
retry_ctx.wait().await;
}
},
Err(e) => {
retry_ctx.record_failure(e).await;
if !retry_ctx.should_retry() {
return Err(retry_ctx.take_last_error().unwrap());
}
retry_ctx.wait().await;
}
}
}
}
fn parse_llm_output(text: &str) -> Result<ParsedArticle, crate::error::AppError> {
let mut result = ParsedArticle::default();
let mut current_section: Option<Section> = None;
let mut buffer = Vec::new();
for line in text.lines() {
match line.trim() {
"|||TITLE|||" => {
flush_buffer(&mut buffer, &mut current_section, &mut result);
current_section = Some(Section::Title);
}
"|||END_TITLE|||" => {
flush_buffer(&mut buffer, &mut current_section, &mut result);
current_section = None;
}
"|||CONTENT|||" => {
flush_buffer(&mut buffer, &mut current_section, &mut result);
current_section = Some(Section::Content);
}
"|||END_CONTENT|||" => {
flush_buffer(&mut buffer, &mut current_section, &mut result);
current_section = None;
}
"|||SUMMARY|||" => {
flush_buffer(&mut buffer, &mut current_section, &mut result);
current_section = Some(Section::Summary);
}
"|||END_SUMMARY|||" => {
flush_buffer(&mut buffer, &mut current_section, &mut result);
current_section = None;
}
_ => {
if current_section.is_some() {
buffer.push(line.to_string());
}
}
}
}
flush_buffer(&mut buffer, &mut current_section, &mut result);
if result.title.is_none() && result.content.is_none() {
return Err(crate::error::AppError::Llm(
"Failed to parse LLM output: no title or content found".into(),
));
}
Ok(result)
}
fn flush_buffer(
buffer: &mut Vec<String>,
section: &mut Option<Section>,
result: &mut ParsedArticle,
) {
if buffer.is_empty() {
return;
}
let text = buffer.join("\n").trim().to_string();
match section {
Some(Section::Title) => result.title = Some(text),
Some(Section::Content) => result.content = Some(text),
Some(Section::Summary) => result.summary = Some(text),
None => {}
}
buffer.clear();
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_all_sections() {
let input = "\
|||TITLE|||
Translated Title
|||END_TITLE|||
|||CONTENT|||
First paragraph.
Second paragraph.
|||END_CONTENT|||
|||SUMMARY|||
A short summary.
|||END_SUMMARY|||";
let result = parse_llm_output(input).unwrap();
assert_eq!(result.title.as_deref(), Some("Translated Title"));
assert_eq!(
result.content.as_deref(),
Some("First paragraph.\n\nSecond paragraph.")
);
assert_eq!(result.summary.as_deref(), Some("A short summary."));
}
#[test]
fn parse_title_and_content_only() {
let input = "\
|||TITLE|||
Just a Title
|||END_TITLE|||
|||CONTENT|||
Just content.
|||END_CONTENT|||";
let result = parse_llm_output(input).unwrap();
assert_eq!(result.title.as_deref(), Some("Just a Title"));
assert_eq!(result.content.as_deref(), Some("Just content."));
assert!(result.summary.is_none());
}
#[test]
fn missing_title_and_content_is_error() {
let input = "\
|||SUMMARY|||
A summary only.
|||END_SUMMARY|||";
assert!(parse_llm_output(input).is_err());
}
#[test]
fn empty_input_is_error() {
assert!(parse_llm_output("").is_err());
}
#[test]
fn multiline_content() {
let input = "\
|||TITLE|||
T
|||END_TITLE|||
|||CONTENT|||
Line 1
Line 2
Line 3
|||END_CONTENT|||";
let result = parse_llm_output(input).unwrap();
assert_eq!(result.content.as_deref(), Some("Line 1\nLine 2\nLine 3"));
}
#[test]
fn whitespace_is_trimmed() {
let input = "\
|||TITLE|||
Padded Title
|||END_TITLE|||
|||CONTENT|||
Padded content.
|||END_CONTENT|||";
let result = parse_llm_output(input).unwrap();
assert_eq!(result.title.as_deref(), Some("Padded Title"));
assert_eq!(result.content.as_deref(), Some("Padded content."));
}
#[test]
fn leading_junk_before_tags_is_ignored() {
let input = "\
Here is the translation:
|||TITLE|||
Real Title
|||END_TITLE|||
|||CONTENT|||
Real content.
|||END_CONTENT|||";
let result = parse_llm_output(input).unwrap();
assert_eq!(result.title.as_deref(), Some("Real Title"));
assert_eq!(result.content.as_deref(), Some("Real content."));
}
#[test]
fn html_tags_preserved_in_content() {
let input = "\
|||TITLE|||
T
|||END_TITLE|||
|||CONTENT|||
<p>Hello <b>world</b></p>
|||END_CONTENT|||";
let result = parse_llm_output(input).unwrap();
assert_eq!(result.content.as_deref(), Some("<p>Hello <b>world</b></p>"));
}
#[test]
fn out_of_order_sections() {
let input = "\
|||SUMMARY|||
Sum.
|||END_SUMMARY|||
|||CONTENT|||
Body.
|||END_CONTENT|||
|||TITLE|||
Title.
|||END_TITLE|||";
let result = parse_llm_output(input).unwrap();
assert_eq!(result.title.as_deref(), Some("Title."));
assert_eq!(result.content.as_deref(), Some("Body."));
assert_eq!(result.summary.as_deref(), Some("Sum."));
}
#[test]
fn content_only_is_valid() {
let input = "\
|||CONTENT|||
Just content, no title tag.
|||END_CONTENT|||";
let result = parse_llm_output(input).unwrap();
assert!(result.title.is_none());
assert_eq!(
result.content.as_deref(),
Some("Just content, no title tag.")
);
}
}