shinkai-translator 0.1.3

CLI tool for translating video subtitles with LLMs through OpenAI-compatible APIs, with native PGS OCR
pub mod ass;
pub mod ass_classifier;
pub mod srt;
pub mod vtt;

use std::path::Path;

use crate::domain::{
    AssClassificationPolicy, SubtitleClassificationReport, SubtitleDocument, SubtitleFormat,
};
use crate::error::TranslatorError;

pub fn parse_subtitle(
    source: &str,
    format_hint: Option<SubtitleFormat>,
    source_name: Option<&Path>,
) -> Result<SubtitleDocument, TranslatorError> {
    let format = format_hint
        .or_else(|| source_name.and_then(SubtitleFormat::detect_from_path))
        .or_else(|| SubtitleFormat::detect_from_content(source))
        .ok_or_else(|| {
            TranslatorError::UnsupportedFormat(
                source_name
                    .and_then(|path| path.extension().and_then(|ext| ext.to_str()))
                    .unwrap_or("unknown")
                    .to_owned(),
            )
        })?;

    match format {
        SubtitleFormat::Srt => srt::parse(source),
        SubtitleFormat::Vtt => vtt::parse(source),
        SubtitleFormat::Ass => ass::parse(source),
    }
}

pub fn render_subtitle(document: &SubtitleDocument) -> Result<String, TranslatorError> {
    match document.format() {
        SubtitleFormat::Srt => srt::render(document),
        SubtitleFormat::Vtt => vtt::render(document),
        SubtitleFormat::Ass => ass::render(document),
    }
}

pub fn classify_document(
    document: &SubtitleDocument,
    policy: &AssClassificationPolicy,
) -> (SubtitleDocument, Option<SubtitleClassificationReport>) {
    match document.format() {
        SubtitleFormat::Ass => {
            let (classified, report) = ass_classifier::classify_document(document, policy);
            (classified, Some(report))
        }
        _ => (document.clone(), None),
    }
}

pub(crate) fn normalize_newlines(source: &str) -> String {
    source
        .trim_start_matches('\u{feff}')
        .replace("\r\n", "\n")
        .replace('\r', "\n")
}

pub(crate) fn split_blocks(source: &str) -> Vec<String> {
    let mut blocks = Vec::new();
    let mut current = Vec::new();

    for line in source.lines() {
        if line.trim().is_empty() {
            if !current.is_empty() {
                blocks.push(current.join("\n"));
                current.clear();
            }
            continue;
        }

        current.push(line.to_owned());
    }

    if !current.is_empty() {
        blocks.push(current.join("\n"));
    }

    blocks
}

pub(crate) fn parse_arrow_timing_line(
    line: &str,
) -> Result<(String, String, Option<String>), TranslatorError> {
    let (start, remainder) = line.split_once("-->").ok_or_else(|| {
        TranslatorError::Parse(format!("expected timing line with '-->': {line}"))
    })?;

    let remainder = remainder.trim();
    let (end, settings) = match remainder.split_once(char::is_whitespace) {
        Some((end, settings)) => {
            let settings = settings.trim();
            let settings = if settings.is_empty() {
                None
            } else {
                Some(settings.to_owned())
            };
            (end.trim().to_owned(), settings)
        }
        None => (remainder.to_owned(), None),
    };

    Ok((start.trim().to_owned(), end, settings))
}

pub(crate) fn push_terminal_newline(mut rendered: String) -> String {
    if !rendered.ends_with('\n') {
        rendered.push('\n');
    }
    rendered
}