use std::fs;
use std::path::{Path, PathBuf};
use chrono::{Datelike, Utc};
use clap::{Parser, Subcommand};
use crate::converters::*;
use crate::processors::*;
use crate::subripfile::SubRipFile;
#[derive(Parser)]
#[command(name = "captionrs")]
#[command(about = "CaptionRS - Advanced Subtitle Converter and Processor")]
#[command(version = env!("CARGO_PKG_VERSION"))]
pub struct Cli {
#[arg(short = 'd', long = "debug", help = "Enable debug level logs")]
pub debug: bool,
#[command(subcommand)]
pub command: Commands,
}
#[derive(Subcommand)]
pub enum Commands {
Version,
Convert {
file: PathBuf,
#[arg(short = 'o', long = "out")]
output: Option<PathBuf>,
#[arg(short = 'l', long = "language")]
language: Option<String>,
#[arg(short = 'e', long = "encoding", default_value = "utf-8")]
encoding: String,
#[arg(short = 'n', long = "no-post-processing")]
no_post_processing: bool,
#[arg(short = 'g', long = "keep-short-gaps")]
keep_short_gaps: bool,
},
Process {
file: PathBuf,
#[arg(short = 'o', long = "out")]
output: Option<PathBuf>,
#[arg(short = 'l', long = "language")]
language: Option<String>,
#[arg(short = 'e', long = "encoding", default_value = "utf-8")]
encoding: String,
#[arg(short = 'n', long = "no-post-processing")]
no_post_processing: bool,
#[arg(short = 'g', long = "keep-short-gaps")]
keep_short_gaps: bool,
#[command(subcommand)]
processor: ProcessorCommands,
},
}
#[derive(Subcommand)]
pub enum ProcessorCommands {
Mend,
StripSdh,
}
fn print_version() {
let copyright_years = 2025;
let current_year = Utc::now().year();
let copyright_display = if copyright_years != current_year {
format!("{}-{}", copyright_years, current_year)
} else {
copyright_years.to_string()
};
println!(
"CaptionRS version {} Copyright (c) {} {}",
env!("CARGO_PKG_VERSION"),
copyright_display,
env!("CARGO_PKG_AUTHORS")
);
println!("{}", env!("CARGO_PKG_REPOSITORY"));
}
fn append_stem_suffix(path: &Path, suffix: &str) -> PathBuf {
let stem = path
.file_stem()
.and_then(|stem| stem.to_str())
.unwrap_or_default();
let file_name = match path.extension().and_then(|extension| extension.to_str()) {
Some(extension) if !extension.is_empty() => format!("{stem}{suffix}.{extension}"),
_ => format!("{stem}{suffix}"),
};
path.with_file_name(file_name)
}
fn default_process_output_path(file: &Path, processor: &ProcessorCommands) -> PathBuf {
match processor {
ProcessorCommands::Mend => append_stem_suffix(file, "_mend"),
ProcessorCommands::StripSdh => append_stem_suffix(file, "_sdh_stripped"),
}
}
pub fn run() -> Result<(), Box<dyn std::error::Error>> {
let cli = Cli::parse();
if cli.debug {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("debug")).init();
} else {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
}
run_sync(cli)
}
fn run_sync(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
match &cli.command {
Commands::Version => {
print_version();
}
Commands::Convert {
file,
output,
language,
encoding,
no_post_processing,
keep_short_gaps,
} => {
let data = fs::read(file)?;
let converter = match detect_format(&data) {
Ok(converter) => converter,
Err(_) => {
println!("Subtitle format was unrecognized...");
return Ok(());
}
};
println!("Subtitle format: {}", converter.display_name());
let mut srt = converter.from_bytes(&data)?;
println!("Converted subtitle to SubRip (SRT)");
if !no_post_processing {
let mut fixer = CommonIssuesFixer::new();
fixer.remove_gaps = !keep_short_gaps;
let (processed_srt, changed) = fixer.from_srt(srt, language.as_deref())?;
srt = processed_srt;
println!("{}", repair_status_message(changed));
}
let output_path = output.as_ref().unwrap_or(file).with_extension("srt");
srt.save(&output_path, Some(encoding.as_str()), None)?;
println!("Saved to: {}", output_path.display());
}
Commands::Process {
file,
output,
language,
encoding,
no_post_processing,
keep_short_gaps,
processor,
} => {
let (processed_srt, changed) = match processor {
ProcessorCommands::Mend => {
let mut fixer = CommonIssuesFixer::new();
fixer.remove_gaps = !keep_short_gaps;
let (processed_srt, changed) = fixer.from_file(file, language.as_deref())?;
println!("{}", repair_status_message(changed));
(processed_srt, changed)
}
ProcessorCommands::StripSdh => {
let stripper = SDHStripper::new();
let (mut processed_srt, changed) =
stripper.from_file(file, language.as_deref())?;
println!("{}", sdh_status_message(changed));
if !no_post_processing {
let mut fixer = CommonIssuesFixer::new();
fixer.remove_gaps = !keep_short_gaps;
let (fixed_srt, _) = fixer.from_srt(processed_srt, language.as_deref())?;
processed_srt = fixed_srt;
println!("{}", stripped_repair_status_message(changed));
}
(processed_srt, changed)
}
};
let output_path = output
.clone()
.unwrap_or_else(|| default_process_output_path(file, processor));
if changed {
processed_srt.save(&output_path, Some(encoding.as_str()), None)?;
println!("Saved to: {}", output_path.display());
}
}
}
Ok(())
}
#[allow(clippy::upper_case_acronyms)]
enum ConverterType {
ISMT(ISMTConverter),
WVTT(WVTTConverter),
SAMI(SAMIConverter),
SMPTE(SMPTEConverter),
WebVTT(WebVTTConverter),
BilibiliJSON(BilibiliJSONConverter),
}
impl ConverterType {
fn display_name(&self) -> &'static str {
match self {
ConverterType::ISMT(_) => "ISMT (DFXP in MP4)",
ConverterType::WVTT(_) => "WVTT (WebVTT in MP4)",
ConverterType::SAMI(_) => "SAMI",
ConverterType::SMPTE(_) => "DFXP/TTML/TTML2",
ConverterType::WebVTT(_) => "WebVTT",
ConverterType::BilibiliJSON(_) => "JSON (Bilibili)",
}
}
}
impl BaseConverter for ConverterType {
fn parse<R: std::io::Read>(
&self,
stream: R,
) -> Result<SubRipFile, crate::subripfile::SubtitleError> {
match self {
ConverterType::ISMT(c) => c.parse(stream),
ConverterType::WVTT(c) => c.parse(stream),
ConverterType::SAMI(c) => c.parse(stream),
ConverterType::SMPTE(c) => c.parse(stream),
ConverterType::WebVTT(c) => c.parse(stream),
ConverterType::BilibiliJSON(c) => c.parse(stream),
}
}
}
fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
haystack
.windows(needle.len())
.any(|window| window == needle)
}
fn repair_status_message(changed: bool) -> &'static str {
if changed {
"Processed subtitle and repaired some issues!"
} else {
"Processed subtitle but no issues were found..."
}
}
fn stripped_repair_status_message(changed: bool) -> &'static str {
if changed {
"Processed stripped subtitle and repaired some issues!"
} else {
"Processed stripped subtitle but no issues were found..."
}
}
fn sdh_status_message(changed: bool) -> &'static str {
if changed {
"Processed subtitle and removed SDH!"
} else {
"Processed subtitle but no SDH descriptions were found..."
}
}
fn detect_format(data: &[u8]) -> Result<ConverterType, Box<dyn std::error::Error>> {
if contains_bytes(data, b"mdat") && contains_bytes(data, b"moof") {
if contains_bytes(data, b"</tt>") {
Ok(ConverterType::ISMT(ISMTConverter::new()))
} else if contains_bytes(data, b"vttc") {
Ok(ConverterType::WVTT(WVTTConverter::new()))
} else {
Err("Unknown MP4 subtitle format".into())
}
} else if contains_bytes(data, b"<SAMI>") {
Ok(ConverterType::SAMI(SAMIConverter::new()))
} else if contains_bytes(data, b"</tt>") || contains_bytes(data, b"</tt:tt>") {
Ok(ConverterType::SMPTE(SMPTEConverter::new()))
} else if contains_bytes(data, b"WEBVTT") {
Ok(ConverterType::WebVTT(WebVTTConverter::new()))
} else if data.starts_with(b"{")
&& contains_bytes(data, b"\"Stroke\"")
&& contains_bytes(data, b"\"background_color\"")
{
Ok(ConverterType::BilibiliJSON(BilibiliJSONConverter::new()))
} else {
Err("Unknown subtitle format".into())
}
}