#![allow(unknown_lints)]
#[macro_use]
extern crate error_chain;
extern crate aligner;
extern crate clap;
extern crate pbr;
extern crate subparse;
const PKG_VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION");
const PKG_NAME: Option<&'static str> = option_env!("CARGO_PKG_NAME");
const PKG_DESCRIPTION: Option<&'static str> = option_env!("CARGO_PKG_DESCRIPTION");
use aligner::{ProgressHandler, TimeDelta as AlgTimeDelta, TimePoint as AlgTimePoint, TimeSpan as AlgTimeSpan, align};
use clap::{App, Arg};
use pbr::ProgressBar;
use std::fs::File;
use std::io::{Read, Write};
use std::cmp::{max, min};
use std::str::FromStr;
mod binary;
pub use binary::errors;
pub use binary::errors::*;
pub use binary::errors::ErrorKind::*;
use subparse::{SubtitleEntry, SubtitleFormat, get_subtitle_format_by_ending_err, parse_file_from_string};
use subparse::timetypes::*;
#[derive(Default)]
struct ProgressInfo {
progress_bar: Option<ProgressBar<std::io::Stdout>>,
}
impl ProgressHandler for ProgressInfo {
fn init(&mut self, steps: i64) {
self.progress_bar = Some(ProgressBar::new(steps as u64));
}
fn inc(&mut self) {
self.progress_bar.as_mut().unwrap().inc();
}
fn finish(&mut self) {
self.progress_bar.as_mut().unwrap().finish_println("\n");
}
}
fn read_file_to_string(path: &str) -> Result<String> {
let mut file = File::open(path).map_err(|e| Error::from(Io(e))).chain_err(|| FileOperation(path.to_string()))?;
let mut s = String::new();
file.read_to_string(&mut s).map_err(|e| Error::from(Io(e))).chain_err(|| FileOperation(path.to_string()))?;
Ok(s)
}
fn write_data_to_file(path: &str, d: Vec<u8>) -> Result<()> {
let mut file = File::create(path).map_err(|e| Error::from(Io(e))).chain_err(|| FileOperation(path.to_string()))?;
file.write_all(&d)
.map_err(|e| Error::from(Io(e)))
.chain_err(|| FileOperation(path.to_string()))?;
Ok(())
}
fn timing_to_alg_timepoint(t: TimePoint, interval: i64) -> AlgTimePoint {
assert!(interval > 0);
AlgTimePoint::from(t.msecs() / interval)
}
fn alg_delta_to_delta(t: AlgTimeDelta, interval: i64) -> TimeDelta {
assert!(interval > 0);
let time_int: i64 = t.into();
TimeDelta::from_msecs(time_int * interval)
}
fn timings_to_alg_timespans(v: &[TimeSpan], interval: i64) -> Vec<AlgTimeSpan> {
v.iter()
.cloned()
.map(|timespan| {
AlgTimeSpan::new_safe(timing_to_alg_timepoint(timespan.start, interval),
timing_to_alg_timepoint(timespan.end, interval))
})
.collect()
}
fn alg_deltas_to_timing_deltas(v: &[AlgTimeDelta], interval: i64) -> Vec<TimeDelta> {
v.iter().cloned().map(|x| alg_delta_to_delta(x, interval)).collect()
}
fn get_subtitle_delta_groups(mut v: Vec<(AlgTimeDelta, TimeSpan)>) -> Vec<(AlgTimeDelta, Vec<TimeSpan>)> {
v.sort_by_key(|t| min((t.1).start, (t.1).end));
let mut result: Vec<(AlgTimeDelta, Vec<TimeSpan>)> = Vec::new();
for (delta, original_timespan) in v {
let mut new_block = false;
if let Some(last_tuple_ref) = result.last_mut() {
if delta == last_tuple_ref.0 {
last_tuple_ref.1.push(original_timespan);
} else {
new_block = true;
}
} else {
new_block = true;
}
if new_block {
result.push((delta, vec![original_timespan]));
}
}
result
}
fn corrected_timings(v: Vec<TimeSpan>) -> Vec<TimeSpan> {
v.into_iter()
.map(|timespan| {
TimeSpan::new(min(timespan.start, timespan.end),
max(timespan.start, timespan.end))
})
.collect()
}
fn get_truncated_deltas(timespans: &[TimeSpan], deltas: Vec<TimeDelta>) -> Vec<TimeDelta> {
deltas.into_iter()
.zip(timespans.iter().cloned())
.map(|(delta, timespan)| if (delta + timespan.start).is_negative() { TimePoint::from_msecs(0) - timespan.start } else { delta })
.collect()
}
fn pwarning<'a, T: Into<std::borrow::Cow<'a, str>>>(s: T) {
println!("WW: {}", s.into());
}
fn pinfo<'a, T: Into<std::borrow::Cow<'a, str>>>(s: T) {
println!("II: {}", s.into());
}
fn perror<'a, T: Into<std::borrow::Cow<'a, str>>>(s: T) {
println!("EE: {}", s.into());
}
fn run() -> Result<()> {
let matches = App::new(PKG_NAME.unwrap_or("unkown (not compiled with cargo)"))
.version(PKG_VERSION.unwrap_or("unknown (not compiled with cargo)"))
.about(PKG_DESCRIPTION.unwrap_or("unknown (not compiled with cargo)"))
.arg(Arg::with_name("reference-sub-file")
.help("Path to the reference subtitle file")
.required(true))
.arg(Arg::with_name("incorrect-sub-file")
.help("Path to the incorrect subtitle file")
.required(true))
.arg(Arg::with_name("output-file-path")
.help("Path to corrected subtitle file")
.required(true))
.arg(Arg::with_name("split-penalty")
.short("p")
.long("split-penalty")
.value_name("floating point number from 0 to 100")
.help("Determines how eager the algorithm is to avoid splitting of the subtitles. 100 means that all lines will be shifted by the same offset, while 0 will produce MANY segments with different offsets. Values from 0.1 to 20 are the most useful.")
.default_value("4"))
.arg(Arg::with_name("interval")
.short("i")
.long("interval")
.value_name("integer in milliseconds")
.help("The smallest recognized time interval, smaller numbers make alignment more accurate, greater numbers make it faster.")
.default_value("1"))
.arg(Arg::with_name("allow-negative-timestamps")
.short("n")
.long("allow-negative-timestamps")
.help("Negative timestamps can lead to problems with the output file, so by default 0 will be written instead. This option allows you to disable this behavior."))
.after_help("This program works with .srt, .ass/.ssa and .idx files. The corrected file will have the same format as the incorrect file.")
.get_matches();
let incorrect_file_path = matches.value_of("incorrect-sub-file").unwrap();
let reference_file_path = matches.value_of("reference-sub-file").unwrap();
let output_file_path = matches.value_of("output-file-path").unwrap();
let interval_str: &str = matches.value_of("interval").unwrap();
let interval: i64 = FromStr::from_str(interval_str).chain_err(|| ArgumentParseError("interval", interval_str.to_string()))?;
if interval < 1 {
return Err(Error::from(ExpectedPositiveNumber(interval))).chain_err(|| Error::from(InvalidArgument("interval")));
}
let split_penalty_str: &str = matches.value_of("split-penalty").unwrap();
let split_penalty: f64 = FromStr::from_str(split_penalty_str).chain_err(|| ArgumentParseError("split-penalty", split_penalty_str.to_string()))?;
if split_penalty < 0.0 || split_penalty > 100.0 {
return Err(Error::from(ValueNotInRange(split_penalty, 0.0, 100.0))).chain_err(|| Error::from(InvalidArgument("split-penalty")));
}
let allow_negative_timestamps = matches.is_present("allow-negative-timestamps");
let reference_sub_string = read_file_to_string(reference_file_path)?;
let incorrect_sub_string = read_file_to_string(incorrect_file_path)?;
let incorrect_file_format = get_subtitle_format_by_ending_err(incorrect_file_path)
.chain_err(|| ErrorKind::FileOperation(incorrect_file_path.to_string()))?;
let reference_file_format = get_subtitle_format_by_ending_err(reference_file_path)
.chain_err(|| ErrorKind::FileOperation(reference_file_path.to_string()))?;
let output_file_format = get_subtitle_format_by_ending_err(output_file_path)
.chain_err(|| ErrorKind::FileOperation(output_file_path.to_string()))?;
if incorrect_file_format != output_file_format {
return Err(DifferentOutputFormat(incorrect_file_path.to_string(),
output_file_path.to_string())
.into());
}
let timed_reference_file = parse_file_from_string(reference_file_format, reference_sub_string.clone())
.chain_err(|| FileOperation(reference_file_path.to_string()))?;
let timed_incorrect_file = parse_file_from_string(incorrect_file_format, incorrect_sub_string.clone())
.chain_err(|| FileOperation(incorrect_file_path.to_string()))?;
let timings_reference = corrected_timings(timed_reference_file.get_subtitle_entries()?.into_iter().map(|subentry| subentry.timespan).collect());
let timings_incorrect = corrected_timings(timed_incorrect_file.get_subtitle_entries()?.into_iter().map(|subentry| subentry.timespan).collect());
let alg_reference_timespans = timings_to_alg_timespans(&timings_reference, interval);
let alg_incorrect_timespans = timings_to_alg_timespans(&timings_incorrect, interval);
let alg_deltas = align(alg_incorrect_timespans.clone(),
alg_reference_timespans,
split_penalty / 100.0,
Some(Box::new(ProgressInfo::default())));
let mut deltas = alg_deltas_to_timing_deltas(&alg_deltas, interval);
let shift_groups: Vec<(AlgTimeDelta, Vec<TimeSpan>)> = get_subtitle_delta_groups(alg_deltas.iter()
.cloned()
.zip(timings_incorrect.iter().cloned())
.collect());
for (shift_group_delta, shift_group_lines) in shift_groups {
let min_max_opt = shift_group_lines.iter().fold(None, |last_opt, subline| {
let new_min = subline.start;
let new_max = subline.end;
if let Some((last_min, last_max)) = last_opt { Some((min(last_min, new_min), max(last_max, new_max))) } else { Some((new_min, new_max)) }
});
let (min, max) = match min_max_opt {
Some(v) => v,
None => unreachable!(),
};
pinfo(format!("shifted block of {} subtitles with length {} by {}",
shift_group_lines.len(),
max - min,
alg_delta_to_delta(shift_group_delta, interval)));
}
if timings_reference.is_empty() {
println!("");
pwarning("reference file has no subtitle lines");
}
if timings_incorrect.is_empty() {
println!("");
pwarning("file with incorrect subtitles has no lines");
}
let writing_negative_timespans = deltas.iter().zip(timings_incorrect.iter()).any(|(&delta, ×pan)| (delta + timespan.start).is_negative());
if writing_negative_timespans {
println!("");
pwarning("some subtitles now have negative timings, which can cause invalid subtitle files");
if allow_negative_timestamps {
pwarning("negative timestamps will be written to file, because you passed '-n' or '--allow-negative-timestamps'");
} else {
pwarning("negative subtitles will therefore be set to zero by default; pass '-n' or '--allow-negative-timestamps' to disable this behavior");
deltas = get_truncated_deltas(&timings_incorrect, deltas);
}
}
if output_file_format == SubtitleFormat::VobSubIdx {
println!("");
pwarning("writing to an '.idx' file can lead to unexpected results due to restrictions of this format");
}
let shifted_timespans: Vec<SubtitleEntry> = timings_incorrect.iter()
.zip(deltas.iter())
.map(|(×pan, &delta)| SubtitleEntry::from(timespan + delta))
.collect();
let mut correct_file = timed_incorrect_file.clone();
correct_file.update_subtitle_entries(&shifted_timespans)?;
write_data_to_file(output_file_path, correct_file.to_data()?)?;
Ok(())
}
fn main() {
match run() {
Ok(_) => std::process::exit(0),
Err(e) => {
perror(format!("error: {}", e));
for e in e.iter().skip(1) {
perror(format!("caused by: {}", e));
}
if let Some(backtrace) = e.backtrace() {
perror(format!("backtrace: {:?}", backtrace));
} else {
perror("note: run program with `env RUST_BACKTRACE=1` for a backtrace");
}
std::process::exit(1);
}
}
}