aligner 0.1.4

Automatically corrects subtitle timings given a second correct subtitle
// This file is part of the Rust library and binary `aligner`.
// Copyright (C) 2017 kaegi
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <>.

#![allow(unknown_lints)] // for clippy

extern crate error_chain;
extern crate aligner;
extern crate clap;
extern crate pbr;
extern crate subparse;

const PKG_VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION");
const PKG_NAME: Option<&'static str> = option_env!("CARGO_PKG_NAME");
const PKG_DESCRIPTION: Option<&'static str> = option_env!("CARGO_PKG_DESCRIPTION");

// Alg* stands for algorithm (the internal aligner algorithm types)
use aligner::{ProgressHandler, TimeDelta as AlgTimeDelta, TimePoint as AlgTimePoint, TimeSpan as AlgTimeSpan, align};
use clap::{App, Arg};
use pbr::ProgressBar;
use std::fs::File;
use std::io::{Read, Write};
use std::cmp::{max, min};
use std::str::FromStr;

mod binary;

pub use binary::errors;
pub use binary::errors::*;
pub use binary::errors::ErrorKind::*;

// subparse
use subparse::{SubtitleEntry, SubtitleFormat, get_subtitle_format_by_ending_err, parse_file_from_string};
use subparse::timetypes::*;

struct ProgressInfo {
    progress_bar: Option<ProgressBar<std::io::Stdout>>,

impl ProgressHandler for ProgressInfo {
    fn init(&mut self, steps: i64) {
        self.progress_bar = Some(ProgressBar::new(steps as u64));
    fn inc(&mut self) {
    fn finish(&mut self) {

fn read_file_to_string(path: &str) -> Result<String> {
    let mut file = File::open(path).map_err(|e| Error::from(Io(e))).chain_err(|| FileOperation(path.to_string()))?;
    let mut s = String::new();
    file.read_to_string(&mut s).map_err(|e| Error::from(Io(e))).chain_err(|| FileOperation(path.to_string()))?;

fn write_data_to_file(path: &str, d: Vec<u8>) -> Result<()> {
    let mut file = File::create(path).map_err(|e| Error::from(Io(e))).chain_err(|| FileOperation(path.to_string()))?;
        .map_err(|e| Error::from(Io(e)))
        .chain_err(|| FileOperation(path.to_string()))?;

fn timing_to_alg_timepoint(t: TimePoint, interval: i64) -> AlgTimePoint {
    assert!(interval > 0);
    AlgTimePoint::from(t.msecs() / interval)

fn alg_delta_to_delta(t: AlgTimeDelta, interval: i64) -> TimeDelta {
    assert!(interval > 0);
    let time_int: i64 = t.into();
    TimeDelta::from_msecs(time_int * interval)

fn timings_to_alg_timespans(v: &[TimeSpan], interval: i64) -> Vec<AlgTimeSpan> {
     .map(|timespan| {
         AlgTimeSpan::new_safe(timing_to_alg_timepoint(timespan.start, interval),
                               timing_to_alg_timepoint(timespan.end, interval))
fn alg_deltas_to_timing_deltas(v: &[AlgTimeDelta], interval: i64) -> Vec<TimeDelta> {
    v.iter().cloned().map(|x| alg_delta_to_delta(x, interval)).collect()

/// Groups consecutive timespans with the same delta together.
fn get_subtitle_delta_groups(mut v: Vec<(AlgTimeDelta, TimeSpan)>) -> Vec<(AlgTimeDelta, Vec<TimeSpan>)> {
    v.sort_by_key(|t| min((t.1).start, (t.1).end));

    let mut result: Vec<(AlgTimeDelta, Vec<TimeSpan>)> = Vec::new();

    for (delta, original_timespan) in v {
        let mut new_block = false;

        if let Some(last_tuple_ref) = result.last_mut() {
            if delta == last_tuple_ref.0 {
            } else {
                new_block = true;
        } else {
            new_block = true;

        if new_block {
            result.push((delta, vec![original_timespan]));


/// Will return an array where the start time of an subtitle is always less than the end time (will switch incorrect ones).
fn corrected_timings(v: Vec<TimeSpan>) -> Vec<TimeSpan> {
     .map(|timespan| {
         TimeSpan::new(min(timespan.start, timespan.end),
                       max(timespan.start, timespan.end))

/// Every delta that where `start + delta`, is negative will be adjusted so that `start + delta` is zero. This avoids
/// invalid files for formats that don't support negative timestamps.
fn get_truncated_deltas(timespans: &[TimeSpan], deltas: Vec<TimeDelta>) -> Vec<TimeDelta> {
          .map(|(delta, timespan)| if (delta + timespan.start).is_negative() { TimePoint::from_msecs(0) - timespan.start } else { delta })

/// Prints warning.
fn pwarning<'a, T: Into<std::borrow::Cow<'a, str>>>(s: T) {
    println!("WW: {}", s.into());

/// Prints info.
fn pinfo<'a, T: Into<std::borrow::Cow<'a, str>>>(s: T) {
    println!("II: {}", s.into());

/// Prints error.
fn perror<'a, T: Into<std::borrow::Cow<'a, str>>>(s: T) {
    println!("EE: {}", s.into());

fn run() -> Result<()> {
    let matches = App::new(PKG_NAME.unwrap_or("unkown (not compiled with cargo)"))
        .version(PKG_VERSION.unwrap_or("unknown (not compiled with cargo)"))
        .about(PKG_DESCRIPTION.unwrap_or("unknown (not compiled with cargo)"))
            .help("Path to the reference subtitle file")
            .help("Path to the incorrect subtitle file")
            .help("Path to corrected subtitle file")
            .value_name("floating point number from 0 to 100")
            .help("Determines how eager the algorithm is to avoid splitting of the subtitles. 100 means that all lines will be shifted by the same offset, while 0 will produce MANY segments with different offsets. Values from 0.1 to 20 are the most useful.")
            .value_name("integer in milliseconds")
            .help("The smallest recognized time interval, smaller numbers make alignment more accurate, greater numbers make it faster.")
            .help("Negative timestamps can lead to problems with the output file, so by default 0 will be written instead. This option allows you to disable this behavior."))
        .after_help("This program works with .srt, .ass/.ssa and .idx files. The corrected file will have the same format as the incorrect file.")

    let incorrect_file_path = matches.value_of("incorrect-sub-file").unwrap();
    let reference_file_path = matches.value_of("reference-sub-file").unwrap();
    let output_file_path = matches.value_of("output-file-path").unwrap();

    let interval_str: &str = matches.value_of("interval").unwrap();
    let interval: i64 = FromStr::from_str(interval_str).chain_err(|| ArgumentParseError("interval", interval_str.to_string()))?;
    if interval < 1 {
        return Err(Error::from(ExpectedPositiveNumber(interval))).chain_err(|| Error::from(InvalidArgument("interval")));

    let split_penalty_str: &str = matches.value_of("split-penalty").unwrap();
    let split_penalty: f64 = FromStr::from_str(split_penalty_str).chain_err(|| ArgumentParseError("split-penalty", split_penalty_str.to_string()))?;
    if split_penalty < 0.0 || split_penalty > 100.0 {
        return Err(Error::from(ValueNotInRange(split_penalty, 0.0, 100.0))).chain_err(|| Error::from(InvalidArgument("split-penalty")));

    let allow_negative_timestamps = matches.is_present("allow-negative-timestamps");

    let reference_sub_string = read_file_to_string(reference_file_path)?;
    let incorrect_sub_string = read_file_to_string(incorrect_file_path)?;

    let incorrect_file_format = get_subtitle_format_by_ending_err(incorrect_file_path)
        .chain_err(|| ErrorKind::FileOperation(incorrect_file_path.to_string()))?;
    let reference_file_format = get_subtitle_format_by_ending_err(reference_file_path)
        .chain_err(|| ErrorKind::FileOperation(reference_file_path.to_string()))?;
    let output_file_format = get_subtitle_format_by_ending_err(output_file_path)
        .chain_err(|| ErrorKind::FileOperation(output_file_path.to_string()))?;

    // this program internally stores the files in a non-destructable way (so
    // formatting is preserved) but has no abilty to convert between formats
    if incorrect_file_format != output_file_format {
        return Err(DifferentOutputFormat(incorrect_file_path.to_string(),

    let timed_reference_file = parse_file_from_string(reference_file_format, reference_sub_string.clone())
        .chain_err(|| FileOperation(reference_file_path.to_string()))?;
    let timed_incorrect_file = parse_file_from_string(incorrect_file_format, incorrect_sub_string.clone())
        .chain_err(|| FileOperation(incorrect_file_path.to_string()))?;

    let timings_reference = corrected_timings(timed_reference_file.get_subtitle_entries()?.into_iter().map(|subentry| subentry.timespan).collect());
    let timings_incorrect = corrected_timings(timed_incorrect_file.get_subtitle_entries()?.into_iter().map(|subentry| subentry.timespan).collect());

    let alg_reference_timespans = timings_to_alg_timespans(&timings_reference, interval);
    let alg_incorrect_timespans = timings_to_alg_timespans(&timings_incorrect, interval);

    let alg_deltas = align(alg_incorrect_timespans.clone(),
                           split_penalty / 100.0,
    let mut deltas = alg_deltas_to_timing_deltas(&alg_deltas, interval);

    // list of original subtitles lines which have the same timings
    let shift_groups: Vec<(AlgTimeDelta, Vec<TimeSpan>)> = get_subtitle_delta_groups(alg_deltas.iter()

    for (shift_group_delta, shift_group_lines) in shift_groups {
        // computes the first and last timestamp for all lines with that delta
        // -> that way we can provide the user with an information like
        //     "100 subtitles with 10min length"
        let min_max_opt = shift_group_lines.iter().fold(None, |last_opt, subline| {
            let new_min = subline.start;
            let new_max = subline.end;
            if let Some((last_min, last_max)) = last_opt { Some((min(last_min, new_min), max(last_max, new_max))) } else { Some((new_min, new_max)) }

        let (min, max) = match min_max_opt {
            Some(v) => v,
            None => unreachable!(),

        pinfo(format!("shifted block of {} subtitles with length {} by {}",
                      max - min,
                      alg_delta_to_delta(shift_group_delta, interval)));

    if timings_reference.is_empty() {
        pwarning("reference file has no subtitle lines");
    if timings_incorrect.is_empty() {
        pwarning("file with incorrect subtitles has no lines");

    let writing_negative_timespans = deltas.iter().zip(timings_incorrect.iter()).any(|(&delta, &timespan)| (delta + timespan.start).is_negative());
    if writing_negative_timespans {
        pwarning("some subtitles now have negative timings, which can cause invalid subtitle files");
        if allow_negative_timestamps {
            pwarning("negative timestamps will be written to file, because you passed '-n' or '--allow-negative-timestamps'");
        } else {
            pwarning("negative subtitles will therefore be set to zero by default; pass '-n' or '--allow-negative-timestamps' to disable this behavior");
            deltas = get_truncated_deltas(&timings_incorrect, deltas);

    // .idx only has start timepoints (the subtitle is shown until the next subtitle starts) - so retiming with gaps might
    // produce errors
    if output_file_format == SubtitleFormat::VobSubIdx {
        pwarning("writing to an '.idx' file can lead to unexpected results due to restrictions of this format");

    // incorrect file -> correct file
    let shifted_timespans: Vec<SubtitleEntry> = timings_incorrect.iter()
                                                                 .map(|(&timespan, &delta)| SubtitleEntry::from(timespan + delta))

    // write corrected files
    let mut correct_file = timed_incorrect_file.clone();
    write_data_to_file(output_file_path, correct_file.to_data()?)?;


fn main() {
    match run() {
        Ok(_) => std::process::exit(0),
        Err(e) => {
            perror(format!("error: {}", e));

            for e in e.iter().skip(1) {
                perror(format!("caused by: {}", e));

            // The backtrace is not always generated. Try to this with `RUST_BACKTRACE=1`.
            if let Some(backtrace) = e.backtrace() {
                perror(format!("backtrace: {:?}", backtrace));
            } else {
                perror("note: run program with `env RUST_BACKTRACE=1` for a backtrace");