use self::errors::ErrorKind::*;
use self::errors::*;
use crate::{SubtitleEntry, SubtitleFileInterface};
use crate::errors::Result as SubtitleParserResult;
use crate::formats::common::*;
use combine::char::char;
use combine::combinator::{eof, many, parser as p, satisfy, sep_by};
use combine::primitives::Parser;
use itertools::Itertools;
use std::borrow::Cow;
use std::collections::HashSet;
use failure::ResultExt;
use crate::timetypes::{TimePoint, TimeSpan};
use std::collections::LinkedList;
#[allow(missing_docs)]
pub mod errors {
pub type Result<T> = std::result::Result<T, Error>;
define_error!(Error, ErrorKind);
#[derive(PartialEq, Debug, Fail)]
pub enum ErrorKind {
#[fail(display = "expected subtittle line, found `{}`", line)]
ExpectedSubtitleLine { line: String },
#[fail(display = "parse error at line `{}`", line_num)]
ErrorAtLine { line_num: usize },
}
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
enum MdvdFormatting {
Unknown(String),
}
impl From<String> for MdvdFormatting {
fn from(f: String) -> MdvdFormatting {
MdvdFormatting::Unknown(Self::lowercase_first_char(&f))
}
}
impl MdvdFormatting {
fn is_container_line_formatting(f: &str) -> bool {
f.chars().next().and_then(|c| Some(c.is_uppercase())).unwrap_or(false)
}
fn lowercase_first_char(s: &str) -> String {
let mut c = s.chars();
match c.next() {
None => String::new(),
Some(f) => f.to_lowercase().collect::<String>() + c.as_str(),
}
}
fn uppercase_first_char(s: &str) -> String {
let mut c = s.chars();
match c.next() {
None => String::new(),
Some(f) => f.to_uppercase().collect::<String>() + c.as_str(),
}
}
fn to_formatting_string_intern(&self) -> String {
match *self {
MdvdFormatting::Unknown(ref s) => s.clone(),
}
}
fn to_formatting_string(&self, multiline: bool) -> String {
let s = self.to_formatting_string_intern();
if multiline {
Self::uppercase_first_char(&s)
} else {
Self::lowercase_first_char(&s)
}
}
}
#[derive(Debug, Clone)]
pub struct MdvdFile {
fps: f64,
v: Vec<MdvdLine>,
}
#[derive(Debug, Clone)]
struct MdvdLine {
start_frame: i64,
end_frame: i64,
formatting: Vec<MdvdFormatting>,
text: String,
}
impl MdvdLine {
fn to_subtitle_entry(&self, fps: f64) -> SubtitleEntry {
SubtitleEntry {
timespan: TimeSpan::new(
TimePoint::from_msecs((self.start_frame as f64 * 1000.0 / fps) as i64),
TimePoint::from_msecs((self.end_frame as f64 * 1000.0 / fps) as i64),
),
line: Some(self.text.clone()),
}
}
}
impl MdvdFile {
pub fn parse(s: &str, fps: f64) -> SubtitleParserResult<MdvdFile> {
Ok(Self::parse_file(s, fps).with_context(|_| crate::ErrorKind::ParsingError)?)
}
}
impl MdvdFile {
fn parse_file(i: &str, fps: f64) -> Result<MdvdFile> {
let mut result: Vec<MdvdLine> = Vec::new();
let (_, s) = split_bom(i);
for (line_num, line) in s.lines().enumerate() {
let mut lines: Vec<MdvdLine> = Self::parse_line(line_num, line)?;
result.append(&mut lines);
}
Ok(MdvdFile { fps: fps, v: result })
}
fn parse_line(line_num: usize, line: &str) -> Result<Vec<MdvdLine>> {
let sub_info = (char('{'), many(satisfy(|c| c != '}')), char('}'))
.map(|(_, info, _): (_, String, _)| info)
.expected("MicroDVD info");
let single_line = (many(sub_info), many(satisfy(|c| c != '|')));
(
char('{'),
p(number_i64),
char('}'),
char('{'),
p(number_i64),
char('}'),
sep_by(single_line, char('|')),
eof(),
)
.map(|(_, start_frame, _, _, end_frame, _, fmt_strs_and_lines, ())| (start_frame, end_frame, fmt_strs_and_lines))
.map(|(start_frame, end_frame, fmt_strs_and_lines): (i64, i64, Vec<(Vec<String>, String)>)| {
Self::construct_mdvd_lines(start_frame, end_frame, fmt_strs_and_lines)
})
.parse(line)
.map(|x| x.0)
.map_err(|_| Error::from(ExpectedSubtitleLine { line: line.to_string() }))
.with_context(|_| ErrorAtLine { line_num })
.map_err(Error::from)
}
fn construct_mdvd_lines(start_frame: i64, end_frame: i64, fmt_strs_and_lines: Vec<(Vec<String>, String)>) -> Vec<MdvdLine> {
let mut cline_fmts: Vec<MdvdFormatting> = Vec::new();
let fmts_and_lines = fmt_strs_and_lines
.into_iter()
.map(|(fmts, text)| (Self::string_to_formatting(&mut cline_fmts, fmts), text))
.collect::<Vec<_>>();
fmts_and_lines
.into_iter()
.map(|(sline_fmts, text)| MdvdLine {
start_frame: start_frame,
end_frame: end_frame,
text: text,
formatting: cline_fmts.clone().into_iter().chain(sline_fmts.into_iter()).collect(),
})
.collect()
}
fn string_to_formatting(multiline_formatting: &mut Vec<MdvdFormatting>, fmts: Vec<String>) -> Vec<MdvdFormatting> {
let (cline_fmts_str, sline_fmts_str): (Vec<_>, Vec<_>) = fmts
.into_iter()
.partition(|fmt_str| MdvdFormatting::is_container_line_formatting(fmt_str));
multiline_formatting.extend(&mut cline_fmts_str.into_iter().map(MdvdFormatting::from));
sline_fmts_str.into_iter().map(MdvdFormatting::from).collect()
}
}
impl SubtitleFileInterface for MdvdFile {
fn get_subtitle_entries(&self) -> SubtitleParserResult<Vec<SubtitleEntry>> {
Ok(self.v.iter().map(|line| line.to_subtitle_entry(self.fps)).collect())
}
fn update_subtitle_entries(&mut self, new_subtitle_entries: &[SubtitleEntry]) -> SubtitleParserResult<()> {
assert_eq!(new_subtitle_entries.len(), self.v.len());
let mut iter = new_subtitle_entries.iter().peekable();
for line in &mut self.v {
let peeked = iter.next().unwrap();
line.start_frame = (peeked.timespan.start.secs_f64() * self.fps) as i64;
line.end_frame = (peeked.timespan.end.secs_f64() * self.fps) as i64;
if let Some(ref text) = peeked.line {
line.text = text.clone();
}
}
Ok(())
}
fn to_data(&self) -> SubtitleParserResult<Vec<u8>> {
let mut sorted_list = self.v.clone();
sorted_list.sort_by_key(|line| (line.start_frame, line.end_frame));
let mut result: LinkedList<Cow<'static, str>> = LinkedList::new();
for (gi, group_iter) in sorted_list
.into_iter()
.group_by(|line| (line.start_frame, line.end_frame))
.into_iter()
.enumerate()
{
if gi != 0 {
result.push_back("\n".into());
}
let group: Vec<MdvdLine> = group_iter.1.collect();
let group_len = group.len();
let (start_frame, end_frame) = group_iter.0;
let (formattings, texts): (Vec<HashSet<MdvdFormatting>>, Vec<String>) =
group.into_iter().map(|line| (line.formatting.into_iter().collect(), line.text)).unzip();
let common_formatting = if group_len == 1 {
HashSet::new()
} else {
formattings
.iter()
.fold(None, |acc, set| match acc {
None => Some(set.clone()),
Some(acc_set) => Some(acc_set.intersection(set).cloned().collect()),
})
.unwrap()
};
let individual_formattings = formattings
.into_iter()
.map(|formatting| formatting.difference(&common_formatting).cloned().collect())
.collect::<Vec<HashSet<MdvdFormatting>>>();
result.push_back("{".into());
result.push_back(start_frame.to_string().into());
result.push_back("}".into());
result.push_back("{".into());
result.push_back(end_frame.to_string().into());
result.push_back("}".into());
for formatting in &common_formatting {
result.push_back("{".into());
result.push_back(formatting.to_formatting_string(true).into());
result.push_back("}".into());
}
for (i, (individual_formatting, text)) in individual_formattings.into_iter().zip(texts.into_iter()).enumerate() {
if i != 0 {
result.push_back("|".into());
}
for formatting in individual_formatting {
result.push_back("{".into());
result.push_back(formatting.to_formatting_string(false).into());
result.push_back("}".into());
}
result.push_back(text.into());
}
}
Ok(result.into_iter().map(|cow| cow.to_string()).collect::<String>().into_bytes())
}
}
#[cfg(test)]
mod tests {
use super::*;
use SubtitleFileInterface;
fn mdvd_reconstruct(s: &str) -> String {
let file = MdvdFile::parse(s, 25.0).unwrap();
let data = file.to_data().unwrap();
String::from_utf8(data).unwrap()
}
fn test_mdvd(input: &str, expected: &str) {
assert_eq!(mdvd_reconstruct(input), expected);
assert_eq!(mdvd_reconstruct(expected), expected);
}
#[test]
fn mdvd_test_reconstruction() {
test_mdvd("{0}{25}Hello!", "{0}{25}Hello!");
test_mdvd("{0}{25}{y:i}Hello!", "{0}{25}{y:i}Hello!");
test_mdvd("{0}{25}{Y:i}Hello!", "{0}{25}{y:i}Hello!");
test_mdvd("{0}{25}{Y:i}\n", "{0}{25}{y:i}");
test_mdvd("{0}{25}{y:i}Text1|{y:i}Text2", "{0}{25}{Y:i}Text1|Text2");
test_mdvd("{0}{25}{y:i}Text1\n{0}{25}{y:i}Text2", "{0}{25}{Y:i}Text1|Text2");
test_mdvd("{0}{25}{y:i}{y:b}Text1\n{0}{25}{y:i}Text2", "{0}{25}{Y:i}{y:b}Text1|Text2");
test_mdvd("{0}{25}{y:i}{y:b}Text1\n{0}{25}{y:i}Text2", "{0}{25}{Y:i}{y:b}Text1|Text2");
test_mdvd("{0}{25}{y:i}Text1\n{0}{26}{y:i}Text2", "{0}{25}{y:i}Text1\n{0}{26}{y:i}Text2");
}
}