1use self::errors::ErrorKind::*;
6use self::errors::*;
7use crate::{SubtitleEntry, SubtitleFileInterface};
8
9use crate::errors::Result as SubtitleParserResult;
10use crate::formats::common::*;
11use combine::char::char;
12use combine::combinator::{eof, many, parser as p, satisfy, sep_by};
13use combine::primitives::Parser;
14
15use itertools::Itertools;
16use std::borrow::Cow;
17use std::collections::HashSet;
18
19use failure::ResultExt;
20
21use crate::timetypes::{TimePoint, TimeSpan};
22use std::collections::LinkedList;
23
24#[allow(missing_docs)]
26pub mod errors {
27 pub type Result<T> = std::result::Result<T, Error>;
28
29 define_error!(Error, ErrorKind);
30
31 #[derive(PartialEq, Debug, Fail)]
32 pub enum ErrorKind {
33 #[fail(display = "expected subtittle line, found `{}`", line)]
34 ExpectedSubtitleLine { line: String },
35 #[fail(display = "parse error at line `{}`", line_num)]
36 ErrorAtLine { line_num: usize },
37 }
38}
39
40#[derive(Debug, Clone, Hash, PartialEq, Eq)]
44enum MdvdFormatting {
45 Unknown(String),
47}
48
49impl From<String> for MdvdFormatting {
50 fn from(f: String) -> MdvdFormatting {
51 MdvdFormatting::Unknown(Self::lowercase_first_char(&f))
52 }
53}
54
55impl MdvdFormatting {
56 fn is_container_line_formatting(f: &str) -> bool {
58 f.chars().next().and_then(|c| Some(c.is_uppercase())).unwrap_or(false)
59 }
60
61 fn lowercase_first_char(s: &str) -> String {
63 let mut c = s.chars();
64 match c.next() {
65 None => String::new(),
66 Some(f) => f.to_lowercase().collect::<String>() + c.as_str(),
67 }
68 }
69
70 fn uppercase_first_char(s: &str) -> String {
72 let mut c = s.chars();
73 match c.next() {
74 None => String::new(),
75 Some(f) => f.to_uppercase().collect::<String>() + c.as_str(),
76 }
77 }
78
79 fn to_formatting_string_intern(&self) -> String {
80 match *self {
81 MdvdFormatting::Unknown(ref s) => s.clone(),
82 }
83 }
84
85 fn to_formatting_string(&self, multiline: bool) -> String {
87 let s = self.to_formatting_string_intern();
88 if multiline {
89 Self::uppercase_first_char(&s)
90 } else {
91 Self::lowercase_first_char(&s)
92 }
93 }
94}
95
96#[derive(Debug, Clone)]
97pub struct MdvdFile {
99 fps: f64,
102
103 v: Vec<MdvdLine>,
105}
106
107#[derive(Debug, Clone)]
109struct MdvdLine {
110 start_frame: i64,
112
113 end_frame: i64,
115
116 formatting: Vec<MdvdFormatting>,
118
119 text: String,
121}
122
123impl MdvdLine {
124 fn to_subtitle_entry(&self, fps: f64) -> SubtitleEntry {
125 SubtitleEntry {
126 timespan: TimeSpan::new(
127 TimePoint::from_msecs((self.start_frame as f64 * 1000.0 / fps) as i64),
128 TimePoint::from_msecs((self.end_frame as f64 * 1000.0 / fps) as i64),
129 ),
130 line: Some(self.text.clone()),
131 }
132 }
133}
134
135impl MdvdFile {
136 pub fn parse(s: &str, fps: f64) -> SubtitleParserResult<MdvdFile> {
138 Ok(Self::parse_file(s, fps).with_context(|_| crate::ErrorKind::ParsingError)?)
139 }
140}
141
142impl MdvdFile {
144 fn parse_file(i: &str, fps: f64) -> Result<MdvdFile> {
145 let mut result: Vec<MdvdLine> = Vec::new();
146
147 let (_, s) = split_bom(i);
149
150 for (line_num, line) in s.lines().enumerate() {
151 let mut lines: Vec<MdvdLine> = Self::parse_line(line_num, line)?;
154 result.append(&mut lines);
155 }
156
157 Ok(MdvdFile { fps: fps, v: result })
158 }
159
160 fn parse_line(line_num: usize, line: &str) -> Result<Vec<MdvdLine>> {
162 let sub_info = (char('{'), many(satisfy(|c| c != '}')), char('}'))
164 .map(|(_, info, _): (_, String, _)| info)
165 .expected("MicroDVD info");
166
167 let single_line = (many(sub_info), many(satisfy(|c| c != '|')));
170
171 (
173 char('{'),
174 p(number_i64),
175 char('}'),
176 char('{'),
177 p(number_i64),
178 char('}'),
179 sep_by(single_line, char('|')),
180 eof(),
181 )
182 .map(|(_, start_frame, _, _, end_frame, _, fmt_strs_and_lines, ())| (start_frame, end_frame, fmt_strs_and_lines))
183 .map(|(start_frame, end_frame, fmt_strs_and_lines): (i64, i64, Vec<(Vec<String>, String)>)| {
184 Self::construct_mdvd_lines(start_frame, end_frame, fmt_strs_and_lines)
185 })
186 .parse(line)
187 .map(|x| x.0)
188 .map_err(|_| Error::from(ExpectedSubtitleLine { line: line.to_string() }))
189 .with_context(|_| ErrorAtLine { line_num })
190 .map_err(Error::from)
191 }
192
193 fn construct_mdvd_lines(start_frame: i64, end_frame: i64, fmt_strs_and_lines: Vec<(Vec<String>, String)>) -> Vec<MdvdLine> {
199 let mut cline_fmts: Vec<MdvdFormatting> = Vec::new();
201
202 let fmts_and_lines = fmt_strs_and_lines
204 .into_iter()
205 .map(|(fmts, text)| (Self::string_to_formatting(&mut cline_fmts, fmts), text))
206 .collect::<Vec<_>>();
207
208 fmts_and_lines
212 .into_iter()
213 .map(|(sline_fmts, text)| MdvdLine {
214 start_frame: start_frame,
215 end_frame: end_frame,
216 text: text,
217 formatting: cline_fmts.clone().into_iter().chain(sline_fmts.into_iter()).collect(),
218 })
219 .collect()
220 }
221
222 fn string_to_formatting(multiline_formatting: &mut Vec<MdvdFormatting>, fmts: Vec<String>) -> Vec<MdvdFormatting> {
226 let (cline_fmts_str, sline_fmts_str): (Vec<_>, Vec<_>) = fmts
228 .into_iter()
229 .partition(|fmt_str| MdvdFormatting::is_container_line_formatting(fmt_str));
230
231 multiline_formatting.extend(&mut cline_fmts_str.into_iter().map(MdvdFormatting::from));
232 sline_fmts_str.into_iter().map(MdvdFormatting::from).collect()
233 }
234}
235
236impl SubtitleFileInterface for MdvdFile {
237 fn get_subtitle_entries(&self) -> SubtitleParserResult<Vec<SubtitleEntry>> {
238 Ok(self.v.iter().map(|line| line.to_subtitle_entry(self.fps)).collect())
239 }
240
241 fn update_subtitle_entries(&mut self, new_subtitle_entries: &[SubtitleEntry]) -> SubtitleParserResult<()> {
242 assert_eq!(new_subtitle_entries.len(), self.v.len());
243
244 let mut iter = new_subtitle_entries.iter().peekable();
245 for line in &mut self.v {
246 let peeked = iter.next().unwrap();
247
248 line.start_frame = (peeked.timespan.start.secs_f64() * self.fps) as i64;
249 line.end_frame = (peeked.timespan.end.secs_f64() * self.fps) as i64;
250
251 if let Some(ref text) = peeked.line {
252 line.text = text.clone();
253 }
254 }
255
256 Ok(())
257 }
258
259 fn to_data(&self) -> SubtitleParserResult<Vec<u8>> {
260 let mut sorted_list = self.v.clone();
261 sorted_list.sort_by_key(|line| (line.start_frame, line.end_frame));
262
263 let mut result: LinkedList<Cow<'static, str>> = LinkedList::new();
264
265 for (gi, group_iter) in sorted_list
266 .into_iter()
267 .group_by(|line| (line.start_frame, line.end_frame))
268 .into_iter()
269 .enumerate()
270 {
271 if gi != 0 {
272 result.push_back("\n".into());
273 }
274
275 let group: Vec<MdvdLine> = group_iter.1.collect();
276 let group_len = group.len();
277
278 let (start_frame, end_frame) = group_iter.0;
279 let (formattings, texts): (Vec<HashSet<MdvdFormatting>>, Vec<String>) =
280 group.into_iter().map(|line| (line.formatting.into_iter().collect(), line.text)).unzip();
281
282 let common_formatting = if group_len == 1 {
287 HashSet::new()
289 } else {
290 formattings
291 .iter()
292 .fold(None, |acc, set| match acc {
293 None => Some(set.clone()),
294 Some(acc_set) => Some(acc_set.intersection(set).cloned().collect()),
295 })
296 .unwrap()
297 };
298
299 let individual_formattings = formattings
300 .into_iter()
301 .map(|formatting| formatting.difference(&common_formatting).cloned().collect())
302 .collect::<Vec<HashSet<MdvdFormatting>>>();
303
304 result.push_back("{".into());
305 result.push_back(start_frame.to_string().into());
306 result.push_back("}".into());
307
308 result.push_back("{".into());
309 result.push_back(end_frame.to_string().into());
310 result.push_back("}".into());
311
312 for formatting in &common_formatting {
313 result.push_back("{".into());
314 result.push_back(formatting.to_formatting_string(true).into());
315 result.push_back("}".into());
316 }
317
318 for (i, (individual_formatting, text)) in individual_formattings.into_iter().zip(texts.into_iter()).enumerate() {
319 if i != 0 {
320 result.push_back("|".into());
321 }
322
323 for formatting in individual_formatting {
324 result.push_back("{".into());
325 result.push_back(formatting.to_formatting_string(false).into());
326 result.push_back("}".into());
327 }
328
329 result.push_back(text.into());
330 }
331
332 }
334
335 Ok(result.into_iter().map(|cow| cow.to_string()).collect::<String>().into_bytes())
336 }
337}
338
339#[cfg(test)]
340mod tests {
341 use super::*;
342 use SubtitleFileInterface;
343
344 fn mdvd_reconstruct(s: &str) -> String {
346 let file = MdvdFile::parse(s, 25.0).unwrap();
347 let data = file.to_data().unwrap();
348 String::from_utf8(data).unwrap()
349 }
350
351 fn test_mdvd(input: &str, expected: &str) {
353 assert_eq!(mdvd_reconstruct(input), expected);
355
356 assert_eq!(mdvd_reconstruct(expected), expected);
358 }
359
360 #[test]
361 fn mdvd_test_reconstruction() {
362 test_mdvd("{0}{25}Hello!", "{0}{25}Hello!");
364 test_mdvd("{0}{25}{y:i}Hello!", "{0}{25}{y:i}Hello!");
365 test_mdvd("{0}{25}{Y:i}Hello!", "{0}{25}{y:i}Hello!");
366 test_mdvd("{0}{25}{Y:i}\n", "{0}{25}{y:i}");
367
368 test_mdvd("{0}{25}{y:i}Text1|{y:i}Text2", "{0}{25}{Y:i}Text1|Text2");
370 test_mdvd("{0}{25}{y:i}Text1\n{0}{25}{y:i}Text2", "{0}{25}{Y:i}Text1|Text2");
371 test_mdvd("{0}{25}{y:i}{y:b}Text1\n{0}{25}{y:i}Text2", "{0}{25}{Y:i}{y:b}Text1|Text2");
372 test_mdvd("{0}{25}{y:i}{y:b}Text1\n{0}{25}{y:i}Text2", "{0}{25}{Y:i}{y:b}Text1|Text2");
373
374 test_mdvd("{0}{25}{y:i}Text1\n{0}{26}{y:i}Text2", "{0}{25}{y:i}Text1\n{0}{26}{y:i}Text2");
376 }
377}