subparse/formats/
ssa.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5use crate::{SubtitleEntry, SubtitleFileInterface};
6
7use crate::errors::Result as SubtitleParserResult;
8use crate::formats::common::*;
9use combine::char::*;
10use combine::combinator::*;
11use combine::primitives::Parser;
12
13use crate::timetypes::{TimePoint, TimeSpan};
14use failure::ResultExt;
15use std::iter::once;
16
17type Result<T> = std::result::Result<T, Error>;
18
19use self::errors::Error;
20use self::errors::ErrorKind::*;
21
22// Errors specific to the '.ssa' format.
23#[allow(missing_docs)]
24pub mod errors {
25
26    define_error!(Error, ErrorKind);
27
28    /// `.ssa`-parser-specific errors
29    #[derive(PartialEq, Debug, Fail)]
30    pub enum ErrorKind {
31        #[fail(display = ".ssa/.ass file did not have a line beginning with `Format: ` in a `[Events]` section")]
32        SsaFieldsInfoNotFound,
33
34        #[fail(display = "the '{}' field is missing in the field info in line {}", f, line_num)]
35        SsaMissingField { line_num: usize, f: &'static str },
36
37        #[fail(display = "the '{}' field is twice in the field info in line {}", f, line_num)]
38        SsaDuplicateField { line_num: usize, f: &'static str },
39
40        #[fail(display = "the field info in line {} has to have `Text` as its last field", line_num)]
41        SsaTextFieldNotLast { line_num: usize },
42
43        #[fail(display = "the dialog at line {} has incorrect number of fields", line_num)]
44        SsaIncorrectNumberOfFields { line_num: usize },
45
46        #[fail(display = "the timepoint `{}` in line {} has wrong format", string, line_num)]
47        SsaWrongTimepointFormat { line_num: usize, string: String },
48
49        #[fail(display = "parsing the line `{}` failed because of `{}`", line_num, msg)]
50        SsaDialogLineParseError { line_num: usize, msg: String },
51
52        #[fail(display = "parsing the line `{}` failed because of `{}`", line_num, msg)]
53        SsaLineParseError { line_num: usize, msg: String },
54    }
55}
56/*error_chain! {
57    errors {
58        SsaFieldsInfoNotFound {
59            description(".ssa/.ass file did not have a line beginning with `Format: ` in a `[Events]` section")
60        }
61        SsaMissingField(line_num: usize, f: &'static str) {
62            display("the '{}' field is missing in the field info in line {}", f, line_num)
63        }
64        SsaDuplicateField(line_num: usize, f: &'static str) {
65            display("the '{}' field is twice in the field info in line {}", f, line_num)
66        }
67        SsaTextFieldNotLast(line_num: usize) {
68            display("the field info in line {} has to have `Text` as its last field", line_num)
69        }
70        SsaIncorrectNumberOfFields(line_num: usize) {
71            display("the dialog at line {} has incorrect number of fields", line_num)
72        }
73        SsaWrongTimepointFormat(line_num: usize, string: String) {
74            display("the timepoint `{}` in line {} has wrong format", string, line_num)
75        }
76        SsaDialogLineParseError(line_num: usize, msg: String) {
77            display("parsing the line `{}` failed because of `{}`", line_num, msg)
78        }
79        SsaLineParseError(line_num: usize, msg: String) {
80            display("parsing the line `{}` failed because of `{}`", line_num, msg)
81        }
82    }
83}*/
84
85// ////////////////////////////////////////////////////////////////////////////////////////////////
86// SSA field info
87
88struct SsaFieldsInfo {
89    start_field_idx: usize,
90    end_field_idx: usize,
91    text_field_idx: usize,
92    num_fields: usize,
93}
94
95impl SsaFieldsInfo {
96    /// Parses a format line like "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text".
97    fn new_from_fields_info_line(line_num: usize, s: String) -> Result<SsaFieldsInfo> {
98        assert!(s.starts_with("Format:"));
99        let field_info = &s["Format:".len()..];
100        let mut start_field_idx: Option<usize> = None;
101        let mut end_field_idx: Option<usize> = None;
102        let mut text_field_idx: Option<usize> = None;
103
104        // filter "Start" and "End" and "Text"
105        let split_iter = field_info.split(',');
106        let num_fields = split_iter.clone().count();
107        for (i, field_name) in split_iter.enumerate() {
108            let trimmed = field_name.trim();
109            if trimmed == "Start" {
110                if start_field_idx.is_some() {
111                    return Err(SsaDuplicateField { line_num, f: "Start" })?;
112                }
113                start_field_idx = Some(i);
114            } else if trimmed == "End" {
115                if end_field_idx.is_some() {
116                    return Err(SsaDuplicateField { line_num, f: "End" })?;
117                }
118                end_field_idx = Some(i);
119            } else if trimmed == "Text" {
120                if text_field_idx.is_some() {
121                    return Err(SsaDuplicateField { line_num, f: "Text" })?;
122                }
123                text_field_idx = Some(i);
124            }
125        }
126
127        let text_field_idx2 = text_field_idx.ok_or_else(|| Error::from(SsaMissingField { line_num, f: "Text" }))?;
128        if text_field_idx2 != num_fields - 1 {
129            return Err(SsaTextFieldNotLast { line_num })?;
130        }
131
132        Ok(SsaFieldsInfo {
133            start_field_idx: start_field_idx.ok_or_else(|| Error::from(SsaMissingField { line_num, f: "Start" }))?,
134            end_field_idx: end_field_idx.ok_or_else(|| Error::from(SsaMissingField { line_num, f: "End" }))?,
135            text_field_idx: text_field_idx2,
136            num_fields: num_fields,
137        })
138    }
139}
140
141// ////////////////////////////////////////////////////////////////////////////////////////////////
142// SSA parser
143
144impl SsaFile {
145    /// Parse a `.ssa` subtitle string to `SsaFile`.
146    pub fn parse(s: &str) -> SubtitleParserResult<SsaFile> {
147        Ok(Self::parse_inner(s.to_string()).with_context(|_| crate::ErrorKind::ParsingError)?)
148    }
149}
150
151/// Implement parser helper functions.
152impl SsaFile {
153    /// Parses a whole `.ssa` file from string.
154    fn parse_inner(i: String) -> Result<SsaFile> {
155        let mut file_parts = Vec::new();
156        let (bom, s) = split_bom(&i);
157        file_parts.push(SsaFilePart::Filler(bom.to_string()));
158
159        // first we need to find and parse the format line, which then dictates how to parse the file
160        let (line_num, field_info_line) = Self::get_format_info(s)?;
161        let fields_info = SsaFieldsInfo::new_from_fields_info_line(line_num, field_info_line)?;
162
163        // parse the dialog lines with the given format
164        file_parts.append(&mut Self::parse_dialog_lines(&fields_info, s)?);
165        Ok(SsaFile::new(file_parts))
166    }
167
168    /// Searches and parses a format line like "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text".
169    fn get_format_info(s: &str) -> Result<(usize, String)> {
170        let mut section_opt = None;
171        for (line_num, line) in s.lines().enumerate() {
172            // parse section headers like `[Events]`
173            let trimmed_line = line.trim();
174            if trimmed_line.starts_with('[') && trimmed_line.ends_with(']') {
175                section_opt = Some(&trimmed_line[1..trimmed_line.len() - 1]);
176            }
177
178            // most sections have a format line, but we only want the one for the subtitle events
179            if section_opt != Some("Events") {
180                continue;
181            }
182            if !line.trim().starts_with("Format:") {
183                continue;
184            }
185            return Ok((line_num, line.to_string()));
186        }
187
188        Err(SsaFieldsInfoNotFound.into())
189    }
190
191    /// Filters file for lines like this and parses them:
192    ///
193    /// ```text
194    /// "Dialogue: 1,0:22:43.52,0:22:46.22,ED-Romaji,,0,0,0,,{\fad(150,150)\blur0.5\bord1}some text"
195    /// ```
196    fn parse_dialog_lines(fields_info: &SsaFieldsInfo, s: &str) -> Result<Vec<SsaFilePart>> {
197        let mut result = Vec::new();
198        let mut section_opt: Option<String> = None;
199
200        for (line_num, (line, newl)) in get_lines_non_destructive(s).into_iter().enumerate() {
201            let trimmed_line = line.trim().to_string();
202
203            // parse section headers like `[Events]`
204            if trimmed_line.starts_with('[') && trimmed_line.ends_with(']') {
205                section_opt = Some(trimmed_line[1..trimmed_line.len() - 1].to_string());
206                result.push(SsaFilePart::Filler(line));
207                result.push(SsaFilePart::Filler("\n".to_string()));
208                continue;
209            }
210
211            if section_opt.is_none() || section_opt.iter().any(|s| s != "Events") || !trimmed_line.starts_with("Dialogue:") {
212                result.push(SsaFilePart::Filler(line));
213                result.push(SsaFilePart::Filler("\n".to_string()));
214                continue;
215            }
216
217            result.append(&mut Self::parse_dialog_line(line_num, line.as_str(), fields_info)?);
218            result.push(SsaFilePart::Filler(newl));
219        }
220
221        Ok(result)
222    }
223
224    /// Parse lines like:
225    ///
226    /// ```text
227    /// "Dialogue: 1,0:22:43.52,0:22:46.22,ED-Romaji,,0,0,0,,{\fad(150,150)\blur0.5\bord1}some text"
228    /// ```
229    fn parse_dialog_line(line_num: usize, line: &str, fields_info: &SsaFieldsInfo) -> Result<Vec<SsaFilePart>> {
230        let parts_res = (
231            many(ws()),
232            string("Dialogue:"),
233            many(ws()),
234            count(fields_info.num_fields - 1, (many(none_of(once(','))), token(','))),
235            many(r#try(any())),
236        )
237            .map(
238                |(ws1, dl, ws2, v, text): (String, &str, String, Vec<(String, char)>, String)| -> Result<Vec<SsaFilePart>> {
239                    let mut result: Vec<SsaFilePart> = Vec::new();
240                    result.push(SsaFilePart::Filler(ws1));
241                    result.push(SsaFilePart::Filler(dl.to_string()));
242                    result.push(SsaFilePart::Filler(ws2.to_string()));
243                    result.append(&mut Self::parse_fields(line_num, fields_info, v)?);
244                    result.push(SsaFilePart::Text(text));
245                    Ok(result)
246                },
247            )
248            .parse(line);
249
250        match parts_res {
251            // Ok() means that parsing succeded, but the "map" function might created an SSA error
252            Ok((parts, _)) => Ok(parts?),
253            Err(e) => Err(SsaDialogLineParseError {
254                line_num,
255                msg: parse_error_to_string(e),
256            }
257            .into()),
258        }
259    }
260
261    /// Parses an array of fields with the "fields info".
262    ///
263    /// The fields (comma seperated information) as an array like
264    // `vec!["1", "0:22:43.52", "0:22:46.22", "ED-Romaji", "", "0", "0", "0", "", "{\fad(150,150)\blur0.5\bord1}some text"]`.
265    fn parse_fields(line_num: usize, fields_info: &SsaFieldsInfo, v: Vec<(String, char)>) -> Result<Vec<SsaFilePart>> {
266        let extract_file_parts_closure = |(i, (field, sep_char)): (_, (String, char))| -> Result<Vec<SsaFilePart>> {
267            let (begin, field, end) = trim_non_destructive(&field);
268
269            let part = if i == fields_info.start_field_idx {
270                SsaFilePart::TimespanStart(Self::parse_timepoint(line_num, &field)?)
271            } else if i == fields_info.end_field_idx {
272                SsaFilePart::TimespanEnd(Self::parse_timepoint(line_num, &field)?)
273            } else if i == fields_info.text_field_idx {
274                SsaFilePart::Text(field.to_string())
275            } else {
276                SsaFilePart::Filler(field.to_string())
277            };
278
279            Ok(vec![
280                SsaFilePart::Filler(begin),
281                part,
282                SsaFilePart::Filler(end),
283                SsaFilePart::Filler(sep_char.to_string()),
284            ])
285        };
286
287        let result = v
288            .into_iter()
289            .enumerate()
290            .map(extract_file_parts_closure)
291            .collect::<Result<Vec<Vec<SsaFilePart>>>>()?
292            .into_iter()
293            .flat_map(|part| part)
294            .collect();
295        Ok(result)
296    }
297
298    /// Something like "0:19:41.99"
299    fn parse_timepoint(line_num: usize, s: &str) -> Result<TimePoint> {
300        let parse_res = (
301            parser(number_i64),
302            token(':'),
303            parser(number_i64),
304            token(':'),
305            parser(number_i64),
306            or(token('.'), token(':')),
307            parser(number_i64),
308            eof(),
309        )
310            .map(|(h, _, mm, _, ss, _, ms, _)| TimePoint::from_components(h, mm, ss, ms * 10))
311            .parse(s);
312        match parse_res {
313            Ok(res) => Ok(res.0),
314            Err(e) => Err(SsaWrongTimepointFormat {
315                line_num,
316                string: parse_error_to_string(e),
317            }
318            .into()),
319        }
320    }
321}
322
323// ////////////////////////////////////////////////////////////////////////////////////////////////
324// SSA file parts
325
326#[derive(Debug, Clone)]
327enum SsaFilePart {
328    /// Spaces, field information, comments, unimportant fields, ...
329    Filler(String),
330
331    /// Timespan start of a dialogue line
332    TimespanStart(TimePoint),
333
334    /// Timespan end of a dialogue line
335    TimespanEnd(TimePoint),
336
337    /// Dialog lines
338    Text(String),
339}
340
341// ////////////////////////////////////////////////////////////////////////////////////////////////
342// SSA file
343
344/// Represents a reconstructable `.ssa`/`.ass` file.
345///
346/// All unimportant information (for this project) are saved into `SsaFilePart::Filler(...)`, so
347/// a timespan-altered file still has the same field etc.
348#[derive(Debug, Clone)]
349pub struct SsaFile {
350    v: Vec<SsaFilePart>,
351}
352
353impl SsaFile {
354    fn new(v: Vec<SsaFilePart>) -> SsaFile {
355        // cleans up multiple fillers after another
356        let new_file_parts = dedup_string_parts(v, |part: &mut SsaFilePart| match *part {
357            SsaFilePart::Filler(ref mut text) => Some(text),
358            _ => None,
359        });
360
361        SsaFile { v: new_file_parts }
362    }
363
364    /// This function filters out all start times and end times, and returns them ordered
365    /// (="(start, end, dialog)") so they can be easily read or written to.
366    ///
367    /// TODO: implement a single version that takes both `&mut` and `&` (dependent on HKT).
368    fn get_subtitle_entries_mut<'a>(&'a mut self) -> Vec<(&'a mut TimePoint, &'a mut TimePoint, &'a mut String)> {
369        let mut startpoint_buffer: Option<&'a mut TimePoint> = None;
370        let mut endpoint_buffer: Option<&'a mut TimePoint> = None;
371
372        // the extra block satisfies the borrow checker
373        let timings: Vec<_> = {
374            let filter_map_closure = |part: &'a mut SsaFilePart| -> Option<(&'a mut TimePoint, &'a mut TimePoint, &'a mut String)> {
375                use self::SsaFilePart::*;
376                match *part {
377                    TimespanStart(ref mut start) => {
378                        assert_eq!(startpoint_buffer, None); // parser should have ensured that no two consecutive SSA start times exist
379                        startpoint_buffer = Some(start);
380                        None
381                    }
382                    TimespanEnd(ref mut end) => {
383                        assert_eq!(endpoint_buffer, None); // parser should have ensured that no two consecutive SSA end times exist
384                        endpoint_buffer = Some(end);
385                        None
386                    }
387                    Text(ref mut text) => {
388                        // reset the timepoint buffers
389                        let snatched_startpoint_buffer = startpoint_buffer.take();
390                        let snatched_endpoint_buffer = endpoint_buffer.take();
391
392                        let start = snatched_startpoint_buffer.expect("SSA parser should have ensured that every line has a startpoint");
393                        let end = snatched_endpoint_buffer.expect("SSA parser should have ensured that every line has a endpoint");
394
395                        Some((start, end, text))
396                    }
397                    Filler(_) => None,
398                }
399            };
400
401            self.v.iter_mut().filter_map(filter_map_closure).collect()
402        };
403
404        // every timespan should now consist of a beginning and a end (this should be ensured by parser)
405        assert_eq!(startpoint_buffer, None);
406        assert_eq!(endpoint_buffer, None);
407
408        timings
409    }
410}
411
412impl SubtitleFileInterface for SsaFile {
413    fn get_subtitle_entries(&self) -> SubtitleParserResult<Vec<SubtitleEntry>> {
414        // it's unfortunate we have to clone the file before using
415        // `get_subtitle_entries_mut()`, but otherwise we'd have to copy the`
416        // `get_subtitle_entries_mut()` and create a non-mut-reference version
417        // of it (much code duplication); I think a `clone` in this
418        // not-time-critical code is acceptable, and after HKT become
419        // available, this can be solved much nicer.
420        let mut new_file = self.clone();
421        let timings = new_file
422            .get_subtitle_entries_mut()
423            .into_iter()
424            .map(|(&mut start, &mut end, text)| SubtitleEntry::new(TimeSpan::new(start, end), text.clone()))
425            .collect();
426
427        Ok(timings)
428    }
429
430    fn update_subtitle_entries(&mut self, new_subtitle_entries: &[SubtitleEntry]) -> SubtitleParserResult<()> {
431        let subtitle_entries = self.get_subtitle_entries_mut();
432        assert_eq!(subtitle_entries.len(), new_subtitle_entries.len()); // required by specification of this function
433
434        for ((start_ref, end_ref, text_ref), new_entry_ref) in subtitle_entries.into_iter().zip(new_subtitle_entries) {
435            *start_ref = new_entry_ref.timespan.start;
436            *end_ref = new_entry_ref.timespan.end;
437            if let Some(ref text) = new_entry_ref.line {
438                *text_ref = text.clone();
439            }
440        }
441
442        Ok(())
443    }
444
445    fn to_data(&self) -> SubtitleParserResult<Vec<u8>> {
446        // timing to string like "0:00:22.21"
447        let fn_timing_to_string = |t: TimePoint| {
448            let p = if t.msecs() < 0 { -t } else { t };
449            format!(
450                "{}{}:{:02}:{:02}.{:02}",
451                if t.msecs() < 0 { "-" } else { "" },
452                p.hours(),
453                p.mins_comp(),
454                p.secs_comp(),
455                p.csecs_comp()
456            )
457        };
458
459        let fn_file_part_to_string = |part: &SsaFilePart| {
460            use self::SsaFilePart::*;
461            match *part {
462                Filler(ref t) | Text(ref t) => t.clone(),
463                TimespanStart(start) => fn_timing_to_string(start),
464                TimespanEnd(end) => fn_timing_to_string(end),
465            }
466        };
467
468        let result: String = self.v.iter().map(fn_file_part_to_string).collect();
469
470        Ok(result.into_bytes())
471    }
472}