1use crate::{SubtitleEntry, SubtitleFileInterface};
6
7use crate::errors::Result as SubtitleParserResult;
8use crate::formats::common::*;
9use combine::char::*;
10use combine::combinator::*;
11use combine::primitives::Parser;
12
13use crate::timetypes::{TimePoint, TimeSpan};
14use failure::ResultExt;
15use std::iter::once;
16
17type Result<T> = std::result::Result<T, Error>;
18
19use self::errors::Error;
20use self::errors::ErrorKind::*;
21
22#[allow(missing_docs)]
24pub mod errors {
25
26 define_error!(Error, ErrorKind);
27
28 #[derive(PartialEq, Debug, Fail)]
30 pub enum ErrorKind {
31 #[fail(display = ".ssa/.ass file did not have a line beginning with `Format: ` in a `[Events]` section")]
32 SsaFieldsInfoNotFound,
33
34 #[fail(display = "the '{}' field is missing in the field info in line {}", f, line_num)]
35 SsaMissingField { line_num: usize, f: &'static str },
36
37 #[fail(display = "the '{}' field is twice in the field info in line {}", f, line_num)]
38 SsaDuplicateField { line_num: usize, f: &'static str },
39
40 #[fail(display = "the field info in line {} has to have `Text` as its last field", line_num)]
41 SsaTextFieldNotLast { line_num: usize },
42
43 #[fail(display = "the dialog at line {} has incorrect number of fields", line_num)]
44 SsaIncorrectNumberOfFields { line_num: usize },
45
46 #[fail(display = "the timepoint `{}` in line {} has wrong format", string, line_num)]
47 SsaWrongTimepointFormat { line_num: usize, string: String },
48
49 #[fail(display = "parsing the line `{}` failed because of `{}`", line_num, msg)]
50 SsaDialogLineParseError { line_num: usize, msg: String },
51
52 #[fail(display = "parsing the line `{}` failed because of `{}`", line_num, msg)]
53 SsaLineParseError { line_num: usize, msg: String },
54 }
55}
56struct SsaFieldsInfo {
89 start_field_idx: usize,
90 end_field_idx: usize,
91 text_field_idx: usize,
92 num_fields: usize,
93}
94
95impl SsaFieldsInfo {
96 fn new_from_fields_info_line(line_num: usize, s: String) -> Result<SsaFieldsInfo> {
98 assert!(s.starts_with("Format:"));
99 let field_info = &s["Format:".len()..];
100 let mut start_field_idx: Option<usize> = None;
101 let mut end_field_idx: Option<usize> = None;
102 let mut text_field_idx: Option<usize> = None;
103
104 let split_iter = field_info.split(',');
106 let num_fields = split_iter.clone().count();
107 for (i, field_name) in split_iter.enumerate() {
108 let trimmed = field_name.trim();
109 if trimmed == "Start" {
110 if start_field_idx.is_some() {
111 return Err(SsaDuplicateField { line_num, f: "Start" })?;
112 }
113 start_field_idx = Some(i);
114 } else if trimmed == "End" {
115 if end_field_idx.is_some() {
116 return Err(SsaDuplicateField { line_num, f: "End" })?;
117 }
118 end_field_idx = Some(i);
119 } else if trimmed == "Text" {
120 if text_field_idx.is_some() {
121 return Err(SsaDuplicateField { line_num, f: "Text" })?;
122 }
123 text_field_idx = Some(i);
124 }
125 }
126
127 let text_field_idx2 = text_field_idx.ok_or_else(|| Error::from(SsaMissingField { line_num, f: "Text" }))?;
128 if text_field_idx2 != num_fields - 1 {
129 return Err(SsaTextFieldNotLast { line_num })?;
130 }
131
132 Ok(SsaFieldsInfo {
133 start_field_idx: start_field_idx.ok_or_else(|| Error::from(SsaMissingField { line_num, f: "Start" }))?,
134 end_field_idx: end_field_idx.ok_or_else(|| Error::from(SsaMissingField { line_num, f: "End" }))?,
135 text_field_idx: text_field_idx2,
136 num_fields: num_fields,
137 })
138 }
139}
140
141impl SsaFile {
145 pub fn parse(s: &str) -> SubtitleParserResult<SsaFile> {
147 Ok(Self::parse_inner(s.to_string()).with_context(|_| crate::ErrorKind::ParsingError)?)
148 }
149}
150
151impl SsaFile {
153 fn parse_inner(i: String) -> Result<SsaFile> {
155 let mut file_parts = Vec::new();
156 let (bom, s) = split_bom(&i);
157 file_parts.push(SsaFilePart::Filler(bom.to_string()));
158
159 let (line_num, field_info_line) = Self::get_format_info(s)?;
161 let fields_info = SsaFieldsInfo::new_from_fields_info_line(line_num, field_info_line)?;
162
163 file_parts.append(&mut Self::parse_dialog_lines(&fields_info, s)?);
165 Ok(SsaFile::new(file_parts))
166 }
167
168 fn get_format_info(s: &str) -> Result<(usize, String)> {
170 let mut section_opt = None;
171 for (line_num, line) in s.lines().enumerate() {
172 let trimmed_line = line.trim();
174 if trimmed_line.starts_with('[') && trimmed_line.ends_with(']') {
175 section_opt = Some(&trimmed_line[1..trimmed_line.len() - 1]);
176 }
177
178 if section_opt != Some("Events") {
180 continue;
181 }
182 if !line.trim().starts_with("Format:") {
183 continue;
184 }
185 return Ok((line_num, line.to_string()));
186 }
187
188 Err(SsaFieldsInfoNotFound.into())
189 }
190
191 fn parse_dialog_lines(fields_info: &SsaFieldsInfo, s: &str) -> Result<Vec<SsaFilePart>> {
197 let mut result = Vec::new();
198 let mut section_opt: Option<String> = None;
199
200 for (line_num, (line, newl)) in get_lines_non_destructive(s).into_iter().enumerate() {
201 let trimmed_line = line.trim().to_string();
202
203 if trimmed_line.starts_with('[') && trimmed_line.ends_with(']') {
205 section_opt = Some(trimmed_line[1..trimmed_line.len() - 1].to_string());
206 result.push(SsaFilePart::Filler(line));
207 result.push(SsaFilePart::Filler("\n".to_string()));
208 continue;
209 }
210
211 if section_opt.is_none() || section_opt.iter().any(|s| s != "Events") || !trimmed_line.starts_with("Dialogue:") {
212 result.push(SsaFilePart::Filler(line));
213 result.push(SsaFilePart::Filler("\n".to_string()));
214 continue;
215 }
216
217 result.append(&mut Self::parse_dialog_line(line_num, line.as_str(), fields_info)?);
218 result.push(SsaFilePart::Filler(newl));
219 }
220
221 Ok(result)
222 }
223
224 fn parse_dialog_line(line_num: usize, line: &str, fields_info: &SsaFieldsInfo) -> Result<Vec<SsaFilePart>> {
230 let parts_res = (
231 many(ws()),
232 string("Dialogue:"),
233 many(ws()),
234 count(fields_info.num_fields - 1, (many(none_of(once(','))), token(','))),
235 many(r#try(any())),
236 )
237 .map(
238 |(ws1, dl, ws2, v, text): (String, &str, String, Vec<(String, char)>, String)| -> Result<Vec<SsaFilePart>> {
239 let mut result: Vec<SsaFilePart> = Vec::new();
240 result.push(SsaFilePart::Filler(ws1));
241 result.push(SsaFilePart::Filler(dl.to_string()));
242 result.push(SsaFilePart::Filler(ws2.to_string()));
243 result.append(&mut Self::parse_fields(line_num, fields_info, v)?);
244 result.push(SsaFilePart::Text(text));
245 Ok(result)
246 },
247 )
248 .parse(line);
249
250 match parts_res {
251 Ok((parts, _)) => Ok(parts?),
253 Err(e) => Err(SsaDialogLineParseError {
254 line_num,
255 msg: parse_error_to_string(e),
256 }
257 .into()),
258 }
259 }
260
261 fn parse_fields(line_num: usize, fields_info: &SsaFieldsInfo, v: Vec<(String, char)>) -> Result<Vec<SsaFilePart>> {
266 let extract_file_parts_closure = |(i, (field, sep_char)): (_, (String, char))| -> Result<Vec<SsaFilePart>> {
267 let (begin, field, end) = trim_non_destructive(&field);
268
269 let part = if i == fields_info.start_field_idx {
270 SsaFilePart::TimespanStart(Self::parse_timepoint(line_num, &field)?)
271 } else if i == fields_info.end_field_idx {
272 SsaFilePart::TimespanEnd(Self::parse_timepoint(line_num, &field)?)
273 } else if i == fields_info.text_field_idx {
274 SsaFilePart::Text(field.to_string())
275 } else {
276 SsaFilePart::Filler(field.to_string())
277 };
278
279 Ok(vec![
280 SsaFilePart::Filler(begin),
281 part,
282 SsaFilePart::Filler(end),
283 SsaFilePart::Filler(sep_char.to_string()),
284 ])
285 };
286
287 let result = v
288 .into_iter()
289 .enumerate()
290 .map(extract_file_parts_closure)
291 .collect::<Result<Vec<Vec<SsaFilePart>>>>()?
292 .into_iter()
293 .flat_map(|part| part)
294 .collect();
295 Ok(result)
296 }
297
298 fn parse_timepoint(line_num: usize, s: &str) -> Result<TimePoint> {
300 let parse_res = (
301 parser(number_i64),
302 token(':'),
303 parser(number_i64),
304 token(':'),
305 parser(number_i64),
306 or(token('.'), token(':')),
307 parser(number_i64),
308 eof(),
309 )
310 .map(|(h, _, mm, _, ss, _, ms, _)| TimePoint::from_components(h, mm, ss, ms * 10))
311 .parse(s);
312 match parse_res {
313 Ok(res) => Ok(res.0),
314 Err(e) => Err(SsaWrongTimepointFormat {
315 line_num,
316 string: parse_error_to_string(e),
317 }
318 .into()),
319 }
320 }
321}
322
323#[derive(Debug, Clone)]
327enum SsaFilePart {
328 Filler(String),
330
331 TimespanStart(TimePoint),
333
334 TimespanEnd(TimePoint),
336
337 Text(String),
339}
340
341#[derive(Debug, Clone)]
349pub struct SsaFile {
350 v: Vec<SsaFilePart>,
351}
352
353impl SsaFile {
354 fn new(v: Vec<SsaFilePart>) -> SsaFile {
355 let new_file_parts = dedup_string_parts(v, |part: &mut SsaFilePart| match *part {
357 SsaFilePart::Filler(ref mut text) => Some(text),
358 _ => None,
359 });
360
361 SsaFile { v: new_file_parts }
362 }
363
364 fn get_subtitle_entries_mut<'a>(&'a mut self) -> Vec<(&'a mut TimePoint, &'a mut TimePoint, &'a mut String)> {
369 let mut startpoint_buffer: Option<&'a mut TimePoint> = None;
370 let mut endpoint_buffer: Option<&'a mut TimePoint> = None;
371
372 let timings: Vec<_> = {
374 let filter_map_closure = |part: &'a mut SsaFilePart| -> Option<(&'a mut TimePoint, &'a mut TimePoint, &'a mut String)> {
375 use self::SsaFilePart::*;
376 match *part {
377 TimespanStart(ref mut start) => {
378 assert_eq!(startpoint_buffer, None); startpoint_buffer = Some(start);
380 None
381 }
382 TimespanEnd(ref mut end) => {
383 assert_eq!(endpoint_buffer, None); endpoint_buffer = Some(end);
385 None
386 }
387 Text(ref mut text) => {
388 let snatched_startpoint_buffer = startpoint_buffer.take();
390 let snatched_endpoint_buffer = endpoint_buffer.take();
391
392 let start = snatched_startpoint_buffer.expect("SSA parser should have ensured that every line has a startpoint");
393 let end = snatched_endpoint_buffer.expect("SSA parser should have ensured that every line has a endpoint");
394
395 Some((start, end, text))
396 }
397 Filler(_) => None,
398 }
399 };
400
401 self.v.iter_mut().filter_map(filter_map_closure).collect()
402 };
403
404 assert_eq!(startpoint_buffer, None);
406 assert_eq!(endpoint_buffer, None);
407
408 timings
409 }
410}
411
412impl SubtitleFileInterface for SsaFile {
413 fn get_subtitle_entries(&self) -> SubtitleParserResult<Vec<SubtitleEntry>> {
414 let mut new_file = self.clone();
421 let timings = new_file
422 .get_subtitle_entries_mut()
423 .into_iter()
424 .map(|(&mut start, &mut end, text)| SubtitleEntry::new(TimeSpan::new(start, end), text.clone()))
425 .collect();
426
427 Ok(timings)
428 }
429
430 fn update_subtitle_entries(&mut self, new_subtitle_entries: &[SubtitleEntry]) -> SubtitleParserResult<()> {
431 let subtitle_entries = self.get_subtitle_entries_mut();
432 assert_eq!(subtitle_entries.len(), new_subtitle_entries.len()); for ((start_ref, end_ref, text_ref), new_entry_ref) in subtitle_entries.into_iter().zip(new_subtitle_entries) {
435 *start_ref = new_entry_ref.timespan.start;
436 *end_ref = new_entry_ref.timespan.end;
437 if let Some(ref text) = new_entry_ref.line {
438 *text_ref = text.clone();
439 }
440 }
441
442 Ok(())
443 }
444
445 fn to_data(&self) -> SubtitleParserResult<Vec<u8>> {
446 let fn_timing_to_string = |t: TimePoint| {
448 let p = if t.msecs() < 0 { -t } else { t };
449 format!(
450 "{}{}:{:02}:{:02}.{:02}",
451 if t.msecs() < 0 { "-" } else { "" },
452 p.hours(),
453 p.mins_comp(),
454 p.secs_comp(),
455 p.csecs_comp()
456 )
457 };
458
459 let fn_file_part_to_string = |part: &SsaFilePart| {
460 use self::SsaFilePart::*;
461 match *part {
462 Filler(ref t) | Text(ref t) => t.clone(),
463 TimespanStart(start) => fn_timing_to_string(start),
464 TimespanEnd(end) => fn_timing_to_string(end),
465 }
466 };
467
468 let result: String = self.v.iter().map(fn_file_part_to_string).collect();
469
470 Ok(result.into_bytes())
471 }
472}