subparse/formats/
mod.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5pub mod common;
6pub mod idx;
7pub mod microdvd;
8pub mod srt;
9pub mod ssa;
10pub mod vobsub;
11
12use crate::errors::*;
13use crate::SubtitleEntry;
14use crate::SubtitleFileInterface;
15use encoding_rs::Encoding;
16use std::ffi::OsStr;
17use chardet::{charset2encoding, detect};
18
19#[derive(Clone, Copy, Debug, PartialEq, Eq)]
20/// All formats which are supported by this library.
21pub enum SubtitleFormat {
22    /// .srt file
23    SubRip,
24
25    /// .ssa/.ass file
26    SubStationAlpha,
27
28    /// .idx file
29    VobSubIdx,
30
31    /// .sub file (`VobSub`/binary)
32    VobSubSub,
33
34    /// .sub file (`MicroDVD`/text)
35    MicroDVD,
36}
37
38#[derive(Clone, Debug)]
39/// Unified wrapper around the all individual subtitle file types.
40pub enum SubtitleFile {
41    /// .srt file
42    SubRipFile(srt::SrtFile),
43
44    /// .ssa/.ass file
45    SubStationAlpha(ssa::SsaFile),
46
47    /// .idx file
48    VobSubIdxFile(idx::IdxFile),
49
50    /// .sub file (`VobSub`/binary)
51    VobSubSubFile(vobsub::VobFile),
52
53    /// .sub file (`MicroDVD`/text)
54    MicroDVDFile(microdvd::MdvdFile),
55}
56
57impl SubtitleFile {
58    /// The subtitle entries can be changed by calling `update_subtitle_entries()`.
59    pub fn get_subtitle_entries(&self) -> Result<Vec<SubtitleEntry>> {
60        match self {
61            SubtitleFile::SubRipFile(f) => f.get_subtitle_entries(),
62            SubtitleFile::SubStationAlpha(f) => f.get_subtitle_entries(),
63            SubtitleFile::VobSubIdxFile(f) => f.get_subtitle_entries(),
64            SubtitleFile::VobSubSubFile(f) => f.get_subtitle_entries(),
65            SubtitleFile::MicroDVDFile(f) => f.get_subtitle_entries(),
66        }
67    }
68
69    /// Set the entries from the subtitle entries from the `get_subtitle_entries()`.
70    ///
71    /// The length of the given input slice should always match the length of the vector length from
72    /// `get_subtitle_entries()`. This function can not delete/create new entries, but preserves
73    /// everything else in the file (formatting, authors, ...).
74    ///
75    /// If the input entry has `entry.line == None`, the line will not be overwritten.
76    ///
77    /// Be aware that .idx files cannot save time_spans_ (a subtitle will be shown between two
78    /// consecutive timepoints/there are no separate starts and ends) - so the timepoint will be set
79    /// to the start of the corresponding input-timespan.
80    pub fn update_subtitle_entries(&mut self, i: &[SubtitleEntry]) -> Result<()> {
81        match self {
82            SubtitleFile::SubRipFile(f) => f.update_subtitle_entries(i),
83            SubtitleFile::SubStationAlpha(f) => f.update_subtitle_entries(i),
84            SubtitleFile::VobSubIdxFile(f) => f.update_subtitle_entries(i),
85            SubtitleFile::VobSubSubFile(f) => f.update_subtitle_entries(i),
86            SubtitleFile::MicroDVDFile(f) => f.update_subtitle_entries(i),
87        }
88    }
89
90    /// Returns a byte-stream in the respective format (.ssa, .srt, etc.) with the
91    /// (probably) altered information.
92    pub fn to_data(&self) -> Result<Vec<u8>> {
93        match self {
94            SubtitleFile::SubRipFile(f) => f.to_data(),
95            SubtitleFile::SubStationAlpha(f) => f.to_data(),
96            SubtitleFile::VobSubIdxFile(f) => f.to_data(),
97            SubtitleFile::VobSubSubFile(f) => f.to_data(),
98            SubtitleFile::MicroDVDFile(f) => f.to_data(),
99        }
100    }
101}
102
103impl From<srt::SrtFile> for SubtitleFile {
104    fn from(f: srt::SrtFile) -> SubtitleFile {
105        SubtitleFile::SubRipFile(f)
106    }
107}
108
109impl From<ssa::SsaFile> for SubtitleFile {
110    fn from(f: ssa::SsaFile) -> SubtitleFile {
111        SubtitleFile::SubStationAlpha(f)
112    }
113}
114
115impl From<idx::IdxFile> for SubtitleFile {
116    fn from(f: idx::IdxFile) -> SubtitleFile {
117        SubtitleFile::VobSubIdxFile(f)
118    }
119}
120
121impl From<vobsub::VobFile> for SubtitleFile {
122    fn from(f: vobsub::VobFile) -> SubtitleFile {
123        SubtitleFile::VobSubSubFile(f)
124    }
125}
126
127impl From<microdvd::MdvdFile> for SubtitleFile {
128    fn from(f: microdvd::MdvdFile) -> SubtitleFile {
129        SubtitleFile::MicroDVDFile(f)
130    }
131}
132
133impl SubtitleFormat {
134    /// Get a descriptive string for the format like `".srt (SubRip)"`.
135    pub fn get_name(&self) -> &'static str {
136        match *self {
137            SubtitleFormat::SubRip => ".srt (SubRip)",
138            SubtitleFormat::SubStationAlpha => ".ssa (SubStation Alpha)",
139            SubtitleFormat::VobSubIdx => ".idx (VobSub)",
140            SubtitleFormat::VobSubSub => ".sub (VobSub)",
141            SubtitleFormat::MicroDVD => ".sub (MicroDVD)",
142        }
143    }
144}
145
146#[test]
147fn test_subtitle_format_by_extension() {
148    // this shows how the input paramter can be crated from scratch
149    assert_eq!(get_subtitle_format_by_extension(Some(OsStr::new("srt"))), Some(SubtitleFormat::SubRip));
150}
151
152/// Returns the subtitle format by the file extension.
153///
154/// Calling the function with the full file path or simply a `get_subtitle_format_by_extension(Some(OsStr::new("srt")))`
155/// both work. Returns `None` if subtitle format could not be recognized.
156///
157/// Because the `.sub` file extension is ambiguous (both `MicroDVD` and `VobSub` use that extension) the
158/// function will return `None` in that case. Instead, use the content-aware `get_subtitle_format`
159/// to handle this case correctly.
160///
161/// `Option` is used to simplify handling with `PathBuf::extension()`.
162pub fn get_subtitle_format_by_extension(extension: Option<&OsStr>) -> Option<SubtitleFormat> {
163    let _ext_opt: Option<&OsStr> = extension.into();
164
165    if _ext_opt == Some(OsStr::new("srt")) {
166        Some(SubtitleFormat::SubRip)
167    } else if _ext_opt == Some(OsStr::new("ssa")) || _ext_opt == Some(OsStr::new("ass")) {
168        Some(SubtitleFormat::SubStationAlpha)
169    } else if _ext_opt == Some(OsStr::new("idx")) {
170        Some(SubtitleFormat::VobSubIdx)
171    } else {
172        None
173    }
174}
175
176/// Returns true if the file extension is valid for the given subtitle format.
177///
178/// `Option` is used to simplify handling with `PathBuf::extension()`.
179pub fn is_valid_extension_for_subtitle_format(extension: Option<&OsStr>, format: SubtitleFormat) -> bool {
180    match format {
181        SubtitleFormat::SubRip => extension == Some(OsStr::new("srt")),
182        SubtitleFormat::SubStationAlpha => extension == Some(OsStr::new("ssa")) || extension == Some(OsStr::new("ass")),
183        SubtitleFormat::VobSubIdx => extension == Some(OsStr::new("idx")),
184        SubtitleFormat::VobSubSub => extension == Some(OsStr::new("sub")),
185        SubtitleFormat::MicroDVD => extension == Some(OsStr::new("sub")),
186    }
187}
188
189/// Returns the subtitle format by the file extension.
190///
191/// Works exactly like `get_subtitle_format_by_extension`, but instead of `None` a `UnknownFileFormat`
192/// will be returned (for simpler error handling).
193///
194/// `Option` is used to simplify handling with `PathBuf::extension()`.
195pub fn get_subtitle_format_by_extension_err(extension: Option<&OsStr>) -> Result<SubtitleFormat> {
196    get_subtitle_format_by_extension(extension).ok_or_else(|| ErrorKind::UnknownFileFormat.into())
197}
198
199/// Returns the subtitle format by the file extension and provided content.
200///
201/// Calling the function with the full file path or simply a `get_subtitle_format(".sub", content)`
202/// both work. Returns `None` if subtitle format could not be recognized.
203///
204/// It works exactly the same as `get_subtitle_format_by_extension` (see documentation), but also handles the  `.sub` cases
205/// correctly by using the provided content of the file as secondary info.
206///
207/// `Option` is used to simplify handling with `PathBuf::extension()`.
208pub fn get_subtitle_format(extension: Option<&OsStr>, content: &[u8]) -> Option<SubtitleFormat> {
209    if extension == Some(OsStr::new("sub")) {
210        // test for VobSub .sub magic number
211        if content.iter().take(4).cloned().eq([0x00, 0x00, 0x01, 0xba].iter().cloned()) {
212            Some(SubtitleFormat::VobSubSub)
213        } else {
214            Some(SubtitleFormat::MicroDVD)
215        }
216    } else {
217        get_subtitle_format_by_extension(extension)
218    }
219}
220
221/// Returns the subtitle format by the file extension and provided content.
222///
223/// Works exactly like `get_subtitle_format`, but instead of `None` a `UnknownFileFormat`
224/// will be returned (for simpler error handling).
225pub fn get_subtitle_format_err(extension: Option<&OsStr>, content: &[u8]) -> Result<SubtitleFormat> {
226    get_subtitle_format(extension, content).ok_or_else(|| ErrorKind::UnknownFileFormat.into())
227}
228
229/// Parse text subtitles, invoking the right parser given by `format`.
230///
231/// Returns an `Err(ErrorKind::TextFormatOnly)` if attempted on a binary file format.
232///
233/// # Mandatory format specific options
234///
235/// See `parse_bytes`.
236pub fn parse_str(format: SubtitleFormat, content: &str, fps: f64) -> Result<SubtitleFile> {
237    match format {
238        SubtitleFormat::SubRip => Ok(srt::SrtFile::parse(content)?.into()),
239        SubtitleFormat::SubStationAlpha => Ok(ssa::SsaFile::parse(content)?.into()),
240        SubtitleFormat::VobSubIdx => Ok(idx::IdxFile::parse(content)?.into()),
241        SubtitleFormat::VobSubSub => Err(ErrorKind::TextFormatOnly.into()),
242        SubtitleFormat::MicroDVD => Ok(microdvd::MdvdFile::parse(content, fps)?.into()),
243    }
244}
245
246/// Helper function for text subtitles for byte-to-text decoding (use `None` for automatic detection).
247fn decode_bytes_to_string(content: &[u8], encoding: Option<&'static Encoding>) -> Result<String> {
248    let det_encoding = match encoding {
249        Some(encoding) => encoding,
250        None => {
251            let (charset, _, _) = detect(content);
252            let encoding_name = charset2encoding(&charset);
253            Encoding::for_label_no_replacement(encoding_name.as_bytes()).ok_or(ErrorKind::EncodingDetectionError)?
254        }
255    };
256
257    let (decoded, _, replaced) = det_encoding.decode(content);
258    if replaced {
259        Err(Error::from(ErrorKind::DecodingError))
260    } else {
261        Ok(decoded.into_owned())
262    }
263}
264
265/// Parse all subtitle formats, invoking the right parser given by `format`.
266///
267/// # Mandatory format specific options
268///
269/// Some subtitle formats require additional parameters to work as expected. If you want to parse
270/// a specific format that has no additional parameters, you can use the `parse` function of
271/// the respective `***File` struct.
272///
273/// `encoding`: to parse a text-based subtitle format, a character encoding is needed (use `None` for auto-detection by `chardet`)
274///
275/// `fps`: this parameter is used for `MicroDVD` `.sub` files. These files do not store timestamps in
276/// seconds/minutes/... but in frame numbers. So the timing `0 to 30` means "show subtitle for one second"
277/// for a 30fps video, and "show subtitle for half second" for 60fps videos. The parameter specifies how
278/// frame numbers are converted into timestamps.
279pub fn parse_bytes(format: SubtitleFormat, content: &[u8], encoding: Option<&'static Encoding>, fps: f64) -> Result<SubtitleFile> {
280    match format {
281        SubtitleFormat::SubRip => Ok(srt::SrtFile::parse(&decode_bytes_to_string(content, encoding)?)?.into()),
282        SubtitleFormat::SubStationAlpha => Ok(ssa::SsaFile::parse(&decode_bytes_to_string(content, encoding)?)?.into()),
283        SubtitleFormat::VobSubIdx => Ok(idx::IdxFile::parse(&decode_bytes_to_string(content, encoding)?)?.into()),
284        SubtitleFormat::VobSubSub => Ok(vobsub::VobFile::parse(content)?.into()),
285        SubtitleFormat::MicroDVD => Ok(microdvd::MdvdFile::parse(&decode_bytes_to_string(content, encoding)?, fps)?.into()),
286    }
287}