subx_cli/core/formats/srt/mod.rs
1//! SubRip (`.srt`) subtitle format support.
2//!
3//! This module is split across:
4//!
5//! - `parser` — pure parsing (block splitting, validation, hardening).
6//! - `serializer` — pure serialization to canonical SRT bytes.
7//! - `time` — timestamp parsing/formatting helpers.
8//! - `tests` — co-located unit tests.
9//!
10//! The public entry point remains `SrtFormat` which implements
11//! [`crate::core::formats::SubtitleFormat`].
12
13use crate::Result;
14use crate::core::formats::{Subtitle, SubtitleFormat};
15use regex::Regex;
16
17mod parser;
18mod serializer;
19mod time;
20
21#[cfg(test)]
22mod tests;
23
24/// SubRip (`.srt`) format parsing and serialization.
25pub struct SrtFormat;
26
27impl SubtitleFormat for SrtFormat {
28 /// Parse SRT content into a [`Subtitle`].
29 ///
30 /// # Malformed-input dispositions
31 ///
32 /// Per the `subtitle-parser-hardening` spec matrix, this parser handles
33 /// malformed inputs as follows:
34 ///
35 /// - **Empty input** → returns [`crate::error::SubXError::SubtitleFormat`].
36 /// - **UTF-8 BOM prefix** → consumed as a defensive layer at the start of
37 /// `parse` (additive to the encoding-layer
38 /// `formats::encoding::converter::skip_bom`); BOM-only content reduces
39 /// to empty input and is rejected.
40 /// - **Per-cue body exceeding `MAX_CUE_BYTES` (1 MiB)** → returns
41 /// [`crate::error::SubXError::SubtitleFormat`]. The cap is enforced on
42 /// raw pre-normalization bytes, so `\r\n` padding cannot bypass the
43 /// limit by shrinking under line-ending normalization.
44 /// - **CRLF (`\r\n`), bare-CR (`\r`), or mixed line endings** → input is
45 /// normalized to LF before block splitting; CRLF and mixed inputs
46 /// parse to the same entry count and text content as their LF
47 /// counterpart.
48 /// - **Block with non-numeric sequence index** → skip-and-continue with
49 /// `debug!` log.
50 /// - **Block with negative timestamp on the timing line** →
51 /// skip-and-continue with `debug!` log.
52 /// - **Block whose timing line does not match the SRT regex** →
53 /// silently skipped (existing tolerated behavior).
54 /// - **Out-of-order cues by start time** → preserved verbatim in
55 /// `entries`; no implicit sort.
56 fn parse(&self, content: &str) -> Result<Subtitle> {
57 parser::parse(content)
58 }
59
60 fn serialize(&self, subtitle: &Subtitle) -> Result<String> {
61 serializer::serialize(subtitle)
62 }
63
64 fn detect(&self, content: &str) -> bool {
65 let time_pattern =
66 Regex::new(r"\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}").unwrap();
67 time_pattern.is_match(content)
68 }
69
70 fn format_name(&self) -> &'static str {
71 "SRT"
72 }
73
74 fn file_extensions(&self) -> &'static [&'static str] {
75 &["srt"]
76 }
77}