Skip to main content

subx_cli/core/formats/srt/
mod.rs

1//! SubRip (`.srt`) subtitle format support.
2//!
3//! This module is split across:
4//!
5//! - `parser`    — pure parsing (block splitting, validation, hardening).
6//! - `serializer` — pure serialization to canonical SRT bytes.
7//! - `time`      — timestamp parsing/formatting helpers.
8//! - `tests`     — co-located unit tests.
9//!
10//! The public entry point remains `SrtFormat` which implements
11//! [`crate::core::formats::SubtitleFormat`].
12
13use crate::Result;
14use crate::core::formats::{Subtitle, SubtitleFormat};
15use regex::Regex;
16
17mod parser;
18mod serializer;
19mod time;
20
21#[cfg(test)]
22mod tests;
23
24/// SubRip (`.srt`) format parsing and serialization.
25pub struct SrtFormat;
26
27impl SubtitleFormat for SrtFormat {
28    /// Parse SRT content into a [`Subtitle`].
29    ///
30    /// # Malformed-input dispositions
31    ///
32    /// Per the `subtitle-parser-hardening` spec matrix, this parser handles
33    /// malformed inputs as follows:
34    ///
35    /// - **Empty input** → returns [`crate::error::SubXError::SubtitleFormat`].
36    /// - **UTF-8 BOM prefix** → consumed as a defensive layer at the start of
37    ///   `parse` (additive to the encoding-layer
38    ///   `formats::encoding::converter::skip_bom`); BOM-only content reduces
39    ///   to empty input and is rejected.
40    /// - **Per-cue body exceeding `MAX_CUE_BYTES` (1 MiB)** → returns
41    ///   [`crate::error::SubXError::SubtitleFormat`]. The cap is enforced on
42    ///   raw pre-normalization bytes, so `\r\n` padding cannot bypass the
43    ///   limit by shrinking under line-ending normalization.
44    /// - **CRLF (`\r\n`), bare-CR (`\r`), or mixed line endings** → input is
45    ///   normalized to LF before block splitting; CRLF and mixed inputs
46    ///   parse to the same entry count and text content as their LF
47    ///   counterpart.
48    /// - **Block with non-numeric sequence index** → skip-and-continue with
49    ///   `debug!` log.
50    /// - **Block with negative timestamp on the timing line** →
51    ///   skip-and-continue with `debug!` log.
52    /// - **Block whose timing line does not match the SRT regex** →
53    ///   silently skipped (existing tolerated behavior).
54    /// - **Out-of-order cues by start time** → preserved verbatim in
55    ///   `entries`; no implicit sort.
56    fn parse(&self, content: &str) -> Result<Subtitle> {
57        parser::parse(content)
58    }
59
60    fn serialize(&self, subtitle: &Subtitle) -> Result<String> {
61        serializer::serialize(subtitle)
62    }
63
64    fn detect(&self, content: &str) -> bool {
65        let time_pattern =
66            Regex::new(r"\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}").unwrap();
67        time_pattern.is_match(content)
68    }
69
70    fn format_name(&self) -> &'static str {
71        "SRT"
72    }
73
74    fn file_extensions(&self) -> &'static [&'static str] {
75        &["srt"]
76    }
77}