subx_cli/core/formats/vtt/mod.rs
1//! Web Video Text Tracks (WebVTT) subtitle format implementation.
2//!
3//! This module exposes the [`VttFormat`] adapter that implements the
4//! [`SubtitleFormat`] trait. The actual logic is split across:
5//!
6//! - `parser`: parsing logic and the malformed-input disposition matrix
7//! - `serializer`: serialization logic
8//! - `time`: WebVTT timestamp parsing and formatting helpers
9//!
10//! # Examples
11//!
12//! ```rust
13//! use subx_cli::core::formats::{SubtitleFormat, vtt::VttFormat};
14//! let vtt = VttFormat;
15//! let content = "WEBVTT\n\n00:00:01.000 --> 00:00:03.000\nHello";
16//! let subtitle = vtt.parse(content).unwrap();
17//! ```
18
19use crate::Result;
20use crate::core::formats::{Subtitle, SubtitleFormat};
21
22mod parser;
23mod serializer;
24mod time;
25
26#[cfg(test)]
27mod tests;
28
29/// Subtitle format implementation for WebVTT.
30///
31/// The `VttFormat` struct implements parsing, serialization, and
32/// detection for WebVTT files. See the `parser` module documentation
33/// for the malformed-input disposition matrix observed by
34/// [`VttFormat::parse`].
35pub struct VttFormat;
36
37impl SubtitleFormat for VttFormat {
38 /// Parse WebVTT content.
39 ///
40 /// # Malformed-input dispositions
41 ///
42 /// - Empty input → returns [`crate::error::SubXError::SubtitleFormat`].
43 /// - Missing `WEBVTT` signature → returns
44 /// [`crate::error::SubXError::SubtitleFormat`].
45 /// - Leading UTF-8 BOM with valid content → BOM is consumed and the
46 /// file is parsed normally. This parser-level strip is *additive*
47 /// to [`crate::core::formats::encoding::converter`]'s file-layer
48 /// strip; both layers coexist by design.
49 /// - Leading UTF-8 BOM with otherwise invalid content → returns
50 /// [`crate::error::SubXError::SubtitleFormat`] (the post-BOM
51 /// `WEBVTT` check fails).
52 /// - Out-of-order cues → preserved in original file order; no
53 /// implicit sort is performed.
54 /// - Cue marker line with a negative timestamp → that block is
55 /// skipped, a `debug!` log records the skip, and parsing
56 /// continues.
57 /// - Cue body exceeding the per-cue cap (1 MiB) → returns
58 /// [`crate::error::SubXError::SubtitleFormat`]. The cap is enforced
59 /// on raw pre-normalization bytes, so `\r\n` padding cannot bypass
60 /// the limit by shrinking under line-ending normalization.
61 /// - CRLF (`\r\n`), bare-CR (`\r`), or mixed line endings → input is
62 /// normalized to LF after the `WEBVTT` header check and before
63 /// block splitting; CRLF and mixed inputs parse to the same entry
64 /// count and text content as their LF counterpart.
65 /// - Final cue not followed by a trailing blank line → still
66 /// recognized; the file parses successfully.
67 fn parse(&self, content: &str) -> Result<Subtitle> {
68 parser::parse(content)
69 }
70
71 fn serialize(&self, subtitle: &Subtitle) -> Result<String> {
72 serializer::serialize(subtitle)
73 }
74
75 fn detect(&self, content: &str) -> bool {
76 parser::detect(content)
77 }
78
79 fn format_name(&self) -> &'static str {
80 "VTT"
81 }
82
83 fn file_extensions(&self) -> &'static [&'static str] {
84 &["vtt"]
85 }
86}