Skip to main content

subx_cli/core/formats/vtt/
mod.rs

1//! Web Video Text Tracks (WebVTT) subtitle format implementation.
2//!
3//! This module exposes the [`VttFormat`] adapter that implements the
4//! [`SubtitleFormat`] trait. The actual logic is split across:
5//!
6//! - `parser`: parsing logic and the malformed-input disposition matrix
7//! - `serializer`: serialization logic
8//! - `time`: WebVTT timestamp parsing and formatting helpers
9//!
10//! # Examples
11//!
12//! ```rust
13//! use subx_cli::core::formats::{SubtitleFormat, vtt::VttFormat};
14//! let vtt = VttFormat;
15//! let content = "WEBVTT\n\n00:00:01.000 --> 00:00:03.000\nHello";
16//! let subtitle = vtt.parse(content).unwrap();
17//! ```
18
19use crate::Result;
20use crate::core::formats::{Subtitle, SubtitleFormat};
21
22mod parser;
23mod serializer;
24mod time;
25
26#[cfg(test)]
27mod tests;
28
29/// Subtitle format implementation for WebVTT.
30///
31/// The `VttFormat` struct implements parsing, serialization, and
32/// detection for WebVTT files. See the `parser` module documentation
33/// for the malformed-input disposition matrix observed by
34/// [`VttFormat::parse`].
35pub struct VttFormat;
36
37impl SubtitleFormat for VttFormat {
38    /// Parse WebVTT content.
39    ///
40    /// # Malformed-input dispositions
41    ///
42    /// - Empty input → returns [`crate::error::SubXError::SubtitleFormat`].
43    /// - Missing `WEBVTT` signature → returns
44    ///   [`crate::error::SubXError::SubtitleFormat`].
45    /// - Leading UTF-8 BOM with valid content → BOM is consumed and the
46    ///   file is parsed normally. This parser-level strip is *additive*
47    ///   to [`crate::core::formats::encoding::converter`]'s file-layer
48    ///   strip; both layers coexist by design.
49    /// - Leading UTF-8 BOM with otherwise invalid content → returns
50    ///   [`crate::error::SubXError::SubtitleFormat`] (the post-BOM
51    ///   `WEBVTT` check fails).
52    /// - Out-of-order cues → preserved in original file order; no
53    ///   implicit sort is performed.
54    /// - Cue marker line with a negative timestamp → that block is
55    ///   skipped, a `debug!` log records the skip, and parsing
56    ///   continues.
57    /// - Cue body exceeding the per-cue cap (1 MiB) → returns
58    ///   [`crate::error::SubXError::SubtitleFormat`]. The cap is enforced
59    ///   on raw pre-normalization bytes, so `\r\n` padding cannot bypass
60    ///   the limit by shrinking under line-ending normalization.
61    /// - CRLF (`\r\n`), bare-CR (`\r`), or mixed line endings → input is
62    ///   normalized to LF after the `WEBVTT` header check and before
63    ///   block splitting; CRLF and mixed inputs parse to the same entry
64    ///   count and text content as their LF counterpart.
65    /// - Final cue not followed by a trailing blank line → still
66    ///   recognized; the file parses successfully.
67    fn parse(&self, content: &str) -> Result<Subtitle> {
68        parser::parse(content)
69    }
70
71    fn serialize(&self, subtitle: &Subtitle) -> Result<String> {
72        serializer::serialize(subtitle)
73    }
74
75    fn detect(&self, content: &str) -> bool {
76        parser::detect(content)
77    }
78
79    fn format_name(&self) -> &'static str {
80        "VTT"
81    }
82
83    fn file_extensions(&self) -> &'static [&'static str] {
84        &["vtt"]
85    }
86}