subx_cli/core/formats/
vtt.rs

1//! Web Video Text Tracks (WebVTT) subtitle format implementation.
2//!
3//! This module provides parsing, serialization, and detection capabilities
4//! for the WebVTT subtitle format, including timestamp parsing and formatting.
5//!
6//! # Examples
7//!
8//! ```rust
9//! use subx_cli::core::formats::{SubtitleFormat, vtt::VttFormat};
10//! let vtt = VttFormat;
11//! let content = "WEBVTT\n\n00:00:01.000 --> 00:00:03.000\nHello";
12//! let subtitle = vtt.parse(content).unwrap();
13//! ```
14
15use crate::Result;
16use crate::core::formats::{
17    Subtitle, SubtitleEntry, SubtitleFormat, SubtitleFormatType, SubtitleMetadata,
18};
19use crate::error::SubXError;
20use regex::Regex;
21use std::time::Duration;
22
23/// Subtitle format implementation for WebVTT.
24///
25/// The `VttFormat` struct implements parsing, serialization, and detection
26/// for the WebVTT subtitle format.
27pub struct VttFormat;
28
29impl SubtitleFormat for VttFormat {
30    fn parse(&self, content: &str) -> Result<Subtitle> {
31        let time_re = Regex::new(
32            r"(?m)^(\d{2}):(\d{2}):(\d{2})\.(\d{3}) --> (\d{2}):(\d{2}):(\d{2})\.(\d{3})",
33        )
34        .map_err(|e: regex::Error| SubXError::subtitle_format(self.format_name(), e.to_string()))?;
35        let mut entries = Vec::new();
36        for block in content.split("\n\n") {
37            let block = block.trim();
38            if block.is_empty()
39                || block.starts_with("WEBVTT")
40                || block.starts_with("NOTE")
41                || block.starts_with("STYLE")
42            {
43                continue;
44            }
45            let lines: Vec<&str> = block.lines().collect();
46            let mut idx = 0;
47            if !time_re.is_match(lines[0]) {
48                idx = 1;
49                if idx >= lines.len() {
50                    continue;
51                }
52            }
53            if let Some(caps) = time_re.captures(lines[idx]) {
54                let start = parse_vtt_time(&caps, 1)?;
55                let end = parse_vtt_time(&caps, 5)?;
56                let text = lines[(idx + 1)..].join("\n");
57                entries.push(SubtitleEntry {
58                    index: entries.len() + 1,
59                    start_time: start,
60                    end_time: end,
61                    text,
62                    styling: None,
63                });
64            }
65        }
66        Ok(Subtitle {
67            entries,
68            metadata: SubtitleMetadata {
69                title: None,
70                language: None,
71                encoding: "utf-8".to_string(),
72                frame_rate: None,
73                original_format: SubtitleFormatType::Vtt,
74            },
75            format: SubtitleFormatType::Vtt,
76        })
77    }
78
79    fn serialize(&self, subtitle: &Subtitle) -> Result<String> {
80        let mut output = String::new();
81        output.push_str("WEBVTT\n\n");
82        for entry in &subtitle.entries {
83            output.push_str(&format!("{}\n", entry.index));
84            output.push_str(&format_vtt_time_range(entry.start_time, entry.end_time));
85            output.push_str(&format!("{}\n\n", entry.text));
86        }
87        Ok(output)
88    }
89
90    fn detect(&self, content: &str) -> bool {
91        content.trim_start().starts_with("WEBVTT")
92    }
93
94    fn format_name(&self) -> &'static str {
95        "VTT"
96    }
97
98    fn file_extensions(&self) -> &'static [&'static str] {
99        &["vtt"]
100    }
101}
102
103fn parse_vtt_time(caps: &regex::Captures, start: usize) -> Result<Duration> {
104    let hours: u64 = caps[start]
105        .parse()
106        .map_err(|e: std::num::ParseIntError| SubXError::subtitle_format("VTT", e.to_string()))?;
107    let minutes: u64 = caps[start + 1]
108        .parse()
109        .map_err(|e: std::num::ParseIntError| SubXError::subtitle_format("VTT", e.to_string()))?;
110    let seconds: u64 = caps[start + 2]
111        .parse()
112        .map_err(|e: std::num::ParseIntError| SubXError::subtitle_format("VTT", e.to_string()))?;
113    let millis: u64 = caps[start + 3]
114        .parse()
115        .map_err(|e: std::num::ParseIntError| SubXError::subtitle_format("VTT", e.to_string()))?;
116    Ok(Duration::from_millis(
117        hours * 3600 * 1000 + minutes * 60 * 1000 + seconds * 1000 + millis,
118    ))
119}
120
121fn format_vtt_time(duration: Duration) -> String {
122    let total_ms = duration.as_millis();
123    let hours = total_ms / 3600000;
124    let minutes = (total_ms % 3600000) / 60000;
125    let seconds = (total_ms % 60000) / 1000;
126    let millis = total_ms % 1000;
127    format!("{:02}:{:02}:{:02}.{:03}", hours, minutes, seconds, millis)
128}
129
130fn format_vtt_time_range(start: Duration, end: Duration) -> String {
131    format!("{} --> {}\n", format_vtt_time(start), format_vtt_time(end))
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137
138    const SAMPLE: &str = "WEBVTT\n\n1\n00:00:01.000 --> 00:00:03.500\nHello\nWorld\n";
139
140    #[test]
141    fn test_parse_and_serialize() {
142        let fmt = VttFormat;
143        let subtitle = fmt.parse(SAMPLE).expect("parse failed");
144        assert_eq!(subtitle.entries.len(), 1);
145        let out = fmt.serialize(&subtitle).expect("serialize failed");
146        assert!(out.contains("00:00:01.000 --> 00:00:03.500"));
147    }
148
149    #[test]
150    fn test_detect_and_skip_headers() {
151        let fmt = VttFormat;
152        // Has WEBVTT header
153        assert!(fmt.detect("WEBVTT\nContent"));
154        // No header
155        assert!(!fmt.detect("00:00:00.000 --> 00:00:01.000"));
156    }
157
158    #[test]
159    fn test_parse_with_note_and_style() {
160        let content = "WEBVTT\n\nNOTE this is note\nSTYLE body {color:red}\n\n1\n00:00:02.000 --> 00:00:03.000\nTest\n";
161        let fmt = VttFormat;
162        let subtitle = fmt.parse(content).expect("parse with NOTE/STYLE failed");
163        assert_eq!(subtitle.entries.len(), 1);
164        assert_eq!(subtitle.entries[0].text, "Test");
165    }
166
167    #[test]
168    fn test_serialize_multiple_entries() {
169        let mut subtitle = Subtitle {
170            entries: Vec::new(),
171            metadata: SubtitleMetadata {
172                title: None,
173                language: None,
174                encoding: "utf-8".to_string(),
175                frame_rate: None,
176                original_format: SubtitleFormatType::Vtt,
177            },
178            format: SubtitleFormatType::Vtt,
179        };
180        subtitle.entries.push(SubtitleEntry {
181            index: 1,
182            start_time: Duration::from_secs(1),
183            end_time: Duration::from_secs(2),
184            text: "A".into(),
185            styling: None,
186        });
187        subtitle.entries.push(SubtitleEntry {
188            index: 2,
189            start_time: Duration::from_secs(3),
190            end_time: Duration::from_secs(4),
191            text: "B".into(),
192            styling: None,
193        });
194        let fmt = VttFormat;
195        let out = fmt.serialize(&subtitle).expect("serialize multiple failed");
196        assert!(out.contains("WEBVTT"));
197        assert!(out.contains("1\n"));
198        assert!(out.contains("2\n"));
199    }
200}