subx_cli/core/formats/
vtt.rs1use crate::Result;
2use crate::core::formats::{
3 Subtitle, SubtitleEntry, SubtitleFormat, SubtitleFormatType, SubtitleMetadata,
4};
5use crate::error::SubXError;
6use regex::Regex;
7use std::time::Duration;
8
9pub struct VttFormat;
11
12impl SubtitleFormat for VttFormat {
13 fn parse(&self, content: &str) -> Result<Subtitle> {
14 let time_re = Regex::new(
15 r"(?m)^(\d{2}):(\d{2}):(\d{2})\.(\d{3}) --> (\d{2}):(\d{2}):(\d{2})\.(\d{3})",
16 )
17 .map_err(|e: regex::Error| SubXError::subtitle_format(self.format_name(), e.to_string()))?;
18 let mut entries = Vec::new();
19 for block in content.split("\n\n") {
20 let block = block.trim();
21 if block.is_empty()
22 || block.starts_with("WEBVTT")
23 || block.starts_with("NOTE")
24 || block.starts_with("STYLE")
25 {
26 continue;
27 }
28 let lines: Vec<&str> = block.lines().collect();
29 let mut idx = 0;
30 if !time_re.is_match(lines[0]) {
31 idx = 1;
32 if idx >= lines.len() {
33 continue;
34 }
35 }
36 if let Some(caps) = time_re.captures(lines[idx]) {
37 let start = parse_vtt_time(&caps, 1)?;
38 let end = parse_vtt_time(&caps, 5)?;
39 let text = lines[(idx + 1)..].join("\n");
40 entries.push(SubtitleEntry {
41 index: entries.len() + 1,
42 start_time: start,
43 end_time: end,
44 text,
45 styling: None,
46 });
47 }
48 }
49 Ok(Subtitle {
50 entries,
51 metadata: SubtitleMetadata {
52 title: None,
53 language: None,
54 encoding: "utf-8".to_string(),
55 frame_rate: None,
56 original_format: SubtitleFormatType::Vtt,
57 },
58 format: SubtitleFormatType::Vtt,
59 })
60 }
61
62 fn serialize(&self, subtitle: &Subtitle) -> Result<String> {
63 let mut output = String::new();
64 output.push_str("WEBVTT\n\n");
65 for entry in &subtitle.entries {
66 output.push_str(&format!("{}\n", entry.index));
67 output.push_str(&format_vtt_time_range(entry.start_time, entry.end_time));
68 output.push_str(&format!("{}\n\n", entry.text));
69 }
70 Ok(output)
71 }
72
73 fn detect(&self, content: &str) -> bool {
74 content.trim_start().starts_with("WEBVTT")
75 }
76
77 fn format_name(&self) -> &'static str {
78 "VTT"
79 }
80
81 fn file_extensions(&self) -> &'static [&'static str] {
82 &["vtt"]
83 }
84}
85
86fn parse_vtt_time(caps: ®ex::Captures, start: usize) -> Result<Duration> {
87 let hours: u64 = caps[start]
88 .parse()
89 .map_err(|e: std::num::ParseIntError| SubXError::subtitle_format("VTT", e.to_string()))?;
90 let minutes: u64 = caps[start + 1]
91 .parse()
92 .map_err(|e: std::num::ParseIntError| SubXError::subtitle_format("VTT", e.to_string()))?;
93 let seconds: u64 = caps[start + 2]
94 .parse()
95 .map_err(|e: std::num::ParseIntError| SubXError::subtitle_format("VTT", e.to_string()))?;
96 let millis: u64 = caps[start + 3]
97 .parse()
98 .map_err(|e: std::num::ParseIntError| SubXError::subtitle_format("VTT", e.to_string()))?;
99 Ok(Duration::from_millis(
100 hours * 3600 * 1000 + minutes * 60 * 1000 + seconds * 1000 + millis,
101 ))
102}
103
104fn format_vtt_time(duration: Duration) -> String {
105 let total_ms = duration.as_millis();
106 let hours = total_ms / 3600000;
107 let minutes = (total_ms % 3600000) / 60000;
108 let seconds = (total_ms % 60000) / 1000;
109 let millis = total_ms % 1000;
110 format!("{:02}:{:02}:{:02}.{:03}", hours, minutes, seconds, millis)
111}
112
113fn format_vtt_time_range(start: Duration, end: Duration) -> String {
114 format!("{} --> {}\n", format_vtt_time(start), format_vtt_time(end))
115}
116
117#[cfg(test)]
118mod tests {
119 use super::*;
120
121 const SAMPLE: &str = "WEBVTT\n\n1\n00:00:01.000 --> 00:00:03.500\nHello\nWorld\n";
122
123 #[test]
124 fn test_parse_and_serialize() {
125 let fmt = VttFormat;
126 let subtitle = fmt.parse(SAMPLE).expect("parse failed");
127 assert_eq!(subtitle.entries.len(), 1);
128 let out = fmt.serialize(&subtitle).expect("serialize failed");
129 assert!(out.contains("00:00:01.000 --> 00:00:03.500"));
130 }
131
132 #[test]
133 fn test_detect_and_skip_headers() {
134 let fmt = VttFormat;
135 assert!(fmt.detect("WEBVTT\nContent"));
137 assert!(!fmt.detect("00:00:00.000 --> 00:00:01.000"));
139 }
140
141 #[test]
142 fn test_parse_with_note_and_style() {
143 let content = "WEBVTT\n\nNOTE this is note\nSTYLE body {color:red}\n\n1\n00:00:02.000 --> 00:00:03.000\nTest\n";
144 let fmt = VttFormat;
145 let subtitle = fmt.parse(content).expect("parse with NOTE/STYLE failed");
146 assert_eq!(subtitle.entries.len(), 1);
147 assert_eq!(subtitle.entries[0].text, "Test");
148 }
149
150 #[test]
151 fn test_serialize_multiple_entries() {
152 let mut subtitle = Subtitle {
153 entries: Vec::new(),
154 metadata: SubtitleMetadata {
155 title: None,
156 language: None,
157 encoding: "utf-8".to_string(),
158 frame_rate: None,
159 original_format: SubtitleFormatType::Vtt,
160 },
161 format: SubtitleFormatType::Vtt,
162 };
163 subtitle.entries.push(SubtitleEntry {
164 index: 1,
165 start_time: Duration::from_secs(1),
166 end_time: Duration::from_secs(2),
167 text: "A".into(),
168 styling: None,
169 });
170 subtitle.entries.push(SubtitleEntry {
171 index: 2,
172 start_time: Duration::from_secs(3),
173 end_time: Duration::from_secs(4),
174 text: "B".into(),
175 styling: None,
176 });
177 let fmt = VttFormat;
178 let out = fmt.serialize(&subtitle).expect("serialize multiple failed");
179 assert!(out.contains("WEBVTT"));
180 assert!(out.contains("1\n"));
181 assert!(out.contains("2\n"));
182 }
183}