subx_cli/core/formats/
vtt.rs1use crate::Result;
16use crate::core::formats::{
17 Subtitle, SubtitleEntry, SubtitleFormat, SubtitleFormatType, SubtitleMetadata,
18};
19use crate::error::SubXError;
20use regex::Regex;
21use std::time::Duration;
22
23pub struct VttFormat;
28
29impl SubtitleFormat for VttFormat {
30 fn parse(&self, content: &str) -> Result<Subtitle> {
31 let time_re = Regex::new(
32 r"(?m)^(\d{2}):(\d{2}):(\d{2})\.(\d{3}) --> (\d{2}):(\d{2}):(\d{2})\.(\d{3})",
33 )
34 .map_err(|e: regex::Error| SubXError::subtitle_format(self.format_name(), e.to_string()))?;
35 let mut entries = Vec::new();
36 for block in content.split("\n\n") {
37 let block = block.trim();
38 if block.is_empty()
39 || block.starts_with("WEBVTT")
40 || block.starts_with("NOTE")
41 || block.starts_with("STYLE")
42 {
43 continue;
44 }
45 let lines: Vec<&str> = block.lines().collect();
46 let mut idx = 0;
47 if !time_re.is_match(lines[0]) {
48 idx = 1;
49 if idx >= lines.len() {
50 continue;
51 }
52 }
53 if let Some(caps) = time_re.captures(lines[idx]) {
54 let start = parse_vtt_time(&caps, 1)?;
55 let end = parse_vtt_time(&caps, 5)?;
56 let text = lines[(idx + 1)..].join("\n");
57 entries.push(SubtitleEntry {
58 index: entries.len() + 1,
59 start_time: start,
60 end_time: end,
61 text,
62 styling: None,
63 });
64 }
65 }
66 Ok(Subtitle {
67 entries,
68 metadata: SubtitleMetadata {
69 title: None,
70 language: None,
71 encoding: "utf-8".to_string(),
72 frame_rate: None,
73 original_format: SubtitleFormatType::Vtt,
74 },
75 format: SubtitleFormatType::Vtt,
76 })
77 }
78
79 fn serialize(&self, subtitle: &Subtitle) -> Result<String> {
80 let mut output = String::new();
81 output.push_str("WEBVTT\n\n");
82 for entry in &subtitle.entries {
83 output.push_str(&format!("{}\n", entry.index));
84 output.push_str(&format_vtt_time_range(entry.start_time, entry.end_time));
85 output.push_str(&format!("{}\n\n", entry.text));
86 }
87 Ok(output)
88 }
89
90 fn detect(&self, content: &str) -> bool {
91 content.trim_start().starts_with("WEBVTT")
92 }
93
94 fn format_name(&self) -> &'static str {
95 "VTT"
96 }
97
98 fn file_extensions(&self) -> &'static [&'static str] {
99 &["vtt"]
100 }
101}
102
103fn parse_vtt_time(caps: ®ex::Captures, start: usize) -> Result<Duration> {
104 let hours: u64 = caps[start]
105 .parse()
106 .map_err(|e: std::num::ParseIntError| SubXError::subtitle_format("VTT", e.to_string()))?;
107 let minutes: u64 = caps[start + 1]
108 .parse()
109 .map_err(|e: std::num::ParseIntError| SubXError::subtitle_format("VTT", e.to_string()))?;
110 let seconds: u64 = caps[start + 2]
111 .parse()
112 .map_err(|e: std::num::ParseIntError| SubXError::subtitle_format("VTT", e.to_string()))?;
113 let millis: u64 = caps[start + 3]
114 .parse()
115 .map_err(|e: std::num::ParseIntError| SubXError::subtitle_format("VTT", e.to_string()))?;
116 Ok(Duration::from_millis(
117 hours * 3600 * 1000 + minutes * 60 * 1000 + seconds * 1000 + millis,
118 ))
119}
120
121fn format_vtt_time(duration: Duration) -> String {
122 let total_ms = duration.as_millis();
123 let hours = total_ms / 3600000;
124 let minutes = (total_ms % 3600000) / 60000;
125 let seconds = (total_ms % 60000) / 1000;
126 let millis = total_ms % 1000;
127 format!("{:02}:{:02}:{:02}.{:03}", hours, minutes, seconds, millis)
128}
129
130fn format_vtt_time_range(start: Duration, end: Duration) -> String {
131 format!("{} --> {}\n", format_vtt_time(start), format_vtt_time(end))
132}
133
134#[cfg(test)]
135mod tests {
136 use super::*;
137
138 const SAMPLE: &str = "WEBVTT\n\n1\n00:00:01.000 --> 00:00:03.500\nHello\nWorld\n";
139
140 #[test]
141 fn test_parse_and_serialize() {
142 let fmt = VttFormat;
143 let subtitle = fmt.parse(SAMPLE).expect("parse failed");
144 assert_eq!(subtitle.entries.len(), 1);
145 let out = fmt.serialize(&subtitle).expect("serialize failed");
146 assert!(out.contains("00:00:01.000 --> 00:00:03.500"));
147 }
148
149 #[test]
150 fn test_detect_and_skip_headers() {
151 let fmt = VttFormat;
152 assert!(fmt.detect("WEBVTT\nContent"));
154 assert!(!fmt.detect("00:00:00.000 --> 00:00:01.000"));
156 }
157
158 #[test]
159 fn test_parse_with_note_and_style() {
160 let content = "WEBVTT\n\nNOTE this is note\nSTYLE body {color:red}\n\n1\n00:00:02.000 --> 00:00:03.000\nTest\n";
161 let fmt = VttFormat;
162 let subtitle = fmt.parse(content).expect("parse with NOTE/STYLE failed");
163 assert_eq!(subtitle.entries.len(), 1);
164 assert_eq!(subtitle.entries[0].text, "Test");
165 }
166
167 #[test]
168 fn test_serialize_multiple_entries() {
169 let mut subtitle = Subtitle {
170 entries: Vec::new(),
171 metadata: SubtitleMetadata {
172 title: None,
173 language: None,
174 encoding: "utf-8".to_string(),
175 frame_rate: None,
176 original_format: SubtitleFormatType::Vtt,
177 },
178 format: SubtitleFormatType::Vtt,
179 };
180 subtitle.entries.push(SubtitleEntry {
181 index: 1,
182 start_time: Duration::from_secs(1),
183 end_time: Duration::from_secs(2),
184 text: "A".into(),
185 styling: None,
186 });
187 subtitle.entries.push(SubtitleEntry {
188 index: 2,
189 start_time: Duration::from_secs(3),
190 end_time: Duration::from_secs(4),
191 text: "B".into(),
192 styling: None,
193 });
194 let fmt = VttFormat;
195 let out = fmt.serialize(&subtitle).expect("serialize multiple failed");
196 assert!(out.contains("WEBVTT"));
197 assert!(out.contains("1\n"));
198 assert!(out.contains("2\n"));
199 }
200}