subx_cli/core/formats/
vtt.rs

1use crate::Result;
2use crate::core::formats::{
3    Subtitle, SubtitleEntry, SubtitleFormat, SubtitleFormatType, SubtitleMetadata,
4};
5use crate::error::SubXError;
6use regex::Regex;
7use std::time::Duration;
8
9/// WebVTT (.vtt) 格式解析(暫未實作)
10pub struct VttFormat;
11
12impl SubtitleFormat for VttFormat {
13    fn parse(&self, content: &str) -> Result<Subtitle> {
14        let time_re = Regex::new(
15            r"(?m)^(\d{2}):(\d{2}):(\d{2})\.(\d{3}) --> (\d{2}):(\d{2}):(\d{2})\.(\d{3})",
16        )
17        .map_err(|e: regex::Error| SubXError::subtitle_format(self.format_name(), e.to_string()))?;
18        let mut entries = Vec::new();
19        for block in content.split("\n\n") {
20            let block = block.trim();
21            if block.is_empty()
22                || block.starts_with("WEBVTT")
23                || block.starts_with("NOTE")
24                || block.starts_with("STYLE")
25            {
26                continue;
27            }
28            let lines: Vec<&str> = block.lines().collect();
29            let mut idx = 0;
30            if !time_re.is_match(lines[0]) {
31                idx = 1;
32                if idx >= lines.len() {
33                    continue;
34                }
35            }
36            if let Some(caps) = time_re.captures(lines[idx]) {
37                let start = parse_vtt_time(&caps, 1)?;
38                let end = parse_vtt_time(&caps, 5)?;
39                let text = lines[(idx + 1)..].join("\n");
40                entries.push(SubtitleEntry {
41                    index: entries.len() + 1,
42                    start_time: start,
43                    end_time: end,
44                    text,
45                    styling: None,
46                });
47            }
48        }
49        Ok(Subtitle {
50            entries,
51            metadata: SubtitleMetadata {
52                title: None,
53                language: None,
54                encoding: "utf-8".to_string(),
55                frame_rate: None,
56                original_format: SubtitleFormatType::Vtt,
57            },
58            format: SubtitleFormatType::Vtt,
59        })
60    }
61
62    fn serialize(&self, subtitle: &Subtitle) -> Result<String> {
63        let mut output = String::new();
64        output.push_str("WEBVTT\n\n");
65        for entry in &subtitle.entries {
66            output.push_str(&format!("{}\n", entry.index));
67            output.push_str(&format_vtt_time_range(entry.start_time, entry.end_time));
68            output.push_str(&format!("{}\n\n", entry.text));
69        }
70        Ok(output)
71    }
72
73    fn detect(&self, content: &str) -> bool {
74        content.trim_start().starts_with("WEBVTT")
75    }
76
77    fn format_name(&self) -> &'static str {
78        "VTT"
79    }
80
81    fn file_extensions(&self) -> &'static [&'static str] {
82        &["vtt"]
83    }
84}
85
86fn parse_vtt_time(caps: &regex::Captures, start: usize) -> Result<Duration> {
87    let hours: u64 = caps[start]
88        .parse()
89        .map_err(|e: std::num::ParseIntError| SubXError::subtitle_format("VTT", e.to_string()))?;
90    let minutes: u64 = caps[start + 1]
91        .parse()
92        .map_err(|e: std::num::ParseIntError| SubXError::subtitle_format("VTT", e.to_string()))?;
93    let seconds: u64 = caps[start + 2]
94        .parse()
95        .map_err(|e: std::num::ParseIntError| SubXError::subtitle_format("VTT", e.to_string()))?;
96    let millis: u64 = caps[start + 3]
97        .parse()
98        .map_err(|e: std::num::ParseIntError| SubXError::subtitle_format("VTT", e.to_string()))?;
99    Ok(Duration::from_millis(
100        hours * 3600 * 1000 + minutes * 60 * 1000 + seconds * 1000 + millis,
101    ))
102}
103
104fn format_vtt_time(duration: Duration) -> String {
105    let total_ms = duration.as_millis();
106    let hours = total_ms / 3600000;
107    let minutes = (total_ms % 3600000) / 60000;
108    let seconds = (total_ms % 60000) / 1000;
109    let millis = total_ms % 1000;
110    format!("{:02}:{:02}:{:02}.{:03}", hours, minutes, seconds, millis)
111}
112
113fn format_vtt_time_range(start: Duration, end: Duration) -> String {
114    format!("{} --> {}\n", format_vtt_time(start), format_vtt_time(end))
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120
121    const SAMPLE: &str = "WEBVTT\n\n1\n00:00:01.000 --> 00:00:03.500\nHello\nWorld\n";
122
123    #[test]
124    fn test_parse_and_serialize() {
125        let fmt = VttFormat;
126        let subtitle = fmt.parse(SAMPLE).expect("parse failed");
127        assert_eq!(subtitle.entries.len(), 1);
128        let out = fmt.serialize(&subtitle).expect("serialize failed");
129        assert!(out.contains("00:00:01.000 --> 00:00:03.500"));
130    }
131
132    #[test]
133    fn test_detect_and_skip_headers() {
134        let fmt = VttFormat;
135        // 有 WEBVTT 標頭
136        assert!(fmt.detect("WEBVTT\nContent"));
137        // 無標頭
138        assert!(!fmt.detect("00:00:00.000 --> 00:00:01.000"));
139    }
140
141    #[test]
142    fn test_parse_with_note_and_style() {
143        let content = "WEBVTT\n\nNOTE this is note\nSTYLE body {color:red}\n\n1\n00:00:02.000 --> 00:00:03.000\nTest\n";
144        let fmt = VttFormat;
145        let subtitle = fmt.parse(content).expect("parse with NOTE/STYLE failed");
146        assert_eq!(subtitle.entries.len(), 1);
147        assert_eq!(subtitle.entries[0].text, "Test");
148    }
149
150    #[test]
151    fn test_serialize_multiple_entries() {
152        let mut subtitle = Subtitle {
153            entries: Vec::new(),
154            metadata: SubtitleMetadata {
155                title: None,
156                language: None,
157                encoding: "utf-8".to_string(),
158                frame_rate: None,
159                original_format: SubtitleFormatType::Vtt,
160            },
161            format: SubtitleFormatType::Vtt,
162        };
163        subtitle.entries.push(SubtitleEntry {
164            index: 1,
165            start_time: Duration::from_secs(1),
166            end_time: Duration::from_secs(2),
167            text: "A".into(),
168            styling: None,
169        });
170        subtitle.entries.push(SubtitleEntry {
171            index: 2,
172            start_time: Duration::from_secs(3),
173            end_time: Duration::from_secs(4),
174            text: "B".into(),
175            styling: None,
176        });
177        let fmt = VttFormat;
178        let out = fmt.serialize(&subtitle).expect("serialize multiple failed");
179        assert!(out.contains("WEBVTT"));
180        assert!(out.contains("1\n"));
181        assert!(out.contains("2\n"));
182    }
183}