subx_cli/core/formats/
manager.rs1use crate::core::formats::{Subtitle, SubtitleFormat};
2use log::{info, warn};
3
4pub struct FormatManager {
6 formats: Vec<Box<dyn SubtitleFormat>>,
7}
8
9impl Default for FormatManager {
10 fn default() -> Self {
11 Self::new()
12 }
13}
14
15impl FormatManager {
16 pub fn new() -> Self {
18 Self {
19 formats: vec![
20 Box::new(crate::core::formats::ass::AssFormat),
21 Box::new(crate::core::formats::vtt::VttFormat),
22 Box::new(crate::core::formats::srt::SrtFormat),
23 Box::new(crate::core::formats::sub::SubFormat),
24 ],
25 }
26 }
27
28 pub fn parse_auto(&self, content: &str) -> crate::Result<Subtitle> {
30 for fmt in &self.formats {
31 if fmt.detect(content) {
32 return fmt.parse(content);
33 }
34 }
35 Err(crate::error::SubXError::subtitle_format(
36 "Unknown",
37 "未知的字幕格式",
38 ))
39 }
40
41 pub fn get_format(&self, name: &str) -> Option<&dyn SubtitleFormat> {
43 let lname = name.to_lowercase();
44 self.formats
45 .iter()
46 .find(|f| f.format_name().to_lowercase() == lname)
47 .map(|f| f.as_ref())
48 }
49
50 pub fn get_format_by_extension(&self, ext: &str) -> Option<&dyn SubtitleFormat> {
52 let ext_lc = ext.to_lowercase();
53 self.formats
54 .iter()
55 .find(|f| f.file_extensions().contains(&ext_lc.as_str()))
56 .map(|f| f.as_ref())
57 }
58
59 pub fn read_subtitle_with_encoding_detection(&self, file_path: &str) -> crate::Result<String> {
61 let detector = crate::core::formats::encoding::EncodingDetector::new()?;
62 let info = detector.detect_file_encoding(file_path)?;
63 let converter = crate::core::formats::encoding::EncodingConverter::new();
64 let result = converter.convert_file_to_utf8(file_path, &info)?;
65 let validation = converter.validate_conversion(&result);
66 if !validation.is_valid {
67 warn!("Encoding conversion warnings: {:?}", validation.warnings);
68 }
69 info!(
70 "Detected encoding: {:?} (confidence: {:.2})",
71 info.charset, info.confidence
72 );
73 Ok(result.converted_text)
74 }
75
76 pub fn get_encoding_info(
78 &self,
79 file_path: &str,
80 ) -> crate::Result<crate::core::formats::encoding::EncodingInfo> {
81 let detector = crate::core::formats::encoding::EncodingDetector::new()?;
82 detector.detect_file_encoding(file_path)
83 }
84}
85
86#[cfg(test)]
87mod tests {
88 use super::*;
89 use crate::core::formats::SubtitleFormatType;
90 use std::time::Duration;
91
92 const SAMPLE_SRT: &str = "1\n00:00:00,000 --> 00:00:01,000\nOne\n";
93 const SAMPLE_VTT: &str = "WEBVTT\n\n1\n00:00:00.000 --> 00:00:01.000\nOne\n";
94 const SAMPLE_WEBVTT_THREE_LINES: &str = "WEBVTT\n\n1\n00:00:01.000 --> 00:00:03.000\n第一句字幕內容\n\n2\n00:00:04.000 --> 00:00:06.000\n第二句字幕內容\n\n3\n00:00:07.000 --> 00:00:09.000\n第三句字幕內容\n";
95
96 const COMPLEX_WEBVTT: &str = "WEBVTT\n\nNOTE 這是註解,應該被忽略\n\nSTYLE\n::cue {\n background-color: black;\n color: white;\n}\n\n1\n00:00:01.000 --> 00:00:03.500\n第一句字幕內容\n包含多行文字\n\n2\n00:00:04.200 --> 00:00:07.800\n第二句字幕內容\n\n3\n00:00:08.000 --> 00:00:10.000\n第三句字幕內容\n";
97
98 #[test]
99 fn test_get_format_by_name_and_extension() {
100 let mgr = FormatManager::new();
101 let srt = mgr.get_format("srt").expect("get_format srt");
102 assert_eq!(srt.format_name(), "SRT");
103 let vtt = mgr
104 .get_format_by_extension("vtt")
105 .expect("get_format_by_extension vtt");
106 assert_eq!(vtt.format_name(), "VTT");
107 }
108
109 #[test]
110 fn test_parse_auto_supported_and_error() {
111 let mgr = FormatManager::new();
112 let sub = mgr.parse_auto(SAMPLE_SRT).expect("parse_auto srt");
113 assert_eq!(sub.format, SubtitleFormatType::Srt);
114 let subv = mgr.parse_auto(SAMPLE_VTT).expect("parse_auto vtt");
115 assert_eq!(subv.format, SubtitleFormatType::Vtt);
116 let err = mgr.parse_auto("no format");
117 assert!(err.is_err());
118 }
119
120 #[test]
121 fn test_webvtt_parse_auto_first_subtitle_content() {
122 let mgr = FormatManager::new();
123
124 let subtitle = mgr
125 .parse_auto(SAMPLE_WEBVTT_THREE_LINES)
126 .expect("Failed to parse WEBVTT format using parse_auto");
127
128 assert_eq!(
130 subtitle.format,
131 SubtitleFormatType::Vtt,
132 "Auto detection should identify as WEBVTT format"
133 );
134
135 assert_eq!(
137 subtitle.entries.len(),
138 3,
139 "Should parse exactly 3 subtitle entries"
140 );
141
142 let first = &subtitle.entries[0];
144 assert_eq!(
145 first.text, "第一句字幕內容",
146 "First subtitle content should be correctly parsed"
147 );
148 assert_eq!(first.index, 1, "First subtitle should have index 1");
149 assert_eq!(
150 first.start_time,
151 Duration::from_millis(1000),
152 "First subtitle start time should be 1 second"
153 );
154 assert_eq!(
155 first.end_time,
156 Duration::from_millis(3000),
157 "First subtitle end time should be 3 seconds"
158 );
159
160 assert_eq!(subtitle.entries[1].text, "第二句字幕內容");
162 assert_eq!(subtitle.entries[2].text, "第三句字幕內容");
163 }
164
165 #[test]
166 fn test_webvtt_parse_auto_with_complex_content() {
167 let mgr = FormatManager::new();
168 let subtitle = mgr
169 .parse_auto(COMPLEX_WEBVTT)
170 .expect("Failed to parse complex WEBVTT");
171
172 assert_eq!(subtitle.format, SubtitleFormatType::Vtt);
174 assert_eq!(subtitle.entries.len(), 3);
175
176 let first = &subtitle.entries[0];
178 assert_eq!(first.text, "第一句字幕內容\n包含多行文字");
179 assert_eq!(first.start_time, Duration::from_millis(1000));
180 assert_eq!(first.end_time, Duration::from_millis(3500));
181 }
182}