subx_cli/core/formats/
manager.rs

1use crate::core::formats::{Subtitle, SubtitleFormat};
2use log::{info, warn};
3
4/// 格式管理器:自動檢測與選擇適當的解析器
5pub struct FormatManager {
6    formats: Vec<Box<dyn SubtitleFormat>>,
7}
8
9impl Default for FormatManager {
10    fn default() -> Self {
11        Self::new()
12    }
13}
14
15impl FormatManager {
16    /// 建立管理器並註冊所有格式
17    pub fn new() -> Self {
18        Self {
19            formats: vec![
20                Box::new(crate::core::formats::ass::AssFormat),
21                Box::new(crate::core::formats::vtt::VttFormat),
22                Box::new(crate::core::formats::srt::SrtFormat),
23                Box::new(crate::core::formats::sub::SubFormat),
24            ],
25        }
26    }
27
28    /// 自動檢測格式並解析
29    pub fn parse_auto(&self, content: &str) -> crate::Result<Subtitle> {
30        for fmt in &self.formats {
31            if fmt.detect(content) {
32                return fmt.parse(content);
33            }
34        }
35        Err(crate::error::SubXError::subtitle_format(
36            "Unknown",
37            "未知的字幕格式",
38        ))
39    }
40
41    /// 根據格式名稱取得解析器
42    pub fn get_format(&self, name: &str) -> Option<&dyn SubtitleFormat> {
43        let lname = name.to_lowercase();
44        self.formats
45            .iter()
46            .find(|f| f.format_name().to_lowercase() == lname)
47            .map(|f| f.as_ref())
48    }
49
50    /// 根據副檔名取得解析器
51    pub fn get_format_by_extension(&self, ext: &str) -> Option<&dyn SubtitleFormat> {
52        let ext_lc = ext.to_lowercase();
53        self.formats
54            .iter()
55            .find(|f| f.file_extensions().contains(&ext_lc.as_str()))
56            .map(|f| f.as_ref())
57    }
58
59    /// 讀取字幕並自動檢測並轉換編碼為 UTF-8
60    pub fn read_subtitle_with_encoding_detection(&self, file_path: &str) -> crate::Result<String> {
61        let detector = crate::core::formats::encoding::EncodingDetector::new()?;
62        let info = detector.detect_file_encoding(file_path)?;
63        let converter = crate::core::formats::encoding::EncodingConverter::new();
64        let result = converter.convert_file_to_utf8(file_path, &info)?;
65        let validation = converter.validate_conversion(&result);
66        if !validation.is_valid {
67            warn!("Encoding conversion warnings: {:?}", validation.warnings);
68        }
69        info!(
70            "Detected encoding: {:?} (confidence: {:.2})",
71            info.charset, info.confidence
72        );
73        Ok(result.converted_text)
74    }
75
76    /// 取得檔案的編碼信息
77    pub fn get_encoding_info(
78        &self,
79        file_path: &str,
80    ) -> crate::Result<crate::core::formats::encoding::EncodingInfo> {
81        let detector = crate::core::formats::encoding::EncodingDetector::new()?;
82        detector.detect_file_encoding(file_path)
83    }
84}
85
86#[cfg(test)]
87mod tests {
88    use super::*;
89    use crate::core::formats::SubtitleFormatType;
90    use std::time::Duration;
91
92    const SAMPLE_SRT: &str = "1\n00:00:00,000 --> 00:00:01,000\nOne\n";
93    const SAMPLE_VTT: &str = "WEBVTT\n\n1\n00:00:00.000 --> 00:00:01.000\nOne\n";
94    const SAMPLE_WEBVTT_THREE_LINES: &str = "WEBVTT\n\n1\n00:00:01.000 --> 00:00:03.000\n第一句字幕內容\n\n2\n00:00:04.000 --> 00:00:06.000\n第二句字幕內容\n\n3\n00:00:07.000 --> 00:00:09.000\n第三句字幕內容\n";
95
96    const COMPLEX_WEBVTT: &str = "WEBVTT\n\nNOTE 這是註解,應該被忽略\n\nSTYLE\n::cue {\n  background-color: black;\n  color: white;\n}\n\n1\n00:00:01.000 --> 00:00:03.500\n第一句字幕內容\n包含多行文字\n\n2\n00:00:04.200 --> 00:00:07.800\n第二句字幕內容\n\n3\n00:00:08.000 --> 00:00:10.000\n第三句字幕內容\n";
97
98    #[test]
99    fn test_get_format_by_name_and_extension() {
100        let mgr = FormatManager::new();
101        let srt = mgr.get_format("srt").expect("get_format srt");
102        assert_eq!(srt.format_name(), "SRT");
103        let vtt = mgr
104            .get_format_by_extension("vtt")
105            .expect("get_format_by_extension vtt");
106        assert_eq!(vtt.format_name(), "VTT");
107    }
108
109    #[test]
110    fn test_parse_auto_supported_and_error() {
111        let mgr = FormatManager::new();
112        let sub = mgr.parse_auto(SAMPLE_SRT).expect("parse_auto srt");
113        assert_eq!(sub.format, SubtitleFormatType::Srt);
114        let subv = mgr.parse_auto(SAMPLE_VTT).expect("parse_auto vtt");
115        assert_eq!(subv.format, SubtitleFormatType::Vtt);
116        let err = mgr.parse_auto("no format");
117        assert!(err.is_err());
118    }
119
120    #[test]
121    fn test_webvtt_parse_auto_first_subtitle_content() {
122        let mgr = FormatManager::new();
123
124        let subtitle = mgr
125            .parse_auto(SAMPLE_WEBVTT_THREE_LINES)
126            .expect("Failed to parse WEBVTT format using parse_auto");
127
128        // 驗證自動檢測為 WEBVTT 格式
129        assert_eq!(
130            subtitle.format,
131            SubtitleFormatType::Vtt,
132            "Auto detection should identify as WEBVTT format"
133        );
134
135        // 驗證共解析到 3 條字幕
136        assert_eq!(
137            subtitle.entries.len(),
138            3,
139            "Should parse exactly 3 subtitle entries"
140        );
141
142        // 驗證第一條字幕的內容、索引與時間軸
143        let first = &subtitle.entries[0];
144        assert_eq!(
145            first.text, "第一句字幕內容",
146            "First subtitle content should be correctly parsed"
147        );
148        assert_eq!(first.index, 1, "First subtitle should have index 1");
149        assert_eq!(
150            first.start_time,
151            Duration::from_millis(1000),
152            "First subtitle start time should be 1 second"
153        );
154        assert_eq!(
155            first.end_time,
156            Duration::from_millis(3000),
157            "First subtitle end time should be 3 seconds"
158        );
159
160        // 驗證其他字幕內容
161        assert_eq!(subtitle.entries[1].text, "第二句字幕內容");
162        assert_eq!(subtitle.entries[2].text, "第三句字幕內容");
163    }
164
165    #[test]
166    fn test_webvtt_parse_auto_with_complex_content() {
167        let mgr = FormatManager::new();
168        let subtitle = mgr
169            .parse_auto(COMPLEX_WEBVTT)
170            .expect("Failed to parse complex WEBVTT");
171
172        // 驗證自動檢測為 WEBVTT 格式並解析三條字幕(忽略 NOTE 和 STYLE)
173        assert_eq!(subtitle.format, SubtitleFormatType::Vtt);
174        assert_eq!(subtitle.entries.len(), 3);
175
176        // 驗證第一條字幕包含多行文字及正確的時間解析
177        let first = &subtitle.entries[0];
178        assert_eq!(first.text, "第一句字幕內容\n包含多行文字");
179        assert_eq!(first.start_time, Duration::from_millis(1000));
180        assert_eq!(first.end_time, Duration::from_millis(3500));
181    }
182}