subx_cli/core/formats/
manager.rs

1//! Subtitle format manager that detects and dispatches to the appropriate parser.
2//!
3//! This module provides the `FormatManager`, which automatically detects
4//! subtitle formats and selects the correct parser for loading and saving.
5//!
6//! # Examples
7//!
8//! ```rust,no_run
9//! use subx_cli::core::formats::manager::FormatManager;
10//! let manager = FormatManager::new();
11//! let content = "1\n00:00:01,000 --> 00:00:02,000\nHello world\n";
12//! let subtitle = manager.parse_auto(content).unwrap();
13//! ```
14
15use crate::core::formats::{Subtitle, SubtitleFormat};
16use log::{info, warn};
17
18/// Manager for subtitle format detection and parser dispatch.
19///
20/// The `FormatManager` handles format inference based on file contents
21/// or extensions and routes parsing and serialization requests accordingly.
22pub struct FormatManager {
23    formats: Vec<Box<dyn SubtitleFormat>>,
24}
25
26impl Default for FormatManager {
27    fn default() -> Self {
28        Self::new()
29    }
30}
31
32impl FormatManager {
33    /// Create manager and register all formats
34    pub fn new() -> Self {
35        Self {
36            formats: vec![
37                Box::new(crate::core::formats::ass::AssFormat),
38                Box::new(crate::core::formats::vtt::VttFormat),
39                Box::new(crate::core::formats::srt::SrtFormat),
40                Box::new(crate::core::formats::sub::SubFormat),
41            ],
42        }
43    }
44
45    /// Auto-detect format and parse
46    pub fn parse_auto(&self, content: &str) -> crate::Result<Subtitle> {
47        for fmt in &self.formats {
48            if fmt.detect(content) {
49                return fmt.parse(content);
50            }
51        }
52        Err(crate::error::SubXError::subtitle_format(
53            "Unknown",
54            "Unknown subtitle format",
55        ))
56    }
57
58    /// Get parser by format name
59    pub fn get_format(&self, name: &str) -> Option<&dyn SubtitleFormat> {
60        let lname = name.to_lowercase();
61        self.formats
62            .iter()
63            .find(|f| f.format_name().to_lowercase() == lname)
64            .map(|f| f.as_ref())
65    }
66
67    /// Get parser by file extension
68    pub fn get_format_by_extension(&self, ext: &str) -> Option<&dyn SubtitleFormat> {
69        let ext_lc = ext.to_lowercase();
70        self.formats
71            .iter()
72            .find(|f| f.file_extensions().contains(&ext_lc.as_str()))
73            .map(|f| f.as_ref())
74    }
75
76    /// Read subtitle and auto-detect encoding, convert to UTF-8
77    pub fn read_subtitle_with_encoding_detection(&self, file_path: &str) -> crate::Result<String> {
78        let detector = crate::core::formats::encoding::EncodingDetector::with_defaults();
79        let info = detector.detect_file_encoding(file_path)?;
80        let converter = crate::core::formats::encoding::EncodingConverter::new();
81        let result = converter.convert_file_to_utf8(file_path, &info)?;
82        let validation = converter.validate_conversion(&result);
83        if !validation.is_valid {
84            warn!("Encoding conversion warnings: {:?}", validation.warnings);
85        }
86        info!(
87            "Detected encoding: {:?} (confidence: {:.2})",
88            info.charset, info.confidence
89        );
90        Ok(result.converted_text)
91    }
92
93    /// Get file encoding information
94    pub fn get_encoding_info(
95        &self,
96        file_path: &str,
97    ) -> crate::Result<crate::core::formats::encoding::EncodingInfo> {
98        let detector = crate::core::formats::encoding::EncodingDetector::with_defaults();
99        detector.detect_file_encoding(file_path)
100    }
101
102    /// Load subtitle from file with encoding detection and parsing
103    pub fn load_subtitle(&self, file_path: &std::path::Path) -> crate::Result<Subtitle> {
104        let content =
105            self.read_subtitle_with_encoding_detection(file_path.to_str().ok_or_else(|| {
106                crate::error::SubXError::subtitle_format("", "Invalid file path encoding")
107            })?)?;
108        self.parse_auto(&content)
109    }
110
111    /// Save subtitle to file in the same format as extension
112    pub fn save_subtitle(
113        &self,
114        subtitle: &Subtitle,
115        file_path: &std::path::Path,
116    ) -> crate::Result<()> {
117        let ext = file_path.extension().and_then(|s| s.to_str()).unwrap_or("");
118        let fmt = self.get_format_by_extension(ext).ok_or_else(|| {
119            crate::error::SubXError::subtitle_format(ext, "Unsupported subtitle format for saving")
120        })?;
121        let out = fmt.serialize(subtitle)?;
122        std::fs::write(file_path, out)?;
123        Ok(())
124    }
125}
126
127#[cfg(test)]
128mod tests {
129    use super::*;
130    use crate::core::formats::SubtitleFormatType;
131    use std::time::Duration;
132
133    const SAMPLE_SRT: &str = "1\n00:00:00,000 --> 00:00:01,000\nOne\n";
134    const SAMPLE_VTT: &str = "WEBVTT\n\n1\n00:00:00.000 --> 00:00:01.000\nOne\n";
135    // NOTE: The following test data contains Chinese text for multi-line subtitle testing. This is allowed and does not require modification.
136    const SAMPLE_WEBVTT_THREE_LINES: &str = "WEBVTT\n\n1\n00:00:01.000 --> 00:00:03.000\n第一句字幕內容\n\n2\n00:00:04.000 --> 00:00:06.000\n第二句字幕內容\n\n3\n00:00:07.000 --> 00:00:09.000\n第三句字幕內容\n";
137    const COMPLEX_WEBVTT: &str = "WEBVTT\n\nNOTE 這是註解,應該被忽略\n\nSTYLE\n::cue {\n  background-color: black;\n  color: white;\n}\n\n1\n00:00:01.000 --> 00:00:03.500\n第一句字幕內容\n包含多行文字\n\n2\n00:00:04.200 --> 00:00:07.800\n第二句字幕內容\n\n3\n00:00:08.000 --> 00:00:10.000\n第三句字幕內容\n";
138
139    #[test]
140    fn test_get_format_by_name_and_extension() {
141        let mgr = FormatManager::new();
142        let srt = mgr.get_format("srt").expect("get_format srt");
143        assert_eq!(srt.format_name(), "SRT");
144        let vtt = mgr
145            .get_format_by_extension("vtt")
146            .expect("get_format_by_extension vtt");
147        assert_eq!(vtt.format_name(), "VTT");
148    }
149
150    #[test]
151    fn test_parse_auto_supported_and_error() {
152        let mgr = FormatManager::new();
153        let sub = mgr.parse_auto(SAMPLE_SRT).expect("parse_auto srt");
154        assert_eq!(sub.format, SubtitleFormatType::Srt);
155        let subv = mgr.parse_auto(SAMPLE_VTT).expect("parse_auto vtt");
156        assert_eq!(subv.format, SubtitleFormatType::Vtt);
157        let err = mgr.parse_auto("no format");
158        assert!(err.is_err());
159    }
160
161    #[test]
162    fn test_webvtt_parse_auto_first_subtitle_content() {
163        let mgr = FormatManager::new();
164
165        let subtitle = mgr
166            .parse_auto(SAMPLE_WEBVTT_THREE_LINES)
167            .expect("Failed to parse WEBVTT format using parse_auto");
168
169        // Verify auto-detection as WEBVTT format
170        assert_eq!(
171            subtitle.format,
172            SubtitleFormatType::Vtt,
173            "Auto detection should identify as WEBVTT format"
174        );
175
176        // Verify 3 subtitles were parsed
177        assert_eq!(
178            subtitle.entries.len(),
179            3,
180            "Should parse exactly 3 subtitle entries"
181        );
182
183        // Verify first subtitle content, index and timeline
184        let first = &subtitle.entries[0];
185        assert_eq!(
186            first.text, "第一句字幕內容",
187            "First subtitle content should be correctly parsed"
188        );
189        assert_eq!(first.index, 1, "First subtitle should have index 1");
190        assert_eq!(
191            first.start_time,
192            Duration::from_millis(1000),
193            "First subtitle start time should be 1 second"
194        );
195        assert_eq!(
196            first.end_time,
197            Duration::from_millis(3000),
198            "First subtitle end time should be 3 seconds"
199        );
200
201        // Verify other subtitle content
202        assert_eq!(subtitle.entries[1].text, "第二句字幕內容");
203        assert_eq!(subtitle.entries[2].text, "第三句字幕內容");
204    }
205
206    #[test]
207    fn test_webvtt_parse_auto_with_complex_content() {
208        let mgr = FormatManager::new();
209        let subtitle = mgr
210            .parse_auto(COMPLEX_WEBVTT)
211            .expect("Failed to parse complex WEBVTT");
212
213        // Verify auto-detection as WEBVTT format and parse three subtitles (ignore NOTE and STYLE)
214        assert_eq!(subtitle.format, SubtitleFormatType::Vtt);
215        assert_eq!(subtitle.entries.len(), 3);
216
217        // Verify first subtitle contains multi-line text and correct time parsing
218        let first = &subtitle.entries[0];
219        assert_eq!(first.text, "第一句字幕內容\n包含多行文字");
220        assert_eq!(first.start_time, Duration::from_millis(1000));
221        assert_eq!(first.end_time, Duration::from_millis(3500));
222    }
223}