Skip to main content

subx_cli/core/formats/
converter.rs

1//! Subtitle format conversion engine.
2//!
3//! This module provides the `FormatConverter`, which performs
4//! format conversions between different subtitle formats,
5//! supporting concurrent processing and task coordination.
6//!
7//! # Examples
8//!
9//! ```rust,ignore
10//! use subx_cli::core::formats::converter::FormatConverter;
11//! // Initialize with default configuration and run conversion tasks
12//! let converter = FormatConverter::new(Default::default());
13//! ```
14
15use futures::future::join_all;
16use std::path::Path;
17use std::sync::Arc;
18use tokio::sync::Semaphore;
19
20use crate::Result;
21use crate::core::formats::Subtitle;
22use crate::core::formats::manager::FormatManager;
23
24/// Subtitle format converter for handling conversion tasks.
25///
26/// The `FormatConverter` coordinates conversion requests across
27/// multiple subtitle formats, managing concurrency and task scheduling.
28pub struct FormatConverter {
29    format_manager: FormatManager,
30    pub(crate) config: ConversionConfig,
31}
32
33impl Clone for FormatConverter {
34    fn clone(&self) -> Self {
35        FormatConverter::new(self.config.clone())
36    }
37}
38
39/// Conversion configuration
40#[derive(Debug, Clone)]
41pub struct ConversionConfig {
42    /// Whether to preserve styling information during conversion
43    pub preserve_styling: bool,
44    /// Target character encoding for the output file
45    pub target_encoding: String,
46    /// Whether to keep the original file after conversion
47    pub keep_original: bool,
48    /// Whether to validate the output after conversion
49    pub validate_output: bool,
50}
51
52/// Result of a subtitle format conversion operation.
53///
54/// Contains detailed information about the conversion process including
55/// success status, format information, entry counts, and any issues encountered.
56#[derive(Debug)]
57pub struct ConversionResult {
58    /// Whether the conversion completed successfully
59    pub success: bool,
60    /// Input subtitle format (e.g., "srt", "ass")
61    pub input_format: String,
62    /// Output subtitle format (e.g., "srt", "ass")
63    pub output_format: String,
64    /// Number of subtitle entries in the original file
65    pub original_entries: usize,
66    /// Number of subtitle entries successfully converted
67    pub converted_entries: usize,
68    /// Non-fatal warnings encountered during conversion
69    pub warnings: Vec<String>,
70    /// Errors encountered during conversion
71    pub errors: Vec<String>,
72}
73
74impl FormatConverter {
75    /// Create new converter
76    pub fn new(config: ConversionConfig) -> Self {
77        Self {
78            format_manager: FormatManager::new(),
79            config,
80        }
81    }
82
83    /// Convert single file
84    pub async fn convert_file(
85        &self,
86        input_path: &Path,
87        output_path: &Path,
88        target_format: &str,
89    ) -> crate::Result<ConversionResult> {
90        // 1. Read and parse input file
91        let input_content = self.read_file_with_encoding(input_path).await?;
92        let input_subtitle = self.format_manager.parse_auto(&input_content)?;
93
94        // 2. Execute format conversion
95        let converted_subtitle = self.transform_subtitle(input_subtitle.clone(), target_format)?;
96
97        // 3. Serialize to target format
98        let target_formatter = self
99            .format_manager
100            .get_format(target_format)
101            .ok_or_else(|| {
102                crate::error::SubXError::subtitle_format(
103                    format!("Unsupported target format: {}", target_format),
104                    "",
105                )
106            })?;
107
108        let output_content = target_formatter.serialize(&converted_subtitle)?;
109
110        // 4. Write file
111        self.write_file_with_encoding(output_path, &output_content)
112            .await?;
113
114        // 5. Validate conversion result
115        let result = if self.config.validate_output {
116            self.validate_conversion(&input_subtitle, &converted_subtitle)
117                .await?
118        } else {
119            ConversionResult {
120                success: true,
121                input_format: input_subtitle.format.to_string(),
122                output_format: target_format.to_string(),
123                original_entries: input_subtitle.entries.len(),
124                converted_entries: converted_subtitle.entries.len(),
125                warnings: Vec::new(),
126                errors: Vec::new(),
127            }
128        };
129        Ok(result)
130    }
131
132    /// Batch convert files
133    pub async fn convert_batch(
134        &self,
135        input_dir: &Path,
136        target_format: &str,
137        recursive: bool,
138    ) -> crate::Result<Vec<ConversionResult>> {
139        let subtitle_files = self.discover_subtitle_files(input_dir, recursive).await?;
140        let semaphore = Arc::new(Semaphore::new(4));
141
142        let tasks = subtitle_files.into_iter().map(|file_path| {
143            let sem = semaphore.clone();
144            let converter = self.clone();
145            let format = target_format.to_string();
146            async move {
147                let _permit = sem.acquire().await.unwrap();
148                let output_path = file_path.with_extension(&format);
149                converter
150                    .convert_file(&file_path, &output_path, &format)
151                    .await
152            }
153        });
154
155        let results = join_all(tasks).await;
156        results.into_iter().collect::<Result<Vec<_>>>()
157    }
158    /// Discover subtitle files in directory
159    async fn discover_subtitle_files(
160        &self,
161        input_dir: &Path,
162        recursive: bool,
163    ) -> crate::Result<Vec<std::path::PathBuf>> {
164        let discovery = crate::core::matcher::discovery::FileDiscovery::new();
165        let media_files = discovery.scan_directory(input_dir, recursive)?;
166        let paths = media_files
167            .into_iter()
168            .filter(|f| {
169                matches!(
170                    f.file_type,
171                    crate::core::matcher::discovery::MediaFileType::Subtitle
172                )
173            })
174            .map(|f| f.path) // Use path field, behavior unchanged
175            .collect();
176        Ok(paths)
177    }
178
179    /// Read file and convert to UTF-8 string
180    async fn read_file_with_encoding(&self, path: &Path) -> crate::Result<String> {
181        crate::core::fs_util::check_file_size(path, 52_428_800, "Subtitle")?;
182        let bytes = tokio::fs::read(path).await?;
183        // Auto-detect encoding and convert to UTF-8
184        let detector = crate::core::formats::encoding::EncodingDetector::with_defaults();
185        let info = detector.detect_encoding(&bytes)?;
186        let converter = crate::core::formats::encoding::EncodingConverter::new();
187        let conversion = converter.convert_to_utf8(&bytes, &info.charset)?;
188        Ok(conversion.converted_text)
189    }
190
191    /// Write file (temporarily using UTF-8 encoding)
192    async fn write_file_with_encoding(&self, path: &Path, content: &str) -> crate::Result<()> {
193        tokio::fs::write(path, content).await?;
194        Ok(())
195    }
196
197    /// Simple conversion quality validation
198    async fn validate_conversion(
199        &self,
200        original: &Subtitle,
201        converted: &Subtitle,
202    ) -> crate::Result<ConversionResult> {
203        let success = original.entries.len() == converted.entries.len();
204        let errors = if success {
205            Vec::new()
206        } else {
207            vec![format!(
208                "Entry count mismatch: {} -> {}",
209                original.entries.len(),
210                converted.entries.len()
211            )]
212        };
213        Ok(ConversionResult {
214            success,
215            input_format: original.format.to_string(),
216            output_format: converted.format.to_string(),
217            original_entries: original.entries.len(),
218            converted_entries: converted.entries.len(),
219            warnings: Vec::new(),
220            errors,
221        })
222    }
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228    use tempfile::TempDir;
229
230    // ── Helpers ─────────────────────────────────────────────────────────────
231
232    fn default_config() -> ConversionConfig {
233        ConversionConfig {
234            preserve_styling: false,
235            target_encoding: "UTF-8".to_string(),
236            keep_original: false,
237            validate_output: false,
238        }
239    }
240
241    fn validating_config() -> ConversionConfig {
242        ConversionConfig {
243            validate_output: true,
244            ..default_config()
245        }
246    }
247
248    const SAMPLE_SRT: &str = "1\n00:00:01,000 --> 00:00:02,500\nHello world\n\n2\n00:00:03,000 --> 00:00:04,000\nSecond line\n\n";
249    const SAMPLE_VTT: &str = "WEBVTT\n\n1\n00:00:01.000 --> 00:00:02.500\nHello world\n\n2\n00:00:03.000 --> 00:00:04.000\nSecond line\n\n";
250    const SAMPLE_ASS: &str = "[Script Info]\nScriptType: v4.00+\n\n[V4+ Styles]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n\n[Events]\nFormat: Layer,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text\nDialogue: 0,0:00:01.00,0:00:02.50,Default,,0000,0000,0000,,Hello world\nDialogue: 0,0:00:03.00,0:00:04.00,Default,,0000,0000,0000,,Second line\n";
251
252    fn write_temp_file(dir: &TempDir, name: &str, content: &str) -> std::path::PathBuf {
253        let path = dir.path().join(name);
254        std::fs::write(&path, content).expect("write temp file");
255        path
256    }
257
258    // ── ConversionConfig & ConversionResult ──────────────────────────────────
259
260    #[test]
261    fn conversion_config_clone_and_debug() {
262        let cfg = default_config();
263        let cloned = cfg.clone();
264        assert_eq!(cloned.target_encoding, "UTF-8");
265        assert!(!cloned.preserve_styling);
266        assert!(!cloned.keep_original);
267        assert!(!cloned.validate_output);
268        let dbg = format!("{:?}", cfg);
269        assert!(dbg.contains("ConversionConfig"));
270    }
271
272    #[test]
273    fn conversion_result_debug() {
274        let r = ConversionResult {
275            success: true,
276            input_format: "srt".to_string(),
277            output_format: "ass".to_string(),
278            original_entries: 2,
279            converted_entries: 2,
280            warnings: vec![],
281            errors: vec![],
282        };
283        let dbg = format!("{:?}", r);
284        assert!(dbg.contains("ConversionResult"));
285    }
286
287    #[test]
288    fn format_converter_clone() {
289        let conv = FormatConverter::new(default_config());
290        let cloned = conv.clone();
291        assert!(!cloned.config.preserve_styling);
292    }
293
294    // ── convert_file: SRT → ASS ──────────────────────────────────────────────
295
296    #[tokio::test]
297    async fn convert_file_srt_to_ass() {
298        let dir = TempDir::new().unwrap();
299        let input = write_temp_file(&dir, "test.srt", SAMPLE_SRT);
300        let output = dir.path().join("test.ass");
301
302        let conv = FormatConverter::new(default_config());
303        let result = conv.convert_file(&input, &output, "ass").await.unwrap();
304
305        assert!(result.success);
306        assert_eq!(result.input_format, "srt");
307        assert_eq!(result.output_format, "ass");
308        assert_eq!(result.original_entries, 2);
309        assert_eq!(result.converted_entries, 2);
310        assert!(result.warnings.is_empty());
311        assert!(result.errors.is_empty());
312        assert!(output.exists());
313    }
314
315    // ── convert_file: ASS → SRT ──────────────────────────────────────────────
316
317    #[tokio::test]
318    async fn convert_file_ass_to_srt() {
319        let dir = TempDir::new().unwrap();
320        let input = write_temp_file(&dir, "test.ass", SAMPLE_ASS);
321        let output = dir.path().join("test.srt");
322
323        let conv = FormatConverter::new(default_config());
324        let result = conv.convert_file(&input, &output, "srt").await.unwrap();
325
326        assert!(result.success);
327        assert_eq!(result.input_format, "ass");
328        assert_eq!(result.output_format, "srt");
329        assert_eq!(result.original_entries, 2);
330        assert_eq!(result.converted_entries, 2);
331        assert!(output.exists());
332    }
333
334    // ── convert_file: SRT → VTT ──────────────────────────────────────────────
335
336    #[tokio::test]
337    async fn convert_file_srt_to_vtt() {
338        let dir = TempDir::new().unwrap();
339        let input = write_temp_file(&dir, "test.srt", SAMPLE_SRT);
340        let output = dir.path().join("test.vtt");
341
342        let conv = FormatConverter::new(default_config());
343        let result = conv.convert_file(&input, &output, "vtt").await.unwrap();
344
345        assert!(result.success);
346        assert_eq!(result.input_format, "srt");
347        assert_eq!(result.output_format, "vtt");
348        assert_eq!(result.converted_entries, 2);
349        assert!(output.exists());
350        let content = std::fs::read_to_string(&output).unwrap();
351        assert!(content.contains("WEBVTT"));
352    }
353
354    // ── convert_file: VTT → SRT ──────────────────────────────────────────────
355
356    #[tokio::test]
357    async fn convert_file_vtt_to_srt() {
358        let dir = TempDir::new().unwrap();
359        let input = write_temp_file(&dir, "test.vtt", SAMPLE_VTT);
360        let output = dir.path().join("test.srt");
361
362        let conv = FormatConverter::new(default_config());
363        let result = conv.convert_file(&input, &output, "srt").await.unwrap();
364
365        assert!(result.success);
366        assert_eq!(result.input_format, "vtt");
367        assert_eq!(result.output_format, "srt");
368        assert_eq!(result.converted_entries, 2);
369        assert!(output.exists());
370        let content = std::fs::read_to_string(&output).unwrap();
371        assert!(content.contains("-->"));
372    }
373
374    // ── convert_file: ASS → VTT ──────────────────────────────────────────────
375
376    #[tokio::test]
377    async fn convert_file_ass_to_vtt() {
378        let dir = TempDir::new().unwrap();
379        let input = write_temp_file(&dir, "test.ass", SAMPLE_ASS);
380        let output = dir.path().join("test.vtt");
381
382        let conv = FormatConverter::new(default_config());
383        let result = conv.convert_file(&input, &output, "vtt").await.unwrap();
384
385        assert!(result.success);
386        assert_eq!(result.input_format, "ass");
387        assert_eq!(result.output_format, "vtt");
388        assert_eq!(result.converted_entries, 2);
389        assert!(output.exists());
390    }
391
392    // ── convert_file: VTT → ASS ──────────────────────────────────────────────
393
394    #[tokio::test]
395    async fn convert_file_vtt_to_ass() {
396        let dir = TempDir::new().unwrap();
397        let input = write_temp_file(&dir, "test.vtt", SAMPLE_VTT);
398        let output = dir.path().join("test.ass");
399
400        let conv = FormatConverter::new(default_config());
401        let result = conv.convert_file(&input, &output, "ass").await.unwrap();
402
403        assert!(result.success);
404        assert_eq!(result.input_format, "vtt");
405        assert_eq!(result.output_format, "ass");
406        assert_eq!(result.converted_entries, 2);
407        assert!(output.exists());
408    }
409
410    // ── convert_file: validate_output=true (equal entry counts) ─────────────
411
412    #[tokio::test]
413    async fn convert_file_with_validate_output_success() {
414        let dir = TempDir::new().unwrap();
415        let input = write_temp_file(&dir, "test.srt", SAMPLE_SRT);
416        let output = dir.path().join("test.ass");
417
418        let conv = FormatConverter::new(validating_config());
419        let result = conv.convert_file(&input, &output, "ass").await.unwrap();
420
421        assert!(result.success);
422        assert!(result.errors.is_empty());
423        assert_eq!(result.original_entries, result.converted_entries);
424    }
425
426    // ── convert_file: validate_output=true, VTT → SRT ───────────────────────
427
428    #[tokio::test]
429    async fn convert_file_vtt_to_srt_with_validation() {
430        let dir = TempDir::new().unwrap();
431        let input = write_temp_file(&dir, "test.vtt", SAMPLE_VTT);
432        let output = dir.path().join("test.srt");
433
434        let conv = FormatConverter::new(validating_config());
435        let result = conv.convert_file(&input, &output, "srt").await.unwrap();
436
437        assert!(result.success);
438        assert_eq!(result.original_entries, result.converted_entries);
439    }
440
441    // ── convert_file: non-existent input file → error ───────────────────────
442
443    #[tokio::test]
444    async fn convert_file_missing_input_returns_error() {
445        let dir = TempDir::new().unwrap();
446        let input = dir.path().join("nonexistent.srt");
447        let output = dir.path().join("out.ass");
448
449        let conv = FormatConverter::new(default_config());
450        let err = conv.convert_file(&input, &output, "ass").await;
451        assert!(err.is_err());
452    }
453
454    // ── convert_file: unsupported target format → error ─────────────────────
455
456    #[tokio::test]
457    async fn convert_file_unsupported_target_format_returns_error() {
458        let dir = TempDir::new().unwrap();
459        let input = write_temp_file(&dir, "test.srt", SAMPLE_SRT);
460        let output = dir.path().join("test.xyz");
461
462        let conv = FormatConverter::new(default_config());
463        let err = conv.convert_file(&input, &output, "xyz").await;
464        assert!(err.is_err());
465        let msg = format!("{}", err.unwrap_err());
466        assert!(
467            msg.to_lowercase().contains("unsupported")
468                || msg.contains("xyz")
469                || msg.contains("format")
470        );
471    }
472
473    // ── convert_file: invalid/unrecognized content → error ───────────────────
474
475    #[tokio::test]
476    async fn convert_file_unrecognized_format_returns_error() {
477        let dir = TempDir::new().unwrap();
478        let input = write_temp_file(&dir, "test.srt", "this is not a subtitle file");
479        let output = dir.path().join("out.ass");
480
481        let conv = FormatConverter::new(default_config());
482        let err = conv.convert_file(&input, &output, "ass").await;
483        assert!(err.is_err());
484    }
485
486    // ── convert_file: output file is written correctly ───────────────────────
487
488    #[tokio::test]
489    async fn convert_file_output_is_valid_ass() {
490        let dir = TempDir::new().unwrap();
491        let input = write_temp_file(&dir, "test.srt", SAMPLE_SRT);
492        let output = dir.path().join("test.ass");
493
494        let conv = FormatConverter::new(default_config());
495        conv.convert_file(&input, &output, "ass").await.unwrap();
496
497        let content = std::fs::read_to_string(&output).unwrap();
498        assert!(content.contains("[Events]"));
499        assert!(content.contains("Dialogue:"));
500    }
501
502    // ── convert_file: SRT → SRT (same format) ───────────────────────────────
503
504    #[tokio::test]
505    async fn convert_file_srt_to_srt_same_format() {
506        let dir = TempDir::new().unwrap();
507        let input = write_temp_file(&dir, "test.srt", SAMPLE_SRT);
508        let output = dir.path().join("out.srt");
509
510        let conv = FormatConverter::new(default_config());
511        let result = conv.convert_file(&input, &output, "srt").await.unwrap();
512
513        assert!(result.success);
514        assert_eq!(result.input_format, "srt");
515        assert_eq!(result.output_format, "srt");
516        assert_eq!(result.original_entries, 2);
517    }
518
519    // ── convert_batch: empty directory ───────────────────────────────────────
520
521    #[tokio::test]
522    async fn convert_batch_empty_directory_returns_empty_vec() {
523        let dir = TempDir::new().unwrap();
524
525        let conv = FormatConverter::new(default_config());
526        let results = conv.convert_batch(dir.path(), "ass", false).await.unwrap();
527        assert!(results.is_empty());
528    }
529
530    // ── convert_batch: directory with SRT files ───────────────────────────────
531
532    #[tokio::test]
533    async fn convert_batch_converts_srt_files_to_ass() {
534        let dir = TempDir::new().unwrap();
535        write_temp_file(&dir, "a.srt", SAMPLE_SRT);
536        write_temp_file(&dir, "b.srt", SAMPLE_SRT);
537
538        let conv = FormatConverter::new(default_config());
539        let results = conv.convert_batch(dir.path(), "ass", false).await.unwrap();
540
541        assert_eq!(results.len(), 2);
542        for r in &results {
543            assert!(r.success);
544            assert_eq!(r.output_format, "ass");
545        }
546    }
547
548    // ── convert_batch: recursive flag ────────────────────────────────────────
549
550    #[tokio::test]
551    async fn convert_batch_recursive_discovers_nested_files() {
552        let dir = TempDir::new().unwrap();
553        let subdir = dir.path().join("sub");
554        std::fs::create_dir(&subdir).unwrap();
555        write_temp_file(&dir, "top.srt", SAMPLE_SRT);
556        std::fs::write(subdir.join("nested.srt"), SAMPLE_SRT).unwrap();
557
558        let conv = FormatConverter::new(default_config());
559
560        // Non-recursive: only top-level
561        let flat = conv.convert_batch(dir.path(), "vtt", false).await.unwrap();
562        assert_eq!(flat.len(), 1);
563
564        // Recursive: includes nested
565        let dir2 = TempDir::new().unwrap();
566        let subdir2 = dir2.path().join("sub");
567        std::fs::create_dir(&subdir2).unwrap();
568        write_temp_file(&dir2, "top.srt", SAMPLE_SRT);
569        std::fs::write(subdir2.join("nested.srt"), SAMPLE_SRT).unwrap();
570        let recursive = conv.convert_batch(dir2.path(), "vtt", true).await.unwrap();
571        assert_eq!(recursive.len(), 2);
572    }
573
574    // ── convert_batch: VTT files ─────────────────────────────────────────────
575
576    #[tokio::test]
577    async fn convert_batch_converts_vtt_files_to_srt() {
578        let dir = TempDir::new().unwrap();
579        write_temp_file(&dir, "a.vtt", SAMPLE_VTT);
580
581        let conv = FormatConverter::new(default_config());
582        let results = conv.convert_batch(dir.path(), "srt", false).await.unwrap();
583
584        assert_eq!(results.len(), 1);
585        assert!(results[0].success);
586        assert_eq!(results[0].output_format, "srt");
587    }
588}