1use futures::future::join_all;
16use std::path::Path;
17use std::sync::Arc;
18use tokio::sync::Semaphore;
19
20use crate::Result;
21use crate::core::formats::Subtitle;
22use crate::core::formats::manager::FormatManager;
23
24pub struct FormatConverter {
29 format_manager: FormatManager,
30 pub(crate) config: ConversionConfig,
31}
32
33impl Clone for FormatConverter {
34 fn clone(&self) -> Self {
35 FormatConverter::new(self.config.clone())
36 }
37}
38
39#[derive(Debug, Clone)]
41pub struct ConversionConfig {
42 pub preserve_styling: bool,
44 pub target_encoding: String,
46 pub keep_original: bool,
48 pub validate_output: bool,
50}
51
52#[derive(Debug)]
57pub struct ConversionResult {
58 pub success: bool,
60 pub input_format: String,
62 pub output_format: String,
64 pub original_entries: usize,
66 pub converted_entries: usize,
68 pub warnings: Vec<String>,
70 pub errors: Vec<String>,
72}
73
74impl FormatConverter {
75 pub fn new(config: ConversionConfig) -> Self {
77 Self {
78 format_manager: FormatManager::new(),
79 config,
80 }
81 }
82
83 pub async fn convert_file(
85 &self,
86 input_path: &Path,
87 output_path: &Path,
88 target_format: &str,
89 ) -> crate::Result<ConversionResult> {
90 let input_content = self.read_file_with_encoding(input_path).await?;
92 let input_subtitle = self.format_manager.parse_auto(&input_content)?;
93
94 let converted_subtitle = self.transform_subtitle(input_subtitle.clone(), target_format)?;
96
97 let target_formatter = self
99 .format_manager
100 .get_format(target_format)
101 .ok_or_else(|| {
102 crate::error::SubXError::subtitle_format(
103 format!("Unsupported target format: {}", target_format),
104 "",
105 )
106 })?;
107
108 let output_content = target_formatter.serialize(&converted_subtitle)?;
109
110 self.write_file_with_encoding(output_path, &output_content)
112 .await?;
113
114 let result = if self.config.validate_output {
116 self.validate_conversion(&input_subtitle, &converted_subtitle)
117 .await?
118 } else {
119 ConversionResult {
120 success: true,
121 input_format: input_subtitle.format.to_string(),
122 output_format: target_format.to_string(),
123 original_entries: input_subtitle.entries.len(),
124 converted_entries: converted_subtitle.entries.len(),
125 warnings: Vec::new(),
126 errors: Vec::new(),
127 }
128 };
129 Ok(result)
130 }
131
132 pub async fn convert_batch(
134 &self,
135 input_dir: &Path,
136 target_format: &str,
137 recursive: bool,
138 ) -> crate::Result<Vec<ConversionResult>> {
139 let subtitle_files = self.discover_subtitle_files(input_dir, recursive).await?;
140 let semaphore = Arc::new(Semaphore::new(4));
141
142 let tasks = subtitle_files.into_iter().map(|file_path| {
143 let sem = semaphore.clone();
144 let converter = self.clone();
145 let format = target_format.to_string();
146 async move {
147 let _permit = sem.acquire().await.unwrap();
148 let output_path = file_path.with_extension(&format);
149 converter
150 .convert_file(&file_path, &output_path, &format)
151 .await
152 }
153 });
154
155 let results = join_all(tasks).await;
156 results.into_iter().collect::<Result<Vec<_>>>()
157 }
158 async fn discover_subtitle_files(
160 &self,
161 input_dir: &Path,
162 recursive: bool,
163 ) -> crate::Result<Vec<std::path::PathBuf>> {
164 let discovery = crate::core::matcher::discovery::FileDiscovery::new();
165 let media_files = discovery.scan_directory(input_dir, recursive)?;
166 let paths = media_files
167 .into_iter()
168 .filter(|f| {
169 matches!(
170 f.file_type,
171 crate::core::matcher::discovery::MediaFileType::Subtitle
172 )
173 })
174 .map(|f| f.path) .collect();
176 Ok(paths)
177 }
178
179 async fn read_file_with_encoding(&self, path: &Path) -> crate::Result<String> {
181 crate::core::fs_util::check_file_size(path, 52_428_800, "Subtitle")?;
182 let bytes = tokio::fs::read(path).await?;
183 let detector = crate::core::formats::encoding::EncodingDetector::with_defaults();
185 let info = detector.detect_encoding(&bytes)?;
186 let converter = crate::core::formats::encoding::EncodingConverter::new();
187 let conversion = converter.convert_to_utf8(&bytes, &info.charset)?;
188 Ok(conversion.converted_text)
189 }
190
191 async fn write_file_with_encoding(&self, path: &Path, content: &str) -> crate::Result<()> {
193 tokio::fs::write(path, content).await?;
194 Ok(())
195 }
196
197 async fn validate_conversion(
199 &self,
200 original: &Subtitle,
201 converted: &Subtitle,
202 ) -> crate::Result<ConversionResult> {
203 let success = original.entries.len() == converted.entries.len();
204 let errors = if success {
205 Vec::new()
206 } else {
207 vec![format!(
208 "Entry count mismatch: {} -> {}",
209 original.entries.len(),
210 converted.entries.len()
211 )]
212 };
213 Ok(ConversionResult {
214 success,
215 input_format: original.format.to_string(),
216 output_format: converted.format.to_string(),
217 original_entries: original.entries.len(),
218 converted_entries: converted.entries.len(),
219 warnings: Vec::new(),
220 errors,
221 })
222 }
223}
224
225#[cfg(test)]
226mod tests {
227 use super::*;
228 use tempfile::TempDir;
229
230 fn default_config() -> ConversionConfig {
233 ConversionConfig {
234 preserve_styling: false,
235 target_encoding: "UTF-8".to_string(),
236 keep_original: false,
237 validate_output: false,
238 }
239 }
240
241 fn validating_config() -> ConversionConfig {
242 ConversionConfig {
243 validate_output: true,
244 ..default_config()
245 }
246 }
247
248 const SAMPLE_SRT: &str = "1\n00:00:01,000 --> 00:00:02,500\nHello world\n\n2\n00:00:03,000 --> 00:00:04,000\nSecond line\n\n";
249 const SAMPLE_VTT: &str = "WEBVTT\n\n1\n00:00:01.000 --> 00:00:02.500\nHello world\n\n2\n00:00:03.000 --> 00:00:04.000\nSecond line\n\n";
250 const SAMPLE_ASS: &str = "[Script Info]\nScriptType: v4.00+\n\n[V4+ Styles]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n\n[Events]\nFormat: Layer,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text\nDialogue: 0,0:00:01.00,0:00:02.50,Default,,0000,0000,0000,,Hello world\nDialogue: 0,0:00:03.00,0:00:04.00,Default,,0000,0000,0000,,Second line\n";
251
252 fn write_temp_file(dir: &TempDir, name: &str, content: &str) -> std::path::PathBuf {
253 let path = dir.path().join(name);
254 std::fs::write(&path, content).expect("write temp file");
255 path
256 }
257
258 #[test]
261 fn conversion_config_clone_and_debug() {
262 let cfg = default_config();
263 let cloned = cfg.clone();
264 assert_eq!(cloned.target_encoding, "UTF-8");
265 assert!(!cloned.preserve_styling);
266 assert!(!cloned.keep_original);
267 assert!(!cloned.validate_output);
268 let dbg = format!("{:?}", cfg);
269 assert!(dbg.contains("ConversionConfig"));
270 }
271
272 #[test]
273 fn conversion_result_debug() {
274 let r = ConversionResult {
275 success: true,
276 input_format: "srt".to_string(),
277 output_format: "ass".to_string(),
278 original_entries: 2,
279 converted_entries: 2,
280 warnings: vec![],
281 errors: vec![],
282 };
283 let dbg = format!("{:?}", r);
284 assert!(dbg.contains("ConversionResult"));
285 }
286
287 #[test]
288 fn format_converter_clone() {
289 let conv = FormatConverter::new(default_config());
290 let cloned = conv.clone();
291 assert!(!cloned.config.preserve_styling);
292 }
293
294 #[tokio::test]
297 async fn convert_file_srt_to_ass() {
298 let dir = TempDir::new().unwrap();
299 let input = write_temp_file(&dir, "test.srt", SAMPLE_SRT);
300 let output = dir.path().join("test.ass");
301
302 let conv = FormatConverter::new(default_config());
303 let result = conv.convert_file(&input, &output, "ass").await.unwrap();
304
305 assert!(result.success);
306 assert_eq!(result.input_format, "srt");
307 assert_eq!(result.output_format, "ass");
308 assert_eq!(result.original_entries, 2);
309 assert_eq!(result.converted_entries, 2);
310 assert!(result.warnings.is_empty());
311 assert!(result.errors.is_empty());
312 assert!(output.exists());
313 }
314
315 #[tokio::test]
318 async fn convert_file_ass_to_srt() {
319 let dir = TempDir::new().unwrap();
320 let input = write_temp_file(&dir, "test.ass", SAMPLE_ASS);
321 let output = dir.path().join("test.srt");
322
323 let conv = FormatConverter::new(default_config());
324 let result = conv.convert_file(&input, &output, "srt").await.unwrap();
325
326 assert!(result.success);
327 assert_eq!(result.input_format, "ass");
328 assert_eq!(result.output_format, "srt");
329 assert_eq!(result.original_entries, 2);
330 assert_eq!(result.converted_entries, 2);
331 assert!(output.exists());
332 }
333
334 #[tokio::test]
337 async fn convert_file_srt_to_vtt() {
338 let dir = TempDir::new().unwrap();
339 let input = write_temp_file(&dir, "test.srt", SAMPLE_SRT);
340 let output = dir.path().join("test.vtt");
341
342 let conv = FormatConverter::new(default_config());
343 let result = conv.convert_file(&input, &output, "vtt").await.unwrap();
344
345 assert!(result.success);
346 assert_eq!(result.input_format, "srt");
347 assert_eq!(result.output_format, "vtt");
348 assert_eq!(result.converted_entries, 2);
349 assert!(output.exists());
350 let content = std::fs::read_to_string(&output).unwrap();
351 assert!(content.contains("WEBVTT"));
352 }
353
354 #[tokio::test]
357 async fn convert_file_vtt_to_srt() {
358 let dir = TempDir::new().unwrap();
359 let input = write_temp_file(&dir, "test.vtt", SAMPLE_VTT);
360 let output = dir.path().join("test.srt");
361
362 let conv = FormatConverter::new(default_config());
363 let result = conv.convert_file(&input, &output, "srt").await.unwrap();
364
365 assert!(result.success);
366 assert_eq!(result.input_format, "vtt");
367 assert_eq!(result.output_format, "srt");
368 assert_eq!(result.converted_entries, 2);
369 assert!(output.exists());
370 let content = std::fs::read_to_string(&output).unwrap();
371 assert!(content.contains("-->"));
372 }
373
374 #[tokio::test]
377 async fn convert_file_ass_to_vtt() {
378 let dir = TempDir::new().unwrap();
379 let input = write_temp_file(&dir, "test.ass", SAMPLE_ASS);
380 let output = dir.path().join("test.vtt");
381
382 let conv = FormatConverter::new(default_config());
383 let result = conv.convert_file(&input, &output, "vtt").await.unwrap();
384
385 assert!(result.success);
386 assert_eq!(result.input_format, "ass");
387 assert_eq!(result.output_format, "vtt");
388 assert_eq!(result.converted_entries, 2);
389 assert!(output.exists());
390 }
391
392 #[tokio::test]
395 async fn convert_file_vtt_to_ass() {
396 let dir = TempDir::new().unwrap();
397 let input = write_temp_file(&dir, "test.vtt", SAMPLE_VTT);
398 let output = dir.path().join("test.ass");
399
400 let conv = FormatConverter::new(default_config());
401 let result = conv.convert_file(&input, &output, "ass").await.unwrap();
402
403 assert!(result.success);
404 assert_eq!(result.input_format, "vtt");
405 assert_eq!(result.output_format, "ass");
406 assert_eq!(result.converted_entries, 2);
407 assert!(output.exists());
408 }
409
410 #[tokio::test]
413 async fn convert_file_with_validate_output_success() {
414 let dir = TempDir::new().unwrap();
415 let input = write_temp_file(&dir, "test.srt", SAMPLE_SRT);
416 let output = dir.path().join("test.ass");
417
418 let conv = FormatConverter::new(validating_config());
419 let result = conv.convert_file(&input, &output, "ass").await.unwrap();
420
421 assert!(result.success);
422 assert!(result.errors.is_empty());
423 assert_eq!(result.original_entries, result.converted_entries);
424 }
425
426 #[tokio::test]
429 async fn convert_file_vtt_to_srt_with_validation() {
430 let dir = TempDir::new().unwrap();
431 let input = write_temp_file(&dir, "test.vtt", SAMPLE_VTT);
432 let output = dir.path().join("test.srt");
433
434 let conv = FormatConverter::new(validating_config());
435 let result = conv.convert_file(&input, &output, "srt").await.unwrap();
436
437 assert!(result.success);
438 assert_eq!(result.original_entries, result.converted_entries);
439 }
440
441 #[tokio::test]
444 async fn convert_file_missing_input_returns_error() {
445 let dir = TempDir::new().unwrap();
446 let input = dir.path().join("nonexistent.srt");
447 let output = dir.path().join("out.ass");
448
449 let conv = FormatConverter::new(default_config());
450 let err = conv.convert_file(&input, &output, "ass").await;
451 assert!(err.is_err());
452 }
453
454 #[tokio::test]
457 async fn convert_file_unsupported_target_format_returns_error() {
458 let dir = TempDir::new().unwrap();
459 let input = write_temp_file(&dir, "test.srt", SAMPLE_SRT);
460 let output = dir.path().join("test.xyz");
461
462 let conv = FormatConverter::new(default_config());
463 let err = conv.convert_file(&input, &output, "xyz").await;
464 assert!(err.is_err());
465 let msg = format!("{}", err.unwrap_err());
466 assert!(
467 msg.to_lowercase().contains("unsupported")
468 || msg.contains("xyz")
469 || msg.contains("format")
470 );
471 }
472
473 #[tokio::test]
476 async fn convert_file_unrecognized_format_returns_error() {
477 let dir = TempDir::new().unwrap();
478 let input = write_temp_file(&dir, "test.srt", "this is not a subtitle file");
479 let output = dir.path().join("out.ass");
480
481 let conv = FormatConverter::new(default_config());
482 let err = conv.convert_file(&input, &output, "ass").await;
483 assert!(err.is_err());
484 }
485
486 #[tokio::test]
489 async fn convert_file_output_is_valid_ass() {
490 let dir = TempDir::new().unwrap();
491 let input = write_temp_file(&dir, "test.srt", SAMPLE_SRT);
492 let output = dir.path().join("test.ass");
493
494 let conv = FormatConverter::new(default_config());
495 conv.convert_file(&input, &output, "ass").await.unwrap();
496
497 let content = std::fs::read_to_string(&output).unwrap();
498 assert!(content.contains("[Events]"));
499 assert!(content.contains("Dialogue:"));
500 }
501
502 #[tokio::test]
505 async fn convert_file_srt_to_srt_same_format() {
506 let dir = TempDir::new().unwrap();
507 let input = write_temp_file(&dir, "test.srt", SAMPLE_SRT);
508 let output = dir.path().join("out.srt");
509
510 let conv = FormatConverter::new(default_config());
511 let result = conv.convert_file(&input, &output, "srt").await.unwrap();
512
513 assert!(result.success);
514 assert_eq!(result.input_format, "srt");
515 assert_eq!(result.output_format, "srt");
516 assert_eq!(result.original_entries, 2);
517 }
518
519 #[tokio::test]
522 async fn convert_batch_empty_directory_returns_empty_vec() {
523 let dir = TempDir::new().unwrap();
524
525 let conv = FormatConverter::new(default_config());
526 let results = conv.convert_batch(dir.path(), "ass", false).await.unwrap();
527 assert!(results.is_empty());
528 }
529
530 #[tokio::test]
533 async fn convert_batch_converts_srt_files_to_ass() {
534 let dir = TempDir::new().unwrap();
535 write_temp_file(&dir, "a.srt", SAMPLE_SRT);
536 write_temp_file(&dir, "b.srt", SAMPLE_SRT);
537
538 let conv = FormatConverter::new(default_config());
539 let results = conv.convert_batch(dir.path(), "ass", false).await.unwrap();
540
541 assert_eq!(results.len(), 2);
542 for r in &results {
543 assert!(r.success);
544 assert_eq!(r.output_format, "ass");
545 }
546 }
547
548 #[tokio::test]
551 async fn convert_batch_recursive_discovers_nested_files() {
552 let dir = TempDir::new().unwrap();
553 let subdir = dir.path().join("sub");
554 std::fs::create_dir(&subdir).unwrap();
555 write_temp_file(&dir, "top.srt", SAMPLE_SRT);
556 std::fs::write(subdir.join("nested.srt"), SAMPLE_SRT).unwrap();
557
558 let conv = FormatConverter::new(default_config());
559
560 let flat = conv.convert_batch(dir.path(), "vtt", false).await.unwrap();
562 assert_eq!(flat.len(), 1);
563
564 let dir2 = TempDir::new().unwrap();
566 let subdir2 = dir2.path().join("sub");
567 std::fs::create_dir(&subdir2).unwrap();
568 write_temp_file(&dir2, "top.srt", SAMPLE_SRT);
569 std::fs::write(subdir2.join("nested.srt"), SAMPLE_SRT).unwrap();
570 let recursive = conv.convert_batch(dir2.path(), "vtt", true).await.unwrap();
571 assert_eq!(recursive.len(), 2);
572 }
573
574 #[tokio::test]
577 async fn convert_batch_converts_vtt_files_to_srt() {
578 let dir = TempDir::new().unwrap();
579 write_temp_file(&dir, "a.vtt", SAMPLE_VTT);
580
581 let conv = FormatConverter::new(default_config());
582 let results = conv.convert_batch(dir.path(), "srt", false).await.unwrap();
583
584 assert_eq!(results.len(), 1);
585 assert!(results[0].success);
586 assert_eq!(results[0].output_format, "srt");
587 }
588}