ass_editor/formats/srt/
mod.rs

1//! SRT (SubRip) format support with style preservation.
2//!
3//! This module provides import/export functionality for SRT files,
4//! with comprehensive style preservation through ASS-style tags.
5
6use crate::core::{EditorDocument, EditorError};
7use crate::formats::{
8    Format, FormatExporter, FormatImporter, FormatInfo, FormatOptions, FormatResult,
9};
10use ass_core::parser::Script;
11use std::io::{Read, Write};
12
13/// SRT format handler with style preservation
14#[derive(Debug)]
15pub struct SrtFormat {
16    info: FormatInfo,
17}
18
19impl SrtFormat {
20    /// Create a new SRT format handler
21    pub fn new() -> Self {
22        Self {
23            info: FormatInfo {
24                name: "SRT".to_string(),
25                extensions: vec!["srt".to_string()],
26                mime_type: "text/srt".to_string(),
27                description: "SubRip subtitle format with style preservation".to_string(),
28                supports_styling: true,
29                supports_positioning: false,
30            },
31        }
32    }
33
34    /// Parse SRT timestamp (HH:MM:SS,mmm)
35    fn parse_srt_time(time_str: &str) -> Result<String, EditorError> {
36        let time_str = time_str.trim();
37
38        // Convert SRT time format (HH:MM:SS,mmm) to ASS format (H:MM:SS.cc)
39        if let Some(comma_pos) = time_str.find(',') {
40            let (time_part, ms_part) = time_str.split_at(comma_pos);
41            let ms_part = &ms_part[1..]; // Remove comma
42
43            // Parse milliseconds and convert to centiseconds
44            let ms: u32 = ms_part.parse().map_err(|_| {
45                EditorError::InvalidFormat(format!("Invalid milliseconds: {ms_part}"))
46            })?;
47            let cs = ms / 10; // Convert to centiseconds
48
49            // Remove leading zero from hours if present for ASS format
50            let time_part = if time_part.starts_with("0") && time_part.len() > 1 {
51                &time_part[1..]
52            } else {
53                time_part
54            };
55
56            Ok(format!("{time_part}.{cs:02}"))
57        } else {
58            Err(EditorError::InvalidFormat(format!(
59                "Invalid SRT time format: {time_str}"
60            )))
61        }
62    }
63
64    /// Convert ASS timestamp to SRT format
65    fn format_srt_time(ass_time: &str) -> Result<String, EditorError> {
66        let ass_time = ass_time.trim();
67
68        // Convert ASS time format (H:MM:SS.cc) to SRT format (HH:MM:SS,mmm)
69        if let Some(dot_pos) = ass_time.find('.') {
70            let (time_part, cs_part) = ass_time.split_at(dot_pos);
71            let cs_part = &cs_part[1..]; // Remove dot
72
73            // Parse centiseconds and convert to milliseconds
74            let cs: u32 = cs_part.parse().map_err(|_| {
75                EditorError::InvalidFormat(format!("Invalid centiseconds: {cs_part}"))
76            })?;
77            let ms = cs * 10; // Convert to milliseconds
78
79            // Ensure hours are zero-padded for SRT format
80            let parts: Vec<&str> = time_part.split(':').collect();
81            if parts.len() == 3 {
82                let hours: u32 = parts[0].parse().map_err(|_| {
83                    EditorError::InvalidFormat(format!("Invalid hours: {}", parts[0]))
84                })?;
85                Ok(format!("{hours:02}:{}:{},{ms:03}", parts[1], parts[2]))
86            } else {
87                Err(EditorError::InvalidFormat(format!(
88                    "Invalid ASS time format: {ass_time}"
89                )))
90            }
91        } else {
92            Err(EditorError::InvalidFormat(format!(
93                "Invalid ASS time format: {ass_time}"
94            )))
95        }
96    }
97
98    /// Convert SRT styling to ASS override tags
99    fn convert_srt_to_ass_styling(text: &str) -> String {
100        let mut result = text.to_string();
101
102        // Convert HTML-like tags to ASS override tags
103        result = result.replace("<b>", r"{\b1}");
104        result = result.replace("</b>", r"{\b0}");
105        result = result.replace("<i>", r"{\i1}");
106        result = result.replace("</i>", r"{\i0}");
107        result = result.replace("<u>", r"{\u1}");
108        result = result.replace("</u>", r"{\u0}");
109        result = result.replace("<s>", r"{\s1}");
110        result = result.replace("</s>", r"{\s0}");
111
112        #[cfg(feature = "formats")]
113        {
114            // Handle font color tags
115            let color_regex = regex::Regex::new(r#"<font color="?#?([0-9A-Fa-f]{6})"?>"#).unwrap();
116            result = color_regex.replace_all(&result, r"{\c&H$1&}").to_string();
117            result = result.replace("</font>", r"{\c}");
118
119            // Handle font face tags
120            let font_regex = regex::Regex::new(r#"<font face="([^"]+)">"#).unwrap();
121            result = font_regex.replace_all(&result, r"{\fn$1}").to_string();
122        }
123
124        result
125    }
126
127    /// Convert ASS override tags to SRT styling
128    fn convert_ass_to_srt_styling(text: &str) -> String {
129        let mut result = text.to_string();
130
131        // Convert ASS override tags to HTML-like tags
132        result = result.replace(r"{\b1}", "<b>");
133        result = result.replace(r"{\b0}", "</b>");
134        result = result.replace(r"{\i1}", "<i>");
135        result = result.replace(r"{\i0}", "</i>");
136        result = result.replace(r"{\u1}", "<u>");
137        result = result.replace(r"{\u0}", "</u>");
138        result = result.replace(r"{\s1}", "<s>");
139        result = result.replace(r"{\s0}", "</s>");
140
141        #[cfg(feature = "formats")]
142        {
143            // Handle color tags
144            let color_regex = regex::Regex::new(r"\\c&H([0-9A-Fa-f]{6})&").unwrap();
145            result = color_regex
146                .replace_all(&result, "<font color=\"#$1\">")
147                .to_string();
148            result = result.replace(r"{\c}", "</font>");
149
150            // Handle font name tags
151            let font_regex = regex::Regex::new(r"\\fn([^}]+)").unwrap();
152            result = font_regex
153                .replace_all(&result, "<font face=\"$1\">")
154                .to_string();
155
156            // Remove any remaining ASS tags
157            let cleanup_regex = regex::Regex::new(r"\{[^}]*\}").unwrap();
158            result = cleanup_regex.replace_all(&result, "").to_string();
159        }
160
161        result
162    }
163
164    /// Parse SRT subtitle entry
165    fn parse_srt_subtitle(
166        lines: &[String],
167        start_idx: usize,
168    ) -> Result<(usize, String), EditorError> {
169        if start_idx >= lines.len() {
170            return Err(EditorError::InvalidFormat(
171                "Unexpected end of file".to_string(),
172            ));
173        }
174
175        let mut idx = start_idx;
176
177        // Skip empty lines
178        while idx < lines.len() && lines[idx].trim().is_empty() {
179            idx += 1;
180        }
181
182        if idx >= lines.len() {
183            return Err(EditorError::InvalidFormat(
184                "Unexpected end of file".to_string(),
185            ));
186        }
187
188        // Parse subtitle number (optional validation)
189        let _subtitle_num = lines[idx].trim();
190        idx += 1;
191
192        if idx >= lines.len() {
193            return Err(EditorError::InvalidFormat(
194                "Missing timestamp line".to_string(),
195            ));
196        }
197
198        // Parse timestamp line
199        let timestamp_line = &lines[idx];
200        if !timestamp_line.contains("-->") {
201            return Err(EditorError::InvalidFormat(format!(
202                "Invalid timestamp line: {timestamp_line}"
203            )));
204        }
205
206        let parts: Vec<&str> = timestamp_line.split("-->").collect();
207        if parts.len() != 2 {
208            return Err(EditorError::InvalidFormat(format!(
209                "Invalid timestamp format: {timestamp_line}"
210            )));
211        }
212
213        let start_time = Self::parse_srt_time(parts[0])?;
214        let end_time = Self::parse_srt_time(parts[1])?;
215
216        idx += 1;
217
218        // Collect subtitle text lines
219        let mut text_lines = Vec::new();
220        while idx < lines.len() && !lines[idx].trim().is_empty() {
221            let styled_text = Self::convert_srt_to_ass_styling(&lines[idx]);
222            text_lines.push(styled_text);
223            idx += 1;
224        }
225
226        if text_lines.is_empty() {
227            return Err(EditorError::InvalidFormat(
228                "Empty subtitle text".to_string(),
229            ));
230        }
231
232        let text = text_lines.join("\\N"); // ASS line break
233        let dialogue_line = format!("Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{text}");
234
235        Ok((idx, dialogue_line))
236    }
237}
238
239impl Default for SrtFormat {
240    fn default() -> Self {
241        Self::new()
242    }
243}
244
245impl FormatImporter for SrtFormat {
246    fn format_info(&self) -> &FormatInfo {
247        &self.info
248    }
249
250    fn import_from_reader(
251        &self,
252        reader: &mut dyn Read,
253        options: &FormatOptions,
254    ) -> Result<(EditorDocument, FormatResult), EditorError> {
255        // Read the entire content
256        let mut content = String::new();
257        reader
258            .read_to_string(&mut content)
259            .map_err(|e| EditorError::IoError(format!("Failed to read SRT content: {e}")))?;
260
261        let lines: Vec<String> = content.lines().map(|s| s.to_string()).collect();
262        let mut warnings = Vec::new();
263        let mut dialogues = Vec::new();
264        let mut idx = 0;
265        let mut subtitle_count = 0;
266
267        // Parse all SRT subtitles
268        while idx < lines.len() {
269            match Self::parse_srt_subtitle(&lines, idx) {
270                Ok((next_idx, dialogue)) => {
271                    dialogues.push(dialogue);
272                    idx = next_idx;
273                    subtitle_count += 1;
274                }
275                Err(e) => {
276                    if idx < lines.len() {
277                        warnings.push(format!(
278                            "Skipping invalid subtitle at line {}: {e}",
279                            idx + 1
280                        ));
281                        idx += 1;
282                    } else {
283                        break;
284                    }
285                }
286            }
287        }
288
289        // Build ASS script content
290        let mut ass_content = String::new();
291
292        // Add script info section
293        ass_content.push_str("[Script Info]\n");
294        ass_content.push_str("Title: Converted from SRT\n");
295        ass_content.push_str("ScriptType: v4.00+\n");
296        ass_content.push_str("Collisions: Normal\n");
297        ass_content.push_str("PlayDepth: 0\n");
298        ass_content.push_str("Timer: 100.0000\n");
299        ass_content.push_str("Video Aspect Ratio: 0\n");
300        ass_content.push_str("Video Zoom: 6\n");
301        ass_content.push_str("Video Position: 0\n\n");
302
303        // Add styles section with basic default style
304        ass_content.push_str("[V4+ Styles]\n");
305        ass_content.push_str("Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\n");
306        ass_content.push_str("Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,&H80000000,0,0,0,0,100,100,0,0,1,2,0,2,10,10,10,1\n\n");
307
308        // Add events section
309        ass_content.push_str("[Events]\n");
310        ass_content.push_str(
311            "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n",
312        );
313
314        for dialogue in dialogues {
315            ass_content.push_str(&dialogue);
316            ass_content.push('\n');
317        }
318
319        // Validate the generated ASS content
320        let _script = Script::parse(&ass_content)?;
321
322        // Create EditorDocument
323        let document = EditorDocument::from_content(&ass_content)?;
324
325        // Create result with metadata
326        let mut result = FormatResult::success(subtitle_count)
327            .with_metadata("original_format".to_string(), "SRT".to_string())
328            .with_metadata("subtitles_count".to_string(), subtitle_count.to_string())
329            .with_metadata("encoding".to_string(), options.encoding.clone());
330
331        if !warnings.is_empty() {
332            result = result.with_warnings(warnings);
333        }
334
335        Ok((document, result))
336    }
337}
338
339impl FormatExporter for SrtFormat {
340    fn format_info(&self) -> &FormatInfo {
341        &self.info
342    }
343
344    fn export_to_writer(
345        &self,
346        document: &EditorDocument,
347        writer: &mut dyn Write,
348        options: &FormatOptions,
349    ) -> Result<FormatResult, EditorError> {
350        // Parse the ASS content to extract events
351        let events = document.parse_script_with(|script| {
352            // Find events section and collect owned data
353            if let Some(ass_core::parser::ast::Section::Events(events)) =
354                script.find_section(ass_core::parser::ast::SectionType::Events)
355            {
356                // Convert to owned data to avoid lifetime issues
357                events
358                    .iter()
359                    .map(|event| {
360                        (
361                            event.event_type,
362                            event.start.to_string(),
363                            event.end.to_string(),
364                            event.text.to_string(),
365                        )
366                    })
367                    .collect::<Vec<_>>()
368            } else {
369                Vec::new()
370            }
371        })?;
372
373        let mut srt_content = String::new();
374        let mut subtitle_num = 1;
375        let mut warnings = Vec::new();
376
377        for (event_type, start, end, text) in &events {
378            // Only export dialogue events
379            if event_type.as_str() != "Dialogue" {
380                continue;
381            }
382
383            // Parse start and end times
384            let start_time = match Self::format_srt_time(start) {
385                Ok(time) => time,
386                Err(e) => {
387                    warnings.push(format!(
388                        "Invalid start time for subtitle {subtitle_num}: {e}"
389                    ));
390                    continue;
391                }
392            };
393
394            let end_time = match Self::format_srt_time(end) {
395                Ok(time) => time,
396                Err(e) => {
397                    warnings.push(format!("Invalid end time for subtitle {subtitle_num}: {e}"));
398                    continue;
399                }
400            };
401
402            // Convert ASS text to SRT format
403            let mut text = text.clone();
404
405            // Convert ASS line breaks to actual line breaks
406            text = text.replace("\\N", "\n");
407            text = text.replace("\\n", "\n");
408
409            // Convert ASS styling to SRT styling
410            text = Self::convert_ass_to_srt_styling(&text);
411
412            // Write SRT subtitle entry
413            srt_content.push_str(&format!("{subtitle_num}\n"));
414            srt_content.push_str(&format!("{start_time} --> {end_time}\n"));
415            srt_content.push_str(&text);
416            srt_content.push_str("\n\n");
417
418            subtitle_num += 1;
419        }
420
421        // Write content with proper encoding
422        let bytes = if options.encoding.eq_ignore_ascii_case("UTF-8") {
423            srt_content.into_bytes()
424        } else {
425            warnings.push(format!(
426                "Encoding '{}' not supported, using UTF-8 instead",
427                options.encoding
428            ));
429            srt_content.into_bytes()
430        };
431
432        writer
433            .write_all(&bytes)
434            .map_err(|e| EditorError::IoError(format!("Failed to write SRT content: {e}")))?;
435
436        let mut result = FormatResult::success(subtitle_num - 1)
437            .with_metadata("exported_format".to_string(), "SRT".to_string())
438            .with_metadata(
439                "subtitles_exported".to_string(),
440                (subtitle_num - 1).to_string(),
441            );
442
443        if !warnings.is_empty() {
444            result = result.with_warnings(warnings);
445        }
446
447        Ok(result)
448    }
449}
450
451impl Format for SrtFormat {
452    fn as_importer(&self) -> &dyn FormatImporter {
453        self
454    }
455
456    fn as_exporter(&self) -> &dyn FormatExporter {
457        self
458    }
459}
460
461#[cfg(test)]
462mod tests {
463    use super::*;
464    #[cfg(not(feature = "std"))]
465    use alloc::string::ToString;
466    #[cfg(not(feature = "std"))]
467    use alloc::{format, string::String, vec};
468
469    const SAMPLE_SRT: &str = "1\n00:00:00,000 --> 00:00:05,000\n<b>Hello</b> <i>World</i>!\n\n2\n00:00:06,000 --> 00:00:10,000\nThis is a <u>subtitle</u> with <font color=\"#FF0000\">red text</font>.\n\n3\n00:00:12,500 --> 00:00:15,750\nMultiple\nlines\nhere\n\n";
470
471    #[test]
472    fn test_srt_format_creation() {
473        let format = SrtFormat::new();
474        let info = FormatImporter::format_info(&format);
475        assert_eq!(info.name, "SRT");
476        assert!(info.supports_styling);
477        assert!(!info.supports_positioning);
478        assert!(format.can_import("srt"));
479        assert!(format.can_export("srt"));
480    }
481
482    #[test]
483    fn test_parse_srt_time() {
484        assert_eq!(
485            SrtFormat::parse_srt_time("00:01:23,456").unwrap(),
486            "0:01:23.45"
487        );
488        assert_eq!(
489            SrtFormat::parse_srt_time("01:00:00,000").unwrap(),
490            "1:00:00.00"
491        );
492        assert_eq!(
493            SrtFormat::parse_srt_time("10:30:45,123").unwrap(),
494            "10:30:45.12"
495        );
496
497        assert!(SrtFormat::parse_srt_time("invalid").is_err());
498        assert!(SrtFormat::parse_srt_time("00:01:23").is_err());
499    }
500
501    #[test]
502    fn test_format_srt_time() {
503        assert_eq!(
504            SrtFormat::format_srt_time("0:01:23.45").unwrap(),
505            "00:01:23,450"
506        );
507        assert_eq!(
508            SrtFormat::format_srt_time("1:00:00.00").unwrap(),
509            "01:00:00,000"
510        );
511        assert_eq!(
512            SrtFormat::format_srt_time("10:30:45.12").unwrap(),
513            "10:30:45,120"
514        );
515
516        assert!(SrtFormat::format_srt_time("invalid").is_err());
517        assert!(SrtFormat::format_srt_time("00:01:23").is_err());
518    }
519
520    #[test]
521    fn test_convert_srt_to_ass_styling() {
522        assert_eq!(
523            SrtFormat::convert_srt_to_ass_styling("<b>Bold</b> text"),
524            r"{\b1}Bold{\b0} text"
525        );
526        assert_eq!(
527            SrtFormat::convert_srt_to_ass_styling("<i>Italic</i> and <u>underlined</u>"),
528            r"{\i1}Italic{\i0} and {\u1}underlined{\u0}"
529        );
530        assert_eq!(
531            SrtFormat::convert_srt_to_ass_styling("<font color=\"#FF0000\">Red text</font>"),
532            r"{\c&HFF0000&}Red text{\c}"
533        );
534    }
535
536    #[test]
537    fn test_convert_ass_to_srt_styling() {
538        assert_eq!(
539            SrtFormat::convert_ass_to_srt_styling(r"{\b1}Bold{\b0} text"),
540            "<b>Bold</b> text"
541        );
542        assert_eq!(
543            SrtFormat::convert_ass_to_srt_styling(r"{\i1}Italic{\i0} and {\u1}underlined{\u0}"),
544            "<i>Italic</i> and <u>underlined</u>"
545        );
546    }
547
548    #[test]
549    fn test_srt_import_from_string() {
550        let format = SrtFormat::new();
551        let options = FormatOptions::default();
552
553        let result = format.import_from_string(SAMPLE_SRT, &options);
554        assert!(result.is_ok());
555
556        let (document, format_result) = result.unwrap();
557        assert!(format_result.success);
558        assert_eq!(format_result.lines_processed, 3); // 3 subtitles
559        assert!(document.text().contains("Hello"));
560        assert!(document.text().contains("World"));
561        assert!(document.text().contains(r"{\b1}"));
562        assert!(document.text().contains(r"{\i1}"));
563    }
564
565    #[test]
566    fn test_srt_export_to_string() {
567        let format = SrtFormat::new();
568        let options = FormatOptions::default();
569
570        // First import
571        let (document, _) = format.import_from_string(SAMPLE_SRT, &options).unwrap();
572
573        // Then export
574        let result = format.export_to_string(&document, &options);
575        assert!(result.is_ok());
576
577        let (exported_content, format_result) = result.unwrap();
578        assert!(format_result.success);
579        assert!(exported_content.contains("Hello"));
580        assert!(exported_content.contains("<b>"));
581        assert!(exported_content.contains("<i>"));
582        assert!(exported_content.contains("00:00:00,000 --> 00:00:05,000"));
583    }
584
585    #[test]
586    fn test_srt_roundtrip_basic() {
587        let format = SrtFormat::new();
588        let options = FormatOptions::default();
589
590        let simple_srt = "1\n00:00:01,000 --> 00:00:03,000\nHello World\n\n";
591
592        // Import -> Export -> Import
593        let (document1, _) = format.import_from_string(simple_srt, &options).unwrap();
594        let (exported_content, _) = format.export_to_string(&document1, &options).unwrap();
595
596        // Verify basic structure is preserved
597        assert!(exported_content.contains("Hello World"));
598        assert!(exported_content.contains("00:00:01,000 --> 00:00:03,000"));
599    }
600
601    #[test]
602    fn test_srt_style_preservation() {
603        let format = SrtFormat::new();
604        let options = FormatOptions::default();
605
606        let styled_srt = r#"1
60700:00:00,000 --> 00:00:02,000
608<b>Bold</b> and <i>italic</i> text
609
610"#;
611
612        let (document, _) = format.import_from_string(styled_srt, &options).unwrap();
613        let (exported_content, _) = format.export_to_string(&document, &options).unwrap();
614
615        // Verify styles are preserved
616        assert!(exported_content.contains("<b>Bold</b>"));
617        assert!(exported_content.contains("<i>italic</i>"));
618    }
619
620    #[test]
621    fn test_srt_multiline_handling() {
622        let format = SrtFormat::new();
623        let options = FormatOptions::default();
624
625        let multiline_srt = r#"1
62600:00:00,000 --> 00:00:02,000
627Line one
628Line two
629Line three
630
631"#;
632
633        let (document, _) = format.import_from_string(multiline_srt, &options).unwrap();
634        let (exported_content, _) = format.export_to_string(&document, &options).unwrap();
635
636        // Verify multiline content is preserved
637        assert!(exported_content.contains("Line one"));
638        assert!(exported_content.contains("Line two"));
639        assert!(exported_content.contains("Line three"));
640    }
641
642    #[test]
643    fn test_srt_error_handling() {
644        let format = SrtFormat::new();
645        let options = FormatOptions::default();
646
647        let invalid_srt = "Invalid SRT content";
648        let result = format.import_from_string(invalid_srt, &options);
649
650        // Should handle gracefully and return warnings
651        if let Ok((_, format_result)) = result {
652            assert!(!format_result.warnings.is_empty());
653        }
654    }
655
656    #[test]
657    fn test_srt_metadata_extraction() {
658        let format = SrtFormat::new();
659        let options = FormatOptions::default();
660
661        let (_, format_result) = format.import_from_string(SAMPLE_SRT, &options).unwrap();
662
663        assert_eq!(
664            format_result.metadata.get("original_format"),
665            Some(&"SRT".to_string())
666        );
667        assert_eq!(
668            format_result.metadata.get("subtitles_count"),
669            Some(&"3".to_string())
670        );
671        assert_eq!(
672            format_result.metadata.get("encoding"),
673            Some(&"UTF-8".to_string())
674        );
675    }
676}