Skip to main content

ff_format/subtitle/
mod.rs

1//! Subtitle format parser — SRT, ASS/SSA, `WebVTT`.
2//!
3//! Provides pure-Rust parsing for the three most common text subtitle formats.
4//! Malformed events are skipped with a `log::warn!`; a file with zero valid
5//! events returns [`SubtitleError::NoEvents`].
6//!
7//! # Example
8//!
9//! ```
10//! use ff_format::subtitle::{SubtitleTrack, SubtitleError};
11//!
12//! let srt = "1\n00:00:01,000 --> 00:00:04,000\nHello world\n\n";
13//! let track = SubtitleTrack::from_srt(srt).unwrap();
14//! assert_eq!(track.events.len(), 1);
15//! assert_eq!(track.events[0].text, "Hello world");
16//! ```
17
18use std::collections::HashMap;
19use std::path::Path;
20use std::time::Duration;
21
22use thiserror::Error;
23
24/// Error type for subtitle parsing operations.
25#[derive(Debug, Error)]
26pub enum SubtitleError {
27    /// I/O error reading a subtitle file.
28    #[error("io error: {0}")]
29    Io(#[from] std::io::Error),
30
31    /// File extension is not a recognized subtitle format.
32    #[error("unsupported subtitle format: {extension}")]
33    UnsupportedFormat {
34        /// The unrecognized file extension.
35        extension: String,
36    },
37
38    /// A structural parse error prevents processing the file.
39    #[error("parse error at line {line}: {reason}")]
40    ParseError {
41        /// 1-based line number where the error was detected.
42        line: usize,
43        /// Human-readable description of the problem.
44        reason: String,
45    },
46
47    /// The input contained no valid subtitle events.
48    #[error("no valid subtitle events found")]
49    NoEvents,
50}
51
52/// A single subtitle event (cue).
53#[derive(Debug, Clone, PartialEq, Eq)]
54pub struct SubtitleEvent {
55    /// Sequential 0-based event index.
56    pub index: usize,
57    /// Presentation start time.
58    pub start: Duration,
59    /// Presentation end time.
60    pub end: Duration,
61    /// Plain text with all style/override tags stripped.
62    pub text: String,
63    /// Original text including any style or override tags.
64    pub raw: String,
65    /// Additional metadata fields (e.g. ASS `Actor`, `Style`).
66    pub metadata: HashMap<String, String>,
67}
68
69/// A parsed subtitle track containing ordered events.
70#[derive(Debug, Clone, PartialEq, Eq)]
71pub struct SubtitleTrack {
72    /// Ordered list of subtitle events.
73    pub events: Vec<SubtitleEvent>,
74    /// BCP-47 language tag when available (e.g. `"en"`, `"ja"`).
75    pub language: Option<String>,
76}
77
78impl SubtitleTrack {
79    /// Parse a `SubRip` (`.srt`) subtitle string.
80    ///
81    /// Supports multi-line cues and HTML-style tags (`<i>`, `<b>`, `<u>`).
82    /// Malformed blocks are skipped with `log::warn!`.
83    ///
84    /// # Errors
85    ///
86    /// Returns [`SubtitleError::NoEvents`] when no valid events are found.
87    pub fn from_srt(input: &str) -> Result<Self, SubtitleError> {
88        parse_srt(input)
89    }
90
91    /// Parse an ASS/SSA subtitle string.
92    ///
93    /// Reads the `[Events]` section only. Override tags (`{...}`) are
94    /// preserved in [`SubtitleEvent::raw`] and stripped for
95    /// [`SubtitleEvent::text`]. Malformed `Dialogue:` lines are skipped.
96    ///
97    /// # Errors
98    ///
99    /// Returns [`SubtitleError::NoEvents`] when no valid events are found.
100    pub fn from_ass(input: &str) -> Result<Self, SubtitleError> {
101        parse_ass(input)
102    }
103
104    /// Parse a `WebVTT` (`.vtt`) subtitle string.
105    ///
106    /// Cue identifiers are optional. Voice span tags (`<v Speaker>`) and
107    /// other HTML tags are stripped for [`SubtitleEvent::text`]. Malformed
108    /// cues are skipped with `log::warn!`.
109    ///
110    /// # Errors
111    ///
112    /// Returns [`SubtitleError::ParseError`] when the `WEBVTT` header is
113    /// missing, or [`SubtitleError::NoEvents`] when no valid cues are found.
114    pub fn from_vtt(input: &str) -> Result<Self, SubtitleError> {
115        parse_vtt(input)
116    }
117
118    /// Serialize this track to a `SubRip` (`.srt`) string.
119    ///
120    /// Events are numbered sequentially starting at `1`. The `raw` field is
121    /// written as the cue body so that style tags round-trip intact.
122    /// Events with empty text produce a blank-line body so that the sequential
123    /// index is preserved.
124    ///
125    /// Timestamp format: `HH:MM:SS,mmm --> HH:MM:SS,mmm`.
126    #[must_use]
127    pub fn to_srt(&self) -> String {
128        use std::fmt::Write as _;
129        let mut out = String::new();
130        for (seq, ev) in self.events.iter().enumerate() {
131            let _ = writeln!(out, "{}", seq + 1);
132            let _ = writeln!(
133                out,
134                "{} --> {}",
135                duration_to_srt_timestamp(ev.start),
136                duration_to_srt_timestamp(ev.end),
137            );
138            out.push_str(&ev.raw);
139            out.push('\n');
140            out.push('\n');
141        }
142        out
143    }
144
145    /// Serialize this track to an ASS/SSA string.
146    ///
147    /// Writes a minimal but valid file containing `[Script Info]`,
148    /// `[V4+ Styles]` (one default style), and `[Events]`. The `raw` field
149    /// is written as the `Text` column so that override tags round-trip intact.
150    /// `Style` and `Name` metadata fields are restored from
151    /// [`SubtitleEvent::metadata`] when present.
152    ///
153    /// Timestamp format: `H:MM:SS.cc` (centiseconds).
154    #[must_use]
155    pub fn to_ass(&self) -> String {
156        use std::fmt::Write as _;
157        let mut out = String::new();
158        out.push_str("[Script Info]\n");
159        out.push_str("ScriptType: v4.00+\n");
160        out.push_str("PlayResX: 384\n");
161        out.push_str("PlayResY: 288\n");
162        out.push('\n');
163        out.push_str("[V4+ Styles]\n");
164        out.push_str(
165            "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, \
166             OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, \
167             ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, \
168             Alignment, MarginL, MarginR, MarginV, Encoding\n",
169        );
170        out.push_str(
171            "Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,\
172             &H00000000,0,0,0,0,100,100,0,0,1,2,2,2,10,10,10,1\n",
173        );
174        out.push('\n');
175        out.push_str("[Events]\n");
176        out.push_str(
177            "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n",
178        );
179        for ev in &self.events {
180            let style = ev.metadata.get("Style").map_or("Default", String::as_str);
181            let name = ev.metadata.get("Name").map_or("", String::as_str);
182            let _ = writeln!(
183                out,
184                "Dialogue: 0,{},{},{},{},0,0,0,,{}",
185                duration_to_ass_timestamp(ev.start),
186                duration_to_ass_timestamp(ev.end),
187                style,
188                name,
189                ev.raw,
190            );
191        }
192        out
193    }
194
195    /// Serialize this track to a `WebVTT` (`.vtt`) string.
196    ///
197    /// Writes the mandatory `WEBVTT` header followed by one cue per event.
198    /// The `raw` field is written as the cue body so that voice span tags
199    /// round-trip intact.
200    ///
201    /// Timestamp format: `HH:MM:SS.mmm --> HH:MM:SS.mmm`.
202    #[must_use]
203    pub fn to_vtt(&self) -> String {
204        use std::fmt::Write as _;
205        let mut out = String::from("WEBVTT\n");
206        for ev in &self.events {
207            out.push('\n');
208            let _ = writeln!(
209                out,
210                "{} --> {}",
211                duration_to_vtt_timestamp(ev.start),
212                duration_to_vtt_timestamp(ev.end),
213            );
214            out.push_str(&ev.raw);
215            out.push('\n');
216        }
217        out
218    }
219
220    /// Write this track to `path`, choosing the serializer by file extension.
221    ///
222    /// Supported extensions: `.srt`, `.ass`, `.ssa`, `.vtt`.
223    ///
224    /// # Errors
225    ///
226    /// Returns [`SubtitleError::UnsupportedFormat`] for unrecognized extensions,
227    /// or [`SubtitleError::Io`] when the file cannot be written.
228    pub fn write_to_file(&self, path: impl AsRef<Path>) -> Result<(), SubtitleError> {
229        let path = path.as_ref();
230        let ext = path
231            .extension()
232            .and_then(|e| e.to_str())
233            .unwrap_or("")
234            .to_ascii_lowercase();
235
236        let content = match ext.as_str() {
237            "srt" => self.to_srt(),
238            "ass" | "ssa" => self.to_ass(),
239            "vtt" => self.to_vtt(),
240            _ => return Err(SubtitleError::UnsupportedFormat { extension: ext }),
241        };
242
243        std::fs::write(path, content)?;
244        Ok(())
245    }
246
247    /// Load and parse a subtitle file, auto-detecting the format by extension.
248    ///
249    /// Supported extensions: `.srt`, `.ass`, `.ssa`, `.vtt`.
250    ///
251    /// # Errors
252    ///
253    /// Returns [`SubtitleError::UnsupportedFormat`] for unrecognized extensions,
254    /// [`SubtitleError::Io`] on read failure, or a format-specific error when
255    /// parsing fails.
256    pub fn from_file(path: impl AsRef<Path>) -> Result<Self, SubtitleError> {
257        let path = path.as_ref();
258        let ext = path
259            .extension()
260            .and_then(|e| e.to_str())
261            .unwrap_or("")
262            .to_ascii_lowercase();
263
264        // Validate extension before performing I/O.
265        match ext.as_str() {
266            "srt" | "ass" | "ssa" | "vtt" => {}
267            _ => return Err(SubtitleError::UnsupportedFormat { extension: ext }),
268        }
269
270        let content = std::fs::read_to_string(path)?;
271
272        match ext.as_str() {
273            "srt" => parse_srt(&content),
274            "ass" | "ssa" => parse_ass(&content),
275            "vtt" => parse_vtt(&content),
276            _ => unreachable!("extension validated above"),
277        }
278    }
279}
280
281// ── SRT parser ────────────────────────────────────────────────────────────────
282
283fn parse_srt(input: &str) -> Result<SubtitleTrack, SubtitleError> {
284    let mut events: Vec<SubtitleEvent> = Vec::new();
285    let mut current_block: Vec<String> = Vec::new();
286
287    for line in input.lines() {
288        let trimmed = line.trim();
289        if trimmed.is_empty() {
290            if !current_block.is_empty() {
291                if let Some(ev) = parse_srt_block(&current_block, events.len()) {
292                    events.push(ev);
293                }
294                current_block.clear();
295            }
296        } else {
297            current_block.push(trimmed.to_string());
298        }
299    }
300
301    // Handle last block without a trailing blank line.
302    if !current_block.is_empty()
303        && let Some(ev) = parse_srt_block(&current_block, events.len())
304    {
305        events.push(ev);
306    }
307
308    if events.is_empty() {
309        return Err(SubtitleError::NoEvents);
310    }
311
312    Ok(SubtitleTrack {
313        events,
314        language: None,
315    })
316}
317
318fn parse_srt_block(block: &[String], index: usize) -> Option<SubtitleEvent> {
319    // A valid block needs at least an index line and a timestamp line.
320    // A missing text line produces an empty-text event (intentional for
321    // round-trip preservation of sequential indices).
322    if block.len() < 2 {
323        log::warn!(
324            "srt block has too few lines, skipping count={}",
325            block.len()
326        );
327        return None;
328    }
329
330    // First line: 1-based sequence number.
331    if block[0].parse::<usize>().is_err() {
332        log::warn!(
333            "srt block index is not a number, skipping value={}",
334            block[0]
335        );
336        return None;
337    }
338
339    let Some((start, end)) = parse_srt_timestamp_line(&block[1]) else {
340        log::warn!("srt malformed timestamp line, skipping line={}", block[1]);
341        return None;
342    };
343
344    let raw = block[2..].join("\n");
345    let text = strip_html_tags(&raw);
346
347    Some(SubtitleEvent {
348        index,
349        start,
350        end,
351        text,
352        raw,
353        metadata: HashMap::new(),
354    })
355}
356
357fn parse_srt_timestamp_line(line: &str) -> Option<(Duration, Duration)> {
358    let mut parts = line.splitn(2, " --> ");
359    let start = parse_srt_timestamp(parts.next()?.trim())?;
360    let end = parse_srt_timestamp(parts.next()?.trim())?;
361    Some((start, end))
362}
363
364/// Parse `HH:MM:SS,mmm` (comma or period separator) into a [`Duration`].
365fn parse_srt_timestamp(s: &str) -> Option<Duration> {
366    let s = s.replace(',', ".");
367    let (hms_str, ms_str) = match s.split_once('.') {
368        Some((h, m)) => (h, m),
369        None => (s.as_str(), "0"),
370    };
371    let ms: u64 = ms_str.parse().ok()?;
372    let hms: Vec<u64> = hms_str
373        .split(':')
374        .map(|p| p.parse().ok())
375        .collect::<Option<Vec<_>>>()?;
376    if hms.len() != 3 {
377        return None;
378    }
379    let total_ms = hms[0] * 3_600_000 + hms[1] * 60_000 + hms[2] * 1_000 + ms;
380    Some(Duration::from_millis(total_ms))
381}
382
383// ── ASS/SSA parser ─────────────────────────────────────────────────────────────
384
385fn parse_ass(input: &str) -> Result<SubtitleTrack, SubtitleError> {
386    let mut events: Vec<SubtitleEvent> = Vec::new();
387    let mut in_events = false;
388    let mut format_cols: Vec<String> = Vec::new();
389
390    for (line_no, line) in input.lines().enumerate() {
391        let line = line.trim();
392
393        if line.eq_ignore_ascii_case("[Events]") {
394            in_events = true;
395            continue;
396        }
397
398        // New section header ends the [Events] block.
399        if line.starts_with('[') && in_events {
400            break;
401        }
402
403        if !in_events {
404            continue;
405        }
406
407        if let Some(rest) = line.strip_prefix("Format:") {
408            format_cols = rest.split(',').map(|c| c.trim().to_string()).collect();
409            continue;
410        }
411
412        let Some(rest) = line.strip_prefix("Dialogue:") else {
413            continue;
414        };
415
416        if format_cols.is_empty() {
417            log::warn!(
418                "ass dialogue line found before Format line at line={}",
419                line_no + 1
420            );
421            continue;
422        }
423
424        let num_cols = format_cols.len();
425        let parts: Vec<&str> = rest.splitn(num_cols, ',').collect();
426        if parts.len() < num_cols {
427            log::warn!(
428                "ass dialogue has fewer fields than format at line={}",
429                line_no + 1
430            );
431            continue;
432        }
433
434        let col_map: HashMap<&str, &str> = format_cols
435            .iter()
436            .zip(parts.iter())
437            .map(|(k, v)| (k.as_str(), v.trim()))
438            .collect();
439
440        let Some(start) = col_map.get("Start").and_then(|s| parse_ass_timestamp(s)) else {
441            log::warn!("ass malformed start timestamp at line={}", line_no + 1);
442            continue;
443        };
444
445        let Some(end) = col_map.get("End").and_then(|s| parse_ass_timestamp(s)) else {
446            log::warn!("ass malformed end timestamp at line={}", line_no + 1);
447            continue;
448        };
449
450        let raw = col_map.get("Text").copied().unwrap_or("").to_string();
451        let text = strip_ass_tags(&raw);
452
453        let mut metadata = HashMap::new();
454        for key in &["Style", "Name", "Actor", "Layer", "Effect"] {
455            if let Some(val) = col_map.get(key)
456                && !val.is_empty()
457            {
458                metadata.insert((*key).to_string(), (*val).to_string());
459            }
460        }
461
462        events.push(SubtitleEvent {
463            index: events.len(),
464            start,
465            end,
466            text,
467            raw,
468            metadata,
469        });
470    }
471
472    if events.is_empty() {
473        return Err(SubtitleError::NoEvents);
474    }
475
476    Ok(SubtitleTrack {
477        events,
478        language: None,
479    })
480}
481
482/// Parse `H:MM:SS.cc` (centiseconds) into a [`Duration`].
483fn parse_ass_timestamp(s: &str) -> Option<Duration> {
484    let (hms_str, cs_str) = match s.split_once('.') {
485        Some((h, c)) => (h, c),
486        None => (s, "0"),
487    };
488    let cs: u64 = cs_str.parse().ok()?;
489    let hms: Vec<u64> = hms_str
490        .split(':')
491        .map(|p| p.parse().ok())
492        .collect::<Option<Vec<_>>>()?;
493    if hms.len() != 3 {
494        return None;
495    }
496    let total_ms = hms[0] * 3_600_000 + hms[1] * 60_000 + hms[2] * 1_000 + cs * 10;
497    Some(Duration::from_millis(total_ms))
498}
499
500// ── WebVTT parser ──────────────────────────────────────────────────────────────
501
502fn parse_vtt(input: &str) -> Result<SubtitleTrack, SubtitleError> {
503    let mut lines_iter = input.lines();
504
505    // The first line must start with "WEBVTT".
506    match lines_iter.next() {
507        Some(first) if first.trim_start_matches('\u{FEFF}').starts_with("WEBVTT") => {}
508        _ => {
509            return Err(SubtitleError::ParseError {
510                line: 1,
511                reason: "WebVTT file must begin with WEBVTT".to_string(),
512            });
513        }
514    }
515
516    let mut events: Vec<SubtitleEvent> = Vec::new();
517    let mut current_block: Vec<String> = Vec::new();
518
519    for line in lines_iter {
520        let trimmed = line.trim();
521        if trimmed.is_empty() {
522            if !current_block.is_empty() {
523                if let Some(ev) = parse_vtt_block(&current_block, events.len()) {
524                    events.push(ev);
525                }
526                current_block.clear();
527            }
528        } else {
529            current_block.push(trimmed.to_string());
530        }
531    }
532
533    // Handle last block without a trailing blank line.
534    if !current_block.is_empty()
535        && let Some(ev) = parse_vtt_block(&current_block, events.len())
536    {
537        events.push(ev);
538    }
539
540    if events.is_empty() {
541        return Err(SubtitleError::NoEvents);
542    }
543
544    Ok(SubtitleTrack {
545        events,
546        language: None,
547    })
548}
549
550fn parse_vtt_block(block: &[String], index: usize) -> Option<SubtitleEvent> {
551    // Skip metadata blocks.
552    let first = block[0].as_str();
553    if first.starts_with("NOTE") || first.starts_with("STYLE") || first.starts_with("REGION") {
554        return None;
555    }
556
557    // Find the line containing "-->".
558    let Some(ts_idx) = block.iter().position(|l| l.contains("-->")) else {
559        log::warn!("vtt block has no timestamp line, skipping block_start={first}");
560        return None;
561    };
562
563    let Some((start, end)) = parse_vtt_timestamp_line(&block[ts_idx]) else {
564        log::warn!(
565            "vtt malformed timestamp line, skipping line={}",
566            block[ts_idx]
567        );
568        return None;
569    };
570
571    if ts_idx + 1 >= block.len() {
572        log::warn!("vtt cue has no text start={start:?}");
573        return None;
574    }
575
576    let raw = block[ts_idx + 1..].join("\n");
577    let text = strip_html_tags(&raw);
578
579    Some(SubtitleEvent {
580        index,
581        start,
582        end,
583        text,
584        raw,
585        metadata: HashMap::new(),
586    })
587}
588
589fn parse_vtt_timestamp_line(line: &str) -> Option<(Duration, Duration)> {
590    let mut parts = line.splitn(2, " --> ");
591    let start = parse_vtt_timestamp(parts.next()?.trim())?;
592    // End timestamp may be followed by cue settings (e.g. `align:center`).
593    let end_part = parts.next()?.trim();
594    let end_str = end_part.split_whitespace().next().unwrap_or("");
595    let end = parse_vtt_timestamp(end_str)?;
596    Some((start, end))
597}
598
599/// Parse `HH:MM:SS.mmm` or `MM:SS.mmm` into a [`Duration`].
600fn parse_vtt_timestamp(s: &str) -> Option<Duration> {
601    let (hms_str, ms_str) = match s.split_once('.') {
602        Some((h, m)) => (h, m),
603        None => (s, "0"),
604    };
605    // Normalise to exactly 3 digits for milliseconds.
606    let ms_padded = format!("{ms_str:0<3}");
607    let ms: u64 = ms_padded[..3.min(ms_padded.len())].parse().ok()?;
608    let hms: Vec<u64> = hms_str
609        .split(':')
610        .map(|p| p.parse().ok())
611        .collect::<Option<Vec<_>>>()?;
612    let total_ms = match hms.len() {
613        2 => hms[0] * 60_000 + hms[1] * 1_000 + ms,
614        3 => hms[0] * 3_600_000 + hms[1] * 60_000 + hms[2] * 1_000 + ms,
615        _ => return None,
616    };
617    Some(Duration::from_millis(total_ms))
618}
619
620// ── Timestamp serialisation helpers ───────────────────────────────────────────
621
622/// Format a [`Duration`] as `HH:MM:SS,mmm` (SRT / `SubRip` style).
623#[allow(clippy::cast_possible_truncation)]
624fn duration_to_srt_timestamp(d: Duration) -> String {
625    let total_ms = d.as_millis() as u64;
626    let ms = total_ms % 1_000;
627    let secs = total_ms / 1_000;
628    let s = secs % 60;
629    let m = (secs / 60) % 60;
630    let h = secs / 3_600;
631    format!("{h:02}:{m:02}:{s:02},{ms:03}")
632}
633
634/// Format a [`Duration`] as `H:MM:SS.cc` (ASS centisecond style).
635#[allow(clippy::cast_possible_truncation)]
636fn duration_to_ass_timestamp(d: Duration) -> String {
637    let total_ms = d.as_millis() as u64;
638    let cs = (total_ms / 10) % 100;
639    let secs = total_ms / 1_000;
640    let s = secs % 60;
641    let m = (secs / 60) % 60;
642    let h = secs / 3_600;
643    format!("{h}:{m:02}:{s:02}.{cs:02}")
644}
645
646/// Format a [`Duration`] as `HH:MM:SS.mmm` (`WebVTT` style).
647#[allow(clippy::cast_possible_truncation)]
648fn duration_to_vtt_timestamp(d: Duration) -> String {
649    let total_ms = d.as_millis() as u64;
650    let ms = total_ms % 1_000;
651    let secs = total_ms / 1_000;
652    let s = secs % 60;
653    let m = (secs / 60) % 60;
654    let h = secs / 3_600;
655    format!("{h:02}:{m:02}:{s:02}.{ms:03}")
656}
657
658// ── Tag stripping helpers ──────────────────────────────────────────────────────
659
660/// Strip HTML-style tags (`<tag>`, `</tag>`) from `s`.
661fn strip_html_tags(s: &str) -> String {
662    let mut result = String::with_capacity(s.len());
663    let mut in_tag = false;
664    for c in s.chars() {
665        match c {
666            '<' => in_tag = true,
667            '>' => in_tag = false,
668            _ if !in_tag => result.push(c),
669            _ => {}
670        }
671    }
672    result
673}
674
675/// Strip ASS override tags (`{...}`) and convert soft line-breaks (`\N`, `\n`).
676fn strip_ass_tags(s: &str) -> String {
677    let mut result = String::with_capacity(s.len());
678    let mut in_tag = false;
679    let chars: Vec<char> = s.chars().collect();
680    let mut i = 0;
681    while i < chars.len() {
682        match chars[i] {
683            '{' => {
684                in_tag = true;
685                i += 1;
686            }
687            '}' => {
688                in_tag = false;
689                i += 1;
690            }
691            '\\' if !in_tag && i + 1 < chars.len() => match chars[i + 1] {
692                'N' | 'n' => {
693                    result.push('\n');
694                    i += 2;
695                }
696                _ => {
697                    result.push(chars[i]);
698                    i += 1;
699                }
700            },
701            c if !in_tag => {
702                result.push(c);
703                i += 1;
704            }
705            _ => {
706                i += 1;
707            }
708        }
709    }
710    result
711}
712
713#[cfg(test)]
714#[allow(clippy::unwrap_used)]
715mod tests {
716    use super::*;
717
718    // ── SRT ───────────────────────────────────────────────────────────────────
719
720    #[test]
721    fn from_srt_should_parse_single_event() {
722        let input = "1\n00:00:01,000 --> 00:00:04,000\nHello world\n";
723        let track = SubtitleTrack::from_srt(input).unwrap();
724        assert_eq!(track.events.len(), 1);
725        let ev = &track.events[0];
726        assert_eq!(ev.index, 0);
727        assert_eq!(ev.start, Duration::from_millis(1_000));
728        assert_eq!(ev.end, Duration::from_millis(4_000));
729        assert_eq!(ev.text, "Hello world");
730        assert_eq!(ev.raw, "Hello world");
731    }
732
733    #[test]
734    fn from_srt_should_parse_multiline_text() {
735        let input = "1\n00:00:01,000 --> 00:00:04,000\nLine one\nLine two\n\n2\n00:00:05,000 --> 00:00:07,000\nSecond\n";
736        let track = SubtitleTrack::from_srt(input).unwrap();
737        assert_eq!(track.events.len(), 2);
738        assert_eq!(track.events[0].text, "Line one\nLine two");
739        assert_eq!(track.events[1].text, "Second");
740    }
741
742    #[test]
743    fn from_srt_should_strip_html_tags_preserving_raw() {
744        let input = "1\n00:00:01,000 --> 00:00:04,000\n<i>Italic</i> and <b>bold</b>\n";
745        let track = SubtitleTrack::from_srt(input).unwrap();
746        let ev = &track.events[0];
747        assert_eq!(ev.text, "Italic and bold");
748        assert_eq!(ev.raw, "<i>Italic</i> and <b>bold</b>");
749    }
750
751    #[test]
752    fn from_srt_should_skip_malformed_event_and_parse_rest() {
753        let input = "1\n00:00:01,000 --> 00:00:04,000\nGood\n\nNOT_NUM\nbad ts\ntext\n\n2\n00:00:05,000 --> 00:00:07,000\nAlso good\n";
754        let track = SubtitleTrack::from_srt(input).unwrap();
755        assert_eq!(track.events.len(), 2);
756        assert_eq!(track.events[0].text, "Good");
757        assert_eq!(track.events[1].text, "Also good");
758    }
759
760    #[test]
761    fn from_srt_should_return_no_events_for_empty_input() {
762        let result = SubtitleTrack::from_srt("");
763        assert!(matches!(result, Err(SubtitleError::NoEvents)));
764    }
765
766    #[test]
767    fn from_srt_should_return_no_events_when_all_blocks_malformed() {
768        let result = SubtitleTrack::from_srt("NOT_NUM\n00:00:01,000 --> 00:00:04,000\ntext\n");
769        assert!(matches!(result, Err(SubtitleError::NoEvents)));
770    }
771
772    // ── ASS ───────────────────────────────────────────────────────────────────
773
774    const ASS_SAMPLE: &str = "\
775[Script Info]
776Title: Test
777
778[Events]
779Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
780Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,Hello {\\i1}world{\\i0}
781Dialogue: 0,0:00:05.00,0:00:07.00,Default,,0,0,0,,Second line
782";
783
784    #[test]
785    fn from_ass_should_parse_dialogue_events() {
786        let track = SubtitleTrack::from_ass(ASS_SAMPLE).unwrap();
787        assert_eq!(track.events.len(), 2);
788        let ev = &track.events[0];
789        assert_eq!(ev.start, Duration::from_millis(1_000));
790        assert_eq!(ev.end, Duration::from_millis(4_000));
791        assert!(ev.raw.contains("{\\i1}"));
792        assert!(!ev.text.contains('{'));
793    }
794
795    #[test]
796    fn from_ass_should_strip_override_tags_preserving_raw() {
797        let input = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\nDialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,{\\pos(100,200)}Hello\n";
798        let track = SubtitleTrack::from_ass(input).unwrap();
799        let ev = &track.events[0];
800        assert_eq!(ev.text, "Hello");
801        assert!(ev.raw.contains("{\\pos"));
802    }
803
804    #[test]
805    fn from_ass_should_populate_metadata_fields() {
806        let input = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\nDialogue: 0,0:00:01.00,0:00:04.00,Signs,Actor1,0,0,0,,text\n";
807        let track = SubtitleTrack::from_ass(input).unwrap();
808        let ev = &track.events[0];
809        assert_eq!(ev.metadata.get("Style"), Some(&"Signs".to_string()));
810        assert_eq!(ev.metadata.get("Name"), Some(&"Actor1".to_string()));
811    }
812
813    #[test]
814    fn from_ass_should_return_no_events_for_empty_events_section() {
815        let input = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n";
816        let result = SubtitleTrack::from_ass(input);
817        assert!(matches!(result, Err(SubtitleError::NoEvents)));
818    }
819
820    // ── VTT ───────────────────────────────────────────────────────────────────
821
822    const VTT_SAMPLE: &str = "\
823WEBVTT
824
8251
82600:00:01.000 --> 00:00:04.000
827Hello world
828
82900:00:05.000 --> 00:00:07.000 align:center
830<v Speaker>Voice tagged text</v>
831";
832
833    #[test]
834    fn from_vtt_should_parse_cues_with_and_without_identifiers() {
835        let track = SubtitleTrack::from_vtt(VTT_SAMPLE).unwrap();
836        assert_eq!(track.events.len(), 2);
837        let ev = &track.events[0];
838        assert_eq!(ev.start, Duration::from_millis(1_000));
839        assert_eq!(ev.end, Duration::from_millis(4_000));
840        assert_eq!(ev.text, "Hello world");
841    }
842
843    #[test]
844    fn from_vtt_should_strip_voice_tags_preserving_raw() {
845        let track = SubtitleTrack::from_vtt(VTT_SAMPLE).unwrap();
846        let ev = &track.events[1];
847        assert_eq!(ev.text, "Voice tagged text");
848        assert_eq!(ev.raw, "<v Speaker>Voice tagged text</v>");
849    }
850
851    #[test]
852    fn from_vtt_should_ignore_cue_settings_in_timestamp_line() {
853        let track = SubtitleTrack::from_vtt(VTT_SAMPLE).unwrap();
854        // Second cue has "align:center" setting — end must still parse correctly.
855        assert_eq!(track.events[1].end, Duration::from_millis(7_000));
856    }
857
858    #[test]
859    fn from_vtt_should_return_parse_error_for_missing_header() {
860        let result = SubtitleTrack::from_vtt("not a vtt file\ncontent");
861        assert!(matches!(result, Err(SubtitleError::ParseError { .. })));
862    }
863
864    #[test]
865    fn from_vtt_should_return_no_events_for_empty_content() {
866        let result = SubtitleTrack::from_vtt("WEBVTT\n\n");
867        assert!(matches!(result, Err(SubtitleError::NoEvents)));
868    }
869
870    // ── from_file ─────────────────────────────────────────────────────────────
871
872    #[test]
873    fn from_file_should_return_unsupported_for_unknown_extension() {
874        let result = SubtitleTrack::from_file("subtitle.xyz");
875        assert!(matches!(
876            result,
877            Err(SubtitleError::UnsupportedFormat { .. })
878        ));
879    }
880
881    // ── timestamp helpers ─────────────────────────────────────────────────────
882
883    #[test]
884    fn parse_srt_timestamp_should_parse_millisecond_precision() {
885        let ts = parse_srt_timestamp("01:23:45,678").unwrap();
886        let expected_ms = 1 * 3_600_000 + 23 * 60_000 + 45 * 1_000 + 678;
887        assert_eq!(ts, Duration::from_millis(expected_ms));
888    }
889
890    #[test]
891    fn parse_srt_timestamp_should_parse_zero_timestamp() {
892        let ts = parse_srt_timestamp("00:00:00,000").unwrap();
893        assert_eq!(ts, Duration::from_millis(0));
894    }
895
896    #[test]
897    fn parse_ass_timestamp_should_parse_centisecond_precision() {
898        let ts = parse_ass_timestamp("1:23:45.67").unwrap();
899        let expected_ms = 1 * 3_600_000 + 23 * 60_000 + 45 * 1_000 + 670;
900        assert_eq!(ts, Duration::from_millis(expected_ms));
901    }
902
903    #[test]
904    fn parse_vtt_timestamp_should_accept_mm_ss_format() {
905        let ts = parse_vtt_timestamp("05:30.500").unwrap();
906        assert_eq!(ts, Duration::from_millis(5 * 60_000 + 30 * 1_000 + 500));
907    }
908
909    #[test]
910    fn parse_vtt_timestamp_should_accept_hh_mm_ss_format() {
911        let ts = parse_vtt_timestamp("01:02:03.456").unwrap();
912        let expected_ms = 3_600_000 + 2 * 60_000 + 3 * 1_000 + 456;
913        assert_eq!(ts, Duration::from_millis(expected_ms));
914    }
915
916    // ── tag stripping helpers ─────────────────────────────────────────────────
917
918    #[test]
919    fn strip_html_tags_should_remove_italic_bold_underline() {
920        assert_eq!(strip_html_tags("<i>italic</i>"), "italic");
921        assert_eq!(strip_html_tags("<b>bold</b>"), "bold");
922        assert_eq!(strip_html_tags("<u>under</u>"), "under");
923    }
924
925    #[test]
926    fn strip_html_tags_should_remove_voice_span() {
927        assert_eq!(strip_html_tags("<v Speaker>text</v>"), "text");
928    }
929
930    #[test]
931    fn strip_ass_tags_should_remove_curly_brace_overrides() {
932        assert_eq!(strip_ass_tags("{\\an8}text"), "text");
933        assert_eq!(strip_ass_tags("before{\\pos(100,200)}after"), "beforeafter");
934    }
935
936    #[test]
937    fn strip_ass_tags_should_convert_soft_line_breaks() {
938        assert_eq!(strip_ass_tags("line1\\Nline2"), "line1\nline2");
939        assert_eq!(strip_ass_tags("line1\\nline2"), "line1\nline2");
940    }
941
942    // ── timestamp serialisation helpers ───────────────────────────────────────
943
944    #[test]
945    fn duration_to_srt_timestamp_should_format_correctly() {
946        let d = Duration::from_millis(1 * 3_600_000 + 23 * 60_000 + 45 * 1_000 + 678);
947        assert_eq!(duration_to_srt_timestamp(d), "01:23:45,678");
948    }
949
950    #[test]
951    fn duration_to_ass_timestamp_should_use_centiseconds() {
952        let d = Duration::from_millis(1 * 3_600_000 + 23 * 60_000 + 45 * 1_000 + 670);
953        assert_eq!(duration_to_ass_timestamp(d), "1:23:45.67");
954    }
955
956    #[test]
957    fn duration_to_vtt_timestamp_should_format_correctly() {
958        let d = Duration::from_millis(1 * 3_600_000 + 2 * 60_000 + 3 * 1_000 + 456);
959        assert_eq!(duration_to_vtt_timestamp(d), "01:02:03.456");
960    }
961
962    // ── to_srt ────────────────────────────────────────────────────────────────
963
964    #[test]
965    fn to_srt_should_produce_1_based_sequential_indices() {
966        let track = SubtitleTrack {
967            events: vec![
968                make_event(0, 1_000, 4_000, "First"),
969                make_event(1, 5_000, 7_000, "Second"),
970            ],
971            language: None,
972        };
973        let srt = track.to_srt();
974        let lines: Vec<&str> = srt.lines().collect();
975        assert_eq!(lines[0], "1");
976        assert_eq!(lines[4], "2");
977    }
978
979    #[test]
980    fn to_srt_should_use_comma_separated_timestamps() {
981        let track = SubtitleTrack {
982            events: vec![make_event(0, 1_000, 4_000, "Hello")],
983            language: None,
984        };
985        let srt = track.to_srt();
986        assert!(srt.contains("00:00:01,000 --> 00:00:04,000"));
987    }
988
989    #[test]
990    fn to_srt_should_write_empty_text_event_preserving_index_sequence() {
991        let empty = SubtitleEvent {
992            index: 1,
993            start: Duration::from_millis(5_000),
994            end: Duration::from_millis(7_000),
995            text: String::new(),
996            raw: String::new(),
997            metadata: HashMap::new(),
998        };
999        let track = SubtitleTrack {
1000            events: vec![make_event(0, 1_000, 4_000, "First"), empty],
1001            language: None,
1002        };
1003        let srt = track.to_srt();
1004        let reparsed = SubtitleTrack::from_srt(&srt).unwrap();
1005        // Empty-text event must survive the round-trip and keep the index intact.
1006        assert_eq!(reparsed.events.len(), 2);
1007        assert_eq!(reparsed.events[1].start, Duration::from_millis(5_000));
1008    }
1009
1010    #[test]
1011    fn srt_round_trip_should_preserve_start_end_and_text() {
1012        let srt_in = "1\n00:00:01,000 --> 00:00:04,000\nHello world\n\n2\n00:00:05,500 --> 00:00:07,250\nSecond\n\n";
1013        let track = SubtitleTrack::from_srt(srt_in).unwrap();
1014        let written = track.to_srt();
1015        let reparsed = SubtitleTrack::from_srt(&written).unwrap();
1016        assert_eq!(reparsed.events.len(), track.events.len());
1017        for (a, b) in track.events.iter().zip(reparsed.events.iter()) {
1018            assert_eq!(a.start, b.start);
1019            assert_eq!(a.end, b.end);
1020            assert_eq!(a.text, b.text);
1021        }
1022    }
1023
1024    // ── to_ass ────────────────────────────────────────────────────────────────
1025
1026    #[test]
1027    fn to_ass_should_contain_required_sections() {
1028        let track = SubtitleTrack {
1029            events: vec![make_event(0, 1_000, 4_000, "Hello")],
1030            language: None,
1031        };
1032        let ass = track.to_ass();
1033        assert!(ass.contains("[Script Info]"));
1034        assert!(ass.contains("[V4+ Styles]"));
1035        assert!(ass.contains("[Events]"));
1036        assert!(ass.contains("Format: Layer, Start, End,"));
1037        assert!(ass.contains("Dialogue:"));
1038    }
1039
1040    #[test]
1041    fn to_ass_should_use_centisecond_timestamps() {
1042        let track = SubtitleTrack {
1043            events: vec![make_event(0, 1_000, 4_000, "Hello")],
1044            language: None,
1045        };
1046        let ass = track.to_ass();
1047        assert!(ass.contains("0:00:01.00,0:00:04.00"));
1048    }
1049
1050    #[test]
1051    fn ass_round_trip_should_preserve_start_end_and_text() {
1052        let track = SubtitleTrack::from_ass(ASS_SAMPLE).unwrap();
1053        let written = track.to_ass();
1054        let reparsed = SubtitleTrack::from_ass(&written).unwrap();
1055        assert_eq!(reparsed.events.len(), track.events.len());
1056        for (a, b) in track.events.iter().zip(reparsed.events.iter()) {
1057            assert_eq!(a.start, b.start, "start mismatch");
1058            assert_eq!(a.end, b.end, "end mismatch");
1059            assert_eq!(a.text, b.text, "text mismatch");
1060        }
1061    }
1062
1063    // ── to_vtt ────────────────────────────────────────────────────────────────
1064
1065    #[test]
1066    fn to_vtt_should_start_with_webvtt_header() {
1067        let track = SubtitleTrack {
1068            events: vec![make_event(0, 1_000, 4_000, "Hello")],
1069            language: None,
1070        };
1071        let vtt = track.to_vtt();
1072        assert!(vtt.starts_with("WEBVTT\n"));
1073    }
1074
1075    #[test]
1076    fn to_vtt_should_use_dot_separated_timestamps() {
1077        let track = SubtitleTrack {
1078            events: vec![make_event(0, 1_000, 4_000, "Hello")],
1079            language: None,
1080        };
1081        let vtt = track.to_vtt();
1082        assert!(vtt.contains("00:00:01.000 --> 00:00:04.000"));
1083    }
1084
1085    #[test]
1086    fn vtt_round_trip_should_preserve_start_end_and_text() {
1087        let track = SubtitleTrack::from_vtt(VTT_SAMPLE).unwrap();
1088        let written = track.to_vtt();
1089        let reparsed = SubtitleTrack::from_vtt(&written).unwrap();
1090        assert_eq!(reparsed.events.len(), track.events.len());
1091        for (a, b) in track.events.iter().zip(reparsed.events.iter()) {
1092            assert_eq!(a.start, b.start, "start mismatch");
1093            assert_eq!(a.end, b.end, "end mismatch");
1094            assert_eq!(a.text, b.text, "text mismatch");
1095        }
1096    }
1097
1098    // ── write_to_file ─────────────────────────────────────────────────────────
1099
1100    #[test]
1101    fn write_to_file_should_return_unsupported_for_unknown_extension() {
1102        let track = SubtitleTrack {
1103            events: vec![make_event(0, 1_000, 4_000, "Hello")],
1104            language: None,
1105        };
1106        let result = track.write_to_file("output.xyz");
1107        assert!(matches!(
1108            result,
1109            Err(SubtitleError::UnsupportedFormat { .. })
1110        ));
1111    }
1112
1113    // ── helpers ───────────────────────────────────────────────────────────────
1114
1115    fn make_event(index: usize, start_ms: u64, end_ms: u64, text: &str) -> SubtitleEvent {
1116        SubtitleEvent {
1117            index,
1118            start: Duration::from_millis(start_ms),
1119            end: Duration::from_millis(end_ms),
1120            text: text.to_string(),
1121            raw: text.to_string(),
1122            metadata: HashMap::new(),
1123        }
1124    }
1125}