Skip to main content

ff_format/subtitle/
mod.rs

1//! Subtitle format parser — SRT, ASS/SSA, `WebVTT`.
2//!
3//! Provides pure-Rust parsing for the three most common text subtitle formats.
4//! Malformed events are skipped with a `log::warn!`; a file with zero valid
5//! events returns [`SubtitleError::NoEvents`].
6//!
7//! # Example
8//!
9//! ```
10//! use ff_format::subtitle::{SubtitleTrack, SubtitleError};
11//!
12//! let srt = "1\n00:00:01,000 --> 00:00:04,000\nHello world\n\n";
13//! let track = SubtitleTrack::from_srt(srt).unwrap();
14//! assert_eq!(track.events.len(), 1);
15//! assert_eq!(track.events[0].text, "Hello world");
16//! ```
17
18use std::collections::HashMap;
19use std::path::Path;
20use std::time::Duration;
21
22pub use crate::error::SubtitleError;
23
24/// A single subtitle event (cue).
25#[derive(Debug, Clone, PartialEq, Eq)]
26pub struct SubtitleEvent {
27    /// Sequential 0-based event index.
28    pub index: usize,
29    /// Presentation start time.
30    pub start: Duration,
31    /// Presentation end time.
32    pub end: Duration,
33    /// Plain text with all style/override tags stripped.
34    pub text: String,
35    /// Original text including any style or override tags.
36    pub raw: String,
37    /// Additional metadata fields (e.g. ASS `Actor`, `Style`).
38    pub metadata: HashMap<String, String>,
39}
40
41/// A parsed subtitle track containing ordered events.
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub struct SubtitleTrack {
44    /// Ordered list of subtitle events.
45    pub events: Vec<SubtitleEvent>,
46    /// BCP-47 language tag when available (e.g. `"en"`, `"ja"`).
47    pub language: Option<String>,
48}
49
50impl SubtitleTrack {
51    /// Parse a `SubRip` (`.srt`) subtitle string.
52    ///
53    /// Supports multi-line cues and HTML-style tags (`<i>`, `<b>`, `<u>`).
54    /// Malformed blocks are skipped with `log::warn!`.
55    ///
56    /// # Errors
57    ///
58    /// Returns [`SubtitleError::NoEvents`] when no valid events are found.
59    pub fn from_srt(input: &str) -> Result<Self, SubtitleError> {
60        parse_srt(input)
61    }
62
63    /// Parse an ASS/SSA subtitle string.
64    ///
65    /// Reads the `[Events]` section only. Override tags (`{...}`) are
66    /// preserved in [`SubtitleEvent::raw`] and stripped for
67    /// [`SubtitleEvent::text`]. Malformed `Dialogue:` lines are skipped.
68    ///
69    /// # Errors
70    ///
71    /// Returns [`SubtitleError::NoEvents`] when no valid events are found.
72    pub fn from_ass(input: &str) -> Result<Self, SubtitleError> {
73        parse_ass(input)
74    }
75
76    /// Parse a `WebVTT` (`.vtt`) subtitle string.
77    ///
78    /// Cue identifiers are optional. Voice span tags (`<v Speaker>`) and
79    /// other HTML tags are stripped for [`SubtitleEvent::text`]. Malformed
80    /// cues are skipped with `log::warn!`.
81    ///
82    /// # Errors
83    ///
84    /// Returns [`SubtitleError::ParseError`] when the `WEBVTT` header is
85    /// missing, or [`SubtitleError::NoEvents`] when no valid cues are found.
86    pub fn from_vtt(input: &str) -> Result<Self, SubtitleError> {
87        parse_vtt(input)
88    }
89
90    /// Serialize this track to a `SubRip` (`.srt`) string.
91    ///
92    /// Events are numbered sequentially starting at `1`. The `raw` field is
93    /// written as the cue body so that style tags round-trip intact.
94    /// Events with empty text produce a blank-line body so that the sequential
95    /// index is preserved.
96    ///
97    /// Timestamp format: `HH:MM:SS,mmm --> HH:MM:SS,mmm`.
98    #[must_use]
99    pub fn to_srt(&self) -> String {
100        use std::fmt::Write as _;
101        let mut out = String::new();
102        for (seq, ev) in self.events.iter().enumerate() {
103            let _ = writeln!(out, "{}", seq + 1);
104            let _ = writeln!(
105                out,
106                "{} --> {}",
107                duration_to_srt_timestamp(ev.start),
108                duration_to_srt_timestamp(ev.end),
109            );
110            out.push_str(&ev.raw);
111            out.push('\n');
112            out.push('\n');
113        }
114        out
115    }
116
117    /// Serialize this track to an ASS/SSA string.
118    ///
119    /// Writes a minimal but valid file containing `[Script Info]`,
120    /// `[V4+ Styles]` (one default style), and `[Events]`. The `raw` field
121    /// is written as the `Text` column so that override tags round-trip intact.
122    /// `Style` and `Name` metadata fields are restored from
123    /// [`SubtitleEvent::metadata`] when present.
124    ///
125    /// Timestamp format: `H:MM:SS.cc` (centiseconds).
126    #[must_use]
127    pub fn to_ass(&self) -> String {
128        use std::fmt::Write as _;
129        let mut out = String::new();
130        out.push_str("[Script Info]\n");
131        out.push_str("ScriptType: v4.00+\n");
132        out.push_str("PlayResX: 384\n");
133        out.push_str("PlayResY: 288\n");
134        out.push('\n');
135        out.push_str("[V4+ Styles]\n");
136        out.push_str(
137            "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, \
138             OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, \
139             ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, \
140             Alignment, MarginL, MarginR, MarginV, Encoding\n",
141        );
142        out.push_str(
143            "Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,\
144             &H00000000,0,0,0,0,100,100,0,0,1,2,2,2,10,10,10,1\n",
145        );
146        out.push('\n');
147        out.push_str("[Events]\n");
148        out.push_str(
149            "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n",
150        );
151        for ev in &self.events {
152            let style = ev.metadata.get("Style").map_or("Default", String::as_str);
153            let name = ev.metadata.get("Name").map_or("", String::as_str);
154            let _ = writeln!(
155                out,
156                "Dialogue: 0,{},{},{},{},0,0,0,,{}",
157                duration_to_ass_timestamp(ev.start),
158                duration_to_ass_timestamp(ev.end),
159                style,
160                name,
161                ev.raw,
162            );
163        }
164        out
165    }
166
167    /// Serialize this track to a `WebVTT` (`.vtt`) string.
168    ///
169    /// Writes the mandatory `WEBVTT` header followed by one cue per event.
170    /// The `raw` field is written as the cue body so that voice span tags
171    /// round-trip intact.
172    ///
173    /// Timestamp format: `HH:MM:SS.mmm --> HH:MM:SS.mmm`.
174    #[must_use]
175    pub fn to_vtt(&self) -> String {
176        use std::fmt::Write as _;
177        let mut out = String::from("WEBVTT\n");
178        for ev in &self.events {
179            out.push('\n');
180            let _ = writeln!(
181                out,
182                "{} --> {}",
183                duration_to_vtt_timestamp(ev.start),
184                duration_to_vtt_timestamp(ev.end),
185            );
186            out.push_str(&ev.raw);
187            out.push('\n');
188        }
189        out
190    }
191
192    /// Write this track to `path`, choosing the serializer by file extension.
193    ///
194    /// Supported extensions: `.srt`, `.ass`, `.ssa`, `.vtt`.
195    ///
196    /// # Errors
197    ///
198    /// Returns [`SubtitleError::UnsupportedFormat`] for unrecognized extensions,
199    /// or [`SubtitleError::Io`] when the file cannot be written.
200    pub fn write_to_file(&self, path: impl AsRef<Path>) -> Result<(), SubtitleError> {
201        let path = path.as_ref();
202        let ext = path
203            .extension()
204            .and_then(|e| e.to_str())
205            .unwrap_or("")
206            .to_ascii_lowercase();
207
208        let content = match ext.as_str() {
209            "srt" => self.to_srt(),
210            "ass" | "ssa" => self.to_ass(),
211            "vtt" => self.to_vtt(),
212            _ => return Err(SubtitleError::UnsupportedFormat { extension: ext }),
213        };
214
215        std::fs::write(path, content)?;
216        Ok(())
217    }
218
219    /// Load and parse a subtitle file, auto-detecting the format by extension.
220    ///
221    /// Supported extensions: `.srt`, `.ass`, `.ssa`, `.vtt`.
222    ///
223    /// # Errors
224    ///
225    /// Returns [`SubtitleError::UnsupportedFormat`] for unrecognized extensions,
226    /// [`SubtitleError::Io`] on read failure, or a format-specific error when
227    /// parsing fails.
228    pub fn from_file(path: impl AsRef<Path>) -> Result<Self, SubtitleError> {
229        let path = path.as_ref();
230        let ext = path
231            .extension()
232            .and_then(|e| e.to_str())
233            .unwrap_or("")
234            .to_ascii_lowercase();
235
236        // Validate extension before performing I/O.
237        match ext.as_str() {
238            "srt" | "ass" | "ssa" | "vtt" => {}
239            _ => return Err(SubtitleError::UnsupportedFormat { extension: ext }),
240        }
241
242        let content = std::fs::read_to_string(path)?;
243
244        match ext.as_str() {
245            "srt" => parse_srt(&content),
246            "ass" | "ssa" => parse_ass(&content),
247            "vtt" => parse_vtt(&content),
248            _ => unreachable!("extension validated above"),
249        }
250    }
251}
252
253// ── SRT parser ────────────────────────────────────────────────────────────────
254
255fn parse_srt(input: &str) -> Result<SubtitleTrack, SubtitleError> {
256    let mut events: Vec<SubtitleEvent> = Vec::new();
257    let mut current_block: Vec<String> = Vec::new();
258
259    for line in input.lines() {
260        let trimmed = line.trim();
261        if trimmed.is_empty() {
262            if !current_block.is_empty() {
263                if let Some(ev) = parse_srt_block(&current_block, events.len()) {
264                    events.push(ev);
265                }
266                current_block.clear();
267            }
268        } else {
269            current_block.push(trimmed.to_string());
270        }
271    }
272
273    // Handle last block without a trailing blank line.
274    if !current_block.is_empty()
275        && let Some(ev) = parse_srt_block(&current_block, events.len())
276    {
277        events.push(ev);
278    }
279
280    if events.is_empty() {
281        return Err(SubtitleError::NoEvents);
282    }
283
284    Ok(SubtitleTrack {
285        events,
286        language: None,
287    })
288}
289
290fn parse_srt_block(block: &[String], index: usize) -> Option<SubtitleEvent> {
291    // A valid block needs at least an index line and a timestamp line.
292    // A missing text line produces an empty-text event (intentional for
293    // round-trip preservation of sequential indices).
294    if block.len() < 2 {
295        log::warn!(
296            "srt block has too few lines, skipping count={}",
297            block.len()
298        );
299        return None;
300    }
301
302    // First line: 1-based sequence number.
303    if block[0].parse::<usize>().is_err() {
304        log::warn!(
305            "srt block index is not a number, skipping value={}",
306            block[0]
307        );
308        return None;
309    }
310
311    let Some((start, end)) = parse_srt_timestamp_line(&block[1]) else {
312        log::warn!("srt malformed timestamp line, skipping line={}", block[1]);
313        return None;
314    };
315
316    let raw = block[2..].join("\n");
317    let text = strip_html_tags(&raw);
318
319    Some(SubtitleEvent {
320        index,
321        start,
322        end,
323        text,
324        raw,
325        metadata: HashMap::new(),
326    })
327}
328
329fn parse_srt_timestamp_line(line: &str) -> Option<(Duration, Duration)> {
330    let mut parts = line.splitn(2, " --> ");
331    let start = parse_srt_timestamp(parts.next()?.trim())?;
332    let end = parse_srt_timestamp(parts.next()?.trim())?;
333    Some((start, end))
334}
335
336/// Parse `HH:MM:SS,mmm` (comma or period separator) into a [`Duration`].
337fn parse_srt_timestamp(s: &str) -> Option<Duration> {
338    let s = s.replace(',', ".");
339    let (hms_str, ms_str) = match s.split_once('.') {
340        Some((h, m)) => (h, m),
341        None => (s.as_str(), "0"),
342    };
343    let ms: u64 = ms_str.parse().ok()?;
344    let hms: Vec<u64> = hms_str
345        .split(':')
346        .map(|p| p.parse().ok())
347        .collect::<Option<Vec<_>>>()?;
348    if hms.len() != 3 {
349        return None;
350    }
351    let total_ms = hms[0] * 3_600_000 + hms[1] * 60_000 + hms[2] * 1_000 + ms;
352    Some(Duration::from_millis(total_ms))
353}
354
355// ── ASS/SSA parser ─────────────────────────────────────────────────────────────
356
357fn parse_ass(input: &str) -> Result<SubtitleTrack, SubtitleError> {
358    let mut events: Vec<SubtitleEvent> = Vec::new();
359    let mut in_events = false;
360    let mut format_cols: Vec<String> = Vec::new();
361
362    for (line_no, line) in input.lines().enumerate() {
363        let line = line.trim();
364
365        if line.eq_ignore_ascii_case("[Events]") {
366            in_events = true;
367            continue;
368        }
369
370        // New section header ends the [Events] block.
371        if line.starts_with('[') && in_events {
372            break;
373        }
374
375        if !in_events {
376            continue;
377        }
378
379        if let Some(rest) = line.strip_prefix("Format:") {
380            format_cols = rest.split(',').map(|c| c.trim().to_string()).collect();
381            continue;
382        }
383
384        let Some(rest) = line.strip_prefix("Dialogue:") else {
385            continue;
386        };
387
388        if format_cols.is_empty() {
389            log::warn!(
390                "ass dialogue line found before Format line at line={}",
391                line_no + 1
392            );
393            continue;
394        }
395
396        let num_cols = format_cols.len();
397        let parts: Vec<&str> = rest.splitn(num_cols, ',').collect();
398        if parts.len() < num_cols {
399            log::warn!(
400                "ass dialogue has fewer fields than format at line={}",
401                line_no + 1
402            );
403            continue;
404        }
405
406        let col_map: HashMap<&str, &str> = format_cols
407            .iter()
408            .zip(parts.iter())
409            .map(|(k, v)| (k.as_str(), v.trim()))
410            .collect();
411
412        let Some(start) = col_map.get("Start").and_then(|s| parse_ass_timestamp(s)) else {
413            log::warn!("ass malformed start timestamp at line={}", line_no + 1);
414            continue;
415        };
416
417        let Some(end) = col_map.get("End").and_then(|s| parse_ass_timestamp(s)) else {
418            log::warn!("ass malformed end timestamp at line={}", line_no + 1);
419            continue;
420        };
421
422        let raw = col_map.get("Text").copied().unwrap_or("").to_string();
423        let text = strip_ass_tags(&raw);
424
425        let mut metadata = HashMap::new();
426        for key in &["Style", "Name", "Actor", "Layer", "Effect"] {
427            if let Some(val) = col_map.get(key)
428                && !val.is_empty()
429            {
430                metadata.insert((*key).to_string(), (*val).to_string());
431            }
432        }
433
434        events.push(SubtitleEvent {
435            index: events.len(),
436            start,
437            end,
438            text,
439            raw,
440            metadata,
441        });
442    }
443
444    if events.is_empty() {
445        return Err(SubtitleError::NoEvents);
446    }
447
448    Ok(SubtitleTrack {
449        events,
450        language: None,
451    })
452}
453
454/// Parse `H:MM:SS.cc` (centiseconds) into a [`Duration`].
455fn parse_ass_timestamp(s: &str) -> Option<Duration> {
456    let (hms_str, cs_str) = match s.split_once('.') {
457        Some((h, c)) => (h, c),
458        None => (s, "0"),
459    };
460    let cs: u64 = cs_str.parse().ok()?;
461    let hms: Vec<u64> = hms_str
462        .split(':')
463        .map(|p| p.parse().ok())
464        .collect::<Option<Vec<_>>>()?;
465    if hms.len() != 3 {
466        return None;
467    }
468    let total_ms = hms[0] * 3_600_000 + hms[1] * 60_000 + hms[2] * 1_000 + cs * 10;
469    Some(Duration::from_millis(total_ms))
470}
471
472// ── WebVTT parser ──────────────────────────────────────────────────────────────
473
474fn parse_vtt(input: &str) -> Result<SubtitleTrack, SubtitleError> {
475    let mut lines_iter = input.lines();
476
477    // The first line must start with "WEBVTT".
478    match lines_iter.next() {
479        Some(first) if first.trim_start_matches('\u{FEFF}').starts_with("WEBVTT") => {}
480        _ => {
481            return Err(SubtitleError::ParseError {
482                line: 1,
483                reason: "WebVTT file must begin with WEBVTT".to_string(),
484            });
485        }
486    }
487
488    let mut events: Vec<SubtitleEvent> = Vec::new();
489    let mut current_block: Vec<String> = Vec::new();
490
491    for line in lines_iter {
492        let trimmed = line.trim();
493        if trimmed.is_empty() {
494            if !current_block.is_empty() {
495                if let Some(ev) = parse_vtt_block(&current_block, events.len()) {
496                    events.push(ev);
497                }
498                current_block.clear();
499            }
500        } else {
501            current_block.push(trimmed.to_string());
502        }
503    }
504
505    // Handle last block without a trailing blank line.
506    if !current_block.is_empty()
507        && let Some(ev) = parse_vtt_block(&current_block, events.len())
508    {
509        events.push(ev);
510    }
511
512    if events.is_empty() {
513        return Err(SubtitleError::NoEvents);
514    }
515
516    Ok(SubtitleTrack {
517        events,
518        language: None,
519    })
520}
521
522fn parse_vtt_block(block: &[String], index: usize) -> Option<SubtitleEvent> {
523    // Skip metadata blocks.
524    let first = block[0].as_str();
525    if first.starts_with("NOTE") || first.starts_with("STYLE") || first.starts_with("REGION") {
526        return None;
527    }
528
529    // Find the line containing "-->".
530    let Some(ts_idx) = block.iter().position(|l| l.contains("-->")) else {
531        log::warn!("vtt block has no timestamp line, skipping block_start={first}");
532        return None;
533    };
534
535    let Some((start, end)) = parse_vtt_timestamp_line(&block[ts_idx]) else {
536        log::warn!(
537            "vtt malformed timestamp line, skipping line={}",
538            block[ts_idx]
539        );
540        return None;
541    };
542
543    if ts_idx + 1 >= block.len() {
544        log::warn!("vtt cue has no text start={start:?}");
545        return None;
546    }
547
548    let raw = block[ts_idx + 1..].join("\n");
549    let text = strip_html_tags(&raw);
550
551    Some(SubtitleEvent {
552        index,
553        start,
554        end,
555        text,
556        raw,
557        metadata: HashMap::new(),
558    })
559}
560
561fn parse_vtt_timestamp_line(line: &str) -> Option<(Duration, Duration)> {
562    let mut parts = line.splitn(2, " --> ");
563    let start = parse_vtt_timestamp(parts.next()?.trim())?;
564    // End timestamp may be followed by cue settings (e.g. `align:center`).
565    let end_part = parts.next()?.trim();
566    let end_str = end_part.split_whitespace().next().unwrap_or("");
567    let end = parse_vtt_timestamp(end_str)?;
568    Some((start, end))
569}
570
571/// Parse `HH:MM:SS.mmm` or `MM:SS.mmm` into a [`Duration`].
572fn parse_vtt_timestamp(s: &str) -> Option<Duration> {
573    let (hms_str, ms_str) = match s.split_once('.') {
574        Some((h, m)) => (h, m),
575        None => (s, "0"),
576    };
577    // Normalise to exactly 3 digits for milliseconds.
578    let ms_padded = format!("{ms_str:0<3}");
579    let ms: u64 = ms_padded[..3.min(ms_padded.len())].parse().ok()?;
580    let hms: Vec<u64> = hms_str
581        .split(':')
582        .map(|p| p.parse().ok())
583        .collect::<Option<Vec<_>>>()?;
584    let total_ms = match hms.len() {
585        2 => hms[0] * 60_000 + hms[1] * 1_000 + ms,
586        3 => hms[0] * 3_600_000 + hms[1] * 60_000 + hms[2] * 1_000 + ms,
587        _ => return None,
588    };
589    Some(Duration::from_millis(total_ms))
590}
591
592// ── Timestamp serialisation helpers ───────────────────────────────────────────
593
594/// Format a [`Duration`] as `HH:MM:SS,mmm` (SRT / `SubRip` style).
595#[allow(clippy::cast_possible_truncation)]
596fn duration_to_srt_timestamp(d: Duration) -> String {
597    let total_ms = d.as_millis() as u64;
598    let ms = total_ms % 1_000;
599    let secs = total_ms / 1_000;
600    let s = secs % 60;
601    let m = (secs / 60) % 60;
602    let h = secs / 3_600;
603    format!("{h:02}:{m:02}:{s:02},{ms:03}")
604}
605
606/// Format a [`Duration`] as `H:MM:SS.cc` (ASS centisecond style).
607#[allow(clippy::cast_possible_truncation)]
608fn duration_to_ass_timestamp(d: Duration) -> String {
609    let total_ms = d.as_millis() as u64;
610    let cs = (total_ms / 10) % 100;
611    let secs = total_ms / 1_000;
612    let s = secs % 60;
613    let m = (secs / 60) % 60;
614    let h = secs / 3_600;
615    format!("{h}:{m:02}:{s:02}.{cs:02}")
616}
617
618/// Format a [`Duration`] as `HH:MM:SS.mmm` (`WebVTT` style).
619#[allow(clippy::cast_possible_truncation)]
620fn duration_to_vtt_timestamp(d: Duration) -> String {
621    let total_ms = d.as_millis() as u64;
622    let ms = total_ms % 1_000;
623    let secs = total_ms / 1_000;
624    let s = secs % 60;
625    let m = (secs / 60) % 60;
626    let h = secs / 3_600;
627    format!("{h:02}:{m:02}:{s:02}.{ms:03}")
628}
629
630// ── Tag stripping helpers ──────────────────────────────────────────────────────
631
632/// Strip HTML-style tags (`<tag>`, `</tag>`) from `s`.
633fn strip_html_tags(s: &str) -> String {
634    let mut result = String::with_capacity(s.len());
635    let mut in_tag = false;
636    for c in s.chars() {
637        match c {
638            '<' => in_tag = true,
639            '>' => in_tag = false,
640            _ if !in_tag => result.push(c),
641            _ => {}
642        }
643    }
644    result
645}
646
647/// Strip ASS override tags (`{...}`) and convert soft line-breaks (`\N`, `\n`).
648fn strip_ass_tags(s: &str) -> String {
649    let mut result = String::with_capacity(s.len());
650    let mut in_tag = false;
651    let chars: Vec<char> = s.chars().collect();
652    let mut i = 0;
653    while i < chars.len() {
654        match chars[i] {
655            '{' => {
656                in_tag = true;
657                i += 1;
658            }
659            '}' => {
660                in_tag = false;
661                i += 1;
662            }
663            '\\' if !in_tag && i + 1 < chars.len() => match chars[i + 1] {
664                'N' | 'n' => {
665                    result.push('\n');
666                    i += 2;
667                }
668                _ => {
669                    result.push(chars[i]);
670                    i += 1;
671                }
672            },
673            c if !in_tag => {
674                result.push(c);
675                i += 1;
676            }
677            _ => {
678                i += 1;
679            }
680        }
681    }
682    result
683}
684
685#[cfg(test)]
686#[allow(clippy::unwrap_used)]
687mod tests {
688    use super::*;
689
690    // ── SRT ───────────────────────────────────────────────────────────────────
691
692    #[test]
693    fn from_srt_should_parse_single_event() {
694        let input = "1\n00:00:01,000 --> 00:00:04,000\nHello world\n";
695        let track = SubtitleTrack::from_srt(input).unwrap();
696        assert_eq!(track.events.len(), 1);
697        let ev = &track.events[0];
698        assert_eq!(ev.index, 0);
699        assert_eq!(ev.start, Duration::from_millis(1_000));
700        assert_eq!(ev.end, Duration::from_millis(4_000));
701        assert_eq!(ev.text, "Hello world");
702        assert_eq!(ev.raw, "Hello world");
703    }
704
705    #[test]
706    fn from_srt_should_parse_multiline_text() {
707        let input = "1\n00:00:01,000 --> 00:00:04,000\nLine one\nLine two\n\n2\n00:00:05,000 --> 00:00:07,000\nSecond\n";
708        let track = SubtitleTrack::from_srt(input).unwrap();
709        assert_eq!(track.events.len(), 2);
710        assert_eq!(track.events[0].text, "Line one\nLine two");
711        assert_eq!(track.events[1].text, "Second");
712    }
713
714    #[test]
715    fn from_srt_should_strip_html_tags_preserving_raw() {
716        let input = "1\n00:00:01,000 --> 00:00:04,000\n<i>Italic</i> and <b>bold</b>\n";
717        let track = SubtitleTrack::from_srt(input).unwrap();
718        let ev = &track.events[0];
719        assert_eq!(ev.text, "Italic and bold");
720        assert_eq!(ev.raw, "<i>Italic</i> and <b>bold</b>");
721    }
722
723    #[test]
724    fn from_srt_should_skip_malformed_event_and_parse_rest() {
725        let input = "1\n00:00:01,000 --> 00:00:04,000\nGood\n\nNOT_NUM\nbad ts\ntext\n\n2\n00:00:05,000 --> 00:00:07,000\nAlso good\n";
726        let track = SubtitleTrack::from_srt(input).unwrap();
727        assert_eq!(track.events.len(), 2);
728        assert_eq!(track.events[0].text, "Good");
729        assert_eq!(track.events[1].text, "Also good");
730    }
731
732    #[test]
733    fn from_srt_should_return_no_events_for_empty_input() {
734        let result = SubtitleTrack::from_srt("");
735        assert!(matches!(result, Err(SubtitleError::NoEvents)));
736    }
737
738    #[test]
739    fn from_srt_should_return_no_events_when_all_blocks_malformed() {
740        let result = SubtitleTrack::from_srt("NOT_NUM\n00:00:01,000 --> 00:00:04,000\ntext\n");
741        assert!(matches!(result, Err(SubtitleError::NoEvents)));
742    }
743
744    // ── ASS ───────────────────────────────────────────────────────────────────
745
746    const ASS_SAMPLE: &str = "\
747[Script Info]
748Title: Test
749
750[Events]
751Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
752Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,Hello {\\i1}world{\\i0}
753Dialogue: 0,0:00:05.00,0:00:07.00,Default,,0,0,0,,Second line
754";
755
756    #[test]
757    fn from_ass_should_parse_dialogue_events() {
758        let track = SubtitleTrack::from_ass(ASS_SAMPLE).unwrap();
759        assert_eq!(track.events.len(), 2);
760        let ev = &track.events[0];
761        assert_eq!(ev.start, Duration::from_millis(1_000));
762        assert_eq!(ev.end, Duration::from_millis(4_000));
763        assert!(ev.raw.contains("{\\i1}"));
764        assert!(!ev.text.contains('{'));
765    }
766
767    #[test]
768    fn from_ass_should_strip_override_tags_preserving_raw() {
769        let input = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\nDialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,{\\pos(100,200)}Hello\n";
770        let track = SubtitleTrack::from_ass(input).unwrap();
771        let ev = &track.events[0];
772        assert_eq!(ev.text, "Hello");
773        assert!(ev.raw.contains("{\\pos"));
774    }
775
776    #[test]
777    fn from_ass_should_populate_metadata_fields() {
778        let input = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\nDialogue: 0,0:00:01.00,0:00:04.00,Signs,Actor1,0,0,0,,text\n";
779        let track = SubtitleTrack::from_ass(input).unwrap();
780        let ev = &track.events[0];
781        assert_eq!(ev.metadata.get("Style"), Some(&"Signs".to_string()));
782        assert_eq!(ev.metadata.get("Name"), Some(&"Actor1".to_string()));
783    }
784
785    #[test]
786    fn from_ass_should_return_no_events_for_empty_events_section() {
787        let input = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n";
788        let result = SubtitleTrack::from_ass(input);
789        assert!(matches!(result, Err(SubtitleError::NoEvents)));
790    }
791
792    // ── VTT ───────────────────────────────────────────────────────────────────
793
794    const VTT_SAMPLE: &str = "\
795WEBVTT
796
7971
79800:00:01.000 --> 00:00:04.000
799Hello world
800
80100:00:05.000 --> 00:00:07.000 align:center
802<v Speaker>Voice tagged text</v>
803";
804
805    #[test]
806    fn from_vtt_should_parse_cues_with_and_without_identifiers() {
807        let track = SubtitleTrack::from_vtt(VTT_SAMPLE).unwrap();
808        assert_eq!(track.events.len(), 2);
809        let ev = &track.events[0];
810        assert_eq!(ev.start, Duration::from_millis(1_000));
811        assert_eq!(ev.end, Duration::from_millis(4_000));
812        assert_eq!(ev.text, "Hello world");
813    }
814
815    #[test]
816    fn from_vtt_should_strip_voice_tags_preserving_raw() {
817        let track = SubtitleTrack::from_vtt(VTT_SAMPLE).unwrap();
818        let ev = &track.events[1];
819        assert_eq!(ev.text, "Voice tagged text");
820        assert_eq!(ev.raw, "<v Speaker>Voice tagged text</v>");
821    }
822
823    #[test]
824    fn from_vtt_should_ignore_cue_settings_in_timestamp_line() {
825        let track = SubtitleTrack::from_vtt(VTT_SAMPLE).unwrap();
826        // Second cue has "align:center" setting — end must still parse correctly.
827        assert_eq!(track.events[1].end, Duration::from_millis(7_000));
828    }
829
830    #[test]
831    fn from_vtt_should_return_parse_error_for_missing_header() {
832        let result = SubtitleTrack::from_vtt("not a vtt file\ncontent");
833        assert!(matches!(result, Err(SubtitleError::ParseError { .. })));
834    }
835
836    #[test]
837    fn from_vtt_should_return_no_events_for_empty_content() {
838        let result = SubtitleTrack::from_vtt("WEBVTT\n\n");
839        assert!(matches!(result, Err(SubtitleError::NoEvents)));
840    }
841
842    // ── from_file ─────────────────────────────────────────────────────────────
843
844    #[test]
845    fn from_file_should_return_unsupported_for_unknown_extension() {
846        let result = SubtitleTrack::from_file("subtitle.xyz");
847        assert!(matches!(
848            result,
849            Err(SubtitleError::UnsupportedFormat { .. })
850        ));
851    }
852
853    // ── timestamp helpers ─────────────────────────────────────────────────────
854
855    #[test]
856    fn parse_srt_timestamp_should_parse_millisecond_precision() {
857        let ts = parse_srt_timestamp("01:23:45,678").unwrap();
858        let expected_ms = 1 * 3_600_000 + 23 * 60_000 + 45 * 1_000 + 678;
859        assert_eq!(ts, Duration::from_millis(expected_ms));
860    }
861
862    #[test]
863    fn parse_srt_timestamp_should_parse_zero_timestamp() {
864        let ts = parse_srt_timestamp("00:00:00,000").unwrap();
865        assert_eq!(ts, Duration::from_millis(0));
866    }
867
868    #[test]
869    fn parse_ass_timestamp_should_parse_centisecond_precision() {
870        let ts = parse_ass_timestamp("1:23:45.67").unwrap();
871        let expected_ms = 1 * 3_600_000 + 23 * 60_000 + 45 * 1_000 + 670;
872        assert_eq!(ts, Duration::from_millis(expected_ms));
873    }
874
875    #[test]
876    fn parse_vtt_timestamp_should_accept_mm_ss_format() {
877        let ts = parse_vtt_timestamp("05:30.500").unwrap();
878        assert_eq!(ts, Duration::from_millis(5 * 60_000 + 30 * 1_000 + 500));
879    }
880
881    #[test]
882    fn parse_vtt_timestamp_should_accept_hh_mm_ss_format() {
883        let ts = parse_vtt_timestamp("01:02:03.456").unwrap();
884        let expected_ms = 3_600_000 + 2 * 60_000 + 3 * 1_000 + 456;
885        assert_eq!(ts, Duration::from_millis(expected_ms));
886    }
887
888    // ── tag stripping helpers ─────────────────────────────────────────────────
889
890    #[test]
891    fn strip_html_tags_should_remove_italic_bold_underline() {
892        assert_eq!(strip_html_tags("<i>italic</i>"), "italic");
893        assert_eq!(strip_html_tags("<b>bold</b>"), "bold");
894        assert_eq!(strip_html_tags("<u>under</u>"), "under");
895    }
896
897    #[test]
898    fn strip_html_tags_should_remove_voice_span() {
899        assert_eq!(strip_html_tags("<v Speaker>text</v>"), "text");
900    }
901
902    #[test]
903    fn strip_ass_tags_should_remove_curly_brace_overrides() {
904        assert_eq!(strip_ass_tags("{\\an8}text"), "text");
905        assert_eq!(strip_ass_tags("before{\\pos(100,200)}after"), "beforeafter");
906    }
907
908    #[test]
909    fn strip_ass_tags_should_convert_soft_line_breaks() {
910        assert_eq!(strip_ass_tags("line1\\Nline2"), "line1\nline2");
911        assert_eq!(strip_ass_tags("line1\\nline2"), "line1\nline2");
912    }
913
914    // ── timestamp serialisation helpers ───────────────────────────────────────
915
916    #[test]
917    fn duration_to_srt_timestamp_should_format_correctly() {
918        let d = Duration::from_millis(1 * 3_600_000 + 23 * 60_000 + 45 * 1_000 + 678);
919        assert_eq!(duration_to_srt_timestamp(d), "01:23:45,678");
920    }
921
922    #[test]
923    fn duration_to_ass_timestamp_should_use_centiseconds() {
924        let d = Duration::from_millis(1 * 3_600_000 + 23 * 60_000 + 45 * 1_000 + 670);
925        assert_eq!(duration_to_ass_timestamp(d), "1:23:45.67");
926    }
927
928    #[test]
929    fn duration_to_vtt_timestamp_should_format_correctly() {
930        let d = Duration::from_millis(1 * 3_600_000 + 2 * 60_000 + 3 * 1_000 + 456);
931        assert_eq!(duration_to_vtt_timestamp(d), "01:02:03.456");
932    }
933
934    // ── to_srt ────────────────────────────────────────────────────────────────
935
936    #[test]
937    fn to_srt_should_produce_1_based_sequential_indices() {
938        let track = SubtitleTrack {
939            events: vec![
940                make_event(0, 1_000, 4_000, "First"),
941                make_event(1, 5_000, 7_000, "Second"),
942            ],
943            language: None,
944        };
945        let srt = track.to_srt();
946        let lines: Vec<&str> = srt.lines().collect();
947        assert_eq!(lines[0], "1");
948        assert_eq!(lines[4], "2");
949    }
950
951    #[test]
952    fn to_srt_should_use_comma_separated_timestamps() {
953        let track = SubtitleTrack {
954            events: vec![make_event(0, 1_000, 4_000, "Hello")],
955            language: None,
956        };
957        let srt = track.to_srt();
958        assert!(srt.contains("00:00:01,000 --> 00:00:04,000"));
959    }
960
961    #[test]
962    fn to_srt_should_write_empty_text_event_preserving_index_sequence() {
963        let empty = SubtitleEvent {
964            index: 1,
965            start: Duration::from_millis(5_000),
966            end: Duration::from_millis(7_000),
967            text: String::new(),
968            raw: String::new(),
969            metadata: HashMap::new(),
970        };
971        let track = SubtitleTrack {
972            events: vec![make_event(0, 1_000, 4_000, "First"), empty],
973            language: None,
974        };
975        let srt = track.to_srt();
976        let reparsed = SubtitleTrack::from_srt(&srt).unwrap();
977        // Empty-text event must survive the round-trip and keep the index intact.
978        assert_eq!(reparsed.events.len(), 2);
979        assert_eq!(reparsed.events[1].start, Duration::from_millis(5_000));
980    }
981
982    #[test]
983    fn srt_round_trip_should_preserve_start_end_and_text() {
984        let srt_in = "1\n00:00:01,000 --> 00:00:04,000\nHello world\n\n2\n00:00:05,500 --> 00:00:07,250\nSecond\n\n";
985        let track = SubtitleTrack::from_srt(srt_in).unwrap();
986        let written = track.to_srt();
987        let reparsed = SubtitleTrack::from_srt(&written).unwrap();
988        assert_eq!(reparsed.events.len(), track.events.len());
989        for (a, b) in track.events.iter().zip(reparsed.events.iter()) {
990            assert_eq!(a.start, b.start);
991            assert_eq!(a.end, b.end);
992            assert_eq!(a.text, b.text);
993        }
994    }
995
996    // ── to_ass ────────────────────────────────────────────────────────────────
997
998    #[test]
999    fn to_ass_should_contain_required_sections() {
1000        let track = SubtitleTrack {
1001            events: vec![make_event(0, 1_000, 4_000, "Hello")],
1002            language: None,
1003        };
1004        let ass = track.to_ass();
1005        assert!(ass.contains("[Script Info]"));
1006        assert!(ass.contains("[V4+ Styles]"));
1007        assert!(ass.contains("[Events]"));
1008        assert!(ass.contains("Format: Layer, Start, End,"));
1009        assert!(ass.contains("Dialogue:"));
1010    }
1011
1012    #[test]
1013    fn to_ass_should_use_centisecond_timestamps() {
1014        let track = SubtitleTrack {
1015            events: vec![make_event(0, 1_000, 4_000, "Hello")],
1016            language: None,
1017        };
1018        let ass = track.to_ass();
1019        assert!(ass.contains("0:00:01.00,0:00:04.00"));
1020    }
1021
1022    #[test]
1023    fn ass_round_trip_should_preserve_start_end_and_text() {
1024        let track = SubtitleTrack::from_ass(ASS_SAMPLE).unwrap();
1025        let written = track.to_ass();
1026        let reparsed = SubtitleTrack::from_ass(&written).unwrap();
1027        assert_eq!(reparsed.events.len(), track.events.len());
1028        for (a, b) in track.events.iter().zip(reparsed.events.iter()) {
1029            assert_eq!(a.start, b.start, "start mismatch");
1030            assert_eq!(a.end, b.end, "end mismatch");
1031            assert_eq!(a.text, b.text, "text mismatch");
1032        }
1033    }
1034
1035    // ── to_vtt ────────────────────────────────────────────────────────────────
1036
1037    #[test]
1038    fn to_vtt_should_start_with_webvtt_header() {
1039        let track = SubtitleTrack {
1040            events: vec![make_event(0, 1_000, 4_000, "Hello")],
1041            language: None,
1042        };
1043        let vtt = track.to_vtt();
1044        assert!(vtt.starts_with("WEBVTT\n"));
1045    }
1046
1047    #[test]
1048    fn to_vtt_should_use_dot_separated_timestamps() {
1049        let track = SubtitleTrack {
1050            events: vec![make_event(0, 1_000, 4_000, "Hello")],
1051            language: None,
1052        };
1053        let vtt = track.to_vtt();
1054        assert!(vtt.contains("00:00:01.000 --> 00:00:04.000"));
1055    }
1056
1057    #[test]
1058    fn vtt_round_trip_should_preserve_start_end_and_text() {
1059        let track = SubtitleTrack::from_vtt(VTT_SAMPLE).unwrap();
1060        let written = track.to_vtt();
1061        let reparsed = SubtitleTrack::from_vtt(&written).unwrap();
1062        assert_eq!(reparsed.events.len(), track.events.len());
1063        for (a, b) in track.events.iter().zip(reparsed.events.iter()) {
1064            assert_eq!(a.start, b.start, "start mismatch");
1065            assert_eq!(a.end, b.end, "end mismatch");
1066            assert_eq!(a.text, b.text, "text mismatch");
1067        }
1068    }
1069
1070    // ── write_to_file ─────────────────────────────────────────────────────────
1071
1072    #[test]
1073    fn write_to_file_should_return_unsupported_for_unknown_extension() {
1074        let track = SubtitleTrack {
1075            events: vec![make_event(0, 1_000, 4_000, "Hello")],
1076            language: None,
1077        };
1078        let result = track.write_to_file("output.xyz");
1079        assert!(matches!(
1080            result,
1081            Err(SubtitleError::UnsupportedFormat { .. })
1082        ));
1083    }
1084
1085    // ── helpers ───────────────────────────────────────────────────────────────
1086
1087    fn make_event(index: usize, start_ms: u64, end_ms: u64, text: &str) -> SubtitleEvent {
1088        SubtitleEvent {
1089            index,
1090            start: Duration::from_millis(start_ms),
1091            end: Duration::from_millis(end_ms),
1092            text: text.to_string(),
1093            raw: text.to_string(),
1094            metadata: HashMap::new(),
1095        }
1096    }
1097}