1use std::collections::HashMap;
19use std::path::Path;
20use std::time::Duration;
21
22use thiserror::Error;
23
24#[derive(Debug, Error)]
26pub enum SubtitleError {
27 #[error("io error: {0}")]
29 Io(#[from] std::io::Error),
30
31 #[error("unsupported subtitle format: {extension}")]
33 UnsupportedFormat {
34 extension: String,
36 },
37
38 #[error("parse error at line {line}: {reason}")]
40 ParseError {
41 line: usize,
43 reason: String,
45 },
46
47 #[error("no valid subtitle events found")]
49 NoEvents,
50}
51
52#[derive(Debug, Clone, PartialEq, Eq)]
54pub struct SubtitleEvent {
55 pub index: usize,
57 pub start: Duration,
59 pub end: Duration,
61 pub text: String,
63 pub raw: String,
65 pub metadata: HashMap<String, String>,
67}
68
69#[derive(Debug, Clone, PartialEq, Eq)]
71pub struct SubtitleTrack {
72 pub events: Vec<SubtitleEvent>,
74 pub language: Option<String>,
76}
77
78impl SubtitleTrack {
79 pub fn from_srt(input: &str) -> Result<Self, SubtitleError> {
88 parse_srt(input)
89 }
90
91 pub fn from_ass(input: &str) -> Result<Self, SubtitleError> {
101 parse_ass(input)
102 }
103
104 pub fn from_vtt(input: &str) -> Result<Self, SubtitleError> {
115 parse_vtt(input)
116 }
117
118 #[must_use]
127 pub fn to_srt(&self) -> String {
128 use std::fmt::Write as _;
129 let mut out = String::new();
130 for (seq, ev) in self.events.iter().enumerate() {
131 let _ = writeln!(out, "{}", seq + 1);
132 let _ = writeln!(
133 out,
134 "{} --> {}",
135 duration_to_srt_timestamp(ev.start),
136 duration_to_srt_timestamp(ev.end),
137 );
138 out.push_str(&ev.raw);
139 out.push('\n');
140 out.push('\n');
141 }
142 out
143 }
144
145 #[must_use]
155 pub fn to_ass(&self) -> String {
156 use std::fmt::Write as _;
157 let mut out = String::new();
158 out.push_str("[Script Info]\n");
159 out.push_str("ScriptType: v4.00+\n");
160 out.push_str("PlayResX: 384\n");
161 out.push_str("PlayResY: 288\n");
162 out.push('\n');
163 out.push_str("[V4+ Styles]\n");
164 out.push_str(
165 "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, \
166 OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, \
167 ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, \
168 Alignment, MarginL, MarginR, MarginV, Encoding\n",
169 );
170 out.push_str(
171 "Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,\
172 &H00000000,0,0,0,0,100,100,0,0,1,2,2,2,10,10,10,1\n",
173 );
174 out.push('\n');
175 out.push_str("[Events]\n");
176 out.push_str(
177 "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n",
178 );
179 for ev in &self.events {
180 let style = ev.metadata.get("Style").map_or("Default", String::as_str);
181 let name = ev.metadata.get("Name").map_or("", String::as_str);
182 let _ = writeln!(
183 out,
184 "Dialogue: 0,{},{},{},{},0,0,0,,{}",
185 duration_to_ass_timestamp(ev.start),
186 duration_to_ass_timestamp(ev.end),
187 style,
188 name,
189 ev.raw,
190 );
191 }
192 out
193 }
194
195 #[must_use]
203 pub fn to_vtt(&self) -> String {
204 use std::fmt::Write as _;
205 let mut out = String::from("WEBVTT\n");
206 for ev in &self.events {
207 out.push('\n');
208 let _ = writeln!(
209 out,
210 "{} --> {}",
211 duration_to_vtt_timestamp(ev.start),
212 duration_to_vtt_timestamp(ev.end),
213 );
214 out.push_str(&ev.raw);
215 out.push('\n');
216 }
217 out
218 }
219
220 pub fn write_to_file(&self, path: impl AsRef<Path>) -> Result<(), SubtitleError> {
229 let path = path.as_ref();
230 let ext = path
231 .extension()
232 .and_then(|e| e.to_str())
233 .unwrap_or("")
234 .to_ascii_lowercase();
235
236 let content = match ext.as_str() {
237 "srt" => self.to_srt(),
238 "ass" | "ssa" => self.to_ass(),
239 "vtt" => self.to_vtt(),
240 _ => return Err(SubtitleError::UnsupportedFormat { extension: ext }),
241 };
242
243 std::fs::write(path, content)?;
244 Ok(())
245 }
246
247 pub fn from_file(path: impl AsRef<Path>) -> Result<Self, SubtitleError> {
257 let path = path.as_ref();
258 let ext = path
259 .extension()
260 .and_then(|e| e.to_str())
261 .unwrap_or("")
262 .to_ascii_lowercase();
263
264 match ext.as_str() {
266 "srt" | "ass" | "ssa" | "vtt" => {}
267 _ => return Err(SubtitleError::UnsupportedFormat { extension: ext }),
268 }
269
270 let content = std::fs::read_to_string(path)?;
271
272 match ext.as_str() {
273 "srt" => parse_srt(&content),
274 "ass" | "ssa" => parse_ass(&content),
275 "vtt" => parse_vtt(&content),
276 _ => unreachable!("extension validated above"),
277 }
278 }
279}
280
281fn parse_srt(input: &str) -> Result<SubtitleTrack, SubtitleError> {
284 let mut events: Vec<SubtitleEvent> = Vec::new();
285 let mut current_block: Vec<String> = Vec::new();
286
287 for line in input.lines() {
288 let trimmed = line.trim();
289 if trimmed.is_empty() {
290 if !current_block.is_empty() {
291 if let Some(ev) = parse_srt_block(¤t_block, events.len()) {
292 events.push(ev);
293 }
294 current_block.clear();
295 }
296 } else {
297 current_block.push(trimmed.to_string());
298 }
299 }
300
301 if !current_block.is_empty()
303 && let Some(ev) = parse_srt_block(¤t_block, events.len())
304 {
305 events.push(ev);
306 }
307
308 if events.is_empty() {
309 return Err(SubtitleError::NoEvents);
310 }
311
312 Ok(SubtitleTrack {
313 events,
314 language: None,
315 })
316}
317
318fn parse_srt_block(block: &[String], index: usize) -> Option<SubtitleEvent> {
319 if block.len() < 2 {
323 log::warn!(
324 "srt block has too few lines, skipping count={}",
325 block.len()
326 );
327 return None;
328 }
329
330 if block[0].parse::<usize>().is_err() {
332 log::warn!(
333 "srt block index is not a number, skipping value={}",
334 block[0]
335 );
336 return None;
337 }
338
339 let Some((start, end)) = parse_srt_timestamp_line(&block[1]) else {
340 log::warn!("srt malformed timestamp line, skipping line={}", block[1]);
341 return None;
342 };
343
344 let raw = block[2..].join("\n");
345 let text = strip_html_tags(&raw);
346
347 Some(SubtitleEvent {
348 index,
349 start,
350 end,
351 text,
352 raw,
353 metadata: HashMap::new(),
354 })
355}
356
357fn parse_srt_timestamp_line(line: &str) -> Option<(Duration, Duration)> {
358 let mut parts = line.splitn(2, " --> ");
359 let start = parse_srt_timestamp(parts.next()?.trim())?;
360 let end = parse_srt_timestamp(parts.next()?.trim())?;
361 Some((start, end))
362}
363
364fn parse_srt_timestamp(s: &str) -> Option<Duration> {
366 let s = s.replace(',', ".");
367 let (hms_str, ms_str) = match s.split_once('.') {
368 Some((h, m)) => (h, m),
369 None => (s.as_str(), "0"),
370 };
371 let ms: u64 = ms_str.parse().ok()?;
372 let hms: Vec<u64> = hms_str
373 .split(':')
374 .map(|p| p.parse().ok())
375 .collect::<Option<Vec<_>>>()?;
376 if hms.len() != 3 {
377 return None;
378 }
379 let total_ms = hms[0] * 3_600_000 + hms[1] * 60_000 + hms[2] * 1_000 + ms;
380 Some(Duration::from_millis(total_ms))
381}
382
383fn parse_ass(input: &str) -> Result<SubtitleTrack, SubtitleError> {
386 let mut events: Vec<SubtitleEvent> = Vec::new();
387 let mut in_events = false;
388 let mut format_cols: Vec<String> = Vec::new();
389
390 for (line_no, line) in input.lines().enumerate() {
391 let line = line.trim();
392
393 if line.eq_ignore_ascii_case("[Events]") {
394 in_events = true;
395 continue;
396 }
397
398 if line.starts_with('[') && in_events {
400 break;
401 }
402
403 if !in_events {
404 continue;
405 }
406
407 if let Some(rest) = line.strip_prefix("Format:") {
408 format_cols = rest.split(',').map(|c| c.trim().to_string()).collect();
409 continue;
410 }
411
412 let Some(rest) = line.strip_prefix("Dialogue:") else {
413 continue;
414 };
415
416 if format_cols.is_empty() {
417 log::warn!(
418 "ass dialogue line found before Format line at line={}",
419 line_no + 1
420 );
421 continue;
422 }
423
424 let num_cols = format_cols.len();
425 let parts: Vec<&str> = rest.splitn(num_cols, ',').collect();
426 if parts.len() < num_cols {
427 log::warn!(
428 "ass dialogue has fewer fields than format at line={}",
429 line_no + 1
430 );
431 continue;
432 }
433
434 let col_map: HashMap<&str, &str> = format_cols
435 .iter()
436 .zip(parts.iter())
437 .map(|(k, v)| (k.as_str(), v.trim()))
438 .collect();
439
440 let Some(start) = col_map.get("Start").and_then(|s| parse_ass_timestamp(s)) else {
441 log::warn!("ass malformed start timestamp at line={}", line_no + 1);
442 continue;
443 };
444
445 let Some(end) = col_map.get("End").and_then(|s| parse_ass_timestamp(s)) else {
446 log::warn!("ass malformed end timestamp at line={}", line_no + 1);
447 continue;
448 };
449
450 let raw = col_map.get("Text").copied().unwrap_or("").to_string();
451 let text = strip_ass_tags(&raw);
452
453 let mut metadata = HashMap::new();
454 for key in &["Style", "Name", "Actor", "Layer", "Effect"] {
455 if let Some(val) = col_map.get(key)
456 && !val.is_empty()
457 {
458 metadata.insert((*key).to_string(), (*val).to_string());
459 }
460 }
461
462 events.push(SubtitleEvent {
463 index: events.len(),
464 start,
465 end,
466 text,
467 raw,
468 metadata,
469 });
470 }
471
472 if events.is_empty() {
473 return Err(SubtitleError::NoEvents);
474 }
475
476 Ok(SubtitleTrack {
477 events,
478 language: None,
479 })
480}
481
482fn parse_ass_timestamp(s: &str) -> Option<Duration> {
484 let (hms_str, cs_str) = match s.split_once('.') {
485 Some((h, c)) => (h, c),
486 None => (s, "0"),
487 };
488 let cs: u64 = cs_str.parse().ok()?;
489 let hms: Vec<u64> = hms_str
490 .split(':')
491 .map(|p| p.parse().ok())
492 .collect::<Option<Vec<_>>>()?;
493 if hms.len() != 3 {
494 return None;
495 }
496 let total_ms = hms[0] * 3_600_000 + hms[1] * 60_000 + hms[2] * 1_000 + cs * 10;
497 Some(Duration::from_millis(total_ms))
498}
499
500fn parse_vtt(input: &str) -> Result<SubtitleTrack, SubtitleError> {
503 let mut lines_iter = input.lines();
504
505 match lines_iter.next() {
507 Some(first) if first.trim_start_matches('\u{FEFF}').starts_with("WEBVTT") => {}
508 _ => {
509 return Err(SubtitleError::ParseError {
510 line: 1,
511 reason: "WebVTT file must begin with WEBVTT".to_string(),
512 });
513 }
514 }
515
516 let mut events: Vec<SubtitleEvent> = Vec::new();
517 let mut current_block: Vec<String> = Vec::new();
518
519 for line in lines_iter {
520 let trimmed = line.trim();
521 if trimmed.is_empty() {
522 if !current_block.is_empty() {
523 if let Some(ev) = parse_vtt_block(¤t_block, events.len()) {
524 events.push(ev);
525 }
526 current_block.clear();
527 }
528 } else {
529 current_block.push(trimmed.to_string());
530 }
531 }
532
533 if !current_block.is_empty()
535 && let Some(ev) = parse_vtt_block(¤t_block, events.len())
536 {
537 events.push(ev);
538 }
539
540 if events.is_empty() {
541 return Err(SubtitleError::NoEvents);
542 }
543
544 Ok(SubtitleTrack {
545 events,
546 language: None,
547 })
548}
549
550fn parse_vtt_block(block: &[String], index: usize) -> Option<SubtitleEvent> {
551 let first = block[0].as_str();
553 if first.starts_with("NOTE") || first.starts_with("STYLE") || first.starts_with("REGION") {
554 return None;
555 }
556
557 let Some(ts_idx) = block.iter().position(|l| l.contains("-->")) else {
559 log::warn!("vtt block has no timestamp line, skipping block_start={first}");
560 return None;
561 };
562
563 let Some((start, end)) = parse_vtt_timestamp_line(&block[ts_idx]) else {
564 log::warn!(
565 "vtt malformed timestamp line, skipping line={}",
566 block[ts_idx]
567 );
568 return None;
569 };
570
571 if ts_idx + 1 >= block.len() {
572 log::warn!("vtt cue has no text start={start:?}");
573 return None;
574 }
575
576 let raw = block[ts_idx + 1..].join("\n");
577 let text = strip_html_tags(&raw);
578
579 Some(SubtitleEvent {
580 index,
581 start,
582 end,
583 text,
584 raw,
585 metadata: HashMap::new(),
586 })
587}
588
589fn parse_vtt_timestamp_line(line: &str) -> Option<(Duration, Duration)> {
590 let mut parts = line.splitn(2, " --> ");
591 let start = parse_vtt_timestamp(parts.next()?.trim())?;
592 let end_part = parts.next()?.trim();
594 let end_str = end_part.split_whitespace().next().unwrap_or("");
595 let end = parse_vtt_timestamp(end_str)?;
596 Some((start, end))
597}
598
599fn parse_vtt_timestamp(s: &str) -> Option<Duration> {
601 let (hms_str, ms_str) = match s.split_once('.') {
602 Some((h, m)) => (h, m),
603 None => (s, "0"),
604 };
605 let ms_padded = format!("{ms_str:0<3}");
607 let ms: u64 = ms_padded[..3.min(ms_padded.len())].parse().ok()?;
608 let hms: Vec<u64> = hms_str
609 .split(':')
610 .map(|p| p.parse().ok())
611 .collect::<Option<Vec<_>>>()?;
612 let total_ms = match hms.len() {
613 2 => hms[0] * 60_000 + hms[1] * 1_000 + ms,
614 3 => hms[0] * 3_600_000 + hms[1] * 60_000 + hms[2] * 1_000 + ms,
615 _ => return None,
616 };
617 Some(Duration::from_millis(total_ms))
618}
619
620#[allow(clippy::cast_possible_truncation)]
624fn duration_to_srt_timestamp(d: Duration) -> String {
625 let total_ms = d.as_millis() as u64;
626 let ms = total_ms % 1_000;
627 let secs = total_ms / 1_000;
628 let s = secs % 60;
629 let m = (secs / 60) % 60;
630 let h = secs / 3_600;
631 format!("{h:02}:{m:02}:{s:02},{ms:03}")
632}
633
634#[allow(clippy::cast_possible_truncation)]
636fn duration_to_ass_timestamp(d: Duration) -> String {
637 let total_ms = d.as_millis() as u64;
638 let cs = (total_ms / 10) % 100;
639 let secs = total_ms / 1_000;
640 let s = secs % 60;
641 let m = (secs / 60) % 60;
642 let h = secs / 3_600;
643 format!("{h}:{m:02}:{s:02}.{cs:02}")
644}
645
646#[allow(clippy::cast_possible_truncation)]
648fn duration_to_vtt_timestamp(d: Duration) -> String {
649 let total_ms = d.as_millis() as u64;
650 let ms = total_ms % 1_000;
651 let secs = total_ms / 1_000;
652 let s = secs % 60;
653 let m = (secs / 60) % 60;
654 let h = secs / 3_600;
655 format!("{h:02}:{m:02}:{s:02}.{ms:03}")
656}
657
658fn strip_html_tags(s: &str) -> String {
662 let mut result = String::with_capacity(s.len());
663 let mut in_tag = false;
664 for c in s.chars() {
665 match c {
666 '<' => in_tag = true,
667 '>' => in_tag = false,
668 _ if !in_tag => result.push(c),
669 _ => {}
670 }
671 }
672 result
673}
674
675fn strip_ass_tags(s: &str) -> String {
677 let mut result = String::with_capacity(s.len());
678 let mut in_tag = false;
679 let chars: Vec<char> = s.chars().collect();
680 let mut i = 0;
681 while i < chars.len() {
682 match chars[i] {
683 '{' => {
684 in_tag = true;
685 i += 1;
686 }
687 '}' => {
688 in_tag = false;
689 i += 1;
690 }
691 '\\' if !in_tag && i + 1 < chars.len() => match chars[i + 1] {
692 'N' | 'n' => {
693 result.push('\n');
694 i += 2;
695 }
696 _ => {
697 result.push(chars[i]);
698 i += 1;
699 }
700 },
701 c if !in_tag => {
702 result.push(c);
703 i += 1;
704 }
705 _ => {
706 i += 1;
707 }
708 }
709 }
710 result
711}
712
713#[cfg(test)]
714#[allow(clippy::unwrap_used)]
715mod tests {
716 use super::*;
717
718 #[test]
721 fn from_srt_should_parse_single_event() {
722 let input = "1\n00:00:01,000 --> 00:00:04,000\nHello world\n";
723 let track = SubtitleTrack::from_srt(input).unwrap();
724 assert_eq!(track.events.len(), 1);
725 let ev = &track.events[0];
726 assert_eq!(ev.index, 0);
727 assert_eq!(ev.start, Duration::from_millis(1_000));
728 assert_eq!(ev.end, Duration::from_millis(4_000));
729 assert_eq!(ev.text, "Hello world");
730 assert_eq!(ev.raw, "Hello world");
731 }
732
733 #[test]
734 fn from_srt_should_parse_multiline_text() {
735 let input = "1\n00:00:01,000 --> 00:00:04,000\nLine one\nLine two\n\n2\n00:00:05,000 --> 00:00:07,000\nSecond\n";
736 let track = SubtitleTrack::from_srt(input).unwrap();
737 assert_eq!(track.events.len(), 2);
738 assert_eq!(track.events[0].text, "Line one\nLine two");
739 assert_eq!(track.events[1].text, "Second");
740 }
741
742 #[test]
743 fn from_srt_should_strip_html_tags_preserving_raw() {
744 let input = "1\n00:00:01,000 --> 00:00:04,000\n<i>Italic</i> and <b>bold</b>\n";
745 let track = SubtitleTrack::from_srt(input).unwrap();
746 let ev = &track.events[0];
747 assert_eq!(ev.text, "Italic and bold");
748 assert_eq!(ev.raw, "<i>Italic</i> and <b>bold</b>");
749 }
750
751 #[test]
752 fn from_srt_should_skip_malformed_event_and_parse_rest() {
753 let input = "1\n00:00:01,000 --> 00:00:04,000\nGood\n\nNOT_NUM\nbad ts\ntext\n\n2\n00:00:05,000 --> 00:00:07,000\nAlso good\n";
754 let track = SubtitleTrack::from_srt(input).unwrap();
755 assert_eq!(track.events.len(), 2);
756 assert_eq!(track.events[0].text, "Good");
757 assert_eq!(track.events[1].text, "Also good");
758 }
759
760 #[test]
761 fn from_srt_should_return_no_events_for_empty_input() {
762 let result = SubtitleTrack::from_srt("");
763 assert!(matches!(result, Err(SubtitleError::NoEvents)));
764 }
765
766 #[test]
767 fn from_srt_should_return_no_events_when_all_blocks_malformed() {
768 let result = SubtitleTrack::from_srt("NOT_NUM\n00:00:01,000 --> 00:00:04,000\ntext\n");
769 assert!(matches!(result, Err(SubtitleError::NoEvents)));
770 }
771
772 const ASS_SAMPLE: &str = "\
775[Script Info]
776Title: Test
777
778[Events]
779Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
780Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,Hello {\\i1}world{\\i0}
781Dialogue: 0,0:00:05.00,0:00:07.00,Default,,0,0,0,,Second line
782";
783
784 #[test]
785 fn from_ass_should_parse_dialogue_events() {
786 let track = SubtitleTrack::from_ass(ASS_SAMPLE).unwrap();
787 assert_eq!(track.events.len(), 2);
788 let ev = &track.events[0];
789 assert_eq!(ev.start, Duration::from_millis(1_000));
790 assert_eq!(ev.end, Duration::from_millis(4_000));
791 assert!(ev.raw.contains("{\\i1}"));
792 assert!(!ev.text.contains('{'));
793 }
794
795 #[test]
796 fn from_ass_should_strip_override_tags_preserving_raw() {
797 let input = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\nDialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,{\\pos(100,200)}Hello\n";
798 let track = SubtitleTrack::from_ass(input).unwrap();
799 let ev = &track.events[0];
800 assert_eq!(ev.text, "Hello");
801 assert!(ev.raw.contains("{\\pos"));
802 }
803
804 #[test]
805 fn from_ass_should_populate_metadata_fields() {
806 let input = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\nDialogue: 0,0:00:01.00,0:00:04.00,Signs,Actor1,0,0,0,,text\n";
807 let track = SubtitleTrack::from_ass(input).unwrap();
808 let ev = &track.events[0];
809 assert_eq!(ev.metadata.get("Style"), Some(&"Signs".to_string()));
810 assert_eq!(ev.metadata.get("Name"), Some(&"Actor1".to_string()));
811 }
812
813 #[test]
814 fn from_ass_should_return_no_events_for_empty_events_section() {
815 let input = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n";
816 let result = SubtitleTrack::from_ass(input);
817 assert!(matches!(result, Err(SubtitleError::NoEvents)));
818 }
819
820 const VTT_SAMPLE: &str = "\
823WEBVTT
824
8251
82600:00:01.000 --> 00:00:04.000
827Hello world
828
82900:00:05.000 --> 00:00:07.000 align:center
830<v Speaker>Voice tagged text</v>
831";
832
833 #[test]
834 fn from_vtt_should_parse_cues_with_and_without_identifiers() {
835 let track = SubtitleTrack::from_vtt(VTT_SAMPLE).unwrap();
836 assert_eq!(track.events.len(), 2);
837 let ev = &track.events[0];
838 assert_eq!(ev.start, Duration::from_millis(1_000));
839 assert_eq!(ev.end, Duration::from_millis(4_000));
840 assert_eq!(ev.text, "Hello world");
841 }
842
843 #[test]
844 fn from_vtt_should_strip_voice_tags_preserving_raw() {
845 let track = SubtitleTrack::from_vtt(VTT_SAMPLE).unwrap();
846 let ev = &track.events[1];
847 assert_eq!(ev.text, "Voice tagged text");
848 assert_eq!(ev.raw, "<v Speaker>Voice tagged text</v>");
849 }
850
851 #[test]
852 fn from_vtt_should_ignore_cue_settings_in_timestamp_line() {
853 let track = SubtitleTrack::from_vtt(VTT_SAMPLE).unwrap();
854 assert_eq!(track.events[1].end, Duration::from_millis(7_000));
856 }
857
858 #[test]
859 fn from_vtt_should_return_parse_error_for_missing_header() {
860 let result = SubtitleTrack::from_vtt("not a vtt file\ncontent");
861 assert!(matches!(result, Err(SubtitleError::ParseError { .. })));
862 }
863
864 #[test]
865 fn from_vtt_should_return_no_events_for_empty_content() {
866 let result = SubtitleTrack::from_vtt("WEBVTT\n\n");
867 assert!(matches!(result, Err(SubtitleError::NoEvents)));
868 }
869
870 #[test]
873 fn from_file_should_return_unsupported_for_unknown_extension() {
874 let result = SubtitleTrack::from_file("subtitle.xyz");
875 assert!(matches!(
876 result,
877 Err(SubtitleError::UnsupportedFormat { .. })
878 ));
879 }
880
881 #[test]
884 fn parse_srt_timestamp_should_parse_millisecond_precision() {
885 let ts = parse_srt_timestamp("01:23:45,678").unwrap();
886 let expected_ms = 1 * 3_600_000 + 23 * 60_000 + 45 * 1_000 + 678;
887 assert_eq!(ts, Duration::from_millis(expected_ms));
888 }
889
890 #[test]
891 fn parse_srt_timestamp_should_parse_zero_timestamp() {
892 let ts = parse_srt_timestamp("00:00:00,000").unwrap();
893 assert_eq!(ts, Duration::from_millis(0));
894 }
895
896 #[test]
897 fn parse_ass_timestamp_should_parse_centisecond_precision() {
898 let ts = parse_ass_timestamp("1:23:45.67").unwrap();
899 let expected_ms = 1 * 3_600_000 + 23 * 60_000 + 45 * 1_000 + 670;
900 assert_eq!(ts, Duration::from_millis(expected_ms));
901 }
902
903 #[test]
904 fn parse_vtt_timestamp_should_accept_mm_ss_format() {
905 let ts = parse_vtt_timestamp("05:30.500").unwrap();
906 assert_eq!(ts, Duration::from_millis(5 * 60_000 + 30 * 1_000 + 500));
907 }
908
909 #[test]
910 fn parse_vtt_timestamp_should_accept_hh_mm_ss_format() {
911 let ts = parse_vtt_timestamp("01:02:03.456").unwrap();
912 let expected_ms = 3_600_000 + 2 * 60_000 + 3 * 1_000 + 456;
913 assert_eq!(ts, Duration::from_millis(expected_ms));
914 }
915
916 #[test]
919 fn strip_html_tags_should_remove_italic_bold_underline() {
920 assert_eq!(strip_html_tags("<i>italic</i>"), "italic");
921 assert_eq!(strip_html_tags("<b>bold</b>"), "bold");
922 assert_eq!(strip_html_tags("<u>under</u>"), "under");
923 }
924
925 #[test]
926 fn strip_html_tags_should_remove_voice_span() {
927 assert_eq!(strip_html_tags("<v Speaker>text</v>"), "text");
928 }
929
930 #[test]
931 fn strip_ass_tags_should_remove_curly_brace_overrides() {
932 assert_eq!(strip_ass_tags("{\\an8}text"), "text");
933 assert_eq!(strip_ass_tags("before{\\pos(100,200)}after"), "beforeafter");
934 }
935
936 #[test]
937 fn strip_ass_tags_should_convert_soft_line_breaks() {
938 assert_eq!(strip_ass_tags("line1\\Nline2"), "line1\nline2");
939 assert_eq!(strip_ass_tags("line1\\nline2"), "line1\nline2");
940 }
941
942 #[test]
945 fn duration_to_srt_timestamp_should_format_correctly() {
946 let d = Duration::from_millis(1 * 3_600_000 + 23 * 60_000 + 45 * 1_000 + 678);
947 assert_eq!(duration_to_srt_timestamp(d), "01:23:45,678");
948 }
949
950 #[test]
951 fn duration_to_ass_timestamp_should_use_centiseconds() {
952 let d = Duration::from_millis(1 * 3_600_000 + 23 * 60_000 + 45 * 1_000 + 670);
953 assert_eq!(duration_to_ass_timestamp(d), "1:23:45.67");
954 }
955
956 #[test]
957 fn duration_to_vtt_timestamp_should_format_correctly() {
958 let d = Duration::from_millis(1 * 3_600_000 + 2 * 60_000 + 3 * 1_000 + 456);
959 assert_eq!(duration_to_vtt_timestamp(d), "01:02:03.456");
960 }
961
962 #[test]
965 fn to_srt_should_produce_1_based_sequential_indices() {
966 let track = SubtitleTrack {
967 events: vec![
968 make_event(0, 1_000, 4_000, "First"),
969 make_event(1, 5_000, 7_000, "Second"),
970 ],
971 language: None,
972 };
973 let srt = track.to_srt();
974 let lines: Vec<&str> = srt.lines().collect();
975 assert_eq!(lines[0], "1");
976 assert_eq!(lines[4], "2");
977 }
978
979 #[test]
980 fn to_srt_should_use_comma_separated_timestamps() {
981 let track = SubtitleTrack {
982 events: vec![make_event(0, 1_000, 4_000, "Hello")],
983 language: None,
984 };
985 let srt = track.to_srt();
986 assert!(srt.contains("00:00:01,000 --> 00:00:04,000"));
987 }
988
989 #[test]
990 fn to_srt_should_write_empty_text_event_preserving_index_sequence() {
991 let empty = SubtitleEvent {
992 index: 1,
993 start: Duration::from_millis(5_000),
994 end: Duration::from_millis(7_000),
995 text: String::new(),
996 raw: String::new(),
997 metadata: HashMap::new(),
998 };
999 let track = SubtitleTrack {
1000 events: vec![make_event(0, 1_000, 4_000, "First"), empty],
1001 language: None,
1002 };
1003 let srt = track.to_srt();
1004 let reparsed = SubtitleTrack::from_srt(&srt).unwrap();
1005 assert_eq!(reparsed.events.len(), 2);
1007 assert_eq!(reparsed.events[1].start, Duration::from_millis(5_000));
1008 }
1009
1010 #[test]
1011 fn srt_round_trip_should_preserve_start_end_and_text() {
1012 let srt_in = "1\n00:00:01,000 --> 00:00:04,000\nHello world\n\n2\n00:00:05,500 --> 00:00:07,250\nSecond\n\n";
1013 let track = SubtitleTrack::from_srt(srt_in).unwrap();
1014 let written = track.to_srt();
1015 let reparsed = SubtitleTrack::from_srt(&written).unwrap();
1016 assert_eq!(reparsed.events.len(), track.events.len());
1017 for (a, b) in track.events.iter().zip(reparsed.events.iter()) {
1018 assert_eq!(a.start, b.start);
1019 assert_eq!(a.end, b.end);
1020 assert_eq!(a.text, b.text);
1021 }
1022 }
1023
1024 #[test]
1027 fn to_ass_should_contain_required_sections() {
1028 let track = SubtitleTrack {
1029 events: vec![make_event(0, 1_000, 4_000, "Hello")],
1030 language: None,
1031 };
1032 let ass = track.to_ass();
1033 assert!(ass.contains("[Script Info]"));
1034 assert!(ass.contains("[V4+ Styles]"));
1035 assert!(ass.contains("[Events]"));
1036 assert!(ass.contains("Format: Layer, Start, End,"));
1037 assert!(ass.contains("Dialogue:"));
1038 }
1039
1040 #[test]
1041 fn to_ass_should_use_centisecond_timestamps() {
1042 let track = SubtitleTrack {
1043 events: vec![make_event(0, 1_000, 4_000, "Hello")],
1044 language: None,
1045 };
1046 let ass = track.to_ass();
1047 assert!(ass.contains("0:00:01.00,0:00:04.00"));
1048 }
1049
1050 #[test]
1051 fn ass_round_trip_should_preserve_start_end_and_text() {
1052 let track = SubtitleTrack::from_ass(ASS_SAMPLE).unwrap();
1053 let written = track.to_ass();
1054 let reparsed = SubtitleTrack::from_ass(&written).unwrap();
1055 assert_eq!(reparsed.events.len(), track.events.len());
1056 for (a, b) in track.events.iter().zip(reparsed.events.iter()) {
1057 assert_eq!(a.start, b.start, "start mismatch");
1058 assert_eq!(a.end, b.end, "end mismatch");
1059 assert_eq!(a.text, b.text, "text mismatch");
1060 }
1061 }
1062
1063 #[test]
1066 fn to_vtt_should_start_with_webvtt_header() {
1067 let track = SubtitleTrack {
1068 events: vec![make_event(0, 1_000, 4_000, "Hello")],
1069 language: None,
1070 };
1071 let vtt = track.to_vtt();
1072 assert!(vtt.starts_with("WEBVTT\n"));
1073 }
1074
1075 #[test]
1076 fn to_vtt_should_use_dot_separated_timestamps() {
1077 let track = SubtitleTrack {
1078 events: vec![make_event(0, 1_000, 4_000, "Hello")],
1079 language: None,
1080 };
1081 let vtt = track.to_vtt();
1082 assert!(vtt.contains("00:00:01.000 --> 00:00:04.000"));
1083 }
1084
1085 #[test]
1086 fn vtt_round_trip_should_preserve_start_end_and_text() {
1087 let track = SubtitleTrack::from_vtt(VTT_SAMPLE).unwrap();
1088 let written = track.to_vtt();
1089 let reparsed = SubtitleTrack::from_vtt(&written).unwrap();
1090 assert_eq!(reparsed.events.len(), track.events.len());
1091 for (a, b) in track.events.iter().zip(reparsed.events.iter()) {
1092 assert_eq!(a.start, b.start, "start mismatch");
1093 assert_eq!(a.end, b.end, "end mismatch");
1094 assert_eq!(a.text, b.text, "text mismatch");
1095 }
1096 }
1097
1098 #[test]
1101 fn write_to_file_should_return_unsupported_for_unknown_extension() {
1102 let track = SubtitleTrack {
1103 events: vec![make_event(0, 1_000, 4_000, "Hello")],
1104 language: None,
1105 };
1106 let result = track.write_to_file("output.xyz");
1107 assert!(matches!(
1108 result,
1109 Err(SubtitleError::UnsupportedFormat { .. })
1110 ));
1111 }
1112
1113 fn make_event(index: usize, start_ms: u64, end_ms: u64, text: &str) -> SubtitleEvent {
1116 SubtitleEvent {
1117 index,
1118 start: Duration::from_millis(start_ms),
1119 end: Duration::from_millis(end_ms),
1120 text: text.to_string(),
1121 raw: text.to_string(),
1122 metadata: HashMap::new(),
1123 }
1124 }
1125}