1use std::collections::HashMap;
19use std::path::Path;
20use std::time::Duration;
21
22pub use crate::error::SubtitleError;
23
24#[derive(Debug, Clone, PartialEq, Eq)]
26pub struct SubtitleEvent {
27 pub index: usize,
29 pub start: Duration,
31 pub end: Duration,
33 pub text: String,
35 pub raw: String,
37 pub metadata: HashMap<String, String>,
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
43pub struct SubtitleTrack {
44 pub events: Vec<SubtitleEvent>,
46 pub language: Option<String>,
48}
49
50impl SubtitleTrack {
51 pub fn from_srt(input: &str) -> Result<Self, SubtitleError> {
60 parse_srt(input)
61 }
62
63 pub fn from_ass(input: &str) -> Result<Self, SubtitleError> {
73 parse_ass(input)
74 }
75
76 pub fn from_vtt(input: &str) -> Result<Self, SubtitleError> {
87 parse_vtt(input)
88 }
89
90 #[must_use]
99 pub fn to_srt(&self) -> String {
100 use std::fmt::Write as _;
101 let mut out = String::new();
102 for (seq, ev) in self.events.iter().enumerate() {
103 let _ = writeln!(out, "{}", seq + 1);
104 let _ = writeln!(
105 out,
106 "{} --> {}",
107 duration_to_srt_timestamp(ev.start),
108 duration_to_srt_timestamp(ev.end),
109 );
110 out.push_str(&ev.raw);
111 out.push('\n');
112 out.push('\n');
113 }
114 out
115 }
116
117 #[must_use]
127 pub fn to_ass(&self) -> String {
128 use std::fmt::Write as _;
129 let mut out = String::new();
130 out.push_str("[Script Info]\n");
131 out.push_str("ScriptType: v4.00+\n");
132 out.push_str("PlayResX: 384\n");
133 out.push_str("PlayResY: 288\n");
134 out.push('\n');
135 out.push_str("[V4+ Styles]\n");
136 out.push_str(
137 "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, \
138 OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, \
139 ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, \
140 Alignment, MarginL, MarginR, MarginV, Encoding\n",
141 );
142 out.push_str(
143 "Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,\
144 &H00000000,0,0,0,0,100,100,0,0,1,2,2,2,10,10,10,1\n",
145 );
146 out.push('\n');
147 out.push_str("[Events]\n");
148 out.push_str(
149 "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n",
150 );
151 for ev in &self.events {
152 let style = ev.metadata.get("Style").map_or("Default", String::as_str);
153 let name = ev.metadata.get("Name").map_or("", String::as_str);
154 let _ = writeln!(
155 out,
156 "Dialogue: 0,{},{},{},{},0,0,0,,{}",
157 duration_to_ass_timestamp(ev.start),
158 duration_to_ass_timestamp(ev.end),
159 style,
160 name,
161 ev.raw,
162 );
163 }
164 out
165 }
166
167 #[must_use]
175 pub fn to_vtt(&self) -> String {
176 use std::fmt::Write as _;
177 let mut out = String::from("WEBVTT\n");
178 for ev in &self.events {
179 out.push('\n');
180 let _ = writeln!(
181 out,
182 "{} --> {}",
183 duration_to_vtt_timestamp(ev.start),
184 duration_to_vtt_timestamp(ev.end),
185 );
186 out.push_str(&ev.raw);
187 out.push('\n');
188 }
189 out
190 }
191
192 pub fn write_to_file(&self, path: impl AsRef<Path>) -> Result<(), SubtitleError> {
201 let path = path.as_ref();
202 let ext = path
203 .extension()
204 .and_then(|e| e.to_str())
205 .unwrap_or("")
206 .to_ascii_lowercase();
207
208 let content = match ext.as_str() {
209 "srt" => self.to_srt(),
210 "ass" | "ssa" => self.to_ass(),
211 "vtt" => self.to_vtt(),
212 _ => return Err(SubtitleError::UnsupportedFormat { extension: ext }),
213 };
214
215 std::fs::write(path, content)?;
216 Ok(())
217 }
218
219 pub fn from_file(path: impl AsRef<Path>) -> Result<Self, SubtitleError> {
229 let path = path.as_ref();
230 let ext = path
231 .extension()
232 .and_then(|e| e.to_str())
233 .unwrap_or("")
234 .to_ascii_lowercase();
235
236 match ext.as_str() {
238 "srt" | "ass" | "ssa" | "vtt" => {}
239 _ => return Err(SubtitleError::UnsupportedFormat { extension: ext }),
240 }
241
242 let content = std::fs::read_to_string(path)?;
243
244 match ext.as_str() {
245 "srt" => parse_srt(&content),
246 "ass" | "ssa" => parse_ass(&content),
247 "vtt" => parse_vtt(&content),
248 _ => unreachable!("extension validated above"),
249 }
250 }
251}
252
253fn parse_srt(input: &str) -> Result<SubtitleTrack, SubtitleError> {
256 let mut events: Vec<SubtitleEvent> = Vec::new();
257 let mut current_block: Vec<String> = Vec::new();
258
259 for line in input.lines() {
260 let trimmed = line.trim();
261 if trimmed.is_empty() {
262 if !current_block.is_empty() {
263 if let Some(ev) = parse_srt_block(¤t_block, events.len()) {
264 events.push(ev);
265 }
266 current_block.clear();
267 }
268 } else {
269 current_block.push(trimmed.to_string());
270 }
271 }
272
273 if !current_block.is_empty()
275 && let Some(ev) = parse_srt_block(¤t_block, events.len())
276 {
277 events.push(ev);
278 }
279
280 if events.is_empty() {
281 return Err(SubtitleError::NoEvents);
282 }
283
284 Ok(SubtitleTrack {
285 events,
286 language: None,
287 })
288}
289
290fn parse_srt_block(block: &[String], index: usize) -> Option<SubtitleEvent> {
291 if block.len() < 2 {
295 log::warn!(
296 "srt block has too few lines, skipping count={}",
297 block.len()
298 );
299 return None;
300 }
301
302 if block[0].parse::<usize>().is_err() {
304 log::warn!(
305 "srt block index is not a number, skipping value={}",
306 block[0]
307 );
308 return None;
309 }
310
311 let Some((start, end)) = parse_srt_timestamp_line(&block[1]) else {
312 log::warn!("srt malformed timestamp line, skipping line={}", block[1]);
313 return None;
314 };
315
316 let raw = block[2..].join("\n");
317 let text = strip_html_tags(&raw);
318
319 Some(SubtitleEvent {
320 index,
321 start,
322 end,
323 text,
324 raw,
325 metadata: HashMap::new(),
326 })
327}
328
329fn parse_srt_timestamp_line(line: &str) -> Option<(Duration, Duration)> {
330 let mut parts = line.splitn(2, " --> ");
331 let start = parse_srt_timestamp(parts.next()?.trim())?;
332 let end = parse_srt_timestamp(parts.next()?.trim())?;
333 Some((start, end))
334}
335
336fn parse_srt_timestamp(s: &str) -> Option<Duration> {
338 let s = s.replace(',', ".");
339 let (hms_str, ms_str) = match s.split_once('.') {
340 Some((h, m)) => (h, m),
341 None => (s.as_str(), "0"),
342 };
343 let ms: u64 = ms_str.parse().ok()?;
344 let hms: Vec<u64> = hms_str
345 .split(':')
346 .map(|p| p.parse().ok())
347 .collect::<Option<Vec<_>>>()?;
348 if hms.len() != 3 {
349 return None;
350 }
351 let total_ms = hms[0] * 3_600_000 + hms[1] * 60_000 + hms[2] * 1_000 + ms;
352 Some(Duration::from_millis(total_ms))
353}
354
355fn parse_ass(input: &str) -> Result<SubtitleTrack, SubtitleError> {
358 let mut events: Vec<SubtitleEvent> = Vec::new();
359 let mut in_events = false;
360 let mut format_cols: Vec<String> = Vec::new();
361
362 for (line_no, line) in input.lines().enumerate() {
363 let line = line.trim();
364
365 if line.eq_ignore_ascii_case("[Events]") {
366 in_events = true;
367 continue;
368 }
369
370 if line.starts_with('[') && in_events {
372 break;
373 }
374
375 if !in_events {
376 continue;
377 }
378
379 if let Some(rest) = line.strip_prefix("Format:") {
380 format_cols = rest.split(',').map(|c| c.trim().to_string()).collect();
381 continue;
382 }
383
384 let Some(rest) = line.strip_prefix("Dialogue:") else {
385 continue;
386 };
387
388 if format_cols.is_empty() {
389 log::warn!(
390 "ass dialogue line found before Format line at line={}",
391 line_no + 1
392 );
393 continue;
394 }
395
396 let num_cols = format_cols.len();
397 let parts: Vec<&str> = rest.splitn(num_cols, ',').collect();
398 if parts.len() < num_cols {
399 log::warn!(
400 "ass dialogue has fewer fields than format at line={}",
401 line_no + 1
402 );
403 continue;
404 }
405
406 let col_map: HashMap<&str, &str> = format_cols
407 .iter()
408 .zip(parts.iter())
409 .map(|(k, v)| (k.as_str(), v.trim()))
410 .collect();
411
412 let Some(start) = col_map.get("Start").and_then(|s| parse_ass_timestamp(s)) else {
413 log::warn!("ass malformed start timestamp at line={}", line_no + 1);
414 continue;
415 };
416
417 let Some(end) = col_map.get("End").and_then(|s| parse_ass_timestamp(s)) else {
418 log::warn!("ass malformed end timestamp at line={}", line_no + 1);
419 continue;
420 };
421
422 let raw = col_map.get("Text").copied().unwrap_or("").to_string();
423 let text = strip_ass_tags(&raw);
424
425 let mut metadata = HashMap::new();
426 for key in &["Style", "Name", "Actor", "Layer", "Effect"] {
427 if let Some(val) = col_map.get(key)
428 && !val.is_empty()
429 {
430 metadata.insert((*key).to_string(), (*val).to_string());
431 }
432 }
433
434 events.push(SubtitleEvent {
435 index: events.len(),
436 start,
437 end,
438 text,
439 raw,
440 metadata,
441 });
442 }
443
444 if events.is_empty() {
445 return Err(SubtitleError::NoEvents);
446 }
447
448 Ok(SubtitleTrack {
449 events,
450 language: None,
451 })
452}
453
454fn parse_ass_timestamp(s: &str) -> Option<Duration> {
456 let (hms_str, cs_str) = match s.split_once('.') {
457 Some((h, c)) => (h, c),
458 None => (s, "0"),
459 };
460 let cs: u64 = cs_str.parse().ok()?;
461 let hms: Vec<u64> = hms_str
462 .split(':')
463 .map(|p| p.parse().ok())
464 .collect::<Option<Vec<_>>>()?;
465 if hms.len() != 3 {
466 return None;
467 }
468 let total_ms = hms[0] * 3_600_000 + hms[1] * 60_000 + hms[2] * 1_000 + cs * 10;
469 Some(Duration::from_millis(total_ms))
470}
471
472fn parse_vtt(input: &str) -> Result<SubtitleTrack, SubtitleError> {
475 let mut lines_iter = input.lines();
476
477 match lines_iter.next() {
479 Some(first) if first.trim_start_matches('\u{FEFF}').starts_with("WEBVTT") => {}
480 _ => {
481 return Err(SubtitleError::ParseError {
482 line: 1,
483 reason: "WebVTT file must begin with WEBVTT".to_string(),
484 });
485 }
486 }
487
488 let mut events: Vec<SubtitleEvent> = Vec::new();
489 let mut current_block: Vec<String> = Vec::new();
490
491 for line in lines_iter {
492 let trimmed = line.trim();
493 if trimmed.is_empty() {
494 if !current_block.is_empty() {
495 if let Some(ev) = parse_vtt_block(¤t_block, events.len()) {
496 events.push(ev);
497 }
498 current_block.clear();
499 }
500 } else {
501 current_block.push(trimmed.to_string());
502 }
503 }
504
505 if !current_block.is_empty()
507 && let Some(ev) = parse_vtt_block(¤t_block, events.len())
508 {
509 events.push(ev);
510 }
511
512 if events.is_empty() {
513 return Err(SubtitleError::NoEvents);
514 }
515
516 Ok(SubtitleTrack {
517 events,
518 language: None,
519 })
520}
521
522fn parse_vtt_block(block: &[String], index: usize) -> Option<SubtitleEvent> {
523 let first = block[0].as_str();
525 if first.starts_with("NOTE") || first.starts_with("STYLE") || first.starts_with("REGION") {
526 return None;
527 }
528
529 let Some(ts_idx) = block.iter().position(|l| l.contains("-->")) else {
531 log::warn!("vtt block has no timestamp line, skipping block_start={first}");
532 return None;
533 };
534
535 let Some((start, end)) = parse_vtt_timestamp_line(&block[ts_idx]) else {
536 log::warn!(
537 "vtt malformed timestamp line, skipping line={}",
538 block[ts_idx]
539 );
540 return None;
541 };
542
543 if ts_idx + 1 >= block.len() {
544 log::warn!("vtt cue has no text start={start:?}");
545 return None;
546 }
547
548 let raw = block[ts_idx + 1..].join("\n");
549 let text = strip_html_tags(&raw);
550
551 Some(SubtitleEvent {
552 index,
553 start,
554 end,
555 text,
556 raw,
557 metadata: HashMap::new(),
558 })
559}
560
561fn parse_vtt_timestamp_line(line: &str) -> Option<(Duration, Duration)> {
562 let mut parts = line.splitn(2, " --> ");
563 let start = parse_vtt_timestamp(parts.next()?.trim())?;
564 let end_part = parts.next()?.trim();
566 let end_str = end_part.split_whitespace().next().unwrap_or("");
567 let end = parse_vtt_timestamp(end_str)?;
568 Some((start, end))
569}
570
571fn parse_vtt_timestamp(s: &str) -> Option<Duration> {
573 let (hms_str, ms_str) = match s.split_once('.') {
574 Some((h, m)) => (h, m),
575 None => (s, "0"),
576 };
577 let ms_padded = format!("{ms_str:0<3}");
579 let ms: u64 = ms_padded[..3.min(ms_padded.len())].parse().ok()?;
580 let hms: Vec<u64> = hms_str
581 .split(':')
582 .map(|p| p.parse().ok())
583 .collect::<Option<Vec<_>>>()?;
584 let total_ms = match hms.len() {
585 2 => hms[0] * 60_000 + hms[1] * 1_000 + ms,
586 3 => hms[0] * 3_600_000 + hms[1] * 60_000 + hms[2] * 1_000 + ms,
587 _ => return None,
588 };
589 Some(Duration::from_millis(total_ms))
590}
591
592#[allow(clippy::cast_possible_truncation)]
596fn duration_to_srt_timestamp(d: Duration) -> String {
597 let total_ms = d.as_millis() as u64;
598 let ms = total_ms % 1_000;
599 let secs = total_ms / 1_000;
600 let s = secs % 60;
601 let m = (secs / 60) % 60;
602 let h = secs / 3_600;
603 format!("{h:02}:{m:02}:{s:02},{ms:03}")
604}
605
606#[allow(clippy::cast_possible_truncation)]
608fn duration_to_ass_timestamp(d: Duration) -> String {
609 let total_ms = d.as_millis() as u64;
610 let cs = (total_ms / 10) % 100;
611 let secs = total_ms / 1_000;
612 let s = secs % 60;
613 let m = (secs / 60) % 60;
614 let h = secs / 3_600;
615 format!("{h}:{m:02}:{s:02}.{cs:02}")
616}
617
618#[allow(clippy::cast_possible_truncation)]
620fn duration_to_vtt_timestamp(d: Duration) -> String {
621 let total_ms = d.as_millis() as u64;
622 let ms = total_ms % 1_000;
623 let secs = total_ms / 1_000;
624 let s = secs % 60;
625 let m = (secs / 60) % 60;
626 let h = secs / 3_600;
627 format!("{h:02}:{m:02}:{s:02}.{ms:03}")
628}
629
630fn strip_html_tags(s: &str) -> String {
634 let mut result = String::with_capacity(s.len());
635 let mut in_tag = false;
636 for c in s.chars() {
637 match c {
638 '<' => in_tag = true,
639 '>' => in_tag = false,
640 _ if !in_tag => result.push(c),
641 _ => {}
642 }
643 }
644 result
645}
646
647fn strip_ass_tags(s: &str) -> String {
649 let mut result = String::with_capacity(s.len());
650 let mut in_tag = false;
651 let chars: Vec<char> = s.chars().collect();
652 let mut i = 0;
653 while i < chars.len() {
654 match chars[i] {
655 '{' => {
656 in_tag = true;
657 i += 1;
658 }
659 '}' => {
660 in_tag = false;
661 i += 1;
662 }
663 '\\' if !in_tag && i + 1 < chars.len() => match chars[i + 1] {
664 'N' | 'n' => {
665 result.push('\n');
666 i += 2;
667 }
668 _ => {
669 result.push(chars[i]);
670 i += 1;
671 }
672 },
673 c if !in_tag => {
674 result.push(c);
675 i += 1;
676 }
677 _ => {
678 i += 1;
679 }
680 }
681 }
682 result
683}
684
685#[cfg(test)]
686#[allow(clippy::unwrap_used)]
687mod tests {
688 use super::*;
689
690 #[test]
693 fn from_srt_should_parse_single_event() {
694 let input = "1\n00:00:01,000 --> 00:00:04,000\nHello world\n";
695 let track = SubtitleTrack::from_srt(input).unwrap();
696 assert_eq!(track.events.len(), 1);
697 let ev = &track.events[0];
698 assert_eq!(ev.index, 0);
699 assert_eq!(ev.start, Duration::from_millis(1_000));
700 assert_eq!(ev.end, Duration::from_millis(4_000));
701 assert_eq!(ev.text, "Hello world");
702 assert_eq!(ev.raw, "Hello world");
703 }
704
705 #[test]
706 fn from_srt_should_parse_multiline_text() {
707 let input = "1\n00:00:01,000 --> 00:00:04,000\nLine one\nLine two\n\n2\n00:00:05,000 --> 00:00:07,000\nSecond\n";
708 let track = SubtitleTrack::from_srt(input).unwrap();
709 assert_eq!(track.events.len(), 2);
710 assert_eq!(track.events[0].text, "Line one\nLine two");
711 assert_eq!(track.events[1].text, "Second");
712 }
713
714 #[test]
715 fn from_srt_should_strip_html_tags_preserving_raw() {
716 let input = "1\n00:00:01,000 --> 00:00:04,000\n<i>Italic</i> and <b>bold</b>\n";
717 let track = SubtitleTrack::from_srt(input).unwrap();
718 let ev = &track.events[0];
719 assert_eq!(ev.text, "Italic and bold");
720 assert_eq!(ev.raw, "<i>Italic</i> and <b>bold</b>");
721 }
722
723 #[test]
724 fn from_srt_should_skip_malformed_event_and_parse_rest() {
725 let input = "1\n00:00:01,000 --> 00:00:04,000\nGood\n\nNOT_NUM\nbad ts\ntext\n\n2\n00:00:05,000 --> 00:00:07,000\nAlso good\n";
726 let track = SubtitleTrack::from_srt(input).unwrap();
727 assert_eq!(track.events.len(), 2);
728 assert_eq!(track.events[0].text, "Good");
729 assert_eq!(track.events[1].text, "Also good");
730 }
731
732 #[test]
733 fn from_srt_should_return_no_events_for_empty_input() {
734 let result = SubtitleTrack::from_srt("");
735 assert!(matches!(result, Err(SubtitleError::NoEvents)));
736 }
737
738 #[test]
739 fn from_srt_should_return_no_events_when_all_blocks_malformed() {
740 let result = SubtitleTrack::from_srt("NOT_NUM\n00:00:01,000 --> 00:00:04,000\ntext\n");
741 assert!(matches!(result, Err(SubtitleError::NoEvents)));
742 }
743
744 const ASS_SAMPLE: &str = "\
747[Script Info]
748Title: Test
749
750[Events]
751Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
752Dialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,Hello {\\i1}world{\\i0}
753Dialogue: 0,0:00:05.00,0:00:07.00,Default,,0,0,0,,Second line
754";
755
756 #[test]
757 fn from_ass_should_parse_dialogue_events() {
758 let track = SubtitleTrack::from_ass(ASS_SAMPLE).unwrap();
759 assert_eq!(track.events.len(), 2);
760 let ev = &track.events[0];
761 assert_eq!(ev.start, Duration::from_millis(1_000));
762 assert_eq!(ev.end, Duration::from_millis(4_000));
763 assert!(ev.raw.contains("{\\i1}"));
764 assert!(!ev.text.contains('{'));
765 }
766
767 #[test]
768 fn from_ass_should_strip_override_tags_preserving_raw() {
769 let input = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\nDialogue: 0,0:00:01.00,0:00:04.00,Default,,0,0,0,,{\\pos(100,200)}Hello\n";
770 let track = SubtitleTrack::from_ass(input).unwrap();
771 let ev = &track.events[0];
772 assert_eq!(ev.text, "Hello");
773 assert!(ev.raw.contains("{\\pos"));
774 }
775
776 #[test]
777 fn from_ass_should_populate_metadata_fields() {
778 let input = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\nDialogue: 0,0:00:01.00,0:00:04.00,Signs,Actor1,0,0,0,,text\n";
779 let track = SubtitleTrack::from_ass(input).unwrap();
780 let ev = &track.events[0];
781 assert_eq!(ev.metadata.get("Style"), Some(&"Signs".to_string()));
782 assert_eq!(ev.metadata.get("Name"), Some(&"Actor1".to_string()));
783 }
784
785 #[test]
786 fn from_ass_should_return_no_events_for_empty_events_section() {
787 let input = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n";
788 let result = SubtitleTrack::from_ass(input);
789 assert!(matches!(result, Err(SubtitleError::NoEvents)));
790 }
791
792 const VTT_SAMPLE: &str = "\
795WEBVTT
796
7971
79800:00:01.000 --> 00:00:04.000
799Hello world
800
80100:00:05.000 --> 00:00:07.000 align:center
802<v Speaker>Voice tagged text</v>
803";
804
805 #[test]
806 fn from_vtt_should_parse_cues_with_and_without_identifiers() {
807 let track = SubtitleTrack::from_vtt(VTT_SAMPLE).unwrap();
808 assert_eq!(track.events.len(), 2);
809 let ev = &track.events[0];
810 assert_eq!(ev.start, Duration::from_millis(1_000));
811 assert_eq!(ev.end, Duration::from_millis(4_000));
812 assert_eq!(ev.text, "Hello world");
813 }
814
815 #[test]
816 fn from_vtt_should_strip_voice_tags_preserving_raw() {
817 let track = SubtitleTrack::from_vtt(VTT_SAMPLE).unwrap();
818 let ev = &track.events[1];
819 assert_eq!(ev.text, "Voice tagged text");
820 assert_eq!(ev.raw, "<v Speaker>Voice tagged text</v>");
821 }
822
823 #[test]
824 fn from_vtt_should_ignore_cue_settings_in_timestamp_line() {
825 let track = SubtitleTrack::from_vtt(VTT_SAMPLE).unwrap();
826 assert_eq!(track.events[1].end, Duration::from_millis(7_000));
828 }
829
830 #[test]
831 fn from_vtt_should_return_parse_error_for_missing_header() {
832 let result = SubtitleTrack::from_vtt("not a vtt file\ncontent");
833 assert!(matches!(result, Err(SubtitleError::ParseError { .. })));
834 }
835
836 #[test]
837 fn from_vtt_should_return_no_events_for_empty_content() {
838 let result = SubtitleTrack::from_vtt("WEBVTT\n\n");
839 assert!(matches!(result, Err(SubtitleError::NoEvents)));
840 }
841
842 #[test]
845 fn from_file_should_return_unsupported_for_unknown_extension() {
846 let result = SubtitleTrack::from_file("subtitle.xyz");
847 assert!(matches!(
848 result,
849 Err(SubtitleError::UnsupportedFormat { .. })
850 ));
851 }
852
853 #[test]
856 fn parse_srt_timestamp_should_parse_millisecond_precision() {
857 let ts = parse_srt_timestamp("01:23:45,678").unwrap();
858 let expected_ms = 1 * 3_600_000 + 23 * 60_000 + 45 * 1_000 + 678;
859 assert_eq!(ts, Duration::from_millis(expected_ms));
860 }
861
862 #[test]
863 fn parse_srt_timestamp_should_parse_zero_timestamp() {
864 let ts = parse_srt_timestamp("00:00:00,000").unwrap();
865 assert_eq!(ts, Duration::from_millis(0));
866 }
867
868 #[test]
869 fn parse_ass_timestamp_should_parse_centisecond_precision() {
870 let ts = parse_ass_timestamp("1:23:45.67").unwrap();
871 let expected_ms = 1 * 3_600_000 + 23 * 60_000 + 45 * 1_000 + 670;
872 assert_eq!(ts, Duration::from_millis(expected_ms));
873 }
874
875 #[test]
876 fn parse_vtt_timestamp_should_accept_mm_ss_format() {
877 let ts = parse_vtt_timestamp("05:30.500").unwrap();
878 assert_eq!(ts, Duration::from_millis(5 * 60_000 + 30 * 1_000 + 500));
879 }
880
881 #[test]
882 fn parse_vtt_timestamp_should_accept_hh_mm_ss_format() {
883 let ts = parse_vtt_timestamp("01:02:03.456").unwrap();
884 let expected_ms = 3_600_000 + 2 * 60_000 + 3 * 1_000 + 456;
885 assert_eq!(ts, Duration::from_millis(expected_ms));
886 }
887
888 #[test]
891 fn strip_html_tags_should_remove_italic_bold_underline() {
892 assert_eq!(strip_html_tags("<i>italic</i>"), "italic");
893 assert_eq!(strip_html_tags("<b>bold</b>"), "bold");
894 assert_eq!(strip_html_tags("<u>under</u>"), "under");
895 }
896
897 #[test]
898 fn strip_html_tags_should_remove_voice_span() {
899 assert_eq!(strip_html_tags("<v Speaker>text</v>"), "text");
900 }
901
902 #[test]
903 fn strip_ass_tags_should_remove_curly_brace_overrides() {
904 assert_eq!(strip_ass_tags("{\\an8}text"), "text");
905 assert_eq!(strip_ass_tags("before{\\pos(100,200)}after"), "beforeafter");
906 }
907
908 #[test]
909 fn strip_ass_tags_should_convert_soft_line_breaks() {
910 assert_eq!(strip_ass_tags("line1\\Nline2"), "line1\nline2");
911 assert_eq!(strip_ass_tags("line1\\nline2"), "line1\nline2");
912 }
913
914 #[test]
917 fn duration_to_srt_timestamp_should_format_correctly() {
918 let d = Duration::from_millis(1 * 3_600_000 + 23 * 60_000 + 45 * 1_000 + 678);
919 assert_eq!(duration_to_srt_timestamp(d), "01:23:45,678");
920 }
921
922 #[test]
923 fn duration_to_ass_timestamp_should_use_centiseconds() {
924 let d = Duration::from_millis(1 * 3_600_000 + 23 * 60_000 + 45 * 1_000 + 670);
925 assert_eq!(duration_to_ass_timestamp(d), "1:23:45.67");
926 }
927
928 #[test]
929 fn duration_to_vtt_timestamp_should_format_correctly() {
930 let d = Duration::from_millis(1 * 3_600_000 + 2 * 60_000 + 3 * 1_000 + 456);
931 assert_eq!(duration_to_vtt_timestamp(d), "01:02:03.456");
932 }
933
934 #[test]
937 fn to_srt_should_produce_1_based_sequential_indices() {
938 let track = SubtitleTrack {
939 events: vec![
940 make_event(0, 1_000, 4_000, "First"),
941 make_event(1, 5_000, 7_000, "Second"),
942 ],
943 language: None,
944 };
945 let srt = track.to_srt();
946 let lines: Vec<&str> = srt.lines().collect();
947 assert_eq!(lines[0], "1");
948 assert_eq!(lines[4], "2");
949 }
950
951 #[test]
952 fn to_srt_should_use_comma_separated_timestamps() {
953 let track = SubtitleTrack {
954 events: vec![make_event(0, 1_000, 4_000, "Hello")],
955 language: None,
956 };
957 let srt = track.to_srt();
958 assert!(srt.contains("00:00:01,000 --> 00:00:04,000"));
959 }
960
961 #[test]
962 fn to_srt_should_write_empty_text_event_preserving_index_sequence() {
963 let empty = SubtitleEvent {
964 index: 1,
965 start: Duration::from_millis(5_000),
966 end: Duration::from_millis(7_000),
967 text: String::new(),
968 raw: String::new(),
969 metadata: HashMap::new(),
970 };
971 let track = SubtitleTrack {
972 events: vec![make_event(0, 1_000, 4_000, "First"), empty],
973 language: None,
974 };
975 let srt = track.to_srt();
976 let reparsed = SubtitleTrack::from_srt(&srt).unwrap();
977 assert_eq!(reparsed.events.len(), 2);
979 assert_eq!(reparsed.events[1].start, Duration::from_millis(5_000));
980 }
981
982 #[test]
983 fn srt_round_trip_should_preserve_start_end_and_text() {
984 let srt_in = "1\n00:00:01,000 --> 00:00:04,000\nHello world\n\n2\n00:00:05,500 --> 00:00:07,250\nSecond\n\n";
985 let track = SubtitleTrack::from_srt(srt_in).unwrap();
986 let written = track.to_srt();
987 let reparsed = SubtitleTrack::from_srt(&written).unwrap();
988 assert_eq!(reparsed.events.len(), track.events.len());
989 for (a, b) in track.events.iter().zip(reparsed.events.iter()) {
990 assert_eq!(a.start, b.start);
991 assert_eq!(a.end, b.end);
992 assert_eq!(a.text, b.text);
993 }
994 }
995
996 #[test]
999 fn to_ass_should_contain_required_sections() {
1000 let track = SubtitleTrack {
1001 events: vec![make_event(0, 1_000, 4_000, "Hello")],
1002 language: None,
1003 };
1004 let ass = track.to_ass();
1005 assert!(ass.contains("[Script Info]"));
1006 assert!(ass.contains("[V4+ Styles]"));
1007 assert!(ass.contains("[Events]"));
1008 assert!(ass.contains("Format: Layer, Start, End,"));
1009 assert!(ass.contains("Dialogue:"));
1010 }
1011
1012 #[test]
1013 fn to_ass_should_use_centisecond_timestamps() {
1014 let track = SubtitleTrack {
1015 events: vec![make_event(0, 1_000, 4_000, "Hello")],
1016 language: None,
1017 };
1018 let ass = track.to_ass();
1019 assert!(ass.contains("0:00:01.00,0:00:04.00"));
1020 }
1021
1022 #[test]
1023 fn ass_round_trip_should_preserve_start_end_and_text() {
1024 let track = SubtitleTrack::from_ass(ASS_SAMPLE).unwrap();
1025 let written = track.to_ass();
1026 let reparsed = SubtitleTrack::from_ass(&written).unwrap();
1027 assert_eq!(reparsed.events.len(), track.events.len());
1028 for (a, b) in track.events.iter().zip(reparsed.events.iter()) {
1029 assert_eq!(a.start, b.start, "start mismatch");
1030 assert_eq!(a.end, b.end, "end mismatch");
1031 assert_eq!(a.text, b.text, "text mismatch");
1032 }
1033 }
1034
1035 #[test]
1038 fn to_vtt_should_start_with_webvtt_header() {
1039 let track = SubtitleTrack {
1040 events: vec![make_event(0, 1_000, 4_000, "Hello")],
1041 language: None,
1042 };
1043 let vtt = track.to_vtt();
1044 assert!(vtt.starts_with("WEBVTT\n"));
1045 }
1046
1047 #[test]
1048 fn to_vtt_should_use_dot_separated_timestamps() {
1049 let track = SubtitleTrack {
1050 events: vec![make_event(0, 1_000, 4_000, "Hello")],
1051 language: None,
1052 };
1053 let vtt = track.to_vtt();
1054 assert!(vtt.contains("00:00:01.000 --> 00:00:04.000"));
1055 }
1056
1057 #[test]
1058 fn vtt_round_trip_should_preserve_start_end_and_text() {
1059 let track = SubtitleTrack::from_vtt(VTT_SAMPLE).unwrap();
1060 let written = track.to_vtt();
1061 let reparsed = SubtitleTrack::from_vtt(&written).unwrap();
1062 assert_eq!(reparsed.events.len(), track.events.len());
1063 for (a, b) in track.events.iter().zip(reparsed.events.iter()) {
1064 assert_eq!(a.start, b.start, "start mismatch");
1065 assert_eq!(a.end, b.end, "end mismatch");
1066 assert_eq!(a.text, b.text, "text mismatch");
1067 }
1068 }
1069
1070 #[test]
1073 fn write_to_file_should_return_unsupported_for_unknown_extension() {
1074 let track = SubtitleTrack {
1075 events: vec![make_event(0, 1_000, 4_000, "Hello")],
1076 language: None,
1077 };
1078 let result = track.write_to_file("output.xyz");
1079 assert!(matches!(
1080 result,
1081 Err(SubtitleError::UnsupportedFormat { .. })
1082 ));
1083 }
1084
1085 fn make_event(index: usize, start_ms: u64, end_ms: u64, text: &str) -> SubtitleEvent {
1088 SubtitleEvent {
1089 index,
1090 start: Duration::from_millis(start_ms),
1091 end: Duration::from_millis(end_ms),
1092 text: text.to_string(),
1093 raw: text.to_string(),
1094 metadata: HashMap::new(),
1095 }
1096 }
1097}