1use super::srt::{SRTLine, SRT};
7use super::ssa::{SSAEvent, SSAInfo, SSAStyle, SSA};
8use super::strip_bom;
9use crate::error;
10use crate::util::{Alignment, Color};
11use regex::Regex;
12use serde::Deserialize;
13use serde::Serialize;
14use std::collections::HashMap;
15use std::fmt::Display;
16use time::Time;
17
18#[derive(Clone, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]
20pub struct VTTStyle {
21 pub selector: Option<String>,
22 pub entries: HashMap<String, String>,
23}
24
25#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)]
27pub struct VTTLine {
28 pub identifier: Option<String>,
29 pub start: Time,
30 pub end: Time,
31 pub settings: HashMap<String, Option<String>>,
32 pub text: String,
33}
34
35impl Default for VTTLine {
36 fn default() -> Self {
37 Self {
38 identifier: None,
39 start: Time::from_hms(0, 0, 0).unwrap(),
40 end: Time::from_hms(0, 0, 0).unwrap(),
41 settings: Default::default(),
42 text: "".to_string(),
43 }
44 }
45}
46
47#[derive(Clone, Debug, Default, PartialEq, Deserialize, Serialize)]
48pub struct VTTRegion {
49 pub id: Option<String>,
50 pub width: Option<f32>,
51 pub lines: Option<u32>,
52 pub region_anchor: Option<(f32, f32)>,
53 pub viewport_anchor: Option<(f32, f32)>,
54 pub scroll: bool,
55}
56impl Eq for VTTRegion {}
57
58#[derive(Clone, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]
60pub struct VTT {
61 pub regions: Vec<VTTRegion>,
62 pub styles: Vec<VTTStyle>,
63 pub lines: Vec<VTTLine>,
64}
65
66impl VTT {
67 pub fn parse<S: AsRef<str>>(content: S) -> Result<VTT, VTTError> {
69 let mut line_num = 0;
70
71 let mut regions = vec![];
72 let mut styles = vec![];
73 let mut lines = vec![];
74
75 let mut blocks = vec![vec![]];
76 for line in strip_bom(&content).lines() {
77 if line.trim().is_empty() {
78 if !blocks.last().unwrap().is_empty() {
79 blocks.push(vec![])
80 }
81 } else {
82 blocks.last_mut().unwrap().push(line)
83 }
84 }
85 if blocks.last().is_some_and(|b| b.is_empty()) {
86 blocks.remove(blocks.len() - 1);
87 }
88
89 parse::parse_start(blocks.remove(0).into_iter())
90 .map_err(|e| VTTError::new(e.kind, line_num + e.line))?;
91
92 line_num += 1;
93 for mut block in blocks {
94 line_num += 1;
95
96 let block_len = block.len();
97 let (first_word, _) = block[0].split_once(' ').unwrap_or((block[0], ""));
98
99 match first_word {
100 "NOTE" => (),
103 "REGION" => {
104 block.remove(0);
105 line_num += 1;
106 regions.push(
107 parse::parse_region_block(block.into_iter())
108 .map_err(|e| VTTError::new(e.kind, line_num + e.line))?,
109 )
110 }
111 "STYLE" => {
112 block[0] = &block[0][5..];
113 styles.push(
114 parse::parse_style_block(block.join("\n").trim())
115 .map_err(|e| VTTError::new(e.kind, line_num + e.line))?,
116 );
117 }
118 _ => lines.push(
119 parse::parse_cue_block(block.into_iter())
120 .map_err(|e| VTTError::new(e.kind, line_num + e.line))?,
121 ),
122 }
123
124 line_num += block_len
125 }
126
127 Ok(VTT {
128 regions,
129 styles,
130 lines,
131 })
132 }
133
134 pub fn to_ssa(&self) -> SSA {
136 let speaker_regex: Regex = Regex::new(r"(?m)^<v.*?\s(?P<speaker>.*?)>").unwrap();
137 let xml_replace_regex: Regex = Regex::new(r"(?m)<.*?>").unwrap();
138
139 let mut default_style = SSAStyle {
140 name: "Default".to_string(),
141 fontname: "Arial".to_string(),
142 fontsize: 20.0,
143 primary_color: None,
144 secondary_color: None,
145 outline_color: None,
146 back_color: None,
147 alignment: Alignment::BottomCenter,
148 ..Default::default()
149 };
150 for style in &self.styles {
151 if style.selector.is_some() {
153 continue;
154 }
155 if let Some(color) = style.entries.get("color") {
157 if let Ok(primary_color) = Color::from_vtt(color) {
158 default_style.primary_color = Some(primary_color)
159 }
160 }
161 if let Some(background_color) = style.entries.get("background-color") {
163 if let Ok(back_color) = Color::from_vtt(background_color) {
164 default_style.back_color = Some(back_color)
165 }
166 }
167 if let Some(font_size) = style.entries.get("font-size") {
170 let font_size = font_size.trim_end_matches("px");
171 if let Ok(font_size) = font_size.parse() {
172 default_style.fontsize = font_size
173 }
174 }
175 if style
177 .entries
178 .get("font-style")
179 .is_some_and(|fs| fs == "italic")
180 {
181 default_style.italic = true;
182 }
183 if style
185 .entries
186 .get("font-weight")
187 .is_some_and(|fw| fw.starts_with("bold"))
188 {
189 default_style.bold = true;
190 }
191 if let Some(text_decoration) = style.entries.get("text-decoration") {
193 if text_decoration.contains("underline") {
194 default_style.underline = true
195 }
196 if text_decoration.contains("line-through") {
197 default_style.strikeout = true
198 }
199 }
200 if let Some(letter_spacing) = style.entries.get("letter-spacing") {
203 let letter_spacing = letter_spacing.trim_end_matches("px");
204 if let Ok(letter_spacing) = letter_spacing.parse() {
205 default_style.spacing = letter_spacing
206 }
207 }
208 }
209
210 let mut events = vec![];
211 for line in &self.lines {
212 let mut captures = speaker_regex.captures_iter(&line.text);
213 let first_capture = captures.next();
214 let second_capture = captures.next();
215
216 let (mut text, speaker) = if first_capture.is_some() && second_capture.is_some() {
217 (speaker_regex.replace_all(&line.text, "").to_string(), None)
218 } else if let Some(c) = first_capture {
219 let text = line.text[c.get(0).unwrap().end()..].to_string();
220 let speaker = c.name("speaker").unwrap().as_str().to_string();
221 (text, Some(speaker))
222 } else {
223 (line.text.clone(), None)
224 };
225
226 text = text
227 .replace("<b>", "{\\b1}")
228 .replace("</b>", "{\\b0}")
229 .replace("<i>", "{\\i1}")
230 .replace("</i>", "{\\i0}")
231 .replace("<s>", "{\\s1}")
232 .replace("</s>", "{\\s0}")
233 .replace("<u>", "{\\u1}")
234 .replace("</u>", "{\\u0}");
235 text = xml_replace_regex.replace_all(&text, "").to_string();
236
237 events.push(SSAEvent {
238 start: line.start,
239 end: line.end,
240 style: "Default".to_string(),
241 name: speaker.unwrap_or_default(),
242 text: text.replace("\r\n", "\\N").replace('\n', "\\N"),
243 ..Default::default()
244 })
245 }
246
247 SSA {
248 info: SSAInfo {
249 ..Default::default()
250 },
251 styles: vec![default_style],
252 events,
253 fonts: vec![],
254 graphics: vec![],
255 }
256 }
257 pub fn to_srt(&self) -> SRT {
259 let speaker_regex: Regex = Regex::new(r"(?m)^<v.*?>").unwrap();
260
261 let mut lines = vec![];
262
263 for (i, line) in self.lines.iter().enumerate() {
264 let text = speaker_regex
265 .replace_all(line.text.as_str(), "")
266 .to_string();
267
268 lines.push(SRTLine {
269 sequence_number: i as u32 + 1,
270 start: line.start,
271 end: line.end,
272 text: text.replace('\n', "\r\n"),
273 })
274 }
275
276 SRT { lines }
277 }
278}
279
280impl Display for VTT {
281 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
282 let mut blocks = vec![];
283
284 blocks.push(vec!["WEBVTT".to_string()]);
285
286 for style in &self.styles {
287 let mut block = vec![];
288 block.push("STYLE".to_string());
289 if let Some(selector) = &style.selector {
290 block.push(format!("::cue({selector}) {{"))
291 } else {
292 block.push("::cue {".to_string())
293 }
294 for (id, value) in &style.entries {
295 block.push(format!("{id}: {value}"))
296 }
297 block.push("}".to_string());
298
299 blocks.push(block)
300 }
301
302 for line in &self.lines {
303 let mut block = vec![];
304 if let Some(identifier) = &line.identifier {
305 block.push(identifier.clone())
306 }
307
308 if !line.settings.is_empty() {
309 block.push(format!(
310 "{} --> {} {}",
311 line.start.format(parse::TIME_FORMAT).unwrap(),
312 line.end.format(parse::TIME_FORMAT).unwrap(),
313 line.settings
314 .iter()
315 .map(|(k, v)| v.as_ref().map_or(k.clone(), |v| format!("{k}: {v}")))
316 .collect::<Vec<String>>()
317 .join(" ")
318 ))
319 } else {
320 block.push(format!(
321 "{} --> {}",
322 line.start.format(parse::TIME_FORMAT).unwrap(),
323 line.end.format(parse::TIME_FORMAT).unwrap()
324 ))
325 }
326 block.push(line.text.clone());
327
328 blocks.push(block)
329 }
330
331 write!(
332 f,
333 "{}",
334 blocks
335 .into_iter()
336 .map(|b| b.join("\n"))
337 .collect::<Vec<String>>()
338 .join("\n\n")
339 )
340 }
341}
342
343mod parse {
344 use super::*;
345 use time::format_description::BorrowedFormatItem;
346 use time::macros::format_description;
347
348 pub(super) struct Error {
349 pub(super) line: usize,
350 pub(super) kind: VTTErrorKind,
351 }
352
353 pub(super) const TIME_FORMAT: &[BorrowedFormatItem] =
354 format_description!("[hour]:[minute]:[second].[subsecond digits:3]");
355
356 type Result<T> = std::result::Result<T, Error>;
357
358 pub(super) fn parse_start<'a, I: Iterator<Item = &'a str>>(mut block_lines: I) -> Result<()> {
359 let line = block_lines.next().unwrap();
360 if !line.starts_with("WEBVTT") {
361 return Err(Error {
362 line: 1,
363 kind: VTTErrorKind::InvalidFormat,
364 });
365 }
366 Ok(())
367 }
368 pub(super) fn parse_region_block<'a, I: Iterator<Item = &'a str>>(
369 block_lines: I,
370 ) -> Result<VTTRegion> {
371 let mut region = VTTRegion {
372 id: None,
373 width: None,
374 lines: None,
375 region_anchor: None,
376 viewport_anchor: None,
377 scroll: false,
378 };
379
380 for (i, line) in block_lines.enumerate() {
381 let (name, value) = line.split_once(':').ok_or(Error {
382 line: 1 + i,
383 kind: VTTErrorKind::Parse("delimiter ':' missing".to_string()),
384 })?;
385
386 match name {
387 "id" => region.id = Some(value.to_string()),
388 "width" => {
389 region.width = Some(parse_percentage(value).ok_or(Error {
390 line: 1 + i,
391 kind: VTTErrorKind::Parse(format!("invalid percentage '{value}'")),
392 })?)
393 }
394 "lines" => {
395 region.lines = Some(value.parse::<u32>().map_err(|e| Error {
396 line: 1 + i,
397 kind: VTTErrorKind::Parse(e.to_string()),
398 })?)
399 }
400 "regionanchor" => {
401 let Some((a, b)) = value.split_once(',') else {
402 return Err(Error {
403 line: 1 + i,
404 kind: VTTErrorKind::Parse("delimiter ',' missing".to_string()),
405 });
406 };
407 region.region_anchor = Some((
408 parse_percentage(a).ok_or(Error {
409 line: 1 + i,
410 kind: VTTErrorKind::Parse(format!("invalid percentage '{value}'")),
411 })?,
412 parse_percentage(b).ok_or(Error {
413 line: 1 + i,
414 kind: VTTErrorKind::Parse(format!("invalid percentage '{value}'")),
415 })?,
416 ))
417 }
418 "viewportanchor" => {
419 let Some((a, b)) = value.split_once(',') else {
420 return Err(Error {
421 line: 1 + i,
422 kind: VTTErrorKind::Parse("delimiter ',' missing".to_string()),
423 });
424 };
425 region.viewport_anchor = Some((
426 parse_percentage(a).ok_or(Error {
427 line: 1 + i,
428 kind: VTTErrorKind::Parse(format!("invalid percentage '{value}'")),
429 })?,
430 parse_percentage(b).ok_or(Error {
431 line: 1 + i,
432 kind: VTTErrorKind::Parse(format!("invalid percentage '{value}'")),
433 })?,
434 ))
435 }
436 "scroll" => {
437 region.scroll = if value == "up" {
438 true
439 } else {
440 return Err(Error {
441 line: 1 + i,
442 kind: VTTErrorKind::Parse("only allowed value is 'up'".to_string()),
443 });
444 }
445 }
446 _ => continue,
447 }
448 }
449
450 Ok(region)
451 }
452 pub(super) fn parse_style_block(block: &str) -> Result<VTTStyle> {
453 let mut selector = None;
454 let mut entries = HashMap::new();
455
456 let Some(mut block) = block.strip_prefix("::cue") else {
458 return Err(Error {
459 line: 1,
460 kind: VTTErrorKind::Parse("missing '::cue' prefix".to_string()),
461 });
462 };
463
464 if block.ends_with('}') {
466 block = &block[..block.len() - 1]
467 } else {
468 return Err(Error {
469 line: block.split('\n').count(),
470 kind: VTTErrorKind::Parse("missing '}' suffix".to_string()),
471 });
472 }
473
474 block = block.trim_start();
476 if block.starts_with('(') {
477 let Some(closing_idx) = block.find(|c| c == ')') else {
478 return Err(Error {
479 line: 1,
480 kind: VTTErrorKind::Parse("selector isn't closed".to_string()),
481 });
482 };
483 selector = Some(block[1..closing_idx].to_string());
484 block = &block[closing_idx + 1..]
485 }
486
487 let Some(mut block) = block.trim_start().strip_prefix('{') else {
489 return Err(Error {
490 line: 1,
491 kind: VTTErrorKind::Parse("missing '{'".to_string()),
492 });
493 };
494
495 let mut line_num = 0;
496 if block.starts_with('\n') {
498 line_num += 1;
499 block = &block[1..];
500 }
501
502 for line in block.split('\n') {
503 line_num += 1;
504
505 for item in line.split(';') {
506 if item.is_empty() {
507 continue;
508 }
509
510 let Some((name, value)) = item.split_once(':') else {
511 return Err(Error {
512 line: 1 + line_num,
513 kind: VTTErrorKind::Parse("delimiter ':' missing".to_string()),
514 });
515 };
516 entries.insert(name.trim().to_string(), value.trim().to_string());
517 }
518 }
519
520 Ok(VTTStyle { selector, entries })
521 }
522 pub(super) fn parse_cue_block<'a, I: Iterator<Item = &'a str>>(
523 mut block_lines: I,
524 ) -> Result<VTTLine> {
525 let mut identifier = None;
526 let mut settings = HashMap::new();
527
528 let mut timing_line = block_lines.next().unwrap();
531 if !timing_line.contains("-->") {
533 identifier = Some(timing_line.to_string());
534 timing_line = block_lines.next().ok_or(Error {
535 line: 2,
536 kind: VTTErrorKind::Parse("missing subtitle timing".to_string()),
537 })?;
538 }
539
540 let (start_str, mut end_str) = timing_line.split_once("-->").ok_or(Error {
543 line: 1 + identifier.is_some() as usize,
544 kind: VTTErrorKind::Parse("missing '-->'".to_string()),
545 })?;
546 let start = if start_str.chars().filter(|c| *c == ':').count() < 2 {
550 let start_str = format!("00:{}", start_str.trim());
551 Time::parse(&start_str, TIME_FORMAT).map_err(|e| Error {
552 line: 1 + identifier.is_some() as usize,
553 kind: VTTErrorKind::Parse(e.to_string()),
554 })?
555 } else {
556 Time::parse(start_str.trim(), TIME_FORMAT).map_err(|e| Error {
557 line: 1 + identifier.is_some() as usize,
558 kind: VTTErrorKind::Parse(e.to_string()),
559 })?
560 };
561 if end_str.trim().contains(' ') {
564 let settings_str;
565 (end_str, settings_str) = end_str.trim().split_once(' ').unwrap();
566
567 for setting in settings_str.split(' ') {
568 if let Some((id, value)) = setting.split_once(':') {
569 settings.insert(id.to_string(), Some(value.to_string()));
570 } else {
571 settings.insert(setting.to_string(), None);
572 }
573 }
574 }
575 let end = if end_str.chars().filter(|c| *c == ':').count() < 2 {
579 let end_str = format!("00:{}", end_str.trim());
580 Time::parse(&end_str, TIME_FORMAT).map_err(|e| Error {
581 line: 1 + identifier.is_some() as usize,
582 kind: VTTErrorKind::Parse(e.to_string()),
583 })?
584 } else {
585 Time::parse(end_str.trim(), TIME_FORMAT).map_err(|e| Error {
586 line: 1 + identifier.is_some() as usize,
587 kind: VTTErrorKind::Parse(e.to_string()),
588 })?
589 };
590
591 Ok(VTTLine {
592 identifier,
593 start,
594 end,
595 settings,
596 text: block_lines.collect::<Vec<&str>>().join("\n"),
597 })
598 }
599
600 fn parse_percentage(s: &str) -> Option<f32> {
601 if !s.ends_with('%') {
602 return None;
603 }
604 s[..s.len() - 1].parse().ok()
605 }
606}
607
608error! {
609 VTTError => VTTErrorKind {
610 InvalidFormat,
611 Parse(String),
612 }
613}