webvtt_parser/
lib.rs

1mod cue_settings_parser;
2pub mod error;
3mod vtt_parser;
4
5pub use error::VttError;
6use nom_locate::LocatedSpan;
7use std::collections::HashMap;
8use std::fmt::{self, Debug, Display, Formatter};
9
10// The magic number at the start of each file
11const START_MARKER: &str = "WEBVTT";
12
13/// A start/end time of
14#[derive(Debug, PartialEq, Eq, Clone, Copy)]
15pub struct Time(pub(crate) u64);
16
17impl Time {
18    #[inline]
19    pub fn as_milliseconds(&self) -> u64 {
20        self.0
21    }
22
23    #[inline]
24    pub fn from_milliseconds(millis: u64) -> Self {
25        Self(millis)
26    }
27}
28
29fn div_rem<T: std::ops::Div<Output = T> + std::ops::Rem<Output = T> + Copy>(x: T, y: T) -> (T, T) {
30    let quot = x / y;
31    let rem = x % y;
32    (quot, rem)
33}
34
35impl Display for Time {
36    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
37        // print hour if needed
38        let (hours, reminder) = div_rem(self.0, 3_600_000);
39        let (minutes, reminder) = div_rem(reminder, 60_000);
40        let (seconds, milliseconds) = div_rem(reminder, 1000);
41
42        if hours > 0 {
43            write!(
44                formatter,
45                "{hours:02}:{minutes:02}:{seconds:02}.{milliseconds:03}",
46            )
47        } else {
48            write!(formatter, "{minutes:02}:{seconds:02}.{milliseconds:03}",)
49        }
50    }
51}
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum Vertical {
55    RightToLeft,
56    LeftToRight,
57}
58
59impl Display for Vertical {
60    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
61        write!(
62            formatter,
63            "{}",
64            match self {
65                Vertical::RightToLeft => "vertical:rt",
66                Vertical::LeftToRight => "vertical:lr",
67            }
68        )
69    }
70}
71
72#[derive(Debug, Clone, Copy, PartialEq, Eq)]
73pub enum NumberOrPercentage {
74    Number(i32),
75    Percentage(u8),
76}
77
78impl Display for NumberOrPercentage {
79    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
80        write!(
81            formatter,
82            "{}",
83            match self {
84                NumberOrPercentage::Number(number) => number.to_string(),
85                NumberOrPercentage::Percentage(percentage) => format!("{percentage}%"),
86            }
87        )
88    }
89}
90
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
92pub enum Align {
93    Start,
94    Middle,
95    End,
96}
97
98impl Display for Align {
99    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
100        write!(
101            formatter,
102            "{}",
103            match self {
104                Align::Start => "start",
105                Align::End => "end",
106                Align::Middle => "middle",
107            }
108        )
109    }
110}
111
112/// Cue settings are optional components used to position where the cue payload text will be displayed over the video.
113/// This includes whether the text is displayed horizontally or vertically.
114/// There can be zero or more of them, and they can be used in any order so long as each setting is used no more than once.
115#[derive(Debug, Clone, Copy, PartialEq, Eq)]
116pub struct VttCueSettings {
117    pub vertical: Option<Vertical>,
118    /// Specifies where text appears vertically. If vertical is set, line specifies where text appears horizontally.
119    /// Value can be a line number:
120    /// - The line height is the height of the first line of the cue as it appears on the video.
121    /// - Positive numbers indicate top down.
122    /// - Negative numbers indicate bottom up.
123    ///
124    /// Or value can be a percentage:
125    /// - Must be an integer (i.e., no decimals) between 0 and 100 inclusive.
126    /// - Must be followed by a percent sign (%).
127    pub line: Option<NumberOrPercentage>,
128    /// Specifies where the text will appear horizontally. If vertical is set, position specifies where the text will appear vertically. Value is percentage.
129    pub position: Option<u8>,
130    /// Specifies the width of the text area. If vertical is set, size specifies the height of the text area. Value is percentage.
131    pub size: Option<u8>,
132    /// Specifies the alignment of the text. Text is aligned within the space given by the size cue setting if it is set.
133    pub align: Option<Align>,
134}
135
136impl VttCueSettings {
137    pub(crate) fn is_empty(&self) -> bool {
138        self.size.is_none()
139            && self.position.is_none()
140            && self.vertical.is_none()
141            && self.line.is_none()
142            && self.align.is_none()
143    }
144}
145
146impl Display for VttCueSettings {
147    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
148        fn format_opt<T: Display>(name: &str, option: Option<T>) -> String {
149            option
150                .map(|value| format!(" {name}:{value}"))
151                .unwrap_or_else(|| "".to_owned())
152        }
153
154        write!(
155            formatter,
156            "{}{}{}{}{}",
157            format_opt("vertical", self.vertical),
158            format_opt("size", self.size),
159            format_opt("position", self.position),
160            format_opt("line", self.line),
161            format_opt("align", self.align)
162        )
163    }
164}
165
166/// A subtitle and associated metadata
167#[derive(Debug, PartialEq, Clone, Copy, Eq)]
168pub struct VttCue<'a> {
169    pub start: Time,
170    pub end: Time,
171    /// The identifier is a name that identifies the cue. It can be used to reference the cue from a script. It must not contain a newline and cannot contain the string "-->". It must end with a single newline.
172    ///
173    /// Ref: https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API#cue_identifier
174    pub name: Option<&'a str>,
175    pub text: &'a str,
176    pub note: Option<&'a str>,
177    /// Optional cue settings that belongs to this particular group. If value is Some(CueSettings) it means that at least one settings passed.
178    ///
179    /// Ref: https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API#cue_settings
180    pub cue_settings: Option<VttCueSettings>,
181}
182
183impl<'a> From<VttCue<'a>> for &'a str {
184    fn from(value: VttCue<'a>) -> &'a str {
185        value.text
186    }
187}
188
189/// Totally same as VttCue but owns the data.
190#[derive(Debug, Clone, PartialEq, Eq)]
191pub struct OwnedVttCue {
192    pub start: Time,
193    pub end: Time,
194    pub name: Option<String>,
195    pub text: String,
196    pub note: Option<String>,
197    pub cue_settings: Option<VttCueSettings>,
198}
199
200impl<'a> From<&'a OwnedVttCue> for &'a str {
201    fn from(value: &'a OwnedVttCue) -> &'a str {
202        &value.text
203    }
204}
205
206impl OwnedVttCue {
207    pub fn as_ref(&self) -> VttCue {
208        VttCue {
209            start: self.start,
210            end: self.end,
211            name: self.name.as_deref(),
212            text: self.text.as_ref(),
213            note: self.note.as_deref(),
214            cue_settings: self.cue_settings,
215        }
216    }
217}
218
219impl VttCue<'_> {
220    pub fn to_owned(&self) -> OwnedVttCue {
221        OwnedVttCue {
222            start: self.start,
223            end: self.end,
224            name: self.name.map(|name| name.to_owned()),
225            text: self.text.to_owned(),
226            note: self.note.map(|note| note.to_owned()),
227            cue_settings: self.cue_settings,
228        }
229    }
230}
231
232impl Display for VttCue<'_> {
233    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
234        write!(
235            formatter,
236            "{}{}{} --> {}{}\n{}\n",
237            self.note
238                .as_ref()
239                .map(|comment| format!("NOTE {comment}\n"))
240                .unwrap_or_else(|| "".to_owned()),
241            self.name
242                .as_ref()
243                .map(|comment| format!("NOTE {comment}\n"))
244                .unwrap_or_else(|| "".to_owned()),
245            self.start,
246            self.end,
247            self.cue_settings
248                .as_ref()
249                .map(|setting| format!("{setting}"))
250                .unwrap_or_else(|| "".to_owned()),
251            self.text
252        )
253    }
254}
255
256/// (web)VTT — Web Video Text Tracks
257/// This struct represents a parsed VTT file. It contains a list of cues and optional metadata.
258///
259/// Make sure that this version is used when you need to own the data. If its possible please use
260/// the `Vtt` struct instead.
261///
262#[derive(Debug, PartialEq, Eq, Clone)]
263pub struct OwnedVtt {
264    /// Top level key-value metadata pairs that might be populated at the very top of the subtitles
265    /// file. For example:
266    ///
267    /// ```text
268    /// WEBVTT
269    /// Kind: captions
270    /// Language: en
271    /// ```
272    pub slugs: HashMap<String, String>,
273    /// Optional global css style can be populated at the very top of the file.
274    /// If it is present it might be applied globally to all cues.
275    ///
276    /// For example:
277    /// ```text
278    /// STYLE
279    /// ::cue {
280    ///    background-image: linear-gradient(to bottom, dimgray, lightgray);
281    ///    color: papayawhip;
282    ///    font-size: 50px;
283    ///    text-align: center;
284    ///    font-family: monospace;
285    ///  }
286    /// ```
287    pub style: Option<String>,
288    /// A list of cues that are present in the file.
289    /// Each cue contains a start and end time, text and optional cue settings.
290    ///
291    /// For example:
292    /// ```text
293    /// WEBVTT
294    /// 00:00.000 --> 00:05.000
295    /// Hey subtitle one
296    /// ```
297    pub cues: Vec<OwnedVttCue>,
298}
299
300/// (web)VTT — Web Video Text Tracks
301/// This struct represents a parsed VTT file. It contains a list of cues and optional metadata.
302///
303#[derive(Debug, PartialEq, Clone, Eq)]
304pub struct Vtt<'a> {
305    /// Top level key-value metadata pairs that might be populated at the very top of the subtitles
306    /// file. For example:
307    ///
308    /// ```text
309    /// WEBVTT
310    /// Kind: captions
311    /// Language: en
312    /// ```
313    pub slugs: HashMap<&'a str, &'a str>,
314    /// Optional global css style can be populated at the very top of the file.
315    /// If it is present it might be applied globally to all cues.
316    ///
317    /// For example:
318    /// ```text
319    /// STYLE
320    /// ::cue {
321    ///    background-image: linear-gradient(to bottom, dimgray, lightgray);
322    ///    color: papayawhip;
323    ///    font-size: 50px;
324    ///    text-align: center;
325    ///    font-family: monospace;
326    ///  }
327    /// ```
328    pub style: Option<&'a str>,
329    /// A list of cues that are present in the file.
330    /// Each cue contains a start and end time, text and optional cue settings.
331    ///
332    /// For example:
333    /// ```text
334    /// WEBVTT
335    /// 00:00.000 --> 00:05.000
336    /// Hey subtitle one
337    /// ```
338    pub cues: Vec<VttCue<'a>>,
339}
340
341impl<'a> Vtt<'a> {
342    /// Parse [webvtt subtitles](https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API) from provided string.
343    /// Make sure that it does not allocate any string data and only references parts of the original string.
344    ///
345    /// # Example
346    ///
347    /// ```rust
348    /// use webvtt_parser::{Vtt, VttCue, VttCueSettings, Align, Time};
349    ///
350    /// let vtt = Vtt::parse("WEBVTT
351    ///
352    /// 00:00.000 --> 00:05.000
353    /// Hey subtitle one
354    ///
355    /// 00:05.000 --> 00:08.000 align:end
356    /// Hey subtitle two
357    ///").unwrap();
358    ///
359    /// assert_eq!(vtt.cues.len(), 2);
360    /// assert_eq!(vtt.cues[0], VttCue { start: Time::from_milliseconds(0), end: Time::from_milliseconds(5000), text: "Hey subtitle one", name: None, note: None, cue_settings: None });
361    /// assert_eq!(vtt.cues[1].cue_settings, Some(VttCueSettings { align: Some(Align::End), position: None, vertical: None, size: None, line: None }));
362    /// ```
363    pub fn parse(content: &'a str) -> Result<Self, VttError> {
364        let content = Span::from(content);
365
366        let (_, vtt) = vtt_parser::parse(content)?;
367        Ok(vtt)
368    }
369
370    /// Clones all the borrowes strings and returns the owned oversion of the vtt data.
371    pub fn to_owned(&self) -> OwnedVtt {
372        OwnedVtt {
373            slugs: self
374                .slugs
375                .iter()
376                .map(|(key, value)| (key.to_string(), value.to_string()))
377                .collect(),
378            style: self.style.map(|style| style.to_owned()),
379            cues: self.cues.iter().map(|cue| cue.to_owned()).collect(),
380        }
381    }
382}
383
384impl OwnedVtt {
385    pub fn parse(content: &str) -> Result<Self, VttError> {
386        let borrowed_vtt = Vtt::parse(content)?;
387
388        Ok(borrowed_vtt.to_owned())
389    }
390}
391
392impl<'a> From<&'a OwnedVtt> for Vtt<'a> {
393    fn from(value: &'a OwnedVtt) -> Self {
394        Vtt {
395            slugs: value
396                .slugs
397                .iter()
398                .map(|(key, value)| (key.as_str(), value.as_str()))
399                .collect(),
400            style: value.style.as_deref(),
401            cues: value.cues.iter().map(|cue| cue.as_ref()).collect(),
402        }
403    }
404}
405
406pub trait ASubtitle {}
407
408impl ASubtitle for OwnedVtt {}
409impl ASubtitle for Vtt<'_> {}
410
411use std::fmt::Write;
412impl Display for Vtt<'_> {
413    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
414        write!(
415            formatter,
416            "{}\n\n{}",
417            START_MARKER,
418            self.cues.iter().fold(String::new(), |mut out, subtitle| {
419                let _ = writeln!(out, "{subtitle}");
420                out
421            })
422        )
423    }
424}
425
426pub type Span<'a> = LocatedSpan<&'a str>;
427
428#[cfg(test)]
429mod tests {
430    use super::*;
431    use std::fs;
432
433    #[test]
434    fn load_and_parse_vtt_file() {
435        let content = fs::read_to_string("tests/complex-vtt-example.vtt").unwrap();
436
437        let expected_vtt = Vtt {
438            slugs: [("Kind", "captions"), ("Language", "en")]
439                .iter()
440                .cloned()
441                .collect::<HashMap<&str, &str>>(),
442            style: None,
443            cues: vec![
444                VttCue {
445                    start: Time(9000),
446                    end: Time(11000),
447                    name: None,
448                    text: "<v Roger Bingham>We are in New York City",
449                    note: None,
450                    cue_settings: Some(VttCueSettings {
451                        vertical: Some(Vertical::RightToLeft),
452                        line: None,
453                        position: None,
454                        size: Some(50),
455                        align: Some(Align::End),
456                    }),
457                },
458                VttCue {
459                    start: Time(11000),
460                    end: Time(13000),
461                    name: None,
462                    text: "<v Roger Bingham>We are in New York City",
463                    note: None,
464                    cue_settings: Some(VttCueSettings {
465                        vertical: None,
466                        line: Some(NumberOrPercentage::Number(1)),
467                        position: Some(100),
468                        size: None,
469                        align: None,
470                    }),
471                },
472                VttCue {
473                    start: Time(13000),
474                    end: Time(16000),
475                    name: None,
476                    text: "<v Roger Bingham>We're actually at the Lucern Hotel, just down the street",
477                    note: None,
478                    cue_settings: Some(VttCueSettings {
479                        vertical: None,
480                        line: Some(NumberOrPercentage::Percentage(0)),
481                        position: None,
482                        size: None,
483                        align: None,
484                    }),
485                },
486                VttCue {
487                    start: Time(16000),
488                    end: Time(18000),
489                    name: None,
490                    text: "<v Roger Bingham>from the American Museum of Natural History",
491                    note: None,
492                    cue_settings: None,
493                },
494                VttCue {
495                    start: Time(18000),
496                    end: Time(20000),
497                    name: None,
498                    text: "— It will perforate your stomach.",
499                    note: None,
500                    cue_settings: None,
501                },
502                VttCue {
503                    start: Time(20000),
504                    end: Time(22000),
505                    name: None,
506                    text: "<v Roger Bingham>Astrophysicist, Director of the Hayden Planetarium",
507                    note: None,
508                    cue_settings: None,
509                },
510                VttCue {
511                    start: Time(22000),
512                    end: Time(24000),
513                    name: None,
514                    text: "<v Roger Bingham>at the AMNH.",
515                    note: None,
516                    cue_settings: None,
517                },
518                VttCue {
519                    start: Time(24000),
520                    end: Time(26000),
521                    name: None,
522                    text: "<v Roger Bingham>Thank you for walking down here.",
523                    note: Some("this is comment"),
524                    cue_settings: None,
525                },
526                VttCue {
527                    start: Time(27000),
528                    end: Time(30000),
529                    name: Some("this is title"),
530                    text: "<v Roger Bingham>And I want to do a follow-up on the last conversation we did.",
531                    note: None,
532                    cue_settings: None,
533                },
534                VttCue {
535                    start: Time(30000),
536                    end: Time(31500),
537                    name: None,
538                    text: "<v Roger Bingham>When we e-mailed—",
539                    note: None,
540                    cue_settings: None,
541                },
542                VttCue {
543                    start: Time(30500),
544                    end: Time(32500),
545                    name: None,
546                    text: "<v Neil deGrasse Tyson>Didn't we talk about enough in that conversation?",
547                    note: None,
548                    cue_settings: Some(VttCueSettings {
549                        vertical: None,
550                        line: None,
551                        position: None,
552                        size: Some(50),
553                        align: None,
554                    }),
555                },
556                VttCue {
557                    start: Time(32000),
558                    end: Time(35500),
559                    name: None,
560                    text: "<v Roger Bingham>No! No no no no; 'cos 'cos obviously 'cos",
561                    note: None,
562                    cue_settings: Some(VttCueSettings {
563                        vertical: None,
564                        line: None,
565                        position: Some(30),
566                        size: Some(50),
567                        align: Some(Align::End),
568                    }),
569                },
570                VttCue {
571                    start: Time(32500),
572                    end: Time(33500),
573                    name: None,
574                    text: "<v Neil deGrasse Tyson><i>Laughs</i>",
575                    note: None,
576                    cue_settings: Some(VttCueSettings {
577                        vertical: None,
578                        line: None,
579                        position: None,
580                        size: Some(50),
581                        align: Some(Align::Start),
582                    }),
583                },
584                VttCue {
585                    start: Time(35500),
586                    end: Time(38000),
587                    name: None,
588                    text: "<v Roger Bingham>You know I'm so excited my glasses are falling off here.",
589                    note: None,
590                    cue_settings: None,
591                },
592            ],
593        };
594
595        assert_eq!(Vtt::parse(&content).unwrap(), expected_vtt);
596    }
597
598    #[test]
599    fn incomplete_file() {
600        let content = fs::read_to_string("tests/incomplete.vtt").unwrap();
601
602        match Vtt::parse(&content) {
603            Ok(_) => panic!("The data is incomplete, should fail."),
604            Err(error) => {
605                assert_eq!(error.looking_for, "Digit");
606                assert_eq!(&error.fragment, Span::from("").fragment());
607            }
608        }
609    }
610
611    #[test]
612    fn invalid_file() {
613        match Vtt::parse(include_str!("../tests/invalid.vtt")) {
614            Ok(_) => panic!("The data is invalid, should fail."),
615            Err(VttError {
616                looking_for,
617                fragment,
618                ..
619            }) => {
620                assert_eq!(looking_for, "Tag");
621                assert_eq!(
622                    fragment,
623                    Span::from(",000\nHey subtitle two\n\n")
624                        .fragment()
625                        .to_owned()
626                );
627            }
628        }
629    }
630
631    #[test]
632    fn simple_output() {
633        let content = include_str!("../tests/simple.vtt");
634
635        let vtt = Vtt::parse(content).unwrap();
636        assert_eq!(format!("{}", vtt), content)
637    }
638
639    #[test]
640    fn no_newline() {
641        match Vtt::parse(include_str!("../tests/no_newline.vtt")) {
642            Ok(_) => (),
643            Err(VttError { .. }) => panic!("The data is valid, shouldn't fail."),
644        }
645    }
646
647    #[test]
648    fn with_optional_hours_in_timestamps() {
649        let content = include_str!("../tests/hours.vtt");
650
651        assert_eq!(
652            Vtt::parse(content).unwrap(),
653            Vtt {
654                slugs: HashMap::new(),
655                style: None,
656                cues: vec![
657                    VttCue {
658                        start: Time(0),
659                        end: Time(2560),
660                        name: None,
661                        text: " Some people literally cannot go to the doctor.",
662                        note: None,
663                        cue_settings: None,
664                    },
665                    VttCue {
666                        start: Time(2560),
667                        end: Time(5040),
668                        name: None,
669                        text: " If they get sick, they just hope that they get better",
670                        note: None,
671                        cue_settings: None,
672                    },
673                ],
674            }
675        );
676    }
677}