webvtt_parser/
lib.rs

1mod cue_settings_parser;
2pub mod error;
3mod vtt_parser;
4
5pub use error::VttError;
6use nom_locate::LocatedSpan;
7use std::collections::HashMap;
8use std::fmt::{self, Debug, Display, Formatter};
9
10// The magic number at the start of each file
11const START_MARKER: &str = "WEBVTT";
12
13/// A start/end time of
14#[derive(Debug, PartialEq, Eq, Clone, Copy)]
15pub struct Time(pub(crate) u64);
16
17impl Time {
18    #[inline]
19    pub fn as_milliseconds(&self) -> u64 {
20        self.0
21    }
22
23    #[inline]
24    pub fn from_milliseconds(millis: u64) -> Self {
25        Self(millis)
26    }
27}
28
29fn div_rem<T: std::ops::Div<Output = T> + std::ops::Rem<Output = T> + Copy>(x: T, y: T) -> (T, T) {
30    let quot = x / y;
31    let rem = x % y;
32    (quot, rem)
33}
34
35impl Display for Time {
36    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
37        // print hour if needed
38        let (hours, reminder) = div_rem(self.0, 3_600_000);
39        let (minutes, reminder) = div_rem(reminder, 60_000);
40        let (seconds, milliseconds) = div_rem(reminder, 1000);
41
42        if hours > 0 {
43            write!(
44                formatter,
45                "{hours:02}:{minutes:02}:{seconds:02}.{milliseconds:03}",
46            )
47        } else {
48            write!(formatter, "{minutes:02}:{seconds:02}.{milliseconds:03}",)
49        }
50    }
51}
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum Vertical {
55    RightToLeft,
56    LeftToRight,
57}
58
59impl Display for Vertical {
60    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
61        write!(
62            formatter,
63            "{}",
64            match self {
65                Vertical::RightToLeft => "vertical:rt",
66                Vertical::LeftToRight => "vertical:lr",
67            }
68        )
69    }
70}
71
72#[derive(Debug, Clone, Copy, PartialEq, Eq)]
73pub enum NumberOrPercentage {
74    Number(i32),
75    Percentage(u8),
76}
77
78impl Display for NumberOrPercentage {
79    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
80        write!(
81            formatter,
82            "{}",
83            match self {
84                NumberOrPercentage::Number(number) => number.to_string(),
85                NumberOrPercentage::Percentage(percentage) => format!("{percentage}%"),
86            }
87        )
88    }
89}
90
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
92pub enum Align {
93    Start,
94    Middle,
95    End,
96}
97
98impl Display for Align {
99    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
100        write!(
101            formatter,
102            "{}",
103            match self {
104                Align::Start => "start",
105                Align::End => "end",
106                Align::Middle => "middle",
107            }
108        )
109    }
110}
111
112/// Cue settings are optional components used to position where the cue payload text will be displayed over the video.
113/// This includes whether the text is displayed horizontally or vertically.
114/// There can be zero or more of them, and they can be used in any order so long as each setting is used no more than once.
115#[derive(Debug, Clone, Copy, PartialEq, Eq)]
116pub struct VttCueSettings {
117    pub vertical: Option<Vertical>,
118    /// Specifies where text appears vertically. If vertical is set, line specifies where text appears horizontally.
119    /// Value can be a line number:
120    /// - The line height is the height of the first line of the cue as it appears on the video.
121    /// - Positive numbers indicate top down.
122    /// - Negative numbers indicate bottom up.
123    ///
124    /// Or value can be a percentage:
125    /// - Must be an integer (i.e., no decimals) between 0 and 100 inclusive.
126    /// - Must be followed by a percent sign (%).
127    pub line: Option<NumberOrPercentage>,
128    /// Specifies where the text will appear horizontally. If vertical is set, position specifies where the text will appear vertically. Value is percentage.
129    pub position: Option<u8>,
130    /// Specifies the width of the text area. If vertical is set, size specifies the height of the text area. Value is percentage.
131    pub size: Option<u8>,
132    /// Specifies the alignment of the text. Text is aligned within the space given by the size cue setting if it is set.
133    pub align: Option<Align>,
134}
135
136impl VttCueSettings {
137    pub(crate) fn is_empty(&self) -> bool {
138        self.size.is_none()
139            && self.position.is_none()
140            && self.vertical.is_none()
141            && self.line.is_none()
142            && self.align.is_none()
143    }
144}
145
146impl Display for VttCueSettings {
147    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
148        fn format_opt<T: Display>(name: &str, option: Option<T>) -> String {
149            option
150                .map(|value| format!(" {name}:{value}"))
151                .unwrap_or_else(|| "".to_owned())
152        }
153
154        write!(
155            formatter,
156            "{}{}{}{}{}",
157            format_opt("vertical", self.vertical),
158            format_opt("size", self.size),
159            format_opt("position", self.position),
160            format_opt("line", self.line),
161            format_opt("align", self.align)
162        )
163    }
164}
165
166/// A subtitle and associated metadata
167#[derive(Debug, PartialEq, Clone, Copy, Eq)]
168pub struct VttCue<'a> {
169    pub start: Time,
170    pub end: Time,
171    /// The identifier is a name that identifies the cue. It can be used to reference the cue from a script. It must not contain a newline and cannot contain the string "-->". It must end with a single newline.
172    ///
173    /// Ref: https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API#cue_identifier
174    pub name: Option<&'a str>,
175    pub text: &'a str,
176    pub note: Option<&'a str>,
177    /// Optional cue settings that belongs to this particular group. If value is Some(CueSettings) it means that at least one settings passed.
178    ///
179    /// Ref: https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API#cue_settings
180    pub cue_settings: Option<VttCueSettings>,
181}
182
183impl<'a> From<VttCue<'a>> for &'a str {
184    fn from(value: VttCue<'a>) -> &'a str {
185        value.text
186    }
187}
188
189/// Totally same as VttCue but owns the data.
190#[derive(Debug, Clone, PartialEq, Eq)]
191pub struct OwnedVttCue {
192    pub start: Time,
193    pub end: Time,
194    pub name: Option<String>,
195    pub text: String,
196    pub note: Option<String>,
197    pub cue_settings: Option<VttCueSettings>,
198}
199
200impl<'a> From<&'a OwnedVttCue> for &'a str {
201    fn from(value: &'a OwnedVttCue) -> &'a str {
202        &value.text
203    }
204}
205
206impl OwnedVttCue {
207    pub fn as_ref(&self) -> VttCue {
208        VttCue {
209            start: self.start,
210            end: self.end,
211            name: self.name.as_deref(),
212            text: self.text.as_ref(),
213            note: self.note.as_deref(),
214            cue_settings: self.cue_settings,
215        }
216    }
217}
218
219impl VttCue<'_> {
220    pub fn to_owned(&self) -> OwnedVttCue {
221        OwnedVttCue {
222            start: self.start,
223            end: self.end,
224            name: self.name.map(|name| name.to_owned()),
225            text: self.text.to_owned(),
226            note: self.note.map(|note| note.to_owned()),
227            cue_settings: self.cue_settings,
228        }
229    }
230}
231
232impl Display for VttCue<'_> {
233    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
234        write!(
235            formatter,
236            "{}{}{} --> {}{}\n{}\n",
237            self.note
238                .as_ref()
239                .map(|comment| format!("NOTE {comment}\n"))
240                .unwrap_or_else(|| "".to_owned()),
241            self.name
242                .as_ref()
243                .map(|comment| format!("NOTE {comment}\n"))
244                .unwrap_or_else(|| "".to_owned()),
245            self.start,
246            self.end,
247            self.cue_settings
248                .as_ref()
249                .map(|setting| format!("{setting}"))
250                .unwrap_or_else(|| "".to_owned()),
251            self.text
252        )
253    }
254}
255
256/// (web)VTT — Web Video Text Tracks
257/// This struct represents a parsed VTT file. It contains a list of cues and optional metadata.
258///
259/// Make sure that this version is used when you need to own the data. If its possible please use
260/// the `Vtt` struct instead.
261///
262#[derive(Debug, PartialEq, Eq, Clone)]
263pub struct OwnedVtt {
264    /// Top level key-value metadata pairs that might be populated at the very top of the subtitles
265    /// file. For example:
266    ///
267    /// ```text
268    /// WEBVTT
269    /// Kind: captions
270    /// Language: en
271    /// ```
272    pub slugs: HashMap<String, String>,
273    /// Optional global css style can be populated at the very top of the file.
274    /// If it is present it might be applied globally to all cues.
275    ///
276    /// For example:
277    /// ```text
278    /// STYLE
279    /// ::cue {
280    ///    background-image: linear-gradient(to bottom, dimgray, lightgray);
281    ///    color: papayawhip;
282    ///    font-size: 50px;
283    ///    text-align: center;
284    ///    font-family: monospace;
285    ///  }
286    /// ```
287    pub style: Option<String>,
288    /// A list of cues that are present in the file.
289    /// Each cue contains a start and end time, text and optional cue settings.
290    ///
291    /// For example:
292    /// ```text
293    /// WEBVTT
294    /// 00:00.000 --> 00:05.000
295    /// Hey subtitle one
296    /// ```
297    pub cues: Vec<OwnedVttCue>,
298}
299
300/// (web)VTT — Web Video Text Tracks
301/// This struct represents a parsed VTT file. It contains a list of cues and optional metadata.
302///
303#[derive(Debug, PartialEq, Clone, Eq)]
304pub struct Vtt<'a> {
305    /// Top level key-value metadata pairs that might be populated at the very top of the subtitles
306    /// file. For example:
307    ///
308    /// ```text
309    /// WEBVTT
310    /// Kind: captions
311    /// Language: en
312    /// ```
313    pub slugs: HashMap<&'a str, &'a str>,
314    /// Optional global css style can be populated at the very top of the file.
315    /// If it is present it might be applied globally to all cues.
316    ///
317    /// For example:
318    /// ```text
319    /// STYLE
320    /// ::cue {
321    ///    background-image: linear-gradient(to bottom, dimgray, lightgray);
322    ///    color: papayawhip;
323    ///    font-size: 50px;
324    ///    text-align: center;
325    ///    font-family: monospace;
326    ///  }
327    /// ```
328    pub style: Option<&'a str>,
329    /// A list of cues that are present in the file.
330    /// Each cue contains a start and end time, text and optional cue settings.
331    ///
332    /// For example:
333    /// ```text
334    /// WEBVTT
335    /// 00:00.000 --> 00:05.000
336    /// Hey subtitle one
337    /// ```
338    pub cues: Vec<VttCue<'a>>,
339}
340
341impl<'a> Vtt<'a> {
342    /// Parse [webvtt subtitles](https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API) from provided string.
343    /// Make sure that it does not allocate any string data and only references parts of the original string.
344    ///
345    /// # Example
346    ///
347    /// ```rust
348    /// use webvtt_parser::{Vtt, VttCue, VttCueSettings, Align, Time};
349    ///
350    /// let vtt = Vtt::parse("WEBVTT
351    ///
352    /// 00:00.000 --> 00:05.000
353    /// Hey subtitle one
354    ///
355    /// 00:05.000 --> 00:08.000 align:end
356    /// Hey subtitle two
357    ///").unwrap();
358    ///
359    /// assert_eq!(vtt.cues.len(), 2);
360    /// assert_eq!(vtt.cues[0], VttCue { start: Time::from_milliseconds(0), end: Time::from_milliseconds(5000), text: "Hey subtitle one", name: None, note: None, cue_settings: None });
361    /// assert_eq!(vtt.cues[1].cue_settings, Some(VttCueSettings { align: Some(Align::End), position: None, vertical: None, size: None, line: None }));
362    /// ```
363    pub fn parse(content: &'a str) -> Result<Self, VttError> {
364        let content = Span::from(content);
365
366        let (_, vtt) = vtt_parser::parse(content)?;
367        Ok(vtt)
368    }
369
370    /// Clones all the borrowes strings and returns the owned oversion of the vtt data.
371    pub fn to_owned(&self) -> OwnedVtt {
372        OwnedVtt {
373            slugs: self
374                .slugs
375                .iter()
376                .map(|(key, value)| (key.to_string(), value.to_string()))
377                .collect(),
378            style: self.style.map(|style| style.to_owned()),
379            cues: self.cues.iter().map(|cue| cue.to_owned()).collect(),
380        }
381    }
382}
383
384impl OwnedVtt {
385    pub fn parse(content: &str) -> Result<Self, VttError> {
386        let borrowed_vtt = Vtt::parse(content)?;
387
388        Ok(borrowed_vtt.to_owned())
389    }
390}
391
392impl<'a> From<&'a OwnedVtt> for Vtt<'a> {
393    fn from(value: &'a OwnedVtt) -> Self {
394        Vtt {
395            slugs: value
396                .slugs
397                .iter()
398                .map(|(key, value)| (key.as_str(), value.as_str()))
399                .collect(),
400            style: value.style.as_deref(),
401            cues: value.cues.iter().map(|cue| cue.as_ref()).collect(),
402        }
403    }
404}
405
406pub trait ASubtitle {}
407
408impl ASubtitle for OwnedVtt {}
409impl ASubtitle for Vtt<'_> {}
410
411use std::fmt::Write;
412impl Display for Vtt<'_> {
413    fn fmt(&self, formatter: &mut Formatter) -> fmt::Result {
414        write!(
415            formatter,
416            "{}\n\n{}",
417            START_MARKER,
418            self.cues.iter().fold(String::new(), |mut out, subtitle| {
419                let _ = writeln!(out, "{subtitle}");
420                out
421            })
422        )
423    }
424}
425
426pub type Span<'a> = LocatedSpan<&'a str>;
427
428#[cfg(test)]
429mod tests {
430    use super::*;
431    use std::fs;
432
433    #[test]
434    fn load_and_parse_vtt_file() {
435        let content = fs::read_to_string("tests/complex-vtt-example.vtt").unwrap();
436
437        let expected_vtt = Vtt {
438        slugs: [
439            ("Kind", "captions"),
440            ("Language", "en"),
441        ]
442        .iter()
443        .cloned()
444        .collect::<HashMap<&str, &str>>(),
445        style: None,
446        cues: vec![
447            VttCue {
448                start: Time(9000),
449                end: Time(11000),
450                name: None,
451                text: "<v Roger Bingham>We are in New York City",
452                note: None,
453                cue_settings: Some(VttCueSettings {
454                    vertical: Some(Vertical::RightToLeft),
455                    line: None,
456                    position: None,
457                    size: Some(50),
458                    align: Some(Align::End),
459                }),
460            },
461            VttCue {
462                start: Time(11000),
463                end: Time(13000),
464                name: None,
465                text: "<v Roger Bingham>We are in New York City",
466                note: None,
467                cue_settings: Some(VttCueSettings {
468                    vertical: None,
469                    line: Some(NumberOrPercentage::Number(1)),
470                    position: Some(100),
471                    size: None,
472                    align: None,
473                }),
474            },
475            VttCue {
476                start: Time(13000),
477                end: Time(16000),
478                name: None,
479                text: "<v Roger Bingham>We're actually at the Lucern Hotel, just down the street",
480                note: None,
481                cue_settings: Some(VttCueSettings {
482                    vertical: None,
483                    line: Some(NumberOrPercentage::Percentage(0)),
484                    position: None,
485                    size: None,
486                    align: None,
487                }),
488            },
489            VttCue {
490                start: Time(16000),
491                end: Time(18000),
492                name: None,
493                text: "<v Roger Bingham>from the American Museum of Natural History",
494                note: None,
495                cue_settings: None,
496            },
497            VttCue {
498                start: Time(18000),
499                end: Time(20000),
500                name: None,
501                text: "— It will perforate your stomach.",
502                note: None,
503                cue_settings: None,
504            },
505            VttCue {
506                start: Time(20000),
507                end: Time(22000),
508                name: None,
509                text: "<v Roger Bingham>Astrophysicist, Director of the Hayden Planetarium",
510                note: None,
511                cue_settings: None,
512            },
513            VttCue {
514                start: Time(22000),
515                end: Time(24000),
516                name: None,
517                text: "<v Roger Bingham>at the AMNH.",
518                note: None,
519                cue_settings: None,
520            },
521            VttCue {
522                start: Time(24000),
523                end: Time(26000),
524                name: None,
525                text: "<v Roger Bingham>Thank you for walking down here.",
526                note: Some("this is comment"),
527                cue_settings: None,
528            },
529            VttCue {
530                start: Time(27000),
531                end: Time(30000),
532                name: Some("this is title"),
533                text: "<v Roger Bingham>And I want to do a follow-up on the last conversation we did.",
534                note: None,
535                cue_settings: None,
536            },
537            VttCue {
538                start: Time(30000),
539                end: Time(31500),
540                name: None,
541                text: "<v Roger Bingham>When we e-mailed—",
542                note: None,
543                cue_settings: None,
544            },
545            VttCue {
546                start: Time(30500),
547                end: Time(32500),
548                name: None,
549                text: "<v Neil deGrasse Tyson>Didn't we talk about enough in that conversation?",
550                note: None,
551                cue_settings: Some(VttCueSettings {
552                    vertical: None,
553                    line: None,
554                    position: None,
555                    size: Some(50),
556                    align: None,
557                }),
558            },
559            VttCue {
560                start: Time(32000),
561                end: Time(35500),
562                name: None,
563                text: "<v Roger Bingham>No! No no no no; 'cos 'cos obviously 'cos",
564                note: None,
565                cue_settings: Some(VttCueSettings {
566                    vertical: None,
567                    line: None,
568                    position: Some(30),
569                    size: Some(50),
570                    align: Some(Align::End),
571                }),
572            },
573            VttCue {
574                start: Time(32500),
575                end: Time(33500),
576                name: None,
577                text: "<v Neil deGrasse Tyson><i>Laughs</i>",
578                note: None,
579                cue_settings: Some(VttCueSettings {
580                    vertical: None,
581                    line: None,
582                    position: None,
583                    size: Some(50),
584                    align: Some(Align::Start),
585                }),
586            },
587            VttCue {
588                start: Time(35500),
589                end: Time(38000),
590                name: None,
591                text: "<v Roger Bingham>You know I'm so excited my glasses are falling off here.",
592                note: None,
593                cue_settings: None,
594            },
595        ],
596    };
597
598        assert_eq!(Vtt::parse(&content).unwrap(), expected_vtt);
599    }
600
601    #[test]
602    fn incomplete_file() {
603        let content = fs::read_to_string("tests/incomplete.vtt").unwrap();
604
605        match Vtt::parse(&content) {
606            Ok(_) => panic!("The data is incomplete, should fail."),
607            Err(error) => {
608                assert_eq!(error.looking_for, "Digit");
609                assert_eq!(&error.fragment, Span::from("").fragment());
610            }
611        }
612    }
613
614    #[test]
615    fn invalid_file() {
616        match Vtt::parse(include_str!("../tests/invalid.vtt")) {
617            Ok(_) => panic!("The data is invalid, should fail."),
618            Err(VttError {
619                looking_for,
620                fragment,
621                ..
622            }) => {
623                assert_eq!(looking_for, "Tag");
624                assert_eq!(
625                    fragment,
626                    Span::from(",000\nHey subtitle two\n\n")
627                        .fragment()
628                        .to_owned()
629                );
630            }
631        }
632    }
633
634    #[test]
635    fn simple_output() {
636        let content = include_str!("../tests/simple.vtt");
637
638        let vtt = Vtt::parse(content).unwrap();
639        assert_eq!(format!("{}", vtt), content)
640    }
641
642    #[test]
643    fn no_newline() {
644        match Vtt::parse(include_str!("../tests/no_newline.vtt")) {
645            Ok(_) => (),
646            Err(VttError { .. }) => panic!("The data is valid, shouldn't fail."),
647        }
648    }
649
650    #[test]
651    fn with_optional_hours_in_timestamps() {
652        let content = include_str!("../tests/hours.vtt");
653
654        assert_eq!(
655            Vtt::parse(content).unwrap(),
656            Vtt {
657                slugs: HashMap::new(),
658                style: None,
659                cues: vec![
660                    VttCue {
661                        start: Time(0),
662                        end: Time(2560),
663                        name: None,
664                        text: " Some people literally cannot go to the doctor.",
665                        note: None,
666                        cue_settings: None,
667                    },
668                    VttCue {
669                        start: Time(2560),
670                        end: Time(5040),
671                        name: None,
672                        text: " If they get sick, they just hope that they get better",
673                        note: None,
674                        cue_settings: None,
675                    },
676                ],
677            }
678        );
679    }
680}