use crate::style::{Alignment, Position};
use crate::{Subtitle, SubtitleError, SubtitleResult};
use nom::{
bytes::complete::{tag, take_until, take_while, take_while1},
character::complete::{char, digit1, line_ending, not_line_ending, space0},
combinator::{map, map_res, opt},
multi::{many0, many1},
sequence::{preceded, separated_pair, terminated},
IResult,
};
pub fn parse(data: &[u8]) -> SubtitleResult<Vec<Subtitle>> {
let text = String::from_utf8_lossy(data);
parse_webvtt(&text)
}
pub fn parse_webvtt(input: &str) -> SubtitleResult<Vec<Subtitle>> {
let normalized = input.replace("\r\n", "\n");
match parse_vtt_file(&normalized) {
Ok((_, subtitles)) => Ok(subtitles),
Err(e) => Err(SubtitleError::ParseError(format!(
"WebVTT parse error: {e}"
))),
}
}
fn parse_vtt_file(input: &str) -> IResult<&str, Vec<Subtitle>> {
let mut input = input.strip_prefix('\u{feff}').unwrap_or(input);
let (rest, _) = parse_header(input)?;
input = rest;
let (rest, _) = skip_metadata(input)?;
input = rest;
let mut cues = Vec::new();
loop {
let (rest, _) = take_while(|c: char| c.is_whitespace())(input)?;
input = rest;
if input.is_empty() {
break;
}
match parse_cue_block(input) {
Ok((rest, cue)) => {
cues.push(cue);
input = rest;
}
Err(_) => break,
}
}
Ok((input, cues))
}
fn parse_header(input: &str) -> IResult<&str, ()> {
let (input, _) = tag("WEBVTT")(input)?;
let (input, _) = if let Ok((rest, _)) = space0::<_, nom::error::Error<_>>(input) {
if let Ok((rest2, _)) = not_line_ending::<_, nom::error::Error<_>>(rest) {
(rest2, ())
} else {
(rest, ())
}
} else {
(input, ())
};
let (input, _) = line_ending(input)?;
Ok((input, ()))
}
fn skip_metadata(input: &str) -> IResult<&str, ()> {
let (input, _) = take_while(|c: char| c.is_whitespace())(input)?;
Ok((input, ()))
}
fn parse_cue_block(input: &str) -> IResult<&str, Subtitle> {
let (input, _) = skip_empty_lines(input)?;
let (input, _) = if let Ok((rest, _)) = cue_identifier(input) {
if let Ok((rest2, _)) = line_ending::<_, nom::error::Error<_>>(rest) {
(rest2, ())
} else {
(input, ())
}
} else {
(input, ())
};
let (input, (start, end, settings)) = cue_timings(input)?;
let (input, _) = line_ending(input)?;
let (input, text) = cue_payload(input)?;
let (position, _alignment) = parse_cue_settings(&settings);
let mut subtitle = Subtitle::new(start, end, text);
if let Some(pos) = position {
subtitle.position = Some(pos);
}
Ok((input, subtitle))
}
fn cue_block(input: &str) -> IResult<&str, Subtitle> {
parse_cue_block(input)
}
fn skip_empty_lines(input: &str) -> IResult<&str, ()> {
let (input, _) = take_while(|c: char| c.is_whitespace())(input)?;
Ok((input, ()))
}
fn cue_identifier(input: &str) -> IResult<&str, &str> {
let (input, id) = take_while1(|c: char| c != '\n' && c != '\r')(input)?;
if id.contains("-->") {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)));
}
Ok((input, id))
}
fn cue_timings(input: &str) -> IResult<&str, (i64, i64, String)> {
let (input, start) = vtt_timestamp(input)?;
let (input, _) = space0(input)?;
let (input, _) = tag("-->")(input)?;
let (input, _) = space0(input)?;
let (input, end) = vtt_timestamp(input)?;
let (input, settings) = if let Ok((rest, _)) = space0::<_, nom::error::Error<_>>(input) {
if let Ok((rest2, s)) = not_line_ending::<_, nom::error::Error<_>>(rest) {
(rest2, s.to_string())
} else {
(rest, String::new())
}
} else {
(input, String::new())
};
Ok((input, (start, end, settings)))
}
fn vtt_timestamp(input: &str) -> IResult<&str, i64> {
if let Ok((rest, ts)) = timestamp_long(input) {
return Ok((rest, ts));
}
timestamp_short(input)
}
fn timestamp_long(input: &str) -> IResult<&str, i64> {
let (input, hours) = digit1(input)?;
let (input, _) = char(':')(input)?;
let (input, minutes) = digit1(input)?;
let (input, _) = char(':')(input)?;
let (input, seconds) = digit1(input)?;
let (input, _) = char('.')(input)?;
let (input, millis) = digit1(input)?;
let result = parse_timestamp_parts(hours, minutes, seconds, millis)
.map_err(|_| nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Fail)))?;
Ok((input, result))
}
fn timestamp_short(input: &str) -> IResult<&str, i64> {
let (input, minutes) = digit1(input)?;
let (input, _) = char(':')(input)?;
let (input, seconds) = digit1(input)?;
let (input, _) = char('.')(input)?;
let (input, millis) = digit1(input)?;
let result = parse_timestamp_short(minutes, seconds, millis)
.map_err(|_| nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Fail)))?;
Ok((input, result))
}
fn parse_timestamp_parts(
hours: &str,
minutes: &str,
seconds: &str,
millis: &str,
) -> Result<i64, std::num::ParseIntError> {
let h: i64 = hours.parse()?;
let m: i64 = minutes.parse()?;
let s: i64 = seconds.parse()?;
let ms: i64 = millis.parse()?;
Ok(h * 3600000 + m * 60000 + s * 1000 + ms)
}
fn parse_timestamp_short(
minutes: &str,
seconds: &str,
millis: &str,
) -> Result<i64, std::num::ParseIntError> {
let m: i64 = minutes.parse()?;
let s: i64 = seconds.parse()?;
let ms: i64 = millis.parse()?;
Ok(m * 60000 + s * 1000 + ms)
}
fn cue_payload(input: &str) -> IResult<&str, String> {
let mut text = String::new();
let mut remaining = input;
#[allow(clippy::while_let_loop)]
loop {
match not_line_ending::<_, nom::error::Error<_>>(remaining) {
Ok((rest, line)) => {
if line.trim().is_empty() {
let (rest, _) =
line_ending::<_, nom::error::Error<_>>(rest).unwrap_or((rest, ""));
return Ok((rest, text));
}
if !text.is_empty() {
text.push('\n');
}
text.push_str(line);
if let Ok((rest2, _)) = line_ending::<_, nom::error::Error<_>>(rest) {
remaining = rest2;
} else {
break;
}
}
Err(_) => break,
}
}
let cleaned = strip_vtt_tags(&text);
Ok((input, cleaned))
}
fn parse_cue_settings(settings: &str) -> (Option<Position>, Option<Alignment>) {
let mut position = None;
let mut alignment = None;
for setting in settings.split_whitespace() {
if let Some(value) = setting.strip_prefix("position:") {
if let Ok(percent) = value.trim_end_matches('%').parse::<f32>() {
position = Some(Position::new(percent / 100.0, 0.9));
}
} else if let Some(value) = setting.strip_prefix("align:") {
alignment = match value {
"start" | "left" => Some(Alignment::Left),
"center" | "middle" => Some(Alignment::Center),
"end" | "right" => Some(Alignment::Right),
_ => None,
};
}
}
(position, alignment)
}
fn strip_vtt_tags(text: &str) -> String {
let mut result = String::with_capacity(text.len());
let mut in_tag = false;
for c in text.chars() {
match c {
'<' => in_tag = true,
'>' => in_tag = false,
_ => {
if !in_tag {
result.push(c);
}
}
}
}
crate::text::decode_html_entities(&result)
}
#[must_use]
pub fn format_timestamp(ms: i64) -> String {
let hours = ms / 3600000;
let minutes = (ms % 3600000) / 60000;
let seconds = (ms % 60000) / 1000;
let millis = ms % 1000;
if hours > 0 {
format!("{hours:02}:{minutes:02}:{seconds:02}.{millis:03}")
} else {
format!("{minutes:02}:{seconds:02}.{millis:03}")
}
}
pub fn write(subtitles: &[Subtitle]) -> SubtitleResult<String> {
let mut output = String::from("WEBVTT\n\n");
for subtitle in subtitles {
output.push_str(&format!(
"{} --> {}\n",
format_timestamp(subtitle.start_time),
format_timestamp(subtitle.end_time)
));
output.push_str(&subtitle.text);
output.push_str("\n\n");
}
Ok(output)
}