use std::time::Duration;
use nom::{
bytes::complete::{is_not, tag},
character::complete::{line_ending, one_of},
combinator::{map, map_res, opt, recognize},
multi::{many0, many1},
sequence::{preceded, terminated, tuple},
IResult,
};
use crate::{Error, Subtitle};
fn decimal(input: &str) -> IResult<&str, &str> {
recognize(many1(one_of("0123456789")))(input)
}
fn bom(input: &str) -> IResult<&str, &str> {
tag("\u{feff}")(input)
}
fn ascii_u32(input: &str) -> IResult<&str, u32> {
map_res(decimal, |s| s.parse::<u32>())(input)
}
fn timestamp(input: &str) -> IResult<&str, Duration> {
map(
tuple((
ascii_u32,
tag(":"),
ascii_u32,
tag(":"),
ascii_u32,
tag(","),
ascii_u32,
)),
|(h, _, m, _, s, _, mi)| {
let mut t = h;
t = t * 60 + m;
t = t * 60 + s;
t = t * 1000 + mi;
Duration::from_millis(t as u64)
},
)(input)
}
fn timespan(input: &str) -> IResult<&str, (Duration, Duration)> {
map(
tuple((
timestamp,
many1(tag(" ")),
tag("-->"),
many1(tag(" ")),
timestamp,
)),
|t| (t.0, t.4),
)(input)
}
fn text(input: &str) -> IResult<&str, &str> {
recognize(many0(terminated(is_not("\n\r"), line_ending)))(input)
}
fn subtitle(input: &str) -> IResult<&str, Subtitle> {
map(
tuple((
ascii_u32,
line_ending,
timespan,
many0(line_ending),
text,
many0(line_ending),
)),
|(idx, _, (start, end), _, text, _)| {
let better_newlines = text.replace("\r\n", "\n");
Subtitle {
idx,
start,
end,
text: better_newlines,
}
},
)(input)
}
fn srt_file(input: &str) -> IResult<&str, Vec<Subtitle>> {
preceded(tuple((opt(bom), many0(line_ending))), many0(subtitle))(input)
}
pub fn parse(input: &str) -> Result<Vec<Subtitle>, Error> {
let (leftover, results) = srt_file(input).map_err(|_| Error::ParseError(vec![], 0))?;
if !leftover.is_empty() {
let start_remainder = input.len() - leftover.len();
Err(Error::ParseError(results, start_remainder))
} else {
Ok(results)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_numberline() {
let input = "231";
let (r, n) = ascii_u32(input).unwrap();
assert_eq!(n, 231);
assert_eq!(r, "");
}
#[test]
fn timestamp_zero() {
let input = "00:00:00,000";
let (_, t) = timestamp(input).unwrap();
assert_eq!(t, Duration::from_millis(0));
}
#[test]
fn timestamp_millis() {
let input = "00:00:00,050";
let (_, t) = timestamp(input).unwrap();
assert_eq!(t, Duration::from_millis(50));
}
#[test]
fn timestamp_secs() {
let input = "00:00:08,000";
let (_, t) = timestamp(input).unwrap();
assert_eq!(t, Duration::from_secs(8));
}
#[test]
fn timestamp_min() {
let input = "00:14:00,000";
let (_, t) = timestamp(input).unwrap();
assert_eq!(t, Duration::from_secs(14 * 60));
}
#[test]
fn timestamp_hrs() {
let input = "02:00:00,000";
let (_, t) = timestamp(input).unwrap();
assert_eq!(t, Duration::from_secs(2 * 60 * 60));
}
#[test]
fn timestamp_random() {
let input = "02:14:08,050";
let (_, t) = timestamp(input).unwrap();
assert_eq!(
t,
Duration::from_millis(((2 * 60 + 14) * 60 + 8) * 1000 + 50)
);
}
struct TimeSpanTestCase {
unit: u32,
raw: &'static str,
start: Duration,
end: Duration,
lines: &'static [&'static str],
}
#[derive(Debug, Clone, Copy, PartialEq)]
enum LineEnding {
Unix,
Windows,
}
impl LineEnding {
fn as_str(&self) -> &'static str {
match self {
LineEnding::Unix => "\n",
LineEnding::Windows => "\r\n",
}
}
}
impl TimeSpanTestCase {
fn input(&self, ending: LineEnding, text_newline: bool, final_newline: bool) -> String {
let newline = ending.as_str();
let mut res = format!("{}", self.unit);
res.push_str(newline);
res.push_str(self.raw);
res.push_str(newline);
if text_newline {
res.push_str(newline);
}
for line in self.lines {
res.push_str(line);
res.push_str(newline);
}
if final_newline {
res.push_str(newline);
}
res
}
fn sub(&self) -> Subtitle {
let mut text = self.lines.join("\n");
text.push('\n');
Subtitle {
idx: self.unit,
start: self.start,
end: self.end,
text,
}
}
}
const EX_TS_1: TimeSpanTestCase = TimeSpanTestCase {
unit: 1,
raw: "00:00:02,002 --> 00:00:05,403",
start: Duration::from_millis(2 * 1000 + 2),
end: Duration::from_millis(5 * 1000 + 403),
lines: &[
"<i>Now the story of a wealthy family</i>",
"<i>who lost everything...</i>",
],
};
const EX_TS_2: TimeSpanTestCase = TimeSpanTestCase {
unit: 2,
raw: "00:00:05,505 --> 00:00:07,496",
start: Duration::from_millis(5 * 1000 + 505),
end: Duration::from_millis(7 * 1000 + 496),
lines: &["<i>and the one son</i>", "<i>who had no choice...</i>"],
};
const EX_TS_3: TimeSpanTestCase = TimeSpanTestCase {
unit: 3,
raw: "00:00:07,607 --> 00:00:09,598",
start: Duration::from_millis(7 * 1000 + 607),
end: Duration::from_millis(9 * 1000 + 598),
lines: &["<i>but to keep them all together.</i>"],
};
fn build_srt(
ending: LineEnding,
extra_text_line: bool,
short_ending: bool,
leading_bom: bool,
) -> (String, Vec<Subtitle>) {
let mut input = String::new();
if leading_bom {
input.push('\u{feff}')
}
input.push_str(EX_TS_1.input(ending, extra_text_line, true).as_str());
input.push_str(EX_TS_2.input(ending, extra_text_line, true).as_str());
input.push_str(
EX_TS_3
.input(ending, extra_text_line, !short_ending)
.as_str(),
);
let expected = vec![EX_TS_1.sub(), EX_TS_2.sub(), EX_TS_3.sub()];
(input, expected)
}
fn test_srt(ending: LineEnding, extra_text_line: bool, short_ending: bool, leading_bom: bool) {
let (input, expected) = build_srt(ending, extra_text_line, short_ending, leading_bom);
let (subt_rem, res) = srt_file(input.as_str()).unwrap();
assert_eq!(subt_rem, "");
assert_eq!(res, expected);
}
fn test_each_sub(ending: LineEnding, extra_text_line: bool, final_newline: bool) {
for tc in &[EX_TS_1, EX_TS_2, EX_TS_3] {
let input = tc.input(ending, extra_text_line, final_newline);
let (subt_rem, sub) = subtitle(input.as_str()).unwrap();
assert_eq!(subt_rem, "");
assert_eq!(sub, tc.sub());
}
}
#[test]
fn parse_sub() {
test_each_sub(LineEnding::Unix, false, true);
}
#[test]
fn parse_sub_windows() {
test_each_sub(LineEnding::Windows, false, true);
}
#[test]
fn parse_sub_last() {
test_each_sub(LineEnding::Unix, false, false);
}
#[test]
fn parse_sub_windows_last() {
test_each_sub(LineEnding::Windows, false, false);
}
#[test]
fn parse_sub_text_newline() {
test_each_sub(LineEnding::Unix, true, false);
}
#[test]
fn parse_sub_windows_text_newline() {
test_each_sub(LineEnding::Windows, true, false);
}
#[test]
fn parse_srt_leading_byte_order_mark() {
test_srt(LineEnding::Unix, false, false, true)
}
#[test]
fn parse_srt_unix() {
test_srt(LineEnding::Unix, false, false, false)
}
#[test]
fn parse_srt_windows() {
test_srt(LineEnding::Windows, false, false, false)
}
#[test]
fn parse_srt_unix_text_newline() {
test_srt(LineEnding::Unix, true, false, false)
}
#[test]
fn parse_srt_windows_text_newline() {
test_srt(LineEnding::Windows, true, false, false)
}
#[test]
fn parse_srt_unix_short() {
test_srt(LineEnding::Unix, false, true, false)
}
#[test]
fn parse_srt_windows_short() {
test_srt(LineEnding::Windows, false, true, false)
}
#[test]
fn parse_srt_unix_text_newline_short() {
test_srt(LineEnding::Unix, true, true, false)
}
#[test]
fn parse_srt_windows_text_newline_short() {
test_srt(LineEnding::Windows, true, true, false)
}
#[test]
fn parse_incomplete() {
let (mut input, expected) = build_srt(LineEnding::Windows, true, true, false);
let junk = "this is extra junk at the end of the file";
input.push_str(junk);
match parse(input.as_str()) {
Ok(_) => panic!("expected error"),
Err(Error::ParseError(res, offset)) => {
assert_eq!(res, expected);
let remainder = std::str::from_utf8(&input.as_bytes()[offset..]).unwrap();
assert_eq!(remainder, junk);
}
}
}
}