use super::{decoder::VobSubDecoder, img::VobSubIndexedImage, mpeg2::ps, VobSubError};
use crate::{
content::{Area, AreaValues},
time::TimeSpan,
util::BytesFormatter,
vobsub::{
img::{VobSubRleImage, VobSubRleImageData},
IResultExt as _,
},
};
use iter_fixed::IntoIteratorFixed as _;
use log::{trace, warn};
use nom::{
bits::{bits, complete::take as take_bits},
branch::alt,
bytes::complete::{tag as tag_bytes, take_until},
combinator::{map, value},
multi::{count, many_till},
number::complete::be_u16,
sequence::preceded,
IResult, Parser as _,
};
use std::{
cmp::Ordering, fmt::Debug, fs, iter::FusedIterator, marker::PhantomData, path::Path,
slice::from_ref,
};
use thiserror::Error;
fn palette_entries(input: &[u8]) -> IResult<&[u8], [u8; 4]> {
let (input, vec) = bits(count(
take_bits::<_, _, _, nom::error::Error<(&[u8], usize)>>(4usize),
4,
))(input)?;
let mut result = [0; 4];
<[u8; 4] as AsMut<_>>::as_mut(&mut result).clone_from_slice(&vec[0..4]);
Ok((input, result))
}
fn coordinate(input: (&[u8], usize)) -> IResult<(&[u8], usize), u16> {
take_bits::<_, _, _, _>(12u8)(input)
}
fn area(input: &[u8]) -> IResult<&[u8], AreaValues> {
bits(|input| {
let (input, (x1, x2, y1, y2)) =
(coordinate, coordinate, coordinate, coordinate).parse(input)?;
Ok((input, AreaValues { x1, y1, x2, y2 }))
})(input)
}
fn rle_offsets(input: &[u8]) -> IResult<&[u8], [u16; 2]> {
let (input, vec) = bits(count(
take_bits::<_, _, _, nom::error::Error<(&[u8], usize)>>(16u16),
2,
))(input)?;
Ok((input, [vec[0], vec[1]]))
}
#[derive(Clone, Debug, PartialEq, Eq)]
enum ControlCommand<'a> {
Force,
StartDate,
StopDate,
Palette([u8; 4]),
Alpha([u8; 4]),
Coordinates(AreaValues),
RleOffsets([u16; 2]),
Unsupported(&'a [u8]),
}
#[repr(u8)]
#[derive(Debug, PartialEq, Eq)]
enum ControlCommandTag {
Force = 0x00,
StartDate = 0x01,
StopDate = 0x02,
Palette = 0x03,
Alpha = 0x04,
Coordinates = 0x05,
RleOffsets = 0x06,
End = 0xff,
}
impl ControlCommandTag {
const fn as_slice(&self) -> &'static [u8] {
let value = self.as_static_ref();
from_ref(value)
}
const fn as_static_ref(&self) -> &'static u8 {
match self {
Self::Force => {
const FORCE: u8 = ControlCommandTag::Force as u8;
&FORCE
}
Self::StartDate => {
const START_DATE: u8 = ControlCommandTag::StartDate as u8;
&START_DATE
}
Self::StopDate => {
const STOP_DATE: u8 = ControlCommandTag::StopDate as u8;
&STOP_DATE
}
Self::Palette => {
const PALETTE: u8 = ControlCommandTag::Palette as u8;
&PALETTE
}
Self::Alpha => {
const ALPHA: u8 = ControlCommandTag::Alpha as u8;
&ALPHA
}
Self::Coordinates => {
const COORDINATES: u8 = ControlCommandTag::Coordinates as u8;
&COORDINATES
}
Self::RleOffsets => {
const RLE_OFFSETS: u8 = ControlCommandTag::RleOffsets as u8;
&RLE_OFFSETS
}
Self::End => {
const END: u8 = ControlCommandTag::End as u8;
&END
}
}
}
}
impl From<ControlCommandTag> for u8 {
fn from(value: ControlCommandTag) -> Self {
value as Self
}
}
fn control_command(input: &[u8]) -> IResult<&[u8], ControlCommand<'_>> {
alt((
value(
ControlCommand::Force,
tag_bytes(ControlCommandTag::Force.as_slice()),
),
value(
ControlCommand::StartDate,
tag_bytes(ControlCommandTag::StartDate.as_slice()),
),
value(
ControlCommand::StopDate,
tag_bytes(ControlCommandTag::StopDate.as_slice()),
),
map(
preceded(
tag_bytes(ControlCommandTag::Palette.as_slice()),
palette_entries,
),
ControlCommand::Palette,
),
map(
preceded(
tag_bytes(ControlCommandTag::Alpha.as_slice()),
palette_entries,
),
ControlCommand::Alpha,
),
map(
preceded(tag_bytes(ControlCommandTag::Coordinates.as_slice()), area),
ControlCommand::Coordinates,
),
map(
preceded(
tag_bytes(ControlCommandTag::RleOffsets.as_slice()),
rle_offsets,
),
ControlCommand::RleOffsets,
),
map(
take_until(ControlCommandTag::End.as_slice()),
ControlCommand::Unsupported,
),
))
.parse(input)
}
fn control_command_end(input: &[u8]) -> IResult<&[u8], &[u8]> {
tag_bytes(ControlCommandTag::End.as_slice()).parse(input)
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct ControlSequence<'a> {
date: u16,
next: u16,
commands: Vec<ControlCommand<'a>>,
}
fn control_sequence(input: &[u8]) -> IResult<&[u8], ControlSequence<'_>> {
let (input, (date, next, commands)) = (
be_u16,
be_u16,
many_till(control_command, control_command_end),
)
.parse(input)?;
Ok((
input,
ControlSequence {
date,
next,
commands: commands.0,
},
))
}
fn parse_be_u16_as_usize(buff: &[u8]) -> Result<(&[u8], usize), VobSubError> {
if buff.len() < 2 {
Err(VobSubError::BufferTooSmallForU16)
} else {
Ok((
&buff[2..],
(usize::from(buff[0]) << 8) | usize::from(buff[1]),
))
}
}
#[derive(Debug, Error)]
#[non_exhaustive]
pub enum ErrorMissing {
#[error("no start time")]
StartTime,
#[error("no area coordinates")]
Area,
#[error("no palette")]
Palette,
#[error("no alpha palette")]
AlphaPalette,
#[error("no RLE offsets")]
RleOffset,
}
fn subtitle<'a, D, T>(raw_data: &'a [u8], base_time: f64) -> Result<T, VobSubError>
where
T: Debug,
D: VobSubDecoder<'a, Output = T>,
{
if raw_data.len() < 2 {
return Err(VobSubError::UnexpectedEndOfSubtitleData);
}
let (_, initial_control_offset) = parse_be_u16_as_usize(&raw_data[2..])?;
let mut start_time = None;
let mut end_time = None;
let mut force = false;
let mut area = None;
let mut palette = None;
let mut alpha = None;
let mut rle_offsets = None;
let mut control_offset = initial_control_offset;
loop {
trace!("looking for control sequence at: 0x{control_offset:x}");
if control_offset >= raw_data.len() {
return Err(VobSubError::ControlOffsetBiggerThanPacket {
offset: control_offset,
packet: raw_data.len(),
});
}
let control_data = &raw_data[control_offset..];
let (_, control) = control_sequence(control_data)
.to_result()
.map_err(VobSubError::ControlSequence)?;
trace!("parsed control sequence: {:?}", &control);
let time = base_time + f64::from(control.date) / 100.0;
for command in control.commands {
match command {
ControlCommand::Force => {
force = true;
}
ControlCommand::StartDate => {
start_time = start_time.or(Some(time));
}
ControlCommand::StopDate => {
end_time = end_time.or(Some(time));
}
ControlCommand::Palette(p) => {
palette = palette.or(Some(p));
}
ControlCommand::Alpha(a) => {
alpha = alpha.or(Some(a));
}
ControlCommand::Coordinates(c) => {
let cmd_area = Area::try_from(c)?;
area = area.or(Some(cmd_area));
}
ControlCommand::RleOffsets(r) => {
rle_offsets = Some(r);
}
ControlCommand::Unsupported(b) => {
warn!("unsupported control sequence: {:?}", BytesFormatter(b));
}
}
}
let next_control_offset = usize::from(control.next);
match control_offset.cmp(&next_control_offset) {
Ordering::Greater => {
return Err(VobSubError::ControlOffsetWentBackwards);
}
Ordering::Equal => {
break;
}
Ordering::Less => {
control_offset = next_control_offset;
}
}
}
let start_time = start_time.ok_or(ErrorMissing::StartTime)?;
let area = area.ok_or(ErrorMissing::Area)?;
let palette = palette.ok_or(ErrorMissing::Palette)?;
let alpha = alpha.ok_or(ErrorMissing::AlphaPalette)?;
let rle_offsets = rle_offsets.ok_or(ErrorMissing::RleOffset)?;
let end = initial_control_offset + 2;
let palette = palette.into_iter_fixed().rev().collect();
let alpha = alpha.into_iter_fixed().rev().collect();
let image_data = VobSubRleImageData::new(raw_data, rle_offsets, end)?;
let rle_image = VobSubRleImage::new(area, palette, alpha, image_data);
let result = D::from_data(start_time, end_time, force, rle_image);
trace!("Parsed subtitle: {:?}", &result);
Ok(result)
}
macro_rules! try_iter {
($e:expr) => {
match $e {
None => return None,
Some(Err(e)) => return Some(Err(From::from(e))),
Some(Ok(value)) => value,
}
};
}
pub struct Sub {
data: Vec<u8>,
}
impl Sub {
pub fn open<P>(path: P) -> Result<Self, VobSubError>
where
P: AsRef<Path> + Clone,
{
let data = fs::read(path.as_ref()).map_err(|source| VobSubError::Io {
source,
path: path.as_ref().to_path_buf(),
})?;
Ok(Self { data })
}
#[must_use]
#[allow(clippy::missing_const_for_fn)]
pub fn subtitles<D>(&self) -> VobsubParser<'_, D> {
VobsubParser::new(&self.data)
}
}
pub struct VobsubParser<'a, Decoder> {
pes_packets: ps::PesPackets<'a>,
phantom_data: PhantomData<Decoder>,
}
impl<'a, Decoder> VobsubParser<'a, Decoder> {
#[must_use]
pub const fn new(input: &'a [u8]) -> Self {
Self {
pes_packets: ps::pes_packets(input),
phantom_data: PhantomData,
}
}
fn next_sub_packet(&mut self) -> Option<Result<(f64, Vec<u8>), VobSubError>> {
profiling::scope!("VobsubParser next_sub_packet");
let first: ps::PesPacket = try_iter!(self.pes_packets.next());
let Some(pts_dts) = first.pes_packet.header_data.pts_dts else {
return Some(Err(VobSubError::MissingTimingForSubtitle));
};
let base_time = pts_dts.pts.as_seconds();
let substream_id = first.pes_packet.substream_id;
if first.pes_packet.data.len() < 2 {
return Some(Err(VobSubError::PacketTooShort));
}
let wanted =
(usize::from(first.pes_packet.data[0]) << 8) | usize::from(first.pes_packet.data[1]);
let mut sub_packet = Vec::with_capacity(wanted);
sub_packet.extend_from_slice(first.pes_packet.data);
while sub_packet.len() < wanted {
let next: ps::PesPacket = try_iter!(self.pes_packets.next());
if next.pes_packet.substream_id != substream_id {
warn!(
"Found subtitle for stream 0x{:x} while looking for 0x{:x}",
next.pes_packet.substream_id, substream_id
);
continue;
}
sub_packet.extend_from_slice(next.pes_packet.data);
}
if sub_packet.len() > wanted {
warn!(
"Found 0x{:x} bytes of data in subtitle packet, wanted 0x{:x}",
sub_packet.len(),
wanted
);
sub_packet.truncate(wanted);
}
Some(Ok((base_time, sub_packet)))
}
}
impl<D> Iterator for VobsubParser<'_, D> {
type Item = Result<(TimeSpan, VobSubIndexedImage), VobSubError>;
fn next(&mut self) -> Option<Self::Item> {
profiling::scope!("VobsubParser next");
let (base_time, sub_packet) = try_iter!(self.next_sub_packet());
let subtitle = subtitle::<(TimeSpan, VobSubIndexedImage), _>(&sub_packet, base_time);
Some(subtitle)
}
}
impl<D> FusedIterator for VobsubParser<'_, D> {}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_palette_entries() {
assert_eq!(
palette_entries(&[0x03, 0x10][..]),
IResult::Ok((&[][..], [0x00, 0x03, 0x01, 0x00]))
);
}
#[test]
fn parse_control_sequence() {
let input_1 = &[
0x00, 0x00, 0x0f, 0x41, 0x01, 0x03, 0x03, 0x10, 0x04, 0xff, 0xf0, 0x05, 0x29, 0xb4,
0xe6, 0x3c, 0x54, 0x00, 0x06, 0x00, 0x04, 0x07, 0x7b, 0xff,
][..];
let expected_1 = ControlSequence {
date: 0x0000,
next: 0x0f41,
commands: vec![
ControlCommand::StartDate,
ControlCommand::Palette([0x0, 0x3, 0x1, 0x0]),
ControlCommand::Alpha([0xf, 0xf, 0xf, 0x0]),
ControlCommand::Coordinates(AreaValues {
x1: 0x29b,
x2: 0x4e6,
y1: 0x3c5,
y2: 0x400,
}),
ControlCommand::RleOffsets([0x0004, 0x077b]),
],
};
assert_eq!(
control_sequence(input_1),
IResult::Ok((&[][..], expected_1))
);
let input_2 = &[0x00, 0x77, 0x0f, 0x41, 0x02, 0xff][..];
let expected_2 = ControlSequence {
date: 0x0077,
next: 0x0f41,
commands: vec![ControlCommand::StopDate],
};
assert_eq!(
control_sequence(input_2),
IResult::Ok((&[][..], expected_2))
);
let input_3 = &[
0x00, 0x00, 0x0b, 0x30, 0x01, 0x00, 0xff,
][..];
let expected_3 = ControlSequence {
date: 0x0000,
next: 0x0b30,
commands: vec![ControlCommand::StartDate, ControlCommand::Force],
};
assert_eq!(
control_sequence(input_3),
IResult::Ok((&[][..], expected_3))
);
}
#[test]
fn parse_subtitles() {
use std::fs;
use crate::image::ImageArea as _;
let buffer = fs::read("./fixtures/example.sub").unwrap();
let mut subs = VobsubParser::<(TimeSpan, VobSubIndexedImage)>::new(&buffer);
let (time_span, img) = subs.next().expect("missing sub 1").unwrap();
assert!(time_span.start.to_secs() - 49.4 < 0.1);
assert!(time_span.end.to_secs() - 50.9 < 0.1);
assert_eq!(
img.area(),
Area::try_from(AreaValues {
x1: 750,
y1: 916,
x2: 1172,
y2: 966
})
.unwrap()
);
assert_eq!(*img.palette(), [0, 1, 3, 0]);
assert_eq!(*img.alpha(), [0, 15, 15, 15]);
subs.next().expect("missing sub 2").unwrap();
assert!(subs.next().is_none());
}
#[test]
fn parse_subtitles_times() {
use std::fs;
use crate::image::ImageArea as _;
let buffer = fs::read("./fixtures/example.sub").unwrap();
let mut subs = VobsubParser::<TimeSpan>::new(&buffer);
let (time_span, img) = subs.next().expect("missing sub 1").unwrap();
assert!(time_span.start.to_secs() - 49.4 < 0.1);
assert!(time_span.end.to_secs() - 50.9 < 0.1);
assert_eq!(
img.area(),
Area::try_from(AreaValues {
x1: 750,
y1: 916,
x2: 1172,
y2: 966
})
.unwrap()
);
assert_eq!(*img.palette(), [0, 1, 3, 0]);
assert_eq!(*img.alpha(), [0, 15, 15, 15]);
subs.next().expect("missing sub 2").unwrap();
assert!(subs.next().is_none());
}
#[test]
fn parse_subtitles_from_subtitle_edit() {
let idx = Sub::open("./fixtures/tiny.sub").unwrap();
let mut subs = idx.subtitles::<TimeSpan>();
subs.next().expect("missing sub").unwrap();
assert!(subs.next().is_none());
}
#[test]
fn parse_fuzz_corpus_seeds() {
let tiny = Sub::open("./fixtures/tiny.sub")
.unwrap()
.subtitles::<TimeSpan>()
.next()
.unwrap()
.unwrap();
let split = Sub::open("./fixtures/tiny-split.sub")
.unwrap()
.subtitles::<TimeSpan>()
.next()
.unwrap()
.unwrap();
assert_eq!(tiny, split);
}
}