pub mod ebml;
pub mod error;
pub mod io;
pub(crate) mod lang;
pub mod m2ts;
pub mod mkv;
pub mod pgs;
pub mod sup;
use error::PgsError;
use io::SeekBufReader;
use m2ts::stream::M2tsExtractorState;
use mkv::stream::MkvExtractorState;
use pgs::DisplaySet;
use std::collections::HashMap;
use std::fs::File;
use std::io::Write;
use std::path::{Path, PathBuf};
use sup::stream::SupExtractorState;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ContainerFormat {
Matroska,
M2ts,
TransportStream,
Sup,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum MkvStrategy {
#[default]
Auto,
Sequential,
}
#[derive(Debug, Clone)]
pub struct PgsTrackInfo {
pub track_id: u32,
pub language: Option<String>,
pub container: ContainerFormat,
pub name: Option<String>,
pub flag_default: Option<bool>,
pub flag_forced: Option<bool>,
pub display_set_count: Option<u64>,
pub has_cues: Option<bool>,
}
#[derive(Debug, Clone)]
pub struct TrackDisplaySets {
pub track: PgsTrackInfo,
pub display_sets: Vec<DisplaySet>,
}
#[derive(Debug, Clone)]
pub struct ExtractionStats {
pub file_size: u64,
pub bytes_read: u64,
}
#[derive(Debug, Clone)]
pub struct TrackDisplaySet {
pub track_id: u32,
pub language: Option<String>,
pub container: ContainerFormat,
pub display_set: DisplaySet,
}
enum ExtractorInner {
Mkv(Box<MkvExtractorState>),
M2ts(M2tsExtractorState),
Sup(SupExtractorState),
Done,
}
pub struct Extractor {
inner: ExtractorInner,
catalog: Vec<TrackDisplaySet>,
catalog_enabled: bool,
tracks: Vec<PgsTrackInfo>,
stats: ExtractionStats,
path: PathBuf,
format: ContainerFormat,
mkv_strategy: MkvStrategy,
time_range_start_ms: Option<f64>,
time_range_end_ms: Option<f64>,
}
impl Extractor {
pub fn open(path: impl AsRef<Path>) -> Result<Self, PgsError> {
let path = path.as_ref();
let file = File::open(path)?;
let file_size = file.metadata().map(|m| m.len()).unwrap_or(0);
let mut reader = SeekBufReader::new(file);
let format = detect_format(&mut reader)?;
match format {
ContainerFormat::Matroska => {
let meta = mkv::prepare_mkv_metadata(&mut reader)?;
let tracks: Vec<PgsTrackInfo> = meta
.pgs_tracks
.iter()
.map(|t| mkv_track_to_info(t, &meta.frame_counts, &meta.cue_points))
.collect();
let state = MkvExtractorState::new(
reader,
path.to_path_buf(),
meta,
None,
MkvStrategy::Auto,
)?;
Ok(Extractor {
inner: ExtractorInner::Mkv(Box::new(state)),
catalog: Vec::new(),
catalog_enabled: true,
tracks,
stats: ExtractionStats {
file_size,
bytes_read: 0,
},
path: path.to_path_buf(),
format: ContainerFormat::Matroska,
mkv_strategy: MkvStrategy::Auto,
time_range_start_ms: None,
time_range_end_ms: None,
})
}
format @ (ContainerFormat::M2ts | ContainerFormat::TransportStream) => {
let file = File::open(path)?;
let mut reader = SeekBufReader::with_capacity(M2TS_BUF_SIZE, file);
detect_format(&mut reader)?;
let meta = m2ts::prepare_m2ts_metadata(&mut reader, Some(path))?;
let tracks: Vec<PgsTrackInfo> = meta
.tracks
.iter()
.map(|t| m2ts_track_to_info(t, format))
.collect();
let state = M2tsExtractorState::new(reader, meta, format, None);
Ok(Extractor {
inner: ExtractorInner::M2ts(state),
catalog: Vec::new(),
catalog_enabled: true,
tracks,
stats: ExtractionStats {
file_size,
bytes_read: 0,
},
path: path.to_path_buf(),
format,
mkv_strategy: MkvStrategy::Auto,
time_range_start_ms: None,
time_range_end_ms: None,
})
}
ContainerFormat::Sup => {
let tracks = vec![sup_track_info()];
let state = SupExtractorState::new(reader);
Ok(Extractor {
inner: ExtractorInner::Sup(state),
catalog: Vec::new(),
catalog_enabled: true,
tracks,
stats: ExtractionStats {
file_size,
bytes_read: 0,
},
path: path.to_path_buf(),
format: ContainerFormat::Sup,
mkv_strategy: MkvStrategy::Auto,
time_range_start_ms: None,
time_range_end_ms: None,
})
}
}
}
#[must_use]
pub fn with_mkv_strategy(mut self, strategy: MkvStrategy) -> Self {
if self.format != ContainerFormat::Matroska || strategy == self.mkv_strategy {
return self;
}
let path = self.path.clone();
let file_size = self.stats.file_size;
let catalog_enabled = self.catalog_enabled;
match Self::open_with_strategy(&path, file_size, strategy, None) {
Ok(mut ext) => {
ext.catalog_enabled = catalog_enabled;
ext
}
Err(_) => {
self.mkv_strategy = strategy;
self
}
}
}
#[must_use]
pub fn with_track_filter(self, track_ids: &[u32]) -> Self {
if track_ids.is_empty() {
return self;
}
let path = self.path.clone();
let file_size = self.stats.file_size;
let format = self.format;
let mkv_strategy = self.mkv_strategy;
let catalog_enabled = self.catalog_enabled;
match Self::open_filtered(&path, file_size, format, track_ids, mkv_strategy) {
Ok(mut ext) => {
ext.catalog_enabled = catalog_enabled;
ext
}
Err(_) => self,
}
}
#[must_use]
pub fn with_history(mut self, enabled: bool) -> Self {
self.catalog_enabled = enabled;
if !enabled {
self.catalog = Vec::new();
}
self
}
#[must_use]
pub fn with_time_range(mut self, start_ms: Option<f64>, end_ms: Option<f64>) -> Self {
if start_ms.is_none() && end_ms.is_none() {
return self;
}
self.time_range_start_ms = start_ms;
self.time_range_end_ms = end_ms;
match &mut self.inner {
ExtractorInner::Mkv(state) => state.set_time_range(start_ms, end_ms),
ExtractorInner::M2ts(state) => state.set_time_range(start_ms, end_ms),
ExtractorInner::Sup(state) => state.set_time_range(start_ms, end_ms),
ExtractorInner::Done => {}
}
self
}
fn open_with_strategy(
path: &Path,
file_size: u64,
strategy: MkvStrategy,
track_ids: Option<&[u32]>,
) -> Result<Self, PgsError> {
let file = File::open(path)?;
let mut reader = SeekBufReader::new(file);
detect_format(&mut reader)?;
let meta = mkv::prepare_mkv_metadata(&mut reader)?;
let tracks: Vec<PgsTrackInfo> = if let Some(ids) = track_ids {
meta.pgs_tracks
.iter()
.filter(|t| ids.contains(&(t.track_number as u32)))
.map(|t| mkv_track_to_info(t, &meta.frame_counts, &meta.cue_points))
.collect()
} else {
meta.pgs_tracks
.iter()
.map(|t| mkv_track_to_info(t, &meta.frame_counts, &meta.cue_points))
.collect()
};
let state = MkvExtractorState::new(reader, path.to_path_buf(), meta, track_ids, strategy)?;
Ok(Extractor {
inner: ExtractorInner::Mkv(Box::new(state)),
catalog: Vec::new(),
catalog_enabled: true,
tracks,
stats: ExtractionStats {
file_size,
bytes_read: 0,
},
path: path.to_path_buf(),
format: ContainerFormat::Matroska,
mkv_strategy: strategy,
time_range_start_ms: None,
time_range_end_ms: None,
})
}
fn open_filtered(
path: &Path,
file_size: u64,
format: ContainerFormat,
track_ids: &[u32],
mkv_strategy: MkvStrategy,
) -> Result<Self, PgsError> {
match format {
ContainerFormat::Matroska => {
Self::open_with_strategy(path, file_size, mkv_strategy, Some(track_ids))
}
fmt @ (ContainerFormat::M2ts | ContainerFormat::TransportStream) => {
let file = File::open(path)?;
let mut reader = SeekBufReader::with_capacity(M2TS_BUF_SIZE, file);
detect_format(&mut reader)?;
let meta = m2ts::prepare_m2ts_metadata(&mut reader, Some(path))?;
let tracks: Vec<PgsTrackInfo> = meta
.tracks
.iter()
.filter(|t| track_ids.contains(&(t.pid as u32)))
.map(|t| m2ts_track_to_info(t, fmt))
.collect();
let state = M2tsExtractorState::new(reader, meta, fmt, Some(track_ids));
Ok(Extractor {
inner: ExtractorInner::M2ts(state),
catalog: Vec::new(),
catalog_enabled: true,
tracks,
stats: ExtractionStats {
file_size,
bytes_read: 0,
},
path: path.to_path_buf(),
format: fmt,
mkv_strategy: MkvStrategy::Auto,
time_range_start_ms: None,
time_range_end_ms: None,
})
}
ContainerFormat::Sup => {
if !track_ids.contains(&0) {
return Ok(Extractor {
inner: ExtractorInner::Done,
catalog: Vec::new(),
catalog_enabled: true,
tracks: Vec::new(),
stats: ExtractionStats {
file_size,
bytes_read: 0,
},
path: path.to_path_buf(),
format: ContainerFormat::Sup,
mkv_strategy: MkvStrategy::Auto,
time_range_start_ms: None,
time_range_end_ms: None,
});
}
let file = File::open(path)?;
let mut reader = SeekBufReader::new(file);
detect_format(&mut reader)?;
let tracks = vec![sup_track_info()];
let state = SupExtractorState::new(reader);
Ok(Extractor {
inner: ExtractorInner::Sup(state),
catalog: Vec::new(),
catalog_enabled: true,
tracks,
stats: ExtractionStats {
file_size,
bytes_read: 0,
},
path: path.to_path_buf(),
format: ContainerFormat::Sup,
mkv_strategy: MkvStrategy::Auto,
time_range_start_ms: None,
time_range_end_ms: None,
})
}
}
}
pub fn format(&self) -> ContainerFormat {
self.format
}
pub fn tracks(&self) -> &[PgsTrackInfo] {
&self.tracks
}
pub fn history(&self) -> &[TrackDisplaySet] {
&self.catalog
}
pub fn history_for_track(&self, track_id: u32) -> Vec<&TrackDisplaySet> {
self.catalog
.iter()
.filter(|ds| ds.track_id == track_id)
.collect()
}
pub fn drain_history(&mut self) -> Vec<TrackDisplaySet> {
std::mem::take(&mut self.catalog)
}
pub fn clear_history(&mut self) {
self.catalog.clear();
}
pub fn stats(&self) -> &ExtractionStats {
&self.stats
}
pub fn collect_by_track(mut self) -> Result<Vec<TrackDisplaySets>, PgsError> {
if self.time_range_start_ms.is_none() && self.time_range_end_ms.is_none() {
if let ExtractorInner::Mkv(ref state) = self.inner
&& let Some(result) = state.try_collect_parallel()
{
return result;
}
}
let track_info_map: HashMap<u32, PgsTrackInfo> = self
.tracks
.iter()
.map(|t| (t.track_id, t.clone()))
.collect();
let results = self.by_ref().collect::<Result<Vec<_>, _>>()?;
Ok(group_by_track(results, &track_info_map))
}
fn update_stats(&mut self) {
self.stats.bytes_read = match &self.inner {
ExtractorInner::Mkv(state) => state.bytes_read(),
ExtractorInner::M2ts(state) => state.bytes_read(),
ExtractorInner::Sup(state) => state.bytes_read(),
ExtractorInner::Done => self.stats.bytes_read,
};
}
}
impl Iterator for Extractor {
type Item = Result<TrackDisplaySet, PgsError>;
fn next(&mut self) -> Option<Self::Item> {
loop {
let result = match &mut self.inner {
ExtractorInner::Mkv(state) => state.next_display_set(),
ExtractorInner::M2ts(state) => state.next_display_set(),
ExtractorInner::Sup(state) => state.next_display_set(),
ExtractorInner::Done => return None,
};
self.update_stats();
match result {
Some(Ok(tds)) => {
let pts_ms = tds.display_set.pts_ms;
if let Some(end) = self.time_range_end_ms {
if pts_ms > end {
self.inner = ExtractorInner::Done;
return None;
}
}
if let Some(start) = self.time_range_start_ms {
if pts_ms < start {
continue;
}
}
if self.catalog_enabled {
self.catalog.push(tds.clone());
}
return Some(Ok(tds));
}
Some(Err(e)) => {
self.inner = ExtractorInner::Done;
return Some(Err(e));
}
None => {
self.inner = ExtractorInner::Done;
return None;
}
}
}
}
}
fn detect_format(reader: &mut SeekBufReader<File>) -> Result<ContainerFormat, PgsError> {
reader.seek_to(0)?;
let mut magic = [0u8; 5];
reader.read_exact(&mut magic)?;
reader.seek_to(0)?;
if magic[0..4] == [0x1A, 0x45, 0xDF, 0xA3] {
return Ok(ContainerFormat::Matroska);
}
if magic[0] == 0x47 || magic[4] == 0x47 {
match m2ts::ts_packet::detect_packet_format(reader) {
Ok(m2ts::ts_packet::PacketFormat::M2ts) => return Ok(ContainerFormat::M2ts),
Ok(m2ts::ts_packet::PacketFormat::RawTs) => {
return Ok(ContainerFormat::TransportStream);
}
Err(_) => {}
}
}
if magic[0] == 0x50 && magic[1] == 0x47 {
return Ok(ContainerFormat::Sup);
}
Err(PgsError::UnknownFormat)
}
fn mkv_track_to_info(
t: &mkv::tracks::MkvPgsTrack,
frame_counts: &HashMap<u64, u64>,
cue_points: &Option<Vec<mkv::cues::PgsCuePoint>>,
) -> PgsTrackInfo {
let has_cues = Some(
cue_points
.as_ref()
.is_some_and(|cues| cues.iter().any(|cp| cp.track_number == t.track_number)),
);
PgsTrackInfo {
track_id: t.track_number as u32,
language: t.language.clone(),
container: ContainerFormat::Matroska,
name: t.name.clone(),
flag_default: t.flag_default,
flag_forced: t.flag_forced,
display_set_count: t.track_uid.and_then(|uid| frame_counts.get(&uid).copied()),
has_cues,
}
}
fn sup_track_info() -> PgsTrackInfo {
PgsTrackInfo {
track_id: 0,
language: None,
container: ContainerFormat::Sup,
name: None,
flag_default: None,
flag_forced: None,
display_set_count: None,
has_cues: None,
}
}
fn m2ts_track_to_info(t: &m2ts::M2tsPgsTrack, format: ContainerFormat) -> PgsTrackInfo {
PgsTrackInfo {
track_id: t.pid as u32,
language: t.language.clone(),
container: format,
name: None,
flag_default: None,
flag_forced: None,
display_set_count: None,
has_cues: None,
}
}
pub fn list_pgs_tracks(path: &Path) -> Result<Vec<PgsTrackInfo>, PgsError> {
let file = File::open(path)?;
let mut reader = SeekBufReader::new(file);
let format = detect_format(&mut reader)?;
match format {
ContainerFormat::Matroska => {
let meta = mkv::prepare_mkv_metadata(&mut reader)?;
Ok(meta
.pgs_tracks
.iter()
.map(|t| mkv_track_to_info(t, &meta.frame_counts, &meta.cue_points))
.collect())
}
ContainerFormat::M2ts | ContainerFormat::TransportStream => {
let tracks = m2ts::list_pgs_tracks_m2ts(&mut reader, Some(path))?;
Ok(tracks
.iter()
.map(|t| m2ts_track_to_info(t, format))
.collect())
}
ContainerFormat::Sup => Ok(vec![sup_track_info()]),
}
}
pub fn extract_all_display_sets(path: &Path) -> Result<Vec<TrackDisplaySets>, PgsError> {
Extractor::open(path)?.collect_by_track()
}
const M2TS_BUF_SIZE: usize = 2 * 1024 * 1024;
fn group_by_track(
results: Vec<TrackDisplaySet>,
track_info_map: &HashMap<u32, PgsTrackInfo>,
) -> Vec<TrackDisplaySets> {
let mut track_map: HashMap<u32, Vec<DisplaySet>> = HashMap::new();
let mut track_order: Vec<u32> = Vec::new();
for tds in results {
let entry = track_map.entry(tds.track_id).or_insert_with(|| {
track_order.push(tds.track_id);
Vec::new()
});
entry.push(tds.display_set);
}
track_order
.into_iter()
.filter_map(|id| {
let display_sets = track_map.remove(&id)?;
if display_sets.is_empty() {
return None;
}
let track = track_info_map.get(&id)?.clone();
Some(TrackDisplaySets {
track,
display_sets,
})
})
.collect()
}
pub fn extract_all_display_sets_with_stats(
path: &Path,
) -> Result<(Vec<TrackDisplaySets>, ExtractionStats), PgsError> {
let mut extractor = Extractor::open(path)?;
let track_info_map: HashMap<u32, PgsTrackInfo> = extractor
.tracks()
.iter()
.map(|t| (t.track_id, t.clone()))
.collect();
let results = extractor.by_ref().collect::<Result<Vec<_>, _>>()?;
let stats = extractor.stats().clone();
let grouped = group_by_track(results, &track_info_map);
Ok((grouped, stats))
}
pub fn extract_display_sets(
path: &Path,
track_id: Option<u32>,
) -> Result<Vec<DisplaySet>, PgsError> {
let (display_sets, _) = extract_display_sets_with_stats(path, track_id)?;
Ok(display_sets)
}
pub fn extract_display_sets_with_stats(
path: &Path,
track_id: Option<u32>,
) -> Result<(Vec<DisplaySet>, ExtractionStats), PgsError> {
let extractor = Extractor::open(path)?;
let mut extractor = if let Some(id) = track_id {
extractor.with_track_filter(&[id])
} else {
extractor
};
let target_id = track_id.or_else(|| extractor.tracks().first().map(|t| t.track_id));
let mut display_sets = Vec::new();
for result in extractor.by_ref() {
let tds = result?;
if target_id.is_none_or(|id| tds.track_id == id) {
display_sets.push(tds.display_set);
}
}
let stats = extractor.stats().clone();
Ok((display_sets, stats))
}
pub fn write_sup_file(display_sets: &[DisplaySet], output: &Path) -> Result<(), PgsError> {
let file = File::create(output)?;
let mut writer = std::io::BufWriter::new(file);
for ds in display_sets {
for segment in &ds.segments {
let bytes = segment.to_bytes();
writer.write_all(&bytes)?;
}
}
writer.flush()?;
Ok(())
}