1pub mod ebml;
2pub mod error;
3pub mod io;
4pub(crate) mod lang;
5pub mod m2ts;
6pub mod mkv;
7pub mod pgs;
8pub mod sup;
9
10use error::PgsError;
11use io::SeekBufReader;
12use m2ts::stream::M2tsExtractorState;
13use mkv::stream::MkvExtractorState;
14use pgs::DisplaySet;
15use std::collections::HashMap;
16use std::fs::File;
17use std::io::Write;
18use std::path::{Path, PathBuf};
19use sup::stream::SupExtractorState;
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum ContainerFormat {
24 Matroska,
25 M2ts,
26 TransportStream,
27 Sup,
28}
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
35pub enum MkvStrategy {
36 #[default]
38 Auto,
39 Sequential,
42}
43
44#[derive(Debug, Clone)]
46pub struct PgsTrackInfo {
47 pub track_id: u32,
49 pub language: Option<String>,
51 pub container: ContainerFormat,
53 pub name: Option<String>,
55 pub flag_default: Option<bool>,
57 pub flag_forced: Option<bool>,
59 pub display_set_count: Option<u64>,
61 pub has_cues: Option<bool>,
63}
64
65#[derive(Debug, Clone)]
67pub struct TrackDisplaySets {
68 pub track: PgsTrackInfo,
70 pub display_sets: Vec<DisplaySet>,
72}
73
74#[derive(Debug, Clone)]
76pub struct ExtractionStats {
77 pub file_size: u64,
79 pub bytes_read: u64,
81}
82
83#[derive(Debug, Clone)]
85pub struct TrackDisplaySet {
86 pub track_id: u32,
88 pub language: Option<String>,
90 pub container: ContainerFormat,
92 pub display_set: DisplaySet,
94}
95
96enum ExtractorInner {
98 Mkv(Box<MkvExtractorState>),
99 M2ts(M2tsExtractorState),
100 Sup(SupExtractorState),
101 Done,
102}
103
104pub struct Extractor {
149 inner: ExtractorInner,
150 catalog: Vec<TrackDisplaySet>,
151 catalog_enabled: bool,
152 tracks: Vec<PgsTrackInfo>,
153 stats: ExtractionStats,
154 path: PathBuf,
155 format: ContainerFormat,
156 mkv_strategy: MkvStrategy,
157 time_range_start_ms: Option<f64>,
158 time_range_end_ms: Option<f64>,
159}
160
161impl Extractor {
162 pub fn open(path: impl AsRef<Path>) -> Result<Self, PgsError> {
168 let path = path.as_ref();
169 let file = File::open(path)?;
170 let file_size = file.metadata().map(|m| m.len()).unwrap_or(0);
171 let mut reader = SeekBufReader::new(file);
172
173 let format = detect_format(&mut reader)?;
174
175 match format {
176 ContainerFormat::Matroska => {
177 let meta = mkv::prepare_mkv_metadata(&mut reader)?;
178 let tracks: Vec<PgsTrackInfo> = meta
179 .pgs_tracks
180 .iter()
181 .map(|t| mkv_track_to_info(t, &meta.frame_counts, &meta.cue_points))
182 .collect();
183
184 let state = MkvExtractorState::new(
185 reader,
186 path.to_path_buf(),
187 meta,
188 None,
189 MkvStrategy::Auto,
190 )?;
191
192 Ok(Extractor {
193 inner: ExtractorInner::Mkv(Box::new(state)),
194 catalog: Vec::new(),
195 catalog_enabled: true,
196 tracks,
197 stats: ExtractionStats {
198 file_size,
199 bytes_read: 0,
200 },
201 path: path.to_path_buf(),
202 format: ContainerFormat::Matroska,
203 mkv_strategy: MkvStrategy::Auto,
204 time_range_start_ms: None,
205 time_range_end_ms: None,
206 })
207 }
208 format @ (ContainerFormat::M2ts | ContainerFormat::TransportStream) => {
209 let file = File::open(path)?;
211 let mut reader = SeekBufReader::with_capacity(M2TS_BUF_SIZE, file);
212 detect_format(&mut reader)?;
213
214 let meta = m2ts::prepare_m2ts_metadata(&mut reader, Some(path))?;
215 let tracks: Vec<PgsTrackInfo> = meta
216 .tracks
217 .iter()
218 .map(|t| m2ts_track_to_info(t, format))
219 .collect();
220
221 let state = M2tsExtractorState::new(reader, meta, format, None);
222
223 Ok(Extractor {
224 inner: ExtractorInner::M2ts(state),
225 catalog: Vec::new(),
226 catalog_enabled: true,
227 tracks,
228 stats: ExtractionStats {
229 file_size,
230 bytes_read: 0,
231 },
232 path: path.to_path_buf(),
233 format,
234 mkv_strategy: MkvStrategy::Auto,
235 time_range_start_ms: None,
236 time_range_end_ms: None,
237 })
238 }
239 ContainerFormat::Sup => {
240 let tracks = vec![sup_track_info()];
241 let state = SupExtractorState::new(reader);
242
243 Ok(Extractor {
244 inner: ExtractorInner::Sup(state),
245 catalog: Vec::new(),
246 catalog_enabled: true,
247 tracks,
248 stats: ExtractionStats {
249 file_size,
250 bytes_read: 0,
251 },
252 path: path.to_path_buf(),
253 format: ContainerFormat::Sup,
254 mkv_strategy: MkvStrategy::Auto,
255 time_range_start_ms: None,
256 time_range_end_ms: None,
257 })
258 }
259 }
260 }
261
262 #[must_use]
267 pub fn with_mkv_strategy(mut self, strategy: MkvStrategy) -> Self {
268 if self.format != ContainerFormat::Matroska || strategy == self.mkv_strategy {
269 return self;
270 }
271
272 let path = self.path.clone();
273 let file_size = self.stats.file_size;
274
275 let catalog_enabled = self.catalog_enabled;
276 match Self::open_with_strategy(&path, file_size, strategy, None) {
277 Ok(mut ext) => {
278 ext.catalog_enabled = catalog_enabled;
279 ext
280 }
281 Err(_) => {
282 self.mkv_strategy = strategy;
283 self
284 }
285 }
286 }
287
288 #[must_use]
294 pub fn with_track_filter(self, track_ids: &[u32]) -> Self {
303 if track_ids.is_empty() {
304 return self;
305 }
306
307 let path = self.path.clone();
308 let file_size = self.stats.file_size;
309 let format = self.format;
310 let mkv_strategy = self.mkv_strategy;
311
312 let catalog_enabled = self.catalog_enabled;
316 match Self::open_filtered(&path, file_size, format, track_ids, mkv_strategy) {
317 Ok(mut ext) => {
318 ext.catalog_enabled = catalog_enabled;
319 ext
320 }
321 Err(_) => self,
322 }
323 }
324
325 #[must_use]
340 pub fn with_history(mut self, enabled: bool) -> Self {
341 self.catalog_enabled = enabled;
342 if !enabled {
343 self.catalog = Vec::new();
344 }
345 self
346 }
347
348 #[must_use]
360 pub fn with_time_range(mut self, start_ms: Option<f64>, end_ms: Option<f64>) -> Self {
361 if start_ms.is_none() && end_ms.is_none() {
362 return self;
363 }
364 self.time_range_start_ms = start_ms;
365 self.time_range_end_ms = end_ms;
366 match &mut self.inner {
367 ExtractorInner::Mkv(state) => state.set_time_range(start_ms, end_ms),
368 ExtractorInner::M2ts(state) => state.set_time_range(start_ms, end_ms),
369 ExtractorInner::Sup(state) => state.set_time_range(start_ms, end_ms),
370 ExtractorInner::Done => {}
371 }
372 self
373 }
374
375 fn open_with_strategy(
377 path: &Path,
378 file_size: u64,
379 strategy: MkvStrategy,
380 track_ids: Option<&[u32]>,
381 ) -> Result<Self, PgsError> {
382 let file = File::open(path)?;
383 let mut reader = SeekBufReader::new(file);
384 detect_format(&mut reader)?;
385
386 let meta = mkv::prepare_mkv_metadata(&mut reader)?;
387 let tracks: Vec<PgsTrackInfo> = if let Some(ids) = track_ids {
388 meta.pgs_tracks
389 .iter()
390 .filter(|t| ids.contains(&(t.track_number as u32)))
391 .map(|t| mkv_track_to_info(t, &meta.frame_counts, &meta.cue_points))
392 .collect()
393 } else {
394 meta.pgs_tracks
395 .iter()
396 .map(|t| mkv_track_to_info(t, &meta.frame_counts, &meta.cue_points))
397 .collect()
398 };
399
400 let state = MkvExtractorState::new(reader, path.to_path_buf(), meta, track_ids, strategy)?;
401
402 Ok(Extractor {
403 inner: ExtractorInner::Mkv(Box::new(state)),
404 catalog: Vec::new(),
405 catalog_enabled: true,
406 tracks,
407 stats: ExtractionStats {
408 file_size,
409 bytes_read: 0,
410 },
411 path: path.to_path_buf(),
412 format: ContainerFormat::Matroska,
413 mkv_strategy: strategy,
414 time_range_start_ms: None,
415 time_range_end_ms: None,
416 })
417 }
418
419 fn open_filtered(
420 path: &Path,
421 file_size: u64,
422 format: ContainerFormat,
423 track_ids: &[u32],
424 mkv_strategy: MkvStrategy,
425 ) -> Result<Self, PgsError> {
426 match format {
427 ContainerFormat::Matroska => {
428 Self::open_with_strategy(path, file_size, mkv_strategy, Some(track_ids))
429 }
430 fmt @ (ContainerFormat::M2ts | ContainerFormat::TransportStream) => {
431 let file = File::open(path)?;
432 let mut reader = SeekBufReader::with_capacity(M2TS_BUF_SIZE, file);
433 detect_format(&mut reader)?;
434
435 let meta = m2ts::prepare_m2ts_metadata(&mut reader, Some(path))?;
436 let tracks: Vec<PgsTrackInfo> = meta
437 .tracks
438 .iter()
439 .filter(|t| track_ids.contains(&(t.pid as u32)))
440 .map(|t| m2ts_track_to_info(t, fmt))
441 .collect();
442
443 let state = M2tsExtractorState::new(reader, meta, fmt, Some(track_ids));
444
445 Ok(Extractor {
446 inner: ExtractorInner::M2ts(state),
447 catalog: Vec::new(),
448 catalog_enabled: true,
449 tracks,
450 stats: ExtractionStats {
451 file_size,
452 bytes_read: 0,
453 },
454 path: path.to_path_buf(),
455 format: fmt,
456 mkv_strategy: MkvStrategy::Auto,
457 time_range_start_ms: None,
458 time_range_end_ms: None,
459 })
460 }
461 ContainerFormat::Sup => {
462 if !track_ids.contains(&0) {
463 return Ok(Extractor {
464 inner: ExtractorInner::Done,
465 catalog: Vec::new(),
466 catalog_enabled: true,
467 tracks: Vec::new(),
468 stats: ExtractionStats {
469 file_size,
470 bytes_read: 0,
471 },
472 path: path.to_path_buf(),
473 format: ContainerFormat::Sup,
474 mkv_strategy: MkvStrategy::Auto,
475 time_range_start_ms: None,
476 time_range_end_ms: None,
477 });
478 }
479
480 let file = File::open(path)?;
481 let mut reader = SeekBufReader::new(file);
482 detect_format(&mut reader)?;
483
484 let tracks = vec![sup_track_info()];
485 let state = SupExtractorState::new(reader);
486
487 Ok(Extractor {
488 inner: ExtractorInner::Sup(state),
489 catalog: Vec::new(),
490 catalog_enabled: true,
491 tracks,
492 stats: ExtractionStats {
493 file_size,
494 bytes_read: 0,
495 },
496 path: path.to_path_buf(),
497 format: ContainerFormat::Sup,
498 mkv_strategy: MkvStrategy::Auto,
499 time_range_start_ms: None,
500 time_range_end_ms: None,
501 })
502 }
503 }
504 }
505
506 pub fn format(&self) -> ContainerFormat {
508 self.format
509 }
510
511 pub fn tracks(&self) -> &[PgsTrackInfo] {
513 &self.tracks
514 }
515
516 pub fn history(&self) -> &[TrackDisplaySet] {
518 &self.catalog
519 }
520
521 pub fn history_for_track(&self, track_id: u32) -> Vec<&TrackDisplaySet> {
523 self.catalog
524 .iter()
525 .filter(|ds| ds.track_id == track_id)
526 .collect()
527 }
528
529 pub fn drain_history(&mut self) -> Vec<TrackDisplaySet> {
533 std::mem::take(&mut self.catalog)
534 }
535
536 pub fn clear_history(&mut self) {
538 self.catalog.clear();
539 }
540
541 pub fn stats(&self) -> &ExtractionStats {
543 &self.stats
544 }
545
546 pub fn collect_by_track(mut self) -> Result<Vec<TrackDisplaySets>, PgsError> {
552 if self.time_range_start_ms.is_none() && self.time_range_end_ms.is_none() {
555 if let ExtractorInner::Mkv(ref state) = self.inner
556 && let Some(result) = state.try_collect_parallel()
557 {
558 return result;
559 }
560 }
561
562 let track_info_map: HashMap<u32, PgsTrackInfo> = self
564 .tracks
565 .iter()
566 .map(|t| (t.track_id, t.clone()))
567 .collect();
568
569 let results = self.by_ref().collect::<Result<Vec<_>, _>>()?;
571 Ok(group_by_track(results, &track_info_map))
572 }
573
574 fn update_stats(&mut self) {
576 self.stats.bytes_read = match &self.inner {
577 ExtractorInner::Mkv(state) => state.bytes_read(),
578 ExtractorInner::M2ts(state) => state.bytes_read(),
579 ExtractorInner::Sup(state) => state.bytes_read(),
580 ExtractorInner::Done => self.stats.bytes_read,
581 };
582 }
583}
584
585impl Iterator for Extractor {
586 type Item = Result<TrackDisplaySet, PgsError>;
587
588 fn next(&mut self) -> Option<Self::Item> {
589 loop {
590 let result = match &mut self.inner {
591 ExtractorInner::Mkv(state) => state.next_display_set(),
592 ExtractorInner::M2ts(state) => state.next_display_set(),
593 ExtractorInner::Sup(state) => state.next_display_set(),
594 ExtractorInner::Done => return None,
595 };
596
597 self.update_stats();
598
599 match result {
600 Some(Ok(tds)) => {
601 let pts_ms = tds.display_set.pts_ms;
602
603 if let Some(end) = self.time_range_end_ms {
605 if pts_ms > end {
606 self.inner = ExtractorInner::Done;
607 return None;
608 }
609 }
610
611 if let Some(start) = self.time_range_start_ms {
613 if pts_ms < start {
614 continue;
615 }
616 }
617
618 if self.catalog_enabled {
619 self.catalog.push(tds.clone());
620 }
621 return Some(Ok(tds));
622 }
623 Some(Err(e)) => {
624 self.inner = ExtractorInner::Done;
625 return Some(Err(e));
626 }
627 None => {
628 self.inner = ExtractorInner::Done;
629 return None;
630 }
631 }
632 }
633 }
634}
635
636fn detect_format(reader: &mut SeekBufReader<File>) -> Result<ContainerFormat, PgsError> {
638 reader.seek_to(0)?;
639 let mut magic = [0u8; 5];
640 reader.read_exact(&mut magic)?;
641 reader.seek_to(0)?;
642
643 if magic[0..4] == [0x1A, 0x45, 0xDF, 0xA3] {
645 return Ok(ContainerFormat::Matroska);
646 }
647
648 if magic[0] == 0x47 || magic[4] == 0x47 {
650 match m2ts::ts_packet::detect_packet_format(reader) {
651 Ok(m2ts::ts_packet::PacketFormat::M2ts) => return Ok(ContainerFormat::M2ts),
652 Ok(m2ts::ts_packet::PacketFormat::RawTs) => {
653 return Ok(ContainerFormat::TransportStream);
654 }
655 Err(_) => {}
656 }
657 }
658
659 if magic[0] == 0x50 && magic[1] == 0x47 {
661 return Ok(ContainerFormat::Sup);
662 }
663
664 Err(PgsError::UnknownFormat)
665}
666
667fn mkv_track_to_info(
669 t: &mkv::tracks::MkvPgsTrack,
670 frame_counts: &HashMap<u64, u64>,
671 cue_points: &Option<Vec<mkv::cues::PgsCuePoint>>,
672) -> PgsTrackInfo {
673 let has_cues = Some(
674 cue_points
675 .as_ref()
676 .is_some_and(|cues| cues.iter().any(|cp| cp.track_number == t.track_number)),
677 );
678 PgsTrackInfo {
679 track_id: t.track_number as u32,
680 language: t.language.clone(),
681 container: ContainerFormat::Matroska,
682 name: t.name.clone(),
683 flag_default: t.flag_default,
684 flag_forced: t.flag_forced,
685 display_set_count: t.track_uid.and_then(|uid| frame_counts.get(&uid).copied()),
686 has_cues,
687 }
688}
689
690fn sup_track_info() -> PgsTrackInfo {
692 PgsTrackInfo {
693 track_id: 0,
694 language: None,
695 container: ContainerFormat::Sup,
696 name: None,
697 flag_default: None,
698 flag_forced: None,
699 display_set_count: None,
700 has_cues: None,
701 }
702}
703
704fn m2ts_track_to_info(t: &m2ts::M2tsPgsTrack, format: ContainerFormat) -> PgsTrackInfo {
706 PgsTrackInfo {
707 track_id: t.pid as u32,
708 language: t.language.clone(),
709 container: format,
710 name: None,
711 flag_default: None,
712 flag_forced: None,
713 display_set_count: None,
714 has_cues: None,
715 }
716}
717
718pub fn list_pgs_tracks(path: &Path) -> Result<Vec<PgsTrackInfo>, PgsError> {
720 let file = File::open(path)?;
721 let mut reader = SeekBufReader::new(file);
722
723 let format = detect_format(&mut reader)?;
724
725 match format {
726 ContainerFormat::Matroska => {
727 let meta = mkv::prepare_mkv_metadata(&mut reader)?;
728 Ok(meta
729 .pgs_tracks
730 .iter()
731 .map(|t| mkv_track_to_info(t, &meta.frame_counts, &meta.cue_points))
732 .collect())
733 }
734 ContainerFormat::M2ts | ContainerFormat::TransportStream => {
735 let tracks = m2ts::list_pgs_tracks_m2ts(&mut reader, Some(path))?;
736 Ok(tracks
737 .iter()
738 .map(|t| m2ts_track_to_info(t, format))
739 .collect())
740 }
741 ContainerFormat::Sup => Ok(vec![sup_track_info()]),
742 }
743}
744
745pub fn extract_all_display_sets(path: &Path) -> Result<Vec<TrackDisplaySets>, PgsError> {
749 Extractor::open(path)?.collect_by_track()
750}
751
752const M2TS_BUF_SIZE: usize = 2 * 1024 * 1024;
755
756fn group_by_track(
759 results: Vec<TrackDisplaySet>,
760 track_info_map: &HashMap<u32, PgsTrackInfo>,
761) -> Vec<TrackDisplaySets> {
762 let mut track_map: HashMap<u32, Vec<DisplaySet>> = HashMap::new();
763 let mut track_order: Vec<u32> = Vec::new();
764
765 for tds in results {
766 let entry = track_map.entry(tds.track_id).or_insert_with(|| {
767 track_order.push(tds.track_id);
768 Vec::new()
769 });
770 entry.push(tds.display_set);
771 }
772
773 track_order
774 .into_iter()
775 .filter_map(|id| {
776 let display_sets = track_map.remove(&id)?;
777 if display_sets.is_empty() {
778 return None;
779 }
780 let track = track_info_map.get(&id)?.clone();
781 Some(TrackDisplaySets {
782 track,
783 display_sets,
784 })
785 })
786 .collect()
787}
788
789pub fn extract_all_display_sets_with_stats(
791 path: &Path,
792) -> Result<(Vec<TrackDisplaySets>, ExtractionStats), PgsError> {
793 let mut extractor = Extractor::open(path)?;
794 let track_info_map: HashMap<u32, PgsTrackInfo> = extractor
795 .tracks()
796 .iter()
797 .map(|t| (t.track_id, t.clone()))
798 .collect();
799
800 let results = extractor.by_ref().collect::<Result<Vec<_>, _>>()?;
801 let stats = extractor.stats().clone();
802 let grouped = group_by_track(results, &track_info_map);
803
804 Ok((grouped, stats))
805}
806
807pub fn extract_display_sets(
811 path: &Path,
812 track_id: Option<u32>,
813) -> Result<Vec<DisplaySet>, PgsError> {
814 let (display_sets, _) = extract_display_sets_with_stats(path, track_id)?;
815 Ok(display_sets)
816}
817
818pub fn extract_display_sets_with_stats(
824 path: &Path,
825 track_id: Option<u32>,
826) -> Result<(Vec<DisplaySet>, ExtractionStats), PgsError> {
827 let extractor = Extractor::open(path)?;
828 let mut extractor = if let Some(id) = track_id {
829 extractor.with_track_filter(&[id])
830 } else {
831 extractor
832 };
833
834 let target_id = track_id.or_else(|| extractor.tracks().first().map(|t| t.track_id));
835
836 let mut display_sets = Vec::new();
837 for result in extractor.by_ref() {
838 let tds = result?;
839 if target_id.is_none_or(|id| tds.track_id == id) {
840 display_sets.push(tds.display_set);
841 }
842 }
843
844 let stats = extractor.stats().clone();
845 Ok((display_sets, stats))
846}
847
848pub fn write_sup_file(display_sets: &[DisplaySet], output: &Path) -> Result<(), PgsError> {
850 let file = File::create(output)?;
851 let mut writer = std::io::BufWriter::new(file);
852
853 for ds in display_sets {
854 for segment in &ds.segments {
855 let bytes = segment.to_bytes();
856 writer.write_all(&bytes)?;
857 }
858 }
859
860 writer.flush()?;
861 Ok(())
862}