Skip to main content

libpgs/
lib.rs

1pub mod ebml;
2pub mod error;
3pub mod io;
4pub(crate) mod lang;
5pub mod m2ts;
6pub mod mkv;
7pub mod pgs;
8pub mod sup;
9
10use error::PgsError;
11use io::SeekBufReader;
12use m2ts::stream::M2tsExtractorState;
13use mkv::stream::MkvExtractorState;
14use pgs::DisplaySet;
15use std::collections::HashMap;
16use std::fs::File;
17use std::io::Write;
18use std::path::{Path, PathBuf};
19use sup::stream::SupExtractorState;
20
21/// Container format of the source file.
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum ContainerFormat {
24    Matroska,
25    M2ts,
26    TransportStream,
27    Sup,
28}
29
30/// MKV extraction strategy override.
31///
32/// Controls how the extractor navigates Clusters in an MKV file.
33/// Used for benchmarking and tuning NAS performance.
34#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
35pub enum MkvStrategy {
36    /// Automatic: use Cues if available, otherwise Sequential.
37    #[default]
38    Auto,
39    /// Single-pass linear scan through the Segment, processing Clusters
40    /// as they're encountered without building a map first.
41    Sequential,
42}
43
44/// Metadata about a PGS track found in the container.
45#[derive(Debug, Clone)]
46pub struct PgsTrackInfo {
47    /// Track number (MKV) or PID (M2TS).
48    pub track_id: u32,
49    /// Language code, if available.
50    pub language: Option<String>,
51    /// Container format.
52    pub container: ContainerFormat,
53    /// Track name / title (MKV TrackName).
54    pub name: Option<String>,
55    /// Whether this track is flagged as default (MKV FlagDefault).
56    pub flag_default: Option<bool>,
57    /// Whether this track contains forced subtitles (MKV FlagForced).
58    pub flag_forced: Option<bool>,
59    /// Total number of display sets / frames, if known from container metadata.
60    pub display_set_count: Option<u64>,
61    /// Whether the container has cue/index entries for this track (MKV only).
62    pub has_cues: Option<bool>,
63}
64
65/// Display sets extracted from a single PGS track.
66#[derive(Debug, Clone)]
67pub struct TrackDisplaySets {
68    /// Track metadata.
69    pub track: PgsTrackInfo,
70    /// All display sets for this track, in presentation order.
71    pub display_sets: Vec<DisplaySet>,
72}
73
74/// I/O statistics from an extraction operation.
75#[derive(Debug, Clone)]
76pub struct ExtractionStats {
77    /// Total size of the source file in bytes.
78    pub file_size: u64,
79    /// Total bytes actually read from the file during extraction.
80    pub bytes_read: u64,
81}
82
83/// A display set annotated with its source track.
84#[derive(Debug, Clone)]
85pub struct TrackDisplaySet {
86    /// Track number (MKV) or PID (M2TS).
87    pub track_id: u32,
88    /// Language code, if available.
89    pub language: Option<String>,
90    /// Container format of the source file.
91    pub container: ContainerFormat,
92    /// The display set itself.
93    pub display_set: DisplaySet,
94}
95
96/// Internal dispatch to format-specific streaming state machines.
97enum ExtractorInner {
98    Mkv(Box<MkvExtractorState>),
99    M2ts(M2tsExtractorState),
100    Sup(SupExtractorState),
101    Done,
102}
103
104/// Streaming PGS extractor that yields display sets incrementally.
105///
106/// Created via [`Extractor::open`]. Implements
107/// `Iterator<Item = Result<TrackDisplaySet, PgsError>>`.
108///
109/// # Streaming
110///
111/// Display sets are yielded one at a time in file order (interleaved across
112/// tracks for multi-track files). Only the I/O needed to produce the next
113/// display set is performed on each call to `next()`.
114///
115/// # Early Termination
116///
117/// Simply drop the `Extractor` to stop extraction. No further I/O occurs.
118///
119/// # History
120///
121/// Processed display sets are cataloged internally. Use [`history()`](Extractor::history)
122/// to access all display sets yielded so far, or
123/// [`history_for_track()`](Extractor::history_for_track) for a specific track.
124/// Use [`drain_history()`](Extractor::drain_history) or
125/// [`clear_history()`](Extractor::clear_history) to manage memory during long extractions.
126///
127/// # Example
128///
129/// ```no_run
130/// # use std::path::Path;
131/// let mut extractor = libpgs::Extractor::open("movie.mkv").unwrap();
132///
133/// // Read first 5 display sets from the English track
134/// let eng_id = extractor.tracks().iter()
135///     .find(|t| t.language.as_deref() == Some("eng"))
136///     .unwrap()
137///     .track_id;
138///
139/// let mut extractor = extractor.with_track_filter(&[eng_id]);
140///
141/// for ds in extractor.by_ref().take(5) {
142///     let ds = ds.unwrap();
143///     println!("PTS: {}ms", ds.display_set.pts_ms);
144/// }
145///
146/// println!("Bytes read: {}", extractor.stats().bytes_read);
147/// ```
148pub struct Extractor {
149    inner: ExtractorInner,
150    catalog: Vec<TrackDisplaySet>,
151    catalog_enabled: bool,
152    tracks: Vec<PgsTrackInfo>,
153    stats: ExtractionStats,
154    path: PathBuf,
155    format: ContainerFormat,
156    mkv_strategy: MkvStrategy,
157    time_range_start_ms: Option<f64>,
158    time_range_end_ms: Option<f64>,
159}
160
161impl Extractor {
162    /// Open a file and prepare for streaming extraction.
163    ///
164    /// Performs initial metadata parsing (format detection, track discovery)
165    /// but does NOT extract any display sets yet. All PGS tracks are selected
166    /// by default; use [`with_track_filter`](Extractor::with_track_filter) to restrict.
167    pub fn open(path: impl AsRef<Path>) -> Result<Self, PgsError> {
168        let path = path.as_ref();
169        let file = File::open(path)?;
170        let file_size = file.metadata().map(|m| m.len()).unwrap_or(0);
171        let mut reader = SeekBufReader::new(file);
172
173        let format = detect_format(&mut reader)?;
174
175        match format {
176            ContainerFormat::Matroska => {
177                let meta = mkv::prepare_mkv_metadata(&mut reader)?;
178                let tracks: Vec<PgsTrackInfo> = meta
179                    .pgs_tracks
180                    .iter()
181                    .map(|t| mkv_track_to_info(t, &meta.frame_counts, &meta.cue_points))
182                    .collect();
183
184                let state = MkvExtractorState::new(
185                    reader,
186                    path.to_path_buf(),
187                    meta,
188                    None,
189                    MkvStrategy::Auto,
190                )?;
191
192                Ok(Extractor {
193                    inner: ExtractorInner::Mkv(Box::new(state)),
194                    catalog: Vec::new(),
195                    catalog_enabled: true,
196                    tracks,
197                    stats: ExtractionStats {
198                        file_size,
199                        bytes_read: 0,
200                    },
201                    path: path.to_path_buf(),
202                    format: ContainerFormat::Matroska,
203                    mkv_strategy: MkvStrategy::Auto,
204                    time_range_start_ms: None,
205                    time_range_end_ms: None,
206                })
207            }
208            format @ (ContainerFormat::M2ts | ContainerFormat::TransportStream) => {
209                // Reopen with large buffer for M2TS throughput.
210                let file = File::open(path)?;
211                let mut reader = SeekBufReader::with_capacity(M2TS_BUF_SIZE, file);
212                detect_format(&mut reader)?;
213
214                let meta = m2ts::prepare_m2ts_metadata(&mut reader, Some(path))?;
215                let tracks: Vec<PgsTrackInfo> = meta
216                    .tracks
217                    .iter()
218                    .map(|t| m2ts_track_to_info(t, format))
219                    .collect();
220
221                let state = M2tsExtractorState::new(reader, meta, format, None);
222
223                Ok(Extractor {
224                    inner: ExtractorInner::M2ts(state),
225                    catalog: Vec::new(),
226                    catalog_enabled: true,
227                    tracks,
228                    stats: ExtractionStats {
229                        file_size,
230                        bytes_read: 0,
231                    },
232                    path: path.to_path_buf(),
233                    format,
234                    mkv_strategy: MkvStrategy::Auto,
235                    time_range_start_ms: None,
236                    time_range_end_ms: None,
237                })
238            }
239            ContainerFormat::Sup => {
240                let tracks = vec![sup_track_info()];
241                let state = SupExtractorState::new(reader);
242
243                Ok(Extractor {
244                    inner: ExtractorInner::Sup(state),
245                    catalog: Vec::new(),
246                    catalog_enabled: true,
247                    tracks,
248                    stats: ExtractionStats {
249                        file_size,
250                        bytes_read: 0,
251                    },
252                    path: path.to_path_buf(),
253                    format: ContainerFormat::Sup,
254                    mkv_strategy: MkvStrategy::Auto,
255                    time_range_start_ms: None,
256                    time_range_end_ms: None,
257                })
258            }
259        }
260    }
261
262    /// Override the MKV extraction strategy. Chainable.
263    ///
264    /// Must be called before the first call to `next()`. Only affects MKV files.
265    /// Useful for benchmarking different strategies on NAS storage.
266    #[must_use]
267    pub fn with_mkv_strategy(mut self, strategy: MkvStrategy) -> Self {
268        if self.format != ContainerFormat::Matroska || strategy == self.mkv_strategy {
269            return self;
270        }
271
272        let path = self.path.clone();
273        let file_size = self.stats.file_size;
274
275        let catalog_enabled = self.catalog_enabled;
276        match Self::open_with_strategy(&path, file_size, strategy, None) {
277            Ok(mut ext) => {
278                ext.catalog_enabled = catalog_enabled;
279                ext
280            }
281            Err(_) => {
282                self.mkv_strategy = strategy;
283                self
284            }
285        }
286    }
287
288    /// Restrict extraction to specific tracks. Chainable.
289    ///
290    /// Must be called before the first call to `next()`. Configures the
291    /// internal state machine to only create assemblers for matching tracks
292    /// and skip non-matching blocks at the source level.
293    #[must_use]
294    ///
295    /// # Example
296    ///
297    /// ```no_run
298    /// let mut ext = libpgs::Extractor::open("movie.mkv").unwrap();
299    /// let id = ext.tracks()[0].track_id;
300    /// let mut ext = ext.with_track_filter(&[id]);
301    /// ```
302    pub fn with_track_filter(self, track_ids: &[u32]) -> Self {
303        if track_ids.is_empty() {
304            return self;
305        }
306
307        let path = self.path.clone();
308        let file_size = self.stats.file_size;
309        let format = self.format;
310        let mkv_strategy = self.mkv_strategy;
311
312        // Reconstruct with the filter applied. Reopens the file and
313        // re-parses metadata so the state machine is initialized with
314        // only the requested tracks from the start.
315        let catalog_enabled = self.catalog_enabled;
316        match Self::open_filtered(&path, file_size, format, track_ids, mkv_strategy) {
317            Ok(mut ext) => {
318                ext.catalog_enabled = catalog_enabled;
319                ext
320            }
321            Err(_) => self,
322        }
323    }
324
325    /// Disable the internal history catalog to avoid cloning each yielded
326    /// display set. Chainable.
327    ///
328    /// By default the `Extractor` retains a clone of every yielded
329    /// [`TrackDisplaySet`] so callers can revisit them via [`history`](Self::history)
330    /// and [`history_for_track`](Self::history_for_track). For pure-streaming
331    /// consumers (e.g. the `stream` CLI subcommand) that never read history,
332    /// calling `with_history(false)` skips the clone and reduces per-frame cost
333    /// for graphically dense subtitles.
334    ///
335    /// When disabled: `history()` returns an empty slice, `drain_history()`
336    /// returns an empty `Vec`, and `clear_history()` is a no-op.
337    ///
338    /// Must be called before the first call to `next()`.
339    #[must_use]
340    pub fn with_history(mut self, enabled: bool) -> Self {
341        self.catalog_enabled = enabled;
342        if !enabled {
343            self.catalog = Vec::new();
344        }
345        self
346    }
347
348    /// Restrict extraction to a time range. Chainable.
349    ///
350    /// Display sets with `pts_ms` before `start_ms` are skipped.
351    /// Iteration stops when `pts_ms` exceeds `end_ms`.
352    /// Pass `None` for either bound to leave it open.
353    ///
354    /// For MKV files with Cues, cue points outside the range are filtered out
355    /// entirely (zero I/O). For M2TS and SUP files, the reader seeks to an
356    /// estimated byte position based on bitrate estimation.
357    ///
358    /// Must be called before the first call to `next()`.
359    #[must_use]
360    pub fn with_time_range(mut self, start_ms: Option<f64>, end_ms: Option<f64>) -> Self {
361        if start_ms.is_none() && end_ms.is_none() {
362            return self;
363        }
364        self.time_range_start_ms = start_ms;
365        self.time_range_end_ms = end_ms;
366        match &mut self.inner {
367            ExtractorInner::Mkv(state) => state.set_time_range(start_ms, end_ms),
368            ExtractorInner::M2ts(state) => state.set_time_range(start_ms, end_ms),
369            ExtractorInner::Sup(state) => state.set_time_range(start_ms, end_ms),
370            ExtractorInner::Done => {}
371        }
372        self
373    }
374
375    /// Open an MKV file with a specific strategy (no track filter).
376    fn open_with_strategy(
377        path: &Path,
378        file_size: u64,
379        strategy: MkvStrategy,
380        track_ids: Option<&[u32]>,
381    ) -> Result<Self, PgsError> {
382        let file = File::open(path)?;
383        let mut reader = SeekBufReader::new(file);
384        detect_format(&mut reader)?;
385
386        let meta = mkv::prepare_mkv_metadata(&mut reader)?;
387        let tracks: Vec<PgsTrackInfo> = if let Some(ids) = track_ids {
388            meta.pgs_tracks
389                .iter()
390                .filter(|t| ids.contains(&(t.track_number as u32)))
391                .map(|t| mkv_track_to_info(t, &meta.frame_counts, &meta.cue_points))
392                .collect()
393        } else {
394            meta.pgs_tracks
395                .iter()
396                .map(|t| mkv_track_to_info(t, &meta.frame_counts, &meta.cue_points))
397                .collect()
398        };
399
400        let state = MkvExtractorState::new(reader, path.to_path_buf(), meta, track_ids, strategy)?;
401
402        Ok(Extractor {
403            inner: ExtractorInner::Mkv(Box::new(state)),
404            catalog: Vec::new(),
405                    catalog_enabled: true,
406            tracks,
407            stats: ExtractionStats {
408                file_size,
409                bytes_read: 0,
410            },
411            path: path.to_path_buf(),
412            format: ContainerFormat::Matroska,
413            mkv_strategy: strategy,
414            time_range_start_ms: None,
415            time_range_end_ms: None,
416        })
417    }
418
419    fn open_filtered(
420        path: &Path,
421        file_size: u64,
422        format: ContainerFormat,
423        track_ids: &[u32],
424        mkv_strategy: MkvStrategy,
425    ) -> Result<Self, PgsError> {
426        match format {
427            ContainerFormat::Matroska => {
428                Self::open_with_strategy(path, file_size, mkv_strategy, Some(track_ids))
429            }
430            fmt @ (ContainerFormat::M2ts | ContainerFormat::TransportStream) => {
431                let file = File::open(path)?;
432                let mut reader = SeekBufReader::with_capacity(M2TS_BUF_SIZE, file);
433                detect_format(&mut reader)?;
434
435                let meta = m2ts::prepare_m2ts_metadata(&mut reader, Some(path))?;
436                let tracks: Vec<PgsTrackInfo> = meta
437                    .tracks
438                    .iter()
439                    .filter(|t| track_ids.contains(&(t.pid as u32)))
440                    .map(|t| m2ts_track_to_info(t, fmt))
441                    .collect();
442
443                let state = M2tsExtractorState::new(reader, meta, fmt, Some(track_ids));
444
445                Ok(Extractor {
446                    inner: ExtractorInner::M2ts(state),
447                    catalog: Vec::new(),
448                    catalog_enabled: true,
449                    tracks,
450                    stats: ExtractionStats {
451                        file_size,
452                        bytes_read: 0,
453                    },
454                    path: path.to_path_buf(),
455                    format: fmt,
456                    mkv_strategy: MkvStrategy::Auto,
457                    time_range_start_ms: None,
458                    time_range_end_ms: None,
459                })
460            }
461            ContainerFormat::Sup => {
462                if !track_ids.contains(&0) {
463                    return Ok(Extractor {
464                        inner: ExtractorInner::Done,
465                        catalog: Vec::new(),
466                    catalog_enabled: true,
467                        tracks: Vec::new(),
468                        stats: ExtractionStats {
469                            file_size,
470                            bytes_read: 0,
471                        },
472                        path: path.to_path_buf(),
473                        format: ContainerFormat::Sup,
474                        mkv_strategy: MkvStrategy::Auto,
475                        time_range_start_ms: None,
476                        time_range_end_ms: None,
477                    });
478                }
479
480                let file = File::open(path)?;
481                let mut reader = SeekBufReader::new(file);
482                detect_format(&mut reader)?;
483
484                let tracks = vec![sup_track_info()];
485                let state = SupExtractorState::new(reader);
486
487                Ok(Extractor {
488                    inner: ExtractorInner::Sup(state),
489                    catalog: Vec::new(),
490                    catalog_enabled: true,
491                    tracks,
492                    stats: ExtractionStats {
493                        file_size,
494                        bytes_read: 0,
495                    },
496                    path: path.to_path_buf(),
497                    format: ContainerFormat::Sup,
498                    mkv_strategy: MkvStrategy::Auto,
499                    time_range_start_ms: None,
500                    time_range_end_ms: None,
501                })
502            }
503        }
504    }
505
506    /// Container format detected at open time.
507    pub fn format(&self) -> ContainerFormat {
508        self.format
509    }
510
511    /// PGS tracks discovered in the file.
512    pub fn tracks(&self) -> &[PgsTrackInfo] {
513        &self.tracks
514    }
515
516    /// All display sets yielded so far, in order.
517    pub fn history(&self) -> &[TrackDisplaySet] {
518        &self.catalog
519    }
520
521    /// Display sets yielded so far for a specific track.
522    pub fn history_for_track(&self, track_id: u32) -> Vec<&TrackDisplaySet> {
523        self.catalog
524            .iter()
525            .filter(|ds| ds.track_id == track_id)
526            .collect()
527    }
528
529    /// Take all cataloged display sets, clearing the internal history.
530    ///
531    /// Useful for periodic memory management during long extractions.
532    pub fn drain_history(&mut self) -> Vec<TrackDisplaySet> {
533        std::mem::take(&mut self.catalog)
534    }
535
536    /// Discard all cataloged display sets to free memory.
537    pub fn clear_history(&mut self) {
538        self.catalog.clear();
539    }
540
541    /// Current I/O statistics. Updates as extraction progresses.
542    pub fn stats(&self) -> &ExtractionStats {
543        &self.stats
544    }
545
546    /// Exhaust the iterator and return all display sets grouped by track.
547    ///
548    /// For MKV files with Cues and enough cue points that haven't been
549    /// partially consumed, this uses parallel extraction with multiple
550    /// file handles for maximum throughput.
551    pub fn collect_by_track(mut self) -> Result<Vec<TrackDisplaySets>, PgsError> {
552        // Parallel optimization bypasses iterator filtering, so skip when a
553        // time range is set.
554        if self.time_range_start_ms.is_none() && self.time_range_end_ms.is_none() {
555            if let ExtractorInner::Mkv(ref state) = self.inner
556                && let Some(result) = state.try_collect_parallel()
557            {
558                return result;
559            }
560        }
561
562        // Build track info lookup from the pre-parsed metadata.
563        let track_info_map: HashMap<u32, PgsTrackInfo> = self
564            .tracks
565            .iter()
566            .map(|t| (t.track_id, t.clone()))
567            .collect();
568
569        // Sequential drain and group.
570        let results = self.by_ref().collect::<Result<Vec<_>, _>>()?;
571        Ok(group_by_track(results, &track_info_map))
572    }
573
574    /// Update stats from the inner reader.
575    fn update_stats(&mut self) {
576        self.stats.bytes_read = match &self.inner {
577            ExtractorInner::Mkv(state) => state.bytes_read(),
578            ExtractorInner::M2ts(state) => state.bytes_read(),
579            ExtractorInner::Sup(state) => state.bytes_read(),
580            ExtractorInner::Done => self.stats.bytes_read,
581        };
582    }
583}
584
585impl Iterator for Extractor {
586    type Item = Result<TrackDisplaySet, PgsError>;
587
588    fn next(&mut self) -> Option<Self::Item> {
589        loop {
590            let result = match &mut self.inner {
591                ExtractorInner::Mkv(state) => state.next_display_set(),
592                ExtractorInner::M2ts(state) => state.next_display_set(),
593                ExtractorInner::Sup(state) => state.next_display_set(),
594                ExtractorInner::Done => return None,
595            };
596
597            self.update_stats();
598
599            match result {
600                Some(Ok(tds)) => {
601                    let pts_ms = tds.display_set.pts_ms;
602
603                    // Past end time — stop iteration entirely.
604                    if let Some(end) = self.time_range_end_ms {
605                        if pts_ms > end {
606                            self.inner = ExtractorInner::Done;
607                            return None;
608                        }
609                    }
610
611                    // Before start time — skip (safety net for estimation overshoot).
612                    if let Some(start) = self.time_range_start_ms {
613                        if pts_ms < start {
614                            continue;
615                        }
616                    }
617
618                    if self.catalog_enabled {
619                        self.catalog.push(tds.clone());
620                    }
621                    return Some(Ok(tds));
622                }
623                Some(Err(e)) => {
624                    self.inner = ExtractorInner::Done;
625                    return Some(Err(e));
626                }
627                None => {
628                    self.inner = ExtractorInner::Done;
629                    return None;
630                }
631            }
632        }
633    }
634}
635
636/// Detect the container format by reading the first few bytes.
637fn detect_format(reader: &mut SeekBufReader<File>) -> Result<ContainerFormat, PgsError> {
638    reader.seek_to(0)?;
639    let mut magic = [0u8; 5];
640    reader.read_exact(&mut magic)?;
641    reader.seek_to(0)?;
642
643    // EBML magic: 0x1A45DFA3
644    if magic[0..4] == [0x1A, 0x45, 0xDF, 0xA3] {
645        return Ok(ContainerFormat::Matroska);
646    }
647
648    // TS/M2TS: 0x47 at offset 0 (raw TS) or offset 4 (M2TS).
649    if magic[0] == 0x47 || magic[4] == 0x47 {
650        match m2ts::ts_packet::detect_packet_format(reader) {
651            Ok(m2ts::ts_packet::PacketFormat::M2ts) => return Ok(ContainerFormat::M2ts),
652            Ok(m2ts::ts_packet::PacketFormat::RawTs) => {
653                return Ok(ContainerFormat::TransportStream);
654            }
655            Err(_) => {}
656        }
657    }
658
659    // SUP: raw PGS segments starting with "PG" magic (0x50, 0x47).
660    if magic[0] == 0x50 && magic[1] == 0x47 {
661        return Ok(ContainerFormat::Sup);
662    }
663
664    Err(PgsError::UnknownFormat)
665}
666
667/// Convert an MKV track to public track info.
668fn mkv_track_to_info(
669    t: &mkv::tracks::MkvPgsTrack,
670    frame_counts: &HashMap<u64, u64>,
671    cue_points: &Option<Vec<mkv::cues::PgsCuePoint>>,
672) -> PgsTrackInfo {
673    let has_cues = Some(
674        cue_points
675            .as_ref()
676            .is_some_and(|cues| cues.iter().any(|cp| cp.track_number == t.track_number)),
677    );
678    PgsTrackInfo {
679        track_id: t.track_number as u32,
680        language: t.language.clone(),
681        container: ContainerFormat::Matroska,
682        name: t.name.clone(),
683        flag_default: t.flag_default,
684        flag_forced: t.flag_forced,
685        display_set_count: t.track_uid.and_then(|uid| frame_counts.get(&uid).copied()),
686        has_cues,
687    }
688}
689
690/// Build synthetic track info for a .sup file (always a single track).
691fn sup_track_info() -> PgsTrackInfo {
692    PgsTrackInfo {
693        track_id: 0,
694        language: None,
695        container: ContainerFormat::Sup,
696        name: None,
697        flag_default: None,
698        flag_forced: None,
699        display_set_count: None,
700        has_cues: None,
701    }
702}
703
704/// Convert an M2TS track to public track info.
705fn m2ts_track_to_info(t: &m2ts::M2tsPgsTrack, format: ContainerFormat) -> PgsTrackInfo {
706    PgsTrackInfo {
707        track_id: t.pid as u32,
708        language: t.language.clone(),
709        container: format,
710        name: None,
711        flag_default: None,
712        flag_forced: None,
713        display_set_count: None,
714        has_cues: None,
715    }
716}
717
718/// List all PGS tracks in a container file.
719pub fn list_pgs_tracks(path: &Path) -> Result<Vec<PgsTrackInfo>, PgsError> {
720    let file = File::open(path)?;
721    let mut reader = SeekBufReader::new(file);
722
723    let format = detect_format(&mut reader)?;
724
725    match format {
726        ContainerFormat::Matroska => {
727            let meta = mkv::prepare_mkv_metadata(&mut reader)?;
728            Ok(meta
729                .pgs_tracks
730                .iter()
731                .map(|t| mkv_track_to_info(t, &meta.frame_counts, &meta.cue_points))
732                .collect())
733        }
734        ContainerFormat::M2ts | ContainerFormat::TransportStream => {
735            let tracks = m2ts::list_pgs_tracks_m2ts(&mut reader, Some(path))?;
736            Ok(tracks
737                .iter()
738                .map(|t| m2ts_track_to_info(t, format))
739                .collect())
740        }
741        ContainerFormat::Sup => Ok(vec![sup_track_info()]),
742    }
743}
744
745/// Extract all PGS Display Sets from all tracks in a container file.
746///
747/// Returns display sets grouped by track, with track metadata.
748pub fn extract_all_display_sets(path: &Path) -> Result<Vec<TrackDisplaySets>, PgsError> {
749    Extractor::open(path)?.collect_by_track()
750}
751
752/// Buffer size for M2TS sequential scanning (2 MB).
753/// Larger buffers reduce OS-level read calls and improve NAS throughput.
754const M2TS_BUF_SIZE: usize = 2 * 1024 * 1024;
755
756/// Group a flat list of `TrackDisplaySet` into per-track `TrackDisplaySets`,
757/// preserving insertion order of tracks.
758fn group_by_track(
759    results: Vec<TrackDisplaySet>,
760    track_info_map: &HashMap<u32, PgsTrackInfo>,
761) -> Vec<TrackDisplaySets> {
762    let mut track_map: HashMap<u32, Vec<DisplaySet>> = HashMap::new();
763    let mut track_order: Vec<u32> = Vec::new();
764
765    for tds in results {
766        let entry = track_map.entry(tds.track_id).or_insert_with(|| {
767            track_order.push(tds.track_id);
768            Vec::new()
769        });
770        entry.push(tds.display_set);
771    }
772
773    track_order
774        .into_iter()
775        .filter_map(|id| {
776            let display_sets = track_map.remove(&id)?;
777            if display_sets.is_empty() {
778                return None;
779            }
780            let track = track_info_map.get(&id)?.clone();
781            Some(TrackDisplaySets {
782                track,
783                display_sets,
784            })
785        })
786        .collect()
787}
788
789/// Extract all PGS Display Sets from all tracks and return I/O statistics.
790pub fn extract_all_display_sets_with_stats(
791    path: &Path,
792) -> Result<(Vec<TrackDisplaySets>, ExtractionStats), PgsError> {
793    let mut extractor = Extractor::open(path)?;
794    let track_info_map: HashMap<u32, PgsTrackInfo> = extractor
795        .tracks()
796        .iter()
797        .map(|t| (t.track_id, t.clone()))
798        .collect();
799
800    let results = extractor.by_ref().collect::<Result<Vec<_>, _>>()?;
801    let stats = extractor.stats().clone();
802    let grouped = group_by_track(results, &track_info_map);
803
804    Ok((grouped, stats))
805}
806
807/// Extract PGS Display Sets from a container file for a single track.
808///
809/// If `track_id` is `None`, extracts from the first PGS track found.
810pub fn extract_display_sets(
811    path: &Path,
812    track_id: Option<u32>,
813) -> Result<Vec<DisplaySet>, PgsError> {
814    let (display_sets, _) = extract_display_sets_with_stats(path, track_id)?;
815    Ok(display_sets)
816}
817
818/// Extract PGS Display Sets for a single track and return I/O statistics.
819///
820/// Same as `extract_display_sets`, but also returns `ExtractionStats`
821/// with file size and bytes actually read — useful for benchmarking
822/// and verifying the library's I/O efficiency.
823pub fn extract_display_sets_with_stats(
824    path: &Path,
825    track_id: Option<u32>,
826) -> Result<(Vec<DisplaySet>, ExtractionStats), PgsError> {
827    let extractor = Extractor::open(path)?;
828    let mut extractor = if let Some(id) = track_id {
829        extractor.with_track_filter(&[id])
830    } else {
831        extractor
832    };
833
834    let target_id = track_id.or_else(|| extractor.tracks().first().map(|t| t.track_id));
835
836    let mut display_sets = Vec::new();
837    for result in extractor.by_ref() {
838        let tds = result?;
839        if target_id.is_none_or(|id| tds.track_id == id) {
840            display_sets.push(tds.display_set);
841        }
842    }
843
844    let stats = extractor.stats().clone();
845    Ok((display_sets, stats))
846}
847
848/// Write Display Sets as a raw .sup file (concatenated PGS segments with headers).
849pub fn write_sup_file(display_sets: &[DisplaySet], output: &Path) -> Result<(), PgsError> {
850    let file = File::create(output)?;
851    let mut writer = std::io::BufWriter::new(file);
852
853    for ds in display_sets {
854        for segment in &ds.segments {
855            let bytes = segment.to_bytes();
856            writer.write_all(&bytes)?;
857        }
858    }
859
860    writer.flush()?;
861    Ok(())
862}