Skip to main content

mkv_element/
view.rs

1//! A View of a Matroska file, parsing w/o loading clusters into memory.
2
3use std::mem::take;
4
5use crate::element::Element;
6use crate::master::*;
7
8/// View of a Matroska file, parsing the EBML and Segment headers, but not loading Clusters.
9#[derive(Debug, Clone, PartialEq)]
10pub struct MatroskaView {
11    /// The EBML header.
12    pub ebml: Ebml,
13    /// The Segment views, as there can be multiple segments in a Matroska file.
14    pub segments: Vec<SegmentView>,
15}
16
17impl MatroskaView {
18    /// Create a new MatroskaView by parsing the EBML header and all Segment headers,
19    /// but skipping Cluster data to avoid loading it into memory.
20    pub fn new<R>(reader: &mut R) -> crate::Result<Self>
21    where
22        R: std::io::Read + std::io::Seek + ?Sized,
23    {
24        use crate::io::blocking_impl::*;
25
26        // Read the EBML header
27        let ebml = Ebml::read_from(reader)?;
28
29        // Parse all segments in the file
30        let segments = SegmentView::new(reader)?;
31
32        // At least one segment is required
33        if segments.is_empty() {
34            return Err(crate::Error::MissingElement(Segment::ID));
35        }
36
37        Ok(MatroskaView { ebml, segments })
38    }
39
40    /// Create a new MatroskaView by parsing the EBML header and all Segment headers,
41    /// but skipping Cluster data to avoid loading it into memory.
42    #[cfg(feature = "tokio")]
43    #[cfg_attr(docsrs, doc(cfg(feature = "tokio")))]
44    pub async fn new_async<R>(reader: &mut R) -> crate::Result<Self>
45    where
46        R: tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin + ?Sized,
47    {
48        use crate::io::tokio_impl::*;
49
50        // Read the EBML header
51        let ebml = Ebml::async_read_from(reader).await?;
52
53        // Parse all segments in the file
54        let segments = SegmentView::new_async(reader).await?;
55
56        Ok(MatroskaView { ebml, segments })
57    }
58}
59
60/// View of a Segment, parsing the Segment header, but not loading Clusters.
61#[derive(Debug, Clone, PartialEq)]
62pub struct SegmentView {
63    /// Contains seeking information of Top-Level Elements; see data-layout.
64    pub seek_head: Vec<SeekHead>,
65    /// Contains general information about the Segment.
66    pub info: Info,
67    /// A Top-Level Element of information with many tracks described.
68    pub tracks: Option<Tracks>,
69    /// A Top-Level Element to speed seeking access. All entries are local to the Segment. This Element **SHOULD** be set when the Segment is not transmitted as a live stream (see #livestreaming).
70    pub cues: Option<Cues>,
71    /// Contain attached files.
72    pub attachments: Option<Attachments>,
73    /// A system to define basic menus and partition data. For more detailed information, look at the Chapters explanation in chapters.
74    pub chapters: Option<Chapters>,
75    /// Element containing metadata describing Tracks, Editions, Chapters, Attachments, or the Segment as a whole. A list of valid tags can be found in [Matroska tagging RFC](https://www.matroska.org/technical/tagging.html).
76    pub tags: Vec<Tags>,
77    /// The position of the Segment data (after the Segment header).
78    pub segment_data_position: u64,
79    /// The position of the first Cluster in the Segment. 0 if no Cluster found.
80    pub first_cluster_position: u64,
81}
82
83impl SegmentView {
84    /// Create a new SegmentView by parsing the Segment header and metadata elements,
85    /// but skipping Cluster data to avoid loading it into memory.
86    pub fn new<R>(reader: &mut R) -> crate::Result<Vec<Self>>
87    where
88        R: std::io::Read + std::io::Seek + ?Sized,
89    {
90        let mut out = vec![];
91
92        use crate::io::blocking_impl::*;
93        use std::io::SeekFrom;
94
95        // Read the Segment header
96        let segment_header = crate::base::Header::read_from(reader)?;
97        if segment_header.id != Segment::ID {
98            return Err(crate::Error::MissingElement(Segment::ID));
99        }
100
101        let mut segment_data_position = reader.stream_position()?;
102
103        let mut seek_head = Vec::new();
104        let mut info = None;
105        let mut tracks = None;
106        let mut cues = None;
107        let mut attachments = None;
108        let mut chapters = None;
109        let mut tags = Vec::new();
110        let mut first_cluster_position = 0;
111
112        // Parse segment elements
113        loop {
114            use crate::base::Header;
115
116            let current_position = reader.stream_position()?;
117            let Ok(header) = Header::read_from(reader) else {
118                break;
119            };
120            if header.id == Cluster::ID && first_cluster_position == 0 {
121                first_cluster_position = current_position;
122            }
123
124            // Check if we've reached the end of the segment
125            match header.id {
126                SeekHead::ID => seek_head.push(SeekHead::read_element(&header, reader)?),
127                Info::ID => info = Some(Info::read_element(&header, reader)?),
128                Tracks::ID => tracks = Some(Tracks::read_element(&header, reader)?),
129                Cues::ID => cues = Some(Cues::read_element(&header, reader)?),
130                Attachments::ID => attachments = Some(Attachments::read_element(&header, reader)?),
131                Chapters::ID => chapters = Some(Chapters::read_element(&header, reader)?),
132                Tags::ID => tags.push(Tags::read_element(&header, reader)?),
133                Cluster::ID => {
134                    // try to skip, or else break
135                    use crate::base::VInt64;
136                    let mut seeks: Vec<(VInt64, u64)> = seek_head
137                        .iter()
138                        .flat_map(|sh| {
139                            sh.seek.iter().flat_map(|s| {
140                                let mut id = &s.seek_id[..];
141                                let a = VInt64::read_from(&mut id);
142                                match a {
143                                    Ok(v) => Some((v, *s.seek_position + segment_data_position)),
144                                    Err(e) => {
145                                        log::warn!("Failed to read seek_id as VInt: {e}, skip...");
146                                        None
147                                    }
148                                }
149                            })
150                        })
151                        .collect();
152
153                    seeks.sort_by_key(|a| a.1);
154
155                    // find position larger than first_cluster_position
156                    if let Some(pos) = seeks.iter().find(|(_, pos)| *pos > first_cluster_position) {
157                        reader.seek(SeekFrom::Start(pos.1))?;
158                        continue;
159                    }
160
161                    if segment_header.size.is_unknown {
162                        break;
163                    } else {
164                        let eos = segment_data_position + *segment_header.size;
165                        reader.seek(SeekFrom::Start(eos))?;
166                        continue;
167                    }
168                }
169                Segment::ID => {
170                    out.push(SegmentView {
171                        seek_head: take(&mut seek_head),
172                        // Info is required in a valid Matroska file
173                        info: info.take().ok_or(crate::Error::MissingElement(Info::ID))?,
174                        tracks: tracks.take(),
175                        cues: cues.take(),
176                        attachments: attachments.take(),
177                        chapters: chapters.take(),
178                        tags: take(&mut tags),
179                        first_cluster_position: take(&mut first_cluster_position),
180                        segment_data_position: take(&mut segment_data_position),
181                    });
182                    segment_data_position = reader.stream_position()?;
183                }
184                _ => {
185                    use log::warn;
186                    use std::io::Read;
187                    // Skip unknown elements, here we read and discard the data for efficiency
188                    std::io::copy(&mut reader.take(*header.size), &mut std::io::sink())?;
189                    warn!("Skipped unknown element with ID: {}", header.id);
190                }
191            }
192        }
193
194        // Info is required in a valid Matroska file
195        let info = info.ok_or(crate::Error::MissingElement(Info::ID))?;
196
197        out.push(SegmentView {
198            seek_head,
199            info,
200            tracks,
201            cues,
202            attachments,
203            chapters,
204            tags,
205            first_cluster_position,
206            segment_data_position,
207        });
208        Ok(out)
209    }
210
211    /// Create a new SegmentView by parsing the Segment header and metadata elements,
212    /// but skipping Cluster data to avoid loading it into memory.
213    #[cfg(feature = "tokio")]
214    #[cfg_attr(docsrs, doc(cfg(feature = "tokio")))]
215    pub async fn new_async<R>(reader: &mut R) -> crate::Result<Vec<Self>>
216    where
217        R: tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin + ?Sized,
218    {
219        let mut out = vec![];
220
221        use crate::io::tokio_impl::*;
222        use tokio::io::AsyncSeekExt;
223
224        // Read the Segment header
225        let segment_header = crate::base::Header::async_read_from(reader).await?;
226        if segment_header.id != Segment::ID {
227            return Err(crate::Error::MissingElement(Segment::ID));
228        }
229
230        let mut segment_data_position = reader.stream_position().await?;
231
232        let mut seek_head = Vec::new();
233        let mut info = None;
234        let mut tracks = None;
235        let mut cues = None;
236        let mut attachments = None;
237        let mut chapters = None;
238        let mut tags = Vec::new();
239        let mut first_cluster_position = 0;
240
241        // Parse segment elements
242        loop {
243            use crate::base::Header;
244
245            let current_position = reader.stream_position().await?;
246            let Ok(header) = Header::async_read_from(reader).await else {
247                break;
248            };
249            if header.id == Cluster::ID && first_cluster_position == 0 {
250                first_cluster_position = current_position;
251            }
252
253            // Check if we've reached the end of the segment
254            match header.id {
255                SeekHead::ID => {
256                    seek_head.push(SeekHead::async_read_element(&header, reader).await?)
257                }
258                Info::ID => info = Some(Info::async_read_element(&header, reader).await?),
259                Tracks::ID => tracks = Some(Tracks::async_read_element(&header, reader).await?),
260                Cues::ID => cues = Some(Cues::async_read_element(&header, reader).await?),
261                Attachments::ID => {
262                    attachments = Some(Attachments::async_read_element(&header, reader).await?)
263                }
264                Chapters::ID => {
265                    chapters = Some(Chapters::async_read_element(&header, reader).await?)
266                }
267                Tags::ID => tags.push(Tags::async_read_element(&header, reader).await?),
268                Cluster::ID => {
269                    // try to skip, or else break
270                    use crate::base::VInt64;
271                    let mut seeks: Vec<(VInt64, u64)> = seek_head
272                        .iter()
273                        .flat_map(|sh| {
274                            sh.seek.iter().flat_map(|s| {
275                                use crate::io::blocking_impl::ReadFrom;
276
277                                let mut id = &s.seek_id[..];
278                                let a = VInt64::read_from(&mut id);
279                                match a {
280                                    Ok(v) => Some((v, *s.seek_position + segment_data_position)),
281                                    Err(e) => {
282                                        log::warn!("Failed to read seek_id as VInt: {e}, skip...");
283                                        None
284                                    }
285                                }
286                            })
287                        })
288                        .collect();
289
290                    seeks.sort_by_key(|a| a.1);
291
292                    // find position larger than first_cluster_position
293                    if let Some(pos) = seeks.iter().find(|(_, pos)| *pos > first_cluster_position) {
294                        reader.seek(std::io::SeekFrom::Start(pos.1)).await?;
295                        continue;
296                    }
297
298                    if segment_header.size.is_unknown {
299                        break;
300                    } else {
301                        let eos = segment_data_position + *segment_header.size;
302                        reader.seek(std::io::SeekFrom::Start(eos)).await?;
303                        continue;
304                    }
305                }
306                Segment::ID => {
307                    out.push(SegmentView {
308                        seek_head: take(&mut seek_head),
309                        // Info is required in a valid Matroska file
310                        info: info.take().ok_or(crate::Error::MissingElement(Info::ID))?,
311                        tracks: tracks.take(),
312                        cues: cues.take(),
313                        attachments: attachments.take(),
314                        chapters: chapters.take(),
315                        tags: take(&mut tags),
316                        first_cluster_position: take(&mut first_cluster_position),
317                        segment_data_position: take(&mut segment_data_position),
318                    });
319                    segment_data_position = reader.stream_position().await?;
320                }
321                _ => {
322                    use log::warn;
323                    use tokio::io::AsyncReadExt;
324                    // Skip unknown elements, here we read and discard the data for efficiency
325                    tokio::io::copy(&mut reader.take(*header.size), &mut tokio::io::sink()).await?;
326                    warn!("Skipped unknown element with ID: {}", header.id);
327                }
328            }
329        }
330
331        // Info is required in a valid Matroska file
332        let info = info.ok_or(crate::Error::MissingElement(Info::ID))?;
333
334        out.push(SegmentView {
335            seek_head,
336            info,
337            tracks,
338            cues,
339            attachments,
340            chapters,
341            tags,
342            first_cluster_position,
343            segment_data_position,
344        });
345        Ok(out)
346    }
347}