ebml_iterable/
tag_iterator.rs

1use std::io::Read;
2use std::collections::{HashSet, VecDeque};
3
4use crate::spec_util::validate_tag_path;
5use crate::tag_iterator_util::EBMLSize::{Known, Unknown};
6use crate::tag_iterator_util::{DEFAULT_BUFFER_LEN, EBMLSize, ProcessingTag, AllowableErrors};
7
8use super::tools;
9use super::specs::{EbmlSpecification, EbmlTag, Master, TagDataType, PathPart};
10use super::errors::tag_iterator::{CorruptedFileError, TagIteratorError};
11use super::errors::tool::ToolError;
12
13const INVALID_TAG_ID_ERROR         : u8 = 0x01;
14const INVALID_HIERARCHY_ERROR      : u8 = 0x02;
15const OVERSIZED_CHILD_ERROR        : u8 = 0x04;
16
17///
18/// Provides an iterator over EBML files (read from a source implementing the [`std::io::Read`] trait). Can be configured to read specific "Master" tags as complete objects rather than just emitting when they start and end.
19///
20/// This is a generic struct that requires a specification implementing [`EbmlSpecification`] and [`EbmlTag`]. No specifications are included in this crate - you will need to either use another crate providing a spec (such as the Matroska spec implemented in the [webm-iterable](https://crates.io/crates/webm_iterable) or write your own spec if you want to iterate over a custom EBML file. The iterator outputs `TSpec` variants representing the type of tag (defined by the specification) and the accompanying tag data. "Master" tags (defined by the specification) usually will be read as `Start` and `End` variants, but the iterator can be configured to buffer Master tags into a `Full` variant using the `tags_to_buffer` parameter.
21///
22/// Note: The [`Self::with_capacity()`] method can be used to construct a `TagIterator` with a specified default buffer size.  This is only useful as a microoptimization to memory management if you know the maximum tag size of the file you're reading.
23///
24/// ## Example
25///
26/// ```no_run
27/// use std::fs::File;
28/// use ebml_iterable::TagIterator;
29/// #
30/// # use ebml_iterable::specs::{EbmlSpecification, TagDataType};
31/// # use ebml_iterable_specification::empty_spec::EmptySpec;
32///
33/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
34/// let file = File::open("my_ebml_file.ebml")?;
35/// let mut my_iterator: TagIterator<_, EmptySpec> = TagIterator::new(file, &[]);
36/// for tag in my_iterator {
37///   println!("{:?}", tag?);
38/// }
39/// # Ok(())
40/// # }
41/// ```
42///
43/// ## Errors
44///
45/// The `Item` type for the associated [`Iterator`] implementation is a [`Result<TSpec, TagIteratorError>`], meaning each `next()` call has the potential to fail.  This is because the source data is not parsed all at once - it is incrementally parsed as the iterator progresses.  If the iterator runs into an error (such as corrupted data or an unexpected end-of-file), it needs to be propagated to the logic trying to read the tags.  The different possible error states are enumerated in [`TagIteratorError`].
46///
47/// ## Panics
48///
49/// The iterator can panic if `<TSpec>` is an internally inconsistent specification (i.e. it claims that a specific tag id has a specific data type but fails to produce a tag variant using data of that type).  This won't happen if the specification being used was created using the [`#[ebml_specification]`](https://docs.rs/ebml-iterable-specification-derive/latest/ebml_iterable_specification_derive/attr.ebml_specification.html) attribute macro.
50///
51pub struct TagIterator<R: Read, TSpec>
52    where
53    TSpec: EbmlSpecification<TSpec> + EbmlTag<TSpec> + Clone
54{
55    source: R,
56    tag_ids_to_buffer: HashSet<u64>,
57    allowed_errors: u8,
58    max_allowed_tag_size: Option<usize>,
59
60    buffer: Box<[u8]>,
61    buffer_offset: Option<usize>,
62    buffered_byte_length: usize,
63    internal_buffer_position: usize,
64    tag_stack: Vec<ProcessingTag<TSpec>>,
65    emission_queue: VecDeque<Result<(TSpec, usize), TagIteratorError>>,
66    last_emitted_tag_offset: usize,
67    has_determined_doc_path: bool,
68
69    emit_master_end_when_eof: bool,
70}
71
72impl<R: Read, TSpec> TagIterator<R, TSpec>
73    where
74    TSpec: EbmlSpecification<TSpec> + EbmlTag<TSpec> + Clone
75{
76
77    ///
78    /// Returns a new [`TagIterator<TSpec>`] instance.
79    ///
80    /// The `source` parameter must implement [`std::io::Read`].  The second argument, `tags_to_buffer`, specifies which "Master" tags should be read as [`Master::Full`]s rather than as [`Master::Start`] and [`Master::End`]s.  Refer to the documentation on [`TagIterator`] for more explanation of how to use the returned instance.
81    ///
82    pub fn new(source: R, tags_to_buffer: &[TSpec]) -> Self {
83        TagIterator::with_capacity(source, tags_to_buffer, DEFAULT_BUFFER_LEN)
84    }
85
86    ///
87    /// Returns a new [`TagIterator<TSpec>`] instance with the specified internal buffer capacity.
88    ///
89    /// This initializes the [`TagIterator`] with a specific byte capacity.  The iterator will still reallocate if necessary. (Reallocation occurs if the iterator comes across a tag that should be output as a [`Master::Full`] and its size in bytes is greater than the iterator's current buffer capacity.)
90    ///
91    pub fn with_capacity(source: R, tags_to_buffer: &[TSpec], capacity: usize) -> Self {
92        let buffer = vec![0;capacity];
93
94        TagIterator {
95            source,
96            tag_ids_to_buffer: tags_to_buffer.iter().map(|tag| tag.get_id()).collect(),
97            allowed_errors: 0,
98            max_allowed_tag_size: Some(4 * usize::pow(1000, 3)), // 4GB
99            buffer: buffer.into_boxed_slice(),
100            buffered_byte_length: 0,
101            buffer_offset: None,
102            internal_buffer_position: 0,
103            tag_stack: Vec::new(),
104            emission_queue: VecDeque::new(),
105            last_emitted_tag_offset: 0,
106            has_determined_doc_path: false,
107            emit_master_end_when_eof: true,
108        }
109    }
110
111    ///
112    /// Configures how strictly the iterator abides `<TSpec>`.
113    /// 
114    /// By default (as of v0.5.0), the iterator assumes `<TSpec>` is complete and that any tags that do not conform to `<TSpec>` are due to corrupted file data.  This method can be used to relax some of these checks so that fewer [`TagIteratorError::CorruptedFileData`] errors occur.
115    /// 
116    /// # Important
117    /// 
118    /// Relaxing these checks do not necessarily make the iterator more robust.  If all errors are allowed, the iterator will assume any incoming tag id and tag data size are valid, and it will produce "RawTag"s containing binary contents for any tag ids not in `<TSpec>`.  However, if the file truly has corrupted data, the "size" of these elements will likely be corrupt as well.  This can typically result in requests for massive allocations, causing delays and eventual crashing.  By eagerly returning errors (the default), applications can decide how to handle corrupted elements more quickly and efficiently.
119    /// 
120    /// tldr; allow errors at your own risk
121    /// 
122    /// > Note: TagIterators returned by [`Self::new()`] and [`Self::with_capacity()`] allow no errors by default.
123    /// 
124    pub fn allow_errors(&mut self, errors: &[AllowableErrors]) {
125        self.allowed_errors = errors.iter().fold(0u8, |a, c| match c {
126            AllowableErrors::InvalidTagIds => a | INVALID_TAG_ID_ERROR ,
127            AllowableErrors::HierarchyProblems => a | INVALID_HIERARCHY_ERROR,
128            AllowableErrors::OversizedTags => a | OVERSIZED_CHILD_ERROR,
129        });
130    }
131
132    ///
133    /// Configures the maximum size a tag is allowed to be before the iterator considers it invalid.
134    ///
135    /// By default (as of v0.6.1), the iterator will throw an [`CorruptedFileError::InvalidTagSize`] error if it comes across any tags that declare their data to be more than 4GB.  This method can be used to change (and optionally remove) this behavior.  Note that increasing this size can potentially result in massive allocations, causing delays and panics.
136    ///
137    pub fn set_max_allowable_tag_size(&mut self, size: Option<usize>) {
138        self.max_allowed_tag_size = size;
139    }
140
141    ///
142    /// Instructs the iterator to attempt to recover after reaching corrupted file data.
143    /// 
144    /// This method can be used to skip over corrupted sections of a read stream without recreating a new iterator.  The iterator will seek forward from its current internal position until it reaches either a valid EBML tag id or EOF.  After recovery, [`Iterator::next()`] *should* return an [`Ok`] result.
145    /// 
146    pub fn try_recover(&mut self) -> Result<(), TagIteratorError> {
147        let original_position = self.current_offset();        
148        loop {
149            if !self.ensure_data_read(1)? {
150                return Err(TagIteratorError::UnexpectedEOF { tag_start: self.current_offset(), tag_id: None, tag_size: None, partial_data: None });
151            }
152
153            self.internal_buffer_position += 1;
154            if self.peek_valid_tag_header().is_ok() {
155                break;
156            }
157        }
158
159        // As part of recovery, update internal tag stack sizes so that we don't get "oversized children" errors after skipping corrupted data
160        let diff = self.current_offset() - original_position;
161        for tag in self.tag_stack.iter_mut() {
162            if let EBMLSize::Known(size) = &tag.size {
163                tag.size = EBMLSize::Known(size + diff);
164            }
165        }
166
167        Ok(())
168    }
169
170    ///
171    /// Consumes self and returns the underlying read stream.
172    /// 
173    /// Note that any leftover tags in the internal emission queue are lost, and any data read into [`TagIterator`]'s internal buffer is dropped. Therefore, constructing a new [`TagIterator`] using the returned stream may lead to data loss unless it is rewound.
174    /// 
175    pub fn into_inner(self) -> R {
176        self.source
177    }
178
179    ///
180    /// Gets a mutable reference to the underlying read stream.
181    /// 
182    /// It is inadvisable to directly read from the underlying stream.
183    /// 
184    pub fn get_mut(&mut self) -> &mut R {
185        &mut self.source
186    }
187
188    ///
189    /// Gets a reference to the underlying read stream.
190    /// 
191    /// It is inadvisable to directly read from the underlying stream.
192    /// 
193    pub fn get_ref(&self) -> &R {
194        &self.source
195    }
196
197    ///
198    /// Returns the byte offset of the last emitted tag.
199    /// 
200    /// This function returns a byte index specifying the start of the last emitted tag in the context of the [`TagIterator`]'s source read stream.  This value is *not guaranteed to always increase as the file is read*.  Whenever the iterator emits a [`Master::End`] variant, [`Self::last_emitted_tag_offset()`] will reflect the start index of the "Master" tag, which will be before previous values that were obtainable when any children of the master were emitted.
201    /// 
202    pub fn last_emitted_tag_offset(&self) -> usize {
203        self.last_emitted_tag_offset
204    }
205
206    ///
207    /// Control whether the iterator should emit closing tags when it reaches EOF.
208    /// 
209    /// By default, the iterator will emit [`Master::End`] items for all currently open tags when it reaches the end of the file.  You may override this behavior by passing `false` to this method.
210    /// 
211    /// This is recommended if you supply a [`std::io::Read`] source that can supply more data after reaching EOF, as in some streaming scenarios.
212    /// 
213    pub fn emit_master_end_when_eof(&mut self, emit: bool) {
214        self.emit_master_end_when_eof = emit;
215    }
216
217    #[inline(always)]
218    fn current_offset(&self) -> usize {
219        self.buffer_offset.unwrap_or(0) + self.internal_buffer_position
220    }
221
222    fn private_read(&mut self, internal_buffer_start: usize) -> Result<bool, TagIteratorError> {
223        let bytes_read = self.source.read(&mut self.buffer[internal_buffer_start..]).map_err(|source| TagIteratorError::ReadError { source })?;
224        if bytes_read == 0 {
225            Ok(false)
226        } else {
227            self.buffered_byte_length += bytes_read;
228            Ok(true)
229        }
230    }
231
232    fn ensure_capacity(&mut self, required_capacity: usize) {
233        if required_capacity > self.buffer.len() {
234            let mut new_buffer = Vec::from(&self.buffer[..]);
235            new_buffer.resize(required_capacity, 0);
236            self.buffer = new_buffer.into_boxed_slice();
237        }
238    }
239
240    fn ensure_data_read(&mut self, length: usize) -> Result<bool, TagIteratorError> {
241        if self.internal_buffer_position + length <= self.buffered_byte_length {
242            return Ok(true)
243        }
244
245        if self.buffer_offset.is_none() {
246            if !self.private_read(0)? {
247                return Ok(false);
248            }
249            self.buffer_offset = Some(0);
250            self.internal_buffer_position = 0;
251        } else {
252            while self.internal_buffer_position + length > self.buffered_byte_length {
253                self.buffer.copy_within(self.internal_buffer_position..self.buffered_byte_length, 0);
254                self.buffered_byte_length -= self.internal_buffer_position;
255                self.buffer_offset = Some(self.current_offset());
256                self.internal_buffer_position = 0;
257                if !self.private_read(self.buffered_byte_length)? {
258                    return Ok(false);
259                }
260            }
261        }
262        Ok(true)
263    }
264
265    #[inline(always)]
266    fn peek_tag_id(&mut self) -> Result<(u64, usize), TagIteratorError> {
267        self.ensure_data_read(8)?;
268        if self.buffer[self.internal_buffer_position] == 0 {
269            return Ok((0, 1));
270        }
271        let length = 8 - self.buffer[self.internal_buffer_position].ilog2() as usize;
272        let mut val = self.buffer[self.internal_buffer_position] as u64;
273        for i in 1..length {
274            val <<= 8;
275            val += self.buffer[self.internal_buffer_position+i] as u64;
276        }
277        Ok((val, length))
278    }
279
280    #[inline]
281    fn peek_valid_tag_header(&mut self) -> Result<(u64, Option<TagDataType>, EBMLSize, usize), TagIteratorError> {
282        self.ensure_data_read(16)?;
283        let (tag_id, id_len) = self.peek_tag_id()?;
284        let spec_tag_type = <TSpec>::get_tag_data_type(tag_id);
285        
286        let (size, size_len) = tools::read_vint(&self.buffer[(self.internal_buffer_position + id_len)..])
287        .or(Err(TagIteratorError::CorruptedFileData(CorruptedFileError::InvalidTagData{tag_id, position: self.current_offset() })))?
288        .ok_or(TagIteratorError::UnexpectedEOF { tag_start: self.current_offset(), tag_id: Some(tag_id), tag_size: None, partial_data: None })?;
289    
290        if self.buffered_byte_length <= id_len + size_len {
291            return Err(TagIteratorError::UnexpectedEOF { tag_start: self.current_offset(), tag_id: Some(tag_id), tag_size: None, partial_data: None });
292        }
293
294        if matches!(spec_tag_type, Some(TagDataType::UnsignedInt) | Some(TagDataType::Integer) | Some(TagDataType::Float)) && size > 8 {
295            return Err(TagIteratorError::CorruptedFileData(CorruptedFileError::InvalidTagData{tag_id, position: self.current_offset() }));
296        }
297
298        let size = EBMLSize::new(size, size_len);
299
300        let header_len = id_len + size_len;
301
302        if (self.allowed_errors & INVALID_TAG_ID_ERROR == 0) && spec_tag_type.is_none() {
303            return Err(TagIteratorError::CorruptedFileData(CorruptedFileError::InvalidTagId{tag_id, position: self.current_offset() }));
304        }
305        
306        if (self.allowed_errors & INVALID_HIERARCHY_ERROR == 0) && spec_tag_type.is_some() {
307            // Do not run check for raw tags    ^^^^^^^^^^^^^^^^^^^^^^^
308            if !self.has_determined_doc_path {
309                //Trust that the first tag in the stream is valid (like if the read stream was seeked to this location)
310                let path = <TSpec>::get_path_by_id(tag_id);
311                if path.iter().all(|p| matches!(p, PathPart::Id(_))) {
312                    //We only know the current path if we read a tag that is non-global
313                    self.tag_stack = path.iter().map(|id| {
314                        match id {
315                            PathPart::Id(id) => {
316                                ProcessingTag { 
317                                    tag: <TSpec>::get_master_tag(*id, Master::Start).unwrap_or_else(|| panic!("Bad specification implementation: Tag id 0x{:x?} type was in path, but could not get master tag!", id)),
318                                    size: EBMLSize::Unknown,
319                                    tag_start: 0,
320                                    data_start: 0,
321                                }
322                            },
323                            PathPart::Global(_) => unreachable!()
324                        }
325                    }).collect();
326                    self.has_determined_doc_path = true;
327                }
328            }
329            if self.has_determined_doc_path && !self.validate_tag_path(tag_id) {
330                return Err(TagIteratorError::CorruptedFileData(CorruptedFileError::HierarchyError { found_tag_id: tag_id, current_parent_id: self.tag_stack.last().map(|tag| tag.tag.get_id()) }));
331            }
332        }
333
334        if (self.allowed_errors & OVERSIZED_CHILD_ERROR == 0) && size.is_known() && self.is_invalid_tag_size(header_len + size.value()) {
335            return Err(TagIteratorError::CorruptedFileData(CorruptedFileError::OversizedChildElement{ position: self.current_offset(), tag_id, size: size.value()}));
336        }
337
338        if let Some(max_size) = self.max_allowed_tag_size {
339            if size.is_known() && size.value() > max_size {
340                return Err(TagIteratorError::CorruptedFileData(CorruptedFileError::InvalidTagSize { position: self.current_offset(), tag_id, size: size.value() }));
341            }
342        }
343
344        Ok((tag_id, spec_tag_type, size, header_len))
345    }
346
347    #[inline(always)]
348    fn read_valid_tag_header(&mut self) -> Result<(u64, Option<TagDataType>, EBMLSize), TagIteratorError> {
349        let (tag_id, spec_tag_type, size, header_len) = self.peek_valid_tag_header()?;
350            
351        self.internal_buffer_position += header_len;
352        Ok((tag_id, spec_tag_type, size))
353    }
354
355    fn read_tag_data(&mut self, size: usize) -> Result<Option<&[u8]>, TagIteratorError> {
356        self.ensure_capacity(size);
357        if !self.ensure_data_read(size)? {
358            return Ok(None);
359        }
360
361        self.internal_buffer_position += size;
362        Ok(Some(&self.buffer[(self.internal_buffer_position-size)..self.internal_buffer_position]))
363    }
364
365    fn read_tag(&mut self) -> Result<ProcessingTag<TSpec>, TagIteratorError> {
366        let tag_start = self.current_offset();
367
368        let (tag_id, spec_tag_type, size) = self.read_valid_tag_header()?;
369
370        let data_start = self.current_offset();
371        let raw_data = if matches!(spec_tag_type, Some(TagDataType::Master)) {
372            &[]
373        } else if let Known(size) = size {
374            if let Some(data) = self.read_tag_data(size)? {
375                data
376            } else {
377                return Err(TagIteratorError::UnexpectedEOF { tag_start, tag_id: Some(tag_id), tag_size: Some(size), partial_data: Some(self.buffer[self.internal_buffer_position..].to_vec()) });
378            }
379        } else {
380            return Err(TagIteratorError::CorruptedFileData(CorruptedFileError::InvalidTagData{ tag_id, position: tag_start }));
381        };
382
383        let tag = match spec_tag_type {
384            Some(TagDataType::Master) => {
385                TSpec::get_master_tag(tag_id, Master::Start).unwrap_or_else(|| panic!("Bad specification implementation: Tag id 0x{:x?} type was master, but could not get tag!", tag_id))
386            },
387            Some(TagDataType::UnsignedInt) => {
388                let val = tools::arr_to_u64(raw_data).map_err(|e| TagIteratorError::CorruptedTagData{ tag_id, problem: e })?;
389                TSpec::get_unsigned_int_tag(tag_id, val).unwrap_or_else(|| panic!("Bad specification implementation: Tag id 0x{:x?} type was unsigned int, but could not get tag!", tag_id))
390            },
391            Some(TagDataType::Integer) => {
392                let val = tools::arr_to_i64(raw_data).map_err(|e| TagIteratorError::CorruptedTagData{ tag_id, problem: e })?;
393                TSpec::get_signed_int_tag(tag_id, val).unwrap_or_else(|| panic!("Bad specification implementation: Tag id 0x{:x?} type was integer, but could not get tag!", tag_id))
394            },
395            Some(TagDataType::Utf8) => {
396                let val = String::from_utf8(raw_data.to_vec()).map_err(|e| TagIteratorError::CorruptedTagData{ tag_id, problem: ToolError::FromUtf8Error(raw_data.to_vec(), e) })?;
397                TSpec::get_utf8_tag(tag_id, val).unwrap_or_else(|| panic!("Bad specification implementation: Tag id 0x{:x?} type was utf8, but could not get tag!", tag_id))
398            },
399            Some(TagDataType::Binary) => {
400                TSpec::get_binary_tag(tag_id, raw_data).unwrap_or_else(|| panic!("Bad specification implementation: Tag id 0x{:x?} type was binary, but could not get tag!", tag_id))
401            },
402            Some(TagDataType::Float) => {
403                let val = tools::arr_to_f64(raw_data).map_err(|e| TagIteratorError::CorruptedTagData{ tag_id, problem: e })?;
404                TSpec::get_float_tag(tag_id, val).unwrap_or_else(|| panic!("Bad specification implementation: Tag id 0x{:x?} type was float, but could not get tag!", tag_id))
405            },
406            None => {
407                TSpec::get_raw_tag(tag_id, raw_data)
408            }
409        };
410
411        Ok(ProcessingTag { tag, size, tag_start, data_start })
412    }
413
414    fn read_tag_checked(&mut self) -> Option<Result<ProcessingTag<TSpec>, TagIteratorError>> {
415        if self.internal_buffer_position == self.buffered_byte_length {
416            //If we've already consumed the entire internal buffer
417            //ensure there is nothing else in the data source before returning `None`
418            let read_result = self.ensure_data_read(1);
419            match read_result {
420                Err(err) => return Some(Err(err)),
421                Ok(data_remaining) => {
422                    if !data_remaining {
423                        return None;
424                    }
425                 }
426            }
427        }
428
429        if self.internal_buffer_position > self.buffered_byte_length {
430            panic!("read position exceeded buffer length");
431        }
432
433        Some(self.read_tag())
434    }
435
436    fn read_next(&mut self) {
437        //If we have reached the known end of any open master tags, queue that tag and all children to emit ends
438        let ended_tag_index = self.tag_stack.iter().position(|tag| matches!(tag.size, Known(size) if self.current_offset() >= tag.data_start + size));
439        if let Some(index) = ended_tag_index {
440            self.emission_queue.extend(self.tag_stack.drain(index..).map(|t| Ok((t.tag, t.tag_start))).rev());
441        }
442
443        if let Some(next_read) = self.read_tag_checked() {
444            if let Ok(next_tag) = &next_read {
445                while matches!(self.tag_stack.last(), Some(open_tag) if open_tag.size == Unknown) {
446                    let open_tag = self.tag_stack.last().unwrap();
447                    let previous_tag_ended = open_tag.is_ended_by(next_tag.tag.get_id());
448        
449                    if previous_tag_ended {
450                        let t = self.tag_stack.pop().unwrap();
451                        self.emission_queue.push_back(Ok((t.tag, t.tag_start)));
452                    } else {
453                        break;
454                    }
455                }
456
457                if let Some(Master::Start) = next_tag.tag.as_master() {
458                    let tag_id = next_tag.tag.get_id();
459
460                    self.tag_stack.push(ProcessingTag {
461                        tag: TSpec::get_master_tag(tag_id, Master::End).unwrap(),
462                        size: next_tag.size,
463                        tag_start: next_tag.tag_start,
464                        data_start: next_tag.data_start,
465                    });
466
467                    if self.tag_ids_to_buffer.contains(&tag_id) {
468                        self.buffer_master(tag_id);
469                        return;
470                    }
471                }
472            }
473
474            self.emission_queue.push_back(next_read.map(|r| (r.tag, r.tag_start)));
475        } else if self.emit_master_end_when_eof {
476            while let Some(tag) = self.tag_stack.pop() {
477                self.emission_queue.push_back(Ok((tag.tag, tag.tag_start)));
478            }
479        }
480    }
481
482    fn buffer_master(&mut self, tag_id: u64) {
483        let tag_start = self.current_offset();
484        let pre_queue_len = self.emission_queue.len();
485
486        let mut position = pre_queue_len;
487        'endTagSearch: loop {
488            if position >= self.emission_queue.len() {
489                self.read_next();
490    
491                if position >= self.emission_queue.len() {
492                    self.emission_queue.push_back(Err(TagIteratorError::UnexpectedEOF{ tag_start, tag_id: Some(tag_id), tag_size: None, partial_data: None }));
493                    return;
494                }
495            }
496
497            while position < self.emission_queue.len() {
498                if let Some(r) = self.emission_queue.get(position) {
499                    match r {
500                        Err(_) => break 'endTagSearch,
501                        Ok(t) => {
502                            if t.0.get_id() == tag_id && matches!(t.0.as_master(), Some(Master::End)) {
503                                break 'endTagSearch;
504                            }
505                        }
506                    }
507                }
508                position += 1;
509            }
510        }
511
512        let mut children = self.emission_queue.split_off(pre_queue_len);
513        let split_to = position - pre_queue_len;
514        if children.get(split_to).unwrap().is_ok() {
515            let remaining = children.split_off(split_to).into_iter().skip(1);
516            let full_tag = Self::roll_up_children(tag_id, children.into_iter().map(|c| c.unwrap().0).collect());
517            self.emission_queue.push_back(Ok((full_tag, tag_start)));
518            self.emission_queue.extend(remaining);
519        } else {
520            self.emission_queue.extend(children.drain(split_to..).take(1));
521        }
522    }
523
524    fn roll_up_children(tag_id: u64, children: Vec<TSpec>) -> TSpec {
525        let mut rolled_children = Vec::new();
526
527        let mut iter = children.into_iter();
528        while let Some(child) = iter.next() {
529            if let Some(Master::Start) = child.as_master() {
530                let child_id = child.get_id();
531                let subchildren = iter.by_ref().take_while(|c| !matches!(c.as_master(), Some(Master::End)) || c.get_id() != child_id).collect();
532                rolled_children.push(Self::roll_up_children(child_id, subchildren));
533            } else {
534                rolled_children.push(child);
535            }
536        }
537
538        TSpec::get_master_tag(tag_id, Master::Full(rolled_children)).unwrap_or_else(|| panic!("Bad specification implementation: Tag id 0x{:x?} type was master, but could not get tag!", tag_id))
539    }
540
541    #[inline(always)]
542    fn validate_tag_path(&self, tag_id: u64) -> bool {
543        validate_tag_path::<TSpec>(tag_id, self.tag_stack.iter().map(|p| (p.tag.get_id(), p.size, 0)))
544    }
545
546    #[inline(always)]
547    fn is_invalid_tag_size(&self, size: usize) -> bool {
548        self.tag_stack.iter().filter(|p| p.size.is_known()).any(|t| {
549            (t.data_start + t.size.value()) < (self.current_offset() + size)
550        })
551    }
552}
553
554impl<R: Read, TSpec> Iterator for TagIterator<R, TSpec>
555    where TSpec: EbmlSpecification<TSpec> + EbmlTag<TSpec> + Clone
556{
557    type Item = Result<TSpec, TagIteratorError>;
558
559    fn next(&mut self) -> Option<Self::Item> {
560        if self.emission_queue.is_empty() {
561            self.read_next();
562        }
563        let next_item = self.emission_queue.pop_front();
564        if let Some(Ok(ref tuple)) = next_item {
565            self.last_emitted_tag_offset = tuple.1;
566        }
567        next_item.map(|r| r.map(|t| t.0))
568    }
569}
ebml_iterable/tag_iterator.rs

ebml_iterable/
tag_iterator.rs