dicom_parser/dataset/
lazy_read.rs

1//! This module contains a mid-level abstraction for reading DICOM content
2//! sequentially and in a lazy fashion.
3//! That is, unlike the reader in the [`read`](super::read) module,
4//! DICOM values can be skipped and most allocations can be avoided.
5//!
6//! At this level, headers and values are treated as tokens which can be used
7//! to form a syntax tree of a full data set.
8//! Whenever an element value or pixel sequence item is encountered,
9//! the given token does not consume the value from the reader,
10//! thus letting users decide whether to:
11//! - fully read the value and turn it into an in-memory representation;
12//! - skip the value altogether, by reading into a sink;
13//! - copying the bytes of the value into another writer,
14//!   such as a previously allocated buffer.
15use crate::dataset::read::OddLengthStrategy;
16use crate::stateful::decode::{
17    CharacterSetOverride, DynStatefulDecoder, Error as DecoderError, StatefulDecode,
18};
19use crate::util::ReadSeek;
20use dicom_core::header::{DataElementHeader, Header, Length, SequenceItemHeader};
21use dicom_core::{Tag, VR};
22use dicom_encoding::text::SpecificCharacterSet;
23use dicom_encoding::transfer_syntax::TransferSyntax;
24use snafu::{Backtrace, OptionExt, ResultExt, Snafu};
25use std::cmp::Ordering;
26
27use super::{DataToken, LazyDataToken, SeqTokenType};
28
29#[derive(Debug, Snafu)]
30#[non_exhaustive]
31pub enum Error {
32    #[snafu(display("Could not create decoder"))]
33    CreateDecoder {
34        #[snafu(backtrace)]
35        source: DecoderError,
36    },
37    #[snafu(display("Could not read item header at {} bytes", bytes_read))]
38    ReadItemHeader {
39        bytes_read: u64,
40        #[snafu(backtrace)]
41        source: DecoderError,
42    },
43    #[snafu(display("Could not read element header at {} bytes", bytes_read))]
44    ReadHeader {
45        bytes_read: u64,
46        #[snafu(backtrace)]
47        source: DecoderError,
48    },
49    #[snafu(display("Could not read value"))]
50    ReadValue {
51        #[snafu(backtrace)]
52        source: DecoderError,
53    },
54    #[snafu(display("Failed to get reader position"))]
55    GetPosition {
56        source: std::io::Error,
57        backtrace: Backtrace,
58    },
59    #[snafu(display(
60        "Inconsistent sequence end: expected end at {} bytes but read {}",
61        end_of_sequence,
62        bytes_read
63    ))]
64    InconsistentSequenceEnd {
65        end_of_sequence: u64,
66        bytes_read: u64,
67        backtrace: Backtrace,
68    },
69    #[snafu(display("Unexpected item delimiter at {} bytes", bytes_read))]
70    UnexpectedItemDelimiter {
71        bytes_read: u64,
72        backtrace: Backtrace,
73    },
74    #[snafu(display("Unexpected undefined value length at {} bytes", bytes_read))]
75    UndefinedLength {
76        bytes_read: u64,
77        backtrace: Backtrace,
78    },
79
80    /// Invalid data element length {len:04X} of {tag} at {bytes_read:#x}
81    InvalidElementLength {
82        tag: Tag,
83        len: u32,
84        bytes_read: u64,
85        backtrace: Backtrace,
86    },
87
88    /// Invalid sequence item length {len:04X} at {bytes_read:#x}
89    InvalidItemLength {
90        len: u32,
91        bytes_read: u64,
92        backtrace: Backtrace,
93    },
94
95    #[snafu(display("Attempted to inspect a header at {} bytes", bytes_read))]
96    Peek {
97        bytes_read: u64,
98        backtrace: Backtrace,
99    },
100}
101
102pub type Result<T, E = Error> = std::result::Result<T, E>;
103
104/// A reader-specific token representing a sequence or item start.
105#[derive(Debug, Copy, Clone, PartialEq)]
106struct SeqToken {
107    /// Whether it is the start of a sequence or the start of an item.
108    typ: SeqTokenType,
109    /// The length of the value, as indicated by the starting element,
110    /// can be unknown.
111    len: Length,
112    /// Whether this sequence token is part of an encapsulated pixel data.
113    pixel_data: bool,
114    /// The number of bytes the parser has read until it reached the
115    /// beginning of the sequence or item value data.
116    base_offset: u64,
117}
118
119/// An attached iterator for retrieving DICOM object element markers
120/// from a random access data source.
121///
122/// This iterator produces data tokens without eagerly reading the bytes
123/// of a value.
124#[derive(Debug)]
125pub struct LazyDataSetReader<S> {
126    /// the stateful decoder
127    parser: S,
128    /// data set reading options
129    options: LazyDataSetReaderOptions,
130    /// whether the reader is expecting an item next (or a sequence delimiter)
131    in_sequence: bool,
132    /// whether a check for a sequence or item delimitation is pending
133    delimiter_check_pending: bool,
134    /// a stack of delimiters
135    seq_delimiters: Vec<SeqToken>,
136    /// fuse the iteration process if true
137    hard_break: bool,
138    /// last decoded header
139    last_header: Option<DataElementHeader>,
140    /// if a peek was taken, this holds the token peeked
141    peek: Option<DataToken>,
142}
143
144/// The set of options for the lazy data set reader.
145#[derive(Debug, Default, Copy, Clone, Eq, Hash, PartialEq)]
146#[non_exhaustive]
147pub struct LazyDataSetReaderOptions {
148    /// The strategy for handling odd length data elements
149    pub odd_length: OddLengthStrategy,
150
151    /// Override for how text is decoded
152    pub charset_override: CharacterSetOverride,
153}
154
155impl<R> LazyDataSetReader<DynStatefulDecoder<R>> {
156    /// Create a new lazy data set reader
157    /// expecting the given transfer syntax
158    /// that reads from the given random access source.
159    #[inline]
160    pub fn new_with_ts(source: R, ts: &TransferSyntax) -> Result<Self>
161    where
162        R: ReadSeek,
163    {
164        Self::new_with_ts_cs(source, ts, SpecificCharacterSet::default())
165    }
166
167    /// Create a new lazy data set reader
168    /// with the given random access source and element dictionary,
169    /// while considering the given transfer syntax and specific character set.
170    #[inline]
171    pub fn new_with_ts_cs(source: R, ts: &TransferSyntax, cs: SpecificCharacterSet) -> Result<Self>
172    where
173        R: ReadSeek,
174    {
175        Self::new_with_ts_cs_options(source, ts, cs, Default::default())
176    }
177
178    /// Create a new lazy data set reader
179    /// expecting the given transfer syntax
180    /// that reads from the given random access source,
181    /// with extra parsing options.
182    #[inline]
183    pub fn new_with_ts_options(
184        source: R,
185        ts: &TransferSyntax,
186        options: LazyDataSetReaderOptions,
187    ) -> Result<Self>
188    where
189        R: ReadSeek,
190    {
191        Self::new_with_ts_cs_options(source, ts, SpecificCharacterSet::default(), options)
192    }
193
194    /// Create a new lazy data set reader
195    /// with the given random access source and element dictionary,
196    /// while considering the given transfer syntax and specific character set.
197    pub fn new_with_ts_cs_options(
198        mut source: R,
199        ts: &TransferSyntax,
200        cs: SpecificCharacterSet,
201        options: LazyDataSetReaderOptions,
202    ) -> Result<Self>
203    where
204        R: ReadSeek,
205    {
206        let position = source.stream_position().context(GetPositionSnafu)?;
207        let parser = DynStatefulDecoder::new_with_override(
208            source,
209            ts,
210            cs,
211            options.charset_override,
212            position,
213        )
214        .context(CreateDecoderSnafu)?;
215
216        Ok(LazyDataSetReader {
217            parser,
218            options,
219            seq_delimiters: Vec::new(),
220            delimiter_check_pending: false,
221            in_sequence: false,
222            hard_break: false,
223            last_header: None,
224            peek: None,
225        })
226    }
227}
228
229impl<S> LazyDataSetReader<S>
230where
231    S: StatefulDecode,
232{
233    /// Create a new iterator with the given stateful decoder.
234    #[inline]
235    pub fn new(parser: S) -> Self {
236        LazyDataSetReader::new_with_options(parser, Default::default())
237    }
238
239    /// Create a new lazy data set reader
240    /// using the given stateful decoder,
241    /// with extra parsing options.
242    pub fn new_with_options(parser: S, options: LazyDataSetReaderOptions) -> Self
243    where
244        S: StatefulDecode,
245    {
246        LazyDataSetReader {
247            parser,
248            options,
249            seq_delimiters: Vec::new(),
250            delimiter_check_pending: false,
251            in_sequence: false,
252            hard_break: false,
253            last_header: None,
254            peek: None,
255        }
256    }
257}
258
259impl<S> LazyDataSetReader<S>
260where
261    S: StatefulDecode,
262{
263    fn update_seq_delimiters<'b>(&mut self) -> Result<Option<LazyDataToken<&'b mut S>>> {
264        if let Some(sd) = self.seq_delimiters.last() {
265            if let Some(len) = sd.len.get() {
266                let end_of_sequence = sd.base_offset + len as u64;
267                let bytes_read = self.parser.position();
268                match end_of_sequence.cmp(&bytes_read) {
269                    Ordering::Equal => {
270                        // end of delimiter, as indicated by the element's length
271                        let token;
272                        match sd.typ {
273                            SeqTokenType::Sequence => {
274                                self.in_sequence = false;
275                                token = LazyDataToken::SequenceEnd;
276                            }
277                            SeqTokenType::Item => {
278                                self.in_sequence = true;
279                                token = LazyDataToken::ItemEnd;
280                            }
281                        }
282                        self.seq_delimiters.pop();
283                        return Ok(Some(token));
284                    }
285                    Ordering::Less => {
286                        return InconsistentSequenceEndSnafu {
287                            end_of_sequence,
288                            bytes_read,
289                        }
290                        .fail();
291                    }
292                    Ordering::Greater => {} // continue normally
293                }
294            }
295        }
296        self.delimiter_check_pending = false;
297        Ok(None)
298    }
299
300    #[inline]
301    fn push_sequence_token(&mut self, typ: SeqTokenType, len: Length, pixel_data: bool) {
302        self.seq_delimiters.push(SeqToken {
303            typ,
304            pixel_data,
305            len,
306            base_offset: self.parser.position(),
307        })
308    }
309
310    /// Retrieve the inner stateful decoder from this data set reader.
311    pub fn into_decoder(self) -> S {
312        self.parser
313    }
314
315    /// Advance and retrieve the next DICOM data token.
316    ///
317    /// **Note:** For the data set to be successfully parsed,
318    /// the resulting data tokens needs to be consumed
319    /// if they are of a value type.
320    pub fn advance(&mut self) -> Option<Result<LazyDataToken<&mut S>>> {
321        if self.hard_break {
322            return None;
323        }
324
325        // if there was a peek, consume peeked token
326        if let Some(peek) = self.peek.take() {
327            let token = match peek {
328                DataToken::ElementHeader(header) => LazyDataToken::ElementHeader(header),
329                DataToken::SequenceStart { tag, len } => LazyDataToken::SequenceStart { tag, len },
330                DataToken::ItemStart { len } => LazyDataToken::ItemStart { len },
331                DataToken::ItemEnd => LazyDataToken::ItemEnd,
332                DataToken::SequenceEnd => LazyDataToken::SequenceEnd,
333                _ => unreachable!("peeked token should not be a value token"),
334            };
335            return Some(Ok(token));
336        }
337
338        // record the reading position before any further reading
339        let bytes_read = self.parser.position();
340
341        // item or sequence delimitation logic for explicit lengths
342        if self.delimiter_check_pending {
343            match self.update_seq_delimiters() {
344                Err(e) => {
345                    self.hard_break = true;
346                    return Some(Err(e));
347                }
348                Ok(Some(token)) => return Some(Ok(token)),
349                Ok(None) => { /* no-op */ }
350            }
351        }
352
353        if self.in_sequence {
354            // at sequence level, expecting item header
355
356            match self.parser.decode_item_header() {
357                Ok(header) => {
358                    match header {
359                        SequenceItemHeader::Item { len } => {
360                            // sanitize length
361                            let Some(len) = self.sanitize_length(len) else {
362                                return Some(
363                                    InvalidItemLengthSnafu {
364                                        len: len.0,
365                                        bytes_read: self.parser.position(),
366                                    }
367                                    .fail(),
368                                );
369                            };
370
371                            // entered a new item
372                            self.in_sequence = false;
373                            self.push_sequence_token(
374                                SeqTokenType::Item,
375                                len,
376                                self.seq_delimiters.last()
377                                    .expect("item header should be read only inside an existing sequence")
378                                    .pixel_data);
379                            // items can be empty
380                            if len == Length(0) {
381                                self.delimiter_check_pending = true;
382                            }
383                            Some(Ok(LazyDataToken::ItemStart { len }))
384                        }
385                        SequenceItemHeader::ItemDelimiter => {
386                            // closed an item
387                            self.seq_delimiters.pop();
388                            self.in_sequence = true;
389                            // sequences can end after an item delimiter
390                            self.delimiter_check_pending = true;
391                            Some(Ok(LazyDataToken::ItemEnd))
392                        }
393                        SequenceItemHeader::SequenceDelimiter => {
394                            // closed a sequence
395                            self.seq_delimiters.pop();
396                            self.in_sequence = false;
397                            // items can end after a nested sequence ends
398                            self.delimiter_check_pending = true;
399                            Some(Ok(LazyDataToken::SequenceEnd))
400                        }
401                    }
402                }
403                Err(e) => {
404                    self.hard_break = true;
405                    Some(Err(e).context(ReadItemHeaderSnafu { bytes_read }))
406                }
407            }
408        } else if let Some(SeqToken {
409            typ: SeqTokenType::Item,
410            pixel_data: true,
411            len,
412            ..
413        }) = self.seq_delimiters.last()
414        {
415            // item value
416
417            let Some(len) = self.sanitize_length(*len) else {
418                return Some(
419                    InvalidItemLengthSnafu {
420                        len: len.0,
421                        bytes_read: self.parser.position(),
422                    }
423                    .fail(),
424                );
425            };
426
427            let len = match len
428                .get()
429                .with_context(|| UndefinedLengthSnafu { bytes_read })
430            {
431                Ok(len) => len,
432                Err(e) => return Some(Err(e)),
433            };
434
435            // need to pop item delimiter on the next iteration
436            self.delimiter_check_pending = true;
437            Some(Ok(LazyDataToken::LazyItemValue {
438                len,
439                decoder: &mut self.parser,
440            }))
441        } else if let Some(header) = self.last_header {
442            if header.is_encapsulated_pixeldata() {
443                self.push_sequence_token(SeqTokenType::Sequence, Length::UNDEFINED, true);
444                self.last_header = None;
445
446                // encapsulated pixel data, expecting offset table
447                match self.parser.decode_item_header() {
448                    Ok(header) => match header {
449                        SequenceItemHeader::Item { len } => {
450                            // sanitize length
451                            let Some(len) = self.sanitize_length(len) else {
452                                return Some(
453                                    InvalidItemLengthSnafu {
454                                        len: len.0,
455                                        bytes_read: self.parser.position(),
456                                    }
457                                    .fail(),
458                                );
459                            };
460
461                            // entered a new item
462                            self.in_sequence = false;
463                            self.push_sequence_token(SeqTokenType::Item, len, true);
464                            // items can be empty
465                            if len == Length(0) {
466                                self.delimiter_check_pending = true;
467                            }
468                            Some(Ok(LazyDataToken::ItemStart { len }))
469                        }
470                        SequenceItemHeader::SequenceDelimiter => {
471                            // empty pixel data
472                            self.seq_delimiters.pop();
473                            self.in_sequence = false;
474                            Some(Ok(LazyDataToken::SequenceEnd))
475                        }
476                        SequenceItemHeader::ItemDelimiter => {
477                            self.hard_break = true;
478                            Some(UnexpectedItemDelimiterSnafu { bytes_read }.fail())
479                        }
480                    },
481                    Err(e) => {
482                        self.hard_break = true;
483                        Some(Err(e).context(ReadItemHeaderSnafu { bytes_read }))
484                    }
485                }
486            } else {
487                // a plain element header was read, so an element value is expected
488                self.last_header = None;
489
490                // sequences can end after this token
491                self.delimiter_check_pending = true;
492
493                Some(Ok(LazyDataToken::LazyValue {
494                    header,
495                    decoder: &mut self.parser,
496                }))
497            }
498        } else {
499            // a data element header or item delimiter is expected
500            match self.parser.decode_header() {
501                Ok(DataElementHeader {
502                    tag,
503                    vr: VR::SQ,
504                    len,
505                }) => {
506                    let Some(len) = self.sanitize_length(len) else {
507                        return Some(
508                            InvalidElementLengthSnafu {
509                                tag,
510                                len: len.0,
511                                bytes_read: self.parser.position(),
512                            }
513                            .fail(),
514                        );
515                    };
516
517                    self.in_sequence = true;
518                    self.push_sequence_token(SeqTokenType::Sequence, len, false);
519
520                    // sequences can end right after they start
521                    if len == Length(0) {
522                        self.delimiter_check_pending = true;
523                    }
524
525                    Some(Ok(LazyDataToken::SequenceStart { tag, len }))
526                }
527                Ok(DataElementHeader {
528                    tag: Tag(0xFFFE, 0xE00D),
529                    ..
530                }) => {
531                    self.in_sequence = true;
532                    // pop item delimiter
533                    self.seq_delimiters.pop();
534                    // sequences can end after this token
535                    self.delimiter_check_pending = true;
536                    Some(Ok(LazyDataToken::ItemEnd))
537                }
538                Ok(header) if header.is_encapsulated_pixeldata() => {
539                    // encapsulated pixel data conditions:
540                    // expect a sequence of pixel data fragments
541
542                    // save it for the next step
543                    self.last_header = Some(header);
544                    Some(Ok(LazyDataToken::PixelSequenceStart))
545                }
546                Ok(header) if header.len.is_undefined() => {
547                    // treat other undefined length elements
548                    // as data set sequences,
549                    // discarding the VR in the process
550                    self.in_sequence = true;
551
552                    let DataElementHeader { tag, len, .. } = header;
553                    self.push_sequence_token(SeqTokenType::Sequence, len, false);
554
555                    Some(Ok(LazyDataToken::SequenceStart { tag, len }))
556                }
557                Ok(mut header) => {
558                    // sanitize length
559                    let Some(len) = self.sanitize_length(header.len) else {
560                        return Some(
561                            InvalidElementLengthSnafu {
562                                tag: header.tag,
563                                len: header.len.0,
564                                bytes_read: self.parser.position(),
565                            }
566                            .fail(),
567                        );
568                    };
569                    header.len = len;
570
571                    // save it for the next step
572                    self.last_header = Some(header);
573                    Some(Ok(LazyDataToken::ElementHeader(header)))
574                }
575                Err(DecoderError::DecodeElementHeader {
576                    source: dicom_encoding::decode::Error::ReadHeaderTag { source, .. },
577                    ..
578                }) if source.kind() == std::io::ErrorKind::UnexpectedEof => {
579                    // Note: if `UnexpectedEof` was reached while trying to read
580                    // an element tag, then we assume that
581                    // the end of a DICOM object was reached gracefully.
582                    // This approach is unlikely to consume trailing bytes,
583                    // but may ignore the current depth of the data set tree.
584                    self.hard_break = true;
585                    None
586                }
587                Err(e) => {
588                    self.hard_break = true;
589                    Some(Err(e).context(ReadHeaderSnafu { bytes_read }))
590                }
591            }
592        }
593    }
594
595    /// Peek the next token from the source by
596    /// reading a new token in the first call.
597    /// Subsequent calls to `peek` will return the same token
598    /// until another consumer method is called.
599    ///
600    /// Peeking only works in a data or item element boundary,
601    /// so the returned data token is either an element header or an item header.
602    /// At the moment, a failed peek will result in a hard break,
603    /// preventing further iteration.
604    pub fn peek(&mut self) -> Result<Option<&DataToken>> {
605        if self.peek.is_none() {
606            // try to read the next token
607            match self.advance() {
608                None => return Ok(None),
609                Some(Err(e)) => return Err(e),
610                Some(Ok(token)) => match token {
611                    LazyDataToken::ElementHeader(header) => {
612                        self.peek = Some(DataToken::ElementHeader(header));
613                    }
614                    LazyDataToken::SequenceStart { tag, len } => {
615                        self.peek = Some(DataToken::SequenceStart { tag, len });
616                    }
617                    LazyDataToken::ItemStart { len } => {
618                        self.peek = Some(DataToken::ItemStart { len });
619                    }
620                    LazyDataToken::ItemEnd => {
621                        self.peek = Some(DataToken::ItemEnd);
622                    }
623                    LazyDataToken::SequenceEnd => {
624                        self.peek = Some(DataToken::SequenceEnd);
625                    }
626                    _ => {
627                        self.hard_break = true;
628                        return PeekSnafu {
629                            bytes_read: self.parser.position(),
630                        }
631                        .fail();
632                    }
633                },
634            }
635        }
636        Ok(self.peek.as_ref())
637    }
638
639    /// Check for a non-compliant length
640    /// and handle it according to the current strategy.
641    /// Returns `None` if the length cannot or should not be resolved.
642    fn sanitize_length(&self, length: Length) -> Option<Length> {
643        if length.is_defined() && length.0 & 1 != 0 {
644            match self.options.odd_length {
645                OddLengthStrategy::Accept => Some(length),
646                OddLengthStrategy::NextEven => Some(length + 1),
647                OddLengthStrategy::Fail => None,
648            }
649        } else {
650            Some(length)
651        }
652    }
653}
654
655#[cfg(test)]
656mod tests {
657    use super::{LazyDataSetReader, StatefulDecode};
658    use crate::{
659        dataset::{
660            lazy_read::LazyDataSetReaderOptions, read::OddLengthStrategy, DataToken, LazyDataToken,
661        },
662        StatefulDecoder,
663    };
664    use dicom_core::value::PrimitiveValue;
665    use dicom_core::{
666        dicom_value,
667        header::{DataElementHeader, Length},
668    };
669    use dicom_core::{Tag, VR};
670    use dicom_encoding::decode::{
671        explicit_le::ExplicitVRLittleEndianDecoder, implicit_le::ImplicitVRLittleEndianDecoder,
672    };
673    use dicom_encoding::{decode::basic::LittleEndianBasicDecoder, text::SpecificCharacterSet};
674
675    fn validate_dataset_reader_implicit_vr<I>(data: &[u8], ground_truth: I)
676    where
677        I: IntoIterator<Item = DataToken>,
678    {
679        let mut cursor = data;
680        let parser = StatefulDecoder::new(
681            &mut cursor,
682            ImplicitVRLittleEndianDecoder::default(),
683            LittleEndianBasicDecoder,
684            SpecificCharacterSet::default(),
685        );
686
687        validate_dataset_reader(data, parser, ground_truth)
688    }
689
690    fn validate_dataset_reader_explicit_vr<I>(data: &[u8], ground_truth: I)
691    where
692        I: IntoIterator<Item = DataToken>,
693    {
694        let mut cursor = data;
695        let parser = StatefulDecoder::new(
696            &mut cursor,
697            ExplicitVRLittleEndianDecoder::default(),
698            LittleEndianBasicDecoder,
699            SpecificCharacterSet::default(),
700        );
701
702        validate_dataset_reader(data, parser, ground_truth)
703    }
704
705    fn validate_dataset_reader<I, D>(data: &[u8], parser: D, ground_truth: I)
706    where
707        I: IntoIterator<Item = DataToken>,
708        D: StatefulDecode,
709    {
710        let mut dset_reader = LazyDataSetReader::new(parser);
711
712        let mut gt_iter = ground_truth.into_iter();
713        while let Some(res) = dset_reader.advance() {
714            let gt_token = gt_iter.next().expect("ground truth is shorter");
715            let token = res.expect("should parse without an error");
716            let token = token.into_owned().unwrap();
717            assert_eq!(token, gt_token);
718        }
719
720        assert_eq!(
721            gt_iter.count(), // consume til the end
722            0,               // we have already read all of them
723            "unexpected number of tokens remaining"
724        );
725        assert_eq!(dset_reader.parser.position(), data.len() as u64);
726    }
727
728    #[test]
729    fn lazy_read_sequence_explicit() {
730        #[rustfmt::skip]
731        static DATA: &[u8] = &[
732            0x18, 0x00, 0x11, 0x60, // sequence tag: (0018,6011) SequenceOfUltrasoundRegions
733            b'S', b'Q', // VR
734            0x00, 0x00, // reserved
735            0x2e, 0x00, 0x00, 0x00, // length: 28 + 18 = 46 (#= 2)
736            // -- 12 --
737            0xfe, 0xff, 0x00, 0xe0, // item start tag
738            0x14, 0x00, 0x00, 0x00, // item length: 20 (#= 2)
739            // -- 20 --
740            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x01, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 1
741            // -- 30 --
742            0x18, 0x00, 0x14, 0x60, b'U', b'S', 0x02, 0x00, 0x02, 0x00, // (0018, 6012) RegionDataType, len = 2, value = 2
743            // -- 40 --
744            0xfe, 0xff, 0x00, 0xe0, // item start tag
745            0x0a, 0x00, 0x00, 0x00, // item length: 10 (#= 1)
746            // -- 48 --
747            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x04, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 4
748            // -- 58 --
749            0x20, 0x00, 0x00, 0x40, b'L', b'T', 0x04, 0x00, // (0020,4000) ImageComments, len = 4
750            b'T', b'E', b'S', b'T', // value = "TEST"
751        ];
752
753        let ground_truth = vec![
754            DataToken::SequenceStart {
755                tag: Tag(0x0018, 0x6011),
756                len: Length(46),
757            },
758            DataToken::ItemStart { len: Length(20) },
759            DataToken::ElementHeader(DataElementHeader {
760                tag: Tag(0x0018, 0x6012),
761                vr: VR::US,
762                len: Length(2),
763            }),
764            DataToken::PrimitiveValue(PrimitiveValue::U16([1].as_ref().into())),
765            DataToken::ElementHeader(DataElementHeader {
766                tag: Tag(0x0018, 0x6014),
767                vr: VR::US,
768                len: Length(2),
769            }),
770            DataToken::PrimitiveValue(PrimitiveValue::U16([2].as_ref().into())),
771            DataToken::ItemEnd,
772            DataToken::ItemStart { len: Length(10) },
773            DataToken::ElementHeader(DataElementHeader {
774                tag: Tag(0x0018, 0x6012),
775                vr: VR::US,
776                len: Length(2),
777            }),
778            DataToken::PrimitiveValue(PrimitiveValue::U16([4].as_ref().into())),
779            DataToken::ItemEnd,
780            DataToken::SequenceEnd,
781            DataToken::ElementHeader(DataElementHeader {
782                tag: Tag(0x0020, 0x4000),
783                vr: VR::LT,
784                len: Length(4),
785            }),
786            DataToken::PrimitiveValue(PrimitiveValue::Str("TEST".into())),
787        ];
788
789        validate_dataset_reader_explicit_vr(DATA, ground_truth);
790    }
791
792    #[test]
793    fn lazy_read_sequence_explicit_2() {
794        static DATA: &[u8] = &[
795            // SequenceStart: (0008,2218) ; len = 54 (#=3)
796            0x08, 0x00, 0x18, 0x22, b'S', b'Q', 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
797            // -- 12, --
798            // ItemStart: len = 46
799            0xfe, 0xff, 0x00, 0xe0, 0x2e, 0x00, 0x00, 0x00,
800            // -- 20, --
801            // ElementHeader: (0008,0100) CodeValue; len = 8
802            0x08, 0x00, 0x00, 0x01, b'S', b'H', 0x08, 0x00, // PrimitiveValue
803            0x54, 0x2d, 0x44, 0x31, 0x32, 0x31, 0x33, b' ',
804            // -- 36, --
805            // ElementHeader: (0008,0102) CodingSchemeDesignator; len = 4
806            0x08, 0x00, 0x02, 0x01, b'S', b'H', 0x04, 0x00, // PrimitiveValue
807            0x53, 0x52, 0x54, b' ',
808            // -- 48, --
809            // (0008,0104) CodeMeaning; len = 10
810            0x08, 0x00, 0x04, 0x01, b'L', b'O', 0x0a, 0x00, // PrimitiveValue
811            0x4a, 0x61, 0x77, b' ', 0x72, 0x65, 0x67, 0x69, 0x6f, 0x6e,
812            // -- 66 --
813            // SequenceStart: (0040,0555) AcquisitionContextSequence; len = 0
814            0x40, 0x00, 0x55, 0x05, b'S', b'Q', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
815            // ElementHeader: (2050,0020) PresentationLUTShape; len = 8
816            0x50, 0x20, 0x20, 0x00, b'C', b'S', 0x08, 0x00, // PrimitiveValue
817            b'I', b'D', b'E', b'N', b'T', b'I', b'T', b'Y',
818        ];
819
820        let ground_truth = vec![
821            DataToken::SequenceStart {
822                tag: Tag(0x0008, 0x2218),
823                len: Length(54),
824            },
825            DataToken::ItemStart { len: Length(46) },
826            DataToken::ElementHeader(DataElementHeader {
827                tag: Tag(0x0008, 0x0100),
828                vr: VR::SH,
829                len: Length(8),
830            }),
831            DataToken::PrimitiveValue(PrimitiveValue::Strs(
832                ["T-D1213 ".to_owned()].as_ref().into(),
833            )),
834            DataToken::ElementHeader(DataElementHeader {
835                tag: Tag(0x0008, 0x0102),
836                vr: VR::SH,
837                len: Length(4),
838            }),
839            DataToken::PrimitiveValue(PrimitiveValue::Strs(["SRT ".to_owned()].as_ref().into())),
840            DataToken::ElementHeader(DataElementHeader {
841                tag: Tag(0x0008, 0x0104),
842                vr: VR::LO,
843                len: Length(10),
844            }),
845            DataToken::PrimitiveValue(PrimitiveValue::Strs(
846                ["Jaw region".to_owned()].as_ref().into(),
847            )),
848            DataToken::ItemEnd,
849            DataToken::SequenceEnd,
850            DataToken::SequenceStart {
851                tag: Tag(0x0040, 0x0555),
852                len: Length(0),
853            },
854            DataToken::SequenceEnd,
855            DataToken::ElementHeader(DataElementHeader {
856                tag: Tag(0x2050, 0x0020),
857                vr: VR::CS,
858                len: Length(8),
859            }),
860            DataToken::PrimitiveValue(PrimitiveValue::Strs(
861                ["IDENTITY".to_owned()].as_ref().into(),
862            )),
863        ];
864
865        validate_dataset_reader_explicit_vr(DATA, ground_truth);
866    }
867
868    #[test]
869    fn lazy_read_sequence_implicit() {
870        #[rustfmt::skip]
871        static DATA: &[u8] = &[
872            0x18, 0x00, 0x11, 0x60, // sequence tag: (0018,6011) SequenceOfUltrasoundRegions
873            b'S', b'Q', // VR
874            0x00, 0x00, // reserved
875            0xff, 0xff, 0xff, 0xff, // length: undefined
876            // -- 12 --
877            0xfe, 0xff, 0x00, 0xe0, // item start tag
878            0xff, 0xff, 0xff, 0xff, // item length: undefined
879            // -- 20 --
880            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x01, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 1
881            // -- 30 --
882            0x18, 0x00, 0x14, 0x60, b'U', b'S', 0x02, 0x00, 0x02, 0x00, // (0018, 6012) RegionDataType, len = 2, value = 2
883            // -- 40 --
884            0xfe, 0xff, 0x0d, 0xe0, 0x00, 0x00, 0x00, 0x00, // item end
885            // -- 48 --
886            0xfe, 0xff, 0x00, 0xe0, // item start tag
887            0xff, 0xff, 0xff, 0xff, // item length: undefined
888            // -- 56 --
889            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x04, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 4
890            // -- 66 --
891            0xfe, 0xff, 0x0d, 0xe0, 0x00, 0x00, 0x00, 0x00, // item end
892            // -- 74 --
893            0xfe, 0xff, 0xdd, 0xe0, 0x00, 0x00, 0x00, 0x00, // sequence end
894            // -- 82 --
895            0x20, 0x00, 0x00, 0x40, b'L', b'T', 0x04, 0x00, // (0020,4000) ImageComments, len = 4
896            b'T', b'E', b'S', b'T', // value = "TEST"
897        ];
898
899        let ground_truth = vec![
900            DataToken::SequenceStart {
901                tag: Tag(0x0018, 0x6011),
902                len: Length::UNDEFINED,
903            },
904            DataToken::ItemStart {
905                len: Length::UNDEFINED,
906            },
907            DataToken::ElementHeader(DataElementHeader {
908                tag: Tag(0x0018, 0x6012),
909                vr: VR::US,
910                len: Length(2),
911            }),
912            DataToken::PrimitiveValue(PrimitiveValue::U16([1].as_ref().into())),
913            DataToken::ElementHeader(DataElementHeader {
914                tag: Tag(0x0018, 0x6014),
915                vr: VR::US,
916                len: Length(2),
917            }),
918            DataToken::PrimitiveValue(PrimitiveValue::U16([2].as_ref().into())),
919            DataToken::ItemEnd,
920            DataToken::ItemStart {
921                len: Length::UNDEFINED,
922            },
923            DataToken::ElementHeader(DataElementHeader {
924                tag: Tag(0x0018, 0x6012),
925                vr: VR::US,
926                len: Length(2),
927            }),
928            DataToken::PrimitiveValue(PrimitiveValue::U16([4].as_ref().into())),
929            DataToken::ItemEnd,
930            DataToken::SequenceEnd,
931            DataToken::ElementHeader(DataElementHeader {
932                tag: Tag(0x0020, 0x4000),
933                vr: VR::LT,
934                len: Length(4),
935            }),
936            DataToken::PrimitiveValue(PrimitiveValue::Str("TEST".into())),
937        ];
938
939        validate_dataset_reader_explicit_vr(DATA, ground_truth);
940    }
941
942    #[test]
943    fn lazy_read_dataset_in_dataset() {
944        #[rustfmt::skip]
945        const DATA: &[u8; 138] = &[
946            // 0: (2001, 9000) private sequence
947            0x01, 0x20, 0x00, 0x90, //
948            // length: undefined
949            0xFF, 0xFF, 0xFF, 0xFF, //
950            // 8: Item start
951            0xFE, 0xFF, 0x00, 0xE0, //
952            // Item length explicit (114 bytes)
953            0x72, 0x00, 0x00, 0x00, //
954            // 16: (0008,1115) ReferencedSeriesSequence
955            0x08, 0x00, 0x15, 0x11, //
956            // length: undefined
957            0xFF, 0xFF, 0xFF, 0xFF, //
958            // 24: Item start
959            0xFE, 0xFF, 0x00, 0xE0, //
960            // Item length undefined
961            0xFF, 0xFF, 0xFF, 0xFF, //
962            // 32: (0008,1140) ReferencedImageSequence
963            0x08, 0x00, 0x40, 0x11, //
964            // length: undefined
965            0xFF, 0xFF, 0xFF, 0xFF, //
966            // 40: Item start
967            0xFE, 0xFF, 0x00, 0xE0, //
968            // Item length undefined
969            0xFF, 0xFF, 0xFF, 0xFF, //
970            // 48: (0008,1150) ReferencedSOPClassUID
971            0x08, 0x00, 0x50, 0x11, //
972            // length: 26
973            0x1a, 0x00, 0x00, 0x00, //
974            // Value: "1.2.840.10008.5.1.4.1.1.7\0" (SecondaryCaptureImageStorage)
975            b'1', b'.', b'2', b'.', b'8', b'4', b'0', b'.', b'1', b'0', b'0', b'0', b'8', b'.',
976            b'5', b'.', b'1', b'.', b'4', b'.', b'1', b'.', b'1', b'.', b'7', b'\0',
977            // 82: Item End (ReferencedImageSequence)
978            0xFE, 0xFF, 0x0D, 0xE0, //
979            0x00, 0x00, 0x00, 0x00, //
980            // 90: Sequence End (ReferencedImageSequence)
981            0xFE, 0xFF, 0xDD, 0xE0, //
982            0x00, 0x00, 0x00, 0x00, //
983            // 98: Item End (ReferencedSeriesSequence)
984            0xFE, 0xFF, 0x0D, 0xE0, //
985            0x00, 0x00, 0x00, 0x00, //
986            // 106: Sequence End (ReferencedSeriesSequence)
987            0xFE, 0xFF, 0xDD, 0xE0, //
988            0x00, 0x00, 0x00, 0x00, //
989            // 114: (2050,0020) PresentationLUTShape (CS)
990            0x50, 0x20, 0x20, 0x00, //
991            // length: 8
992            0x08, 0x00, 0x00, 0x00, //
993            b'I', b'D', b'E', b'N', b'T', b'I', b'T', b'Y', //
994            // 130: Sequence end
995            0xFE, 0xFF, 0xDD, 0xE0, //
996            0x00, 0x00, 0x00, 0x00, //
997        ];
998
999        let ground_truth = vec![
1000            DataToken::SequenceStart {
1001                tag: Tag(0x2001, 0x9000),
1002                len: Length::UNDEFINED,
1003            },
1004            DataToken::ItemStart { len: Length(114) },
1005            DataToken::SequenceStart {
1006                tag: Tag(0x0008, 0x1115),
1007                len: Length::UNDEFINED,
1008            },
1009            DataToken::ItemStart {
1010                len: Length::UNDEFINED,
1011            },
1012            DataToken::SequenceStart {
1013                tag: Tag(0x0008, 0x1140),
1014                len: Length::UNDEFINED,
1015            },
1016            DataToken::ItemStart {
1017                len: Length::UNDEFINED,
1018            },
1019            DataToken::ElementHeader(DataElementHeader {
1020                tag: Tag(0x0008, 0x1150),
1021                vr: VR::UI,
1022                len: Length(26),
1023            }),
1024            DataToken::PrimitiveValue(PrimitiveValue::from("1.2.840.10008.5.1.4.1.1.7\0")),
1025            DataToken::ItemEnd,
1026            DataToken::SequenceEnd,
1027            DataToken::ItemEnd,
1028            DataToken::SequenceEnd,
1029            DataToken::ElementHeader(DataElementHeader {
1030                tag: Tag(0x2050, 0x0020),
1031                vr: VR::CS,
1032                len: Length(8),
1033            }),
1034            DataToken::PrimitiveValue(PrimitiveValue::from("IDENTITY")),
1035            DataToken::ItemEnd, // inserted automatically
1036            DataToken::SequenceEnd,
1037        ];
1038
1039        validate_dataset_reader_implicit_vr(DATA, ground_truth);
1040    }
1041
1042    #[test]
1043    fn lazy_read_implicit_len_sequence_implicit_vr_unknown() {
1044        #[rustfmt::skip]
1045        static DATA: &[u8] = &[
1046            0x33, 0x55, 0x33, 0x55, // sequence tag: (5533,5533) «private, unknown attribute»
1047            0xff, 0xff, 0xff, 0xff, // length: undefined
1048            // -- 8 --
1049            0xfe, 0xff, 0x00, 0xe0, // item begin
1050            0xff, 0xff, 0xff, 0xff, // length: undefined
1051            // -- 16 --
1052            0xfe, 0xff, 0x0d, 0xe0, // item end
1053            0x00, 0x00, 0x00, 0x00, // length is always zero
1054            // -- 24 --
1055            0xfe, 0xff, 0xdd, 0xe0,
1056            0x00, 0x00, 0x00, 0x00, // sequence end
1057            // -- 32 --
1058        ];
1059
1060        let ground_truth = vec![
1061            DataToken::SequenceStart {
1062                tag: Tag(0x5533, 0x5533),
1063                len: Length::UNDEFINED,
1064            },
1065            DataToken::ItemStart {
1066                len: Length::UNDEFINED,
1067            },
1068            DataToken::ItemEnd,
1069            DataToken::SequenceEnd,
1070        ];
1071
1072        validate_dataset_reader_implicit_vr(DATA, ground_truth);
1073    }
1074
1075    #[test]
1076    fn read_encapsulated_pixeldata() {
1077        #[rustfmt::skip]
1078        static DATA: &[u8] = &[
1079            0xe0, 0x7f, 0x10, 0x00, // (7FE0, 0010) PixelData
1080            b'O', b'B', // VR 
1081            0x00, 0x00, // reserved
1082            0xff, 0xff, 0xff, 0xff, // length: undefined
1083            // -- 12 -- Basic offset table
1084            0xfe, 0xff, 0x00, 0xe0, // item start tag
1085            0x00, 0x00, 0x00, 0x00, // item length: 0
1086            // -- 20 -- First fragment of pixel data
1087            0xfe, 0xff, 0x00, 0xe0, // item start tag
1088            0x20, 0x00, 0x00, 0x00, // item length: 32
1089            // -- 28 -- Compressed Fragment
1090            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1091            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1092            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1093            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1094            // -- 60 -- End of pixel data
1095            0xfe, 0xff, 0xdd, 0xe0, // sequence end tag
1096            0x00, 0x00, 0x00, 0x00,
1097            // -- 68 -- padding
1098            0xfc, 0xff, 0xfc, 0xff, // (fffc,fffc) DataSetTrailingPadding
1099            b'O', b'B', // VR
1100            0x00, 0x00, // reserved
1101            0x08, 0x00, 0x00, 0x00, // length: 8
1102            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1103        ];
1104
1105        let ground_truth = vec![
1106            DataToken::PixelSequenceStart,
1107            DataToken::ItemStart { len: Length(0) },
1108            DataToken::ItemEnd,
1109            DataToken::ItemStart { len: Length(32) },
1110            DataToken::ItemValue(vec![0x99; 32]),
1111            DataToken::ItemEnd,
1112            DataToken::SequenceEnd,
1113            DataToken::ElementHeader(DataElementHeader::new(
1114                Tag(0xfffc, 0xfffc),
1115                VR::OB,
1116                Length(8),
1117            )),
1118            DataToken::PrimitiveValue(PrimitiveValue::U8([0x00; 8].as_ref().into())),
1119        ];
1120
1121        validate_dataset_reader_explicit_vr(DATA, ground_truth);
1122    }
1123
1124    #[test]
1125    fn lazy_read_encapsulated_pixeldata_with_offset_table() {
1126        #[rustfmt::skip]
1127        static DATA: &[u8] = &[
1128            0xe0, 0x7f, 0x10, 0x00, // (7FE0, 0010) PixelData
1129            b'O', b'B', // VR 
1130            0x00, 0x00, // reserved
1131            0xff, 0xff, 0xff, 0xff, // length: undefined
1132            // -- 12 -- Basic offset table
1133            0xfe, 0xff, 0x00, 0xe0, // item start tag
1134            0x04, 0x00, 0x00, 0x00, // item length: 4
1135            // -- 20 -- item value
1136            0x10, 0x00, 0x00, 0x00, // 16
1137            // -- 24 -- First fragment of pixel data
1138            0xfe, 0xff, 0x00, 0xe0, // item start tag
1139            0x20, 0x00, 0x00, 0x00, // item length: 32
1140            // -- 32 -- Compressed Fragment
1141            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1142            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1143            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1144            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1145            // -- 60 -- End of pixel data
1146            0xfe, 0xff, 0xdd, 0xe0, // sequence end tag
1147            0x00, 0x00, 0x00, 0x00,
1148            // -- 68 -- padding
1149            0xfc, 0xff, 0xfc, 0xff, // (fffc,fffc) DataSetTrailingPadding
1150            b'O', b'B', // VR
1151            0x00, 0x00, // reserved
1152            0x08, 0x00, 0x00, 0x00, // length: 8
1153            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1154        ];
1155
1156        let ground_truth = vec![
1157            DataToken::PixelSequenceStart,
1158            DataToken::ItemStart { len: Length(4) },
1159            DataToken::ItemValue(vec![0x10, 0x00, 0x00, 0x00]),
1160            DataToken::ItemEnd,
1161            DataToken::ItemStart { len: Length(32) },
1162            DataToken::ItemValue(vec![0x99; 32]),
1163            DataToken::ItemEnd,
1164            DataToken::SequenceEnd,
1165            DataToken::ElementHeader(DataElementHeader::new(
1166                Tag(0xfffc, 0xfffc),
1167                VR::OB,
1168                Length(8),
1169            )),
1170            DataToken::PrimitiveValue(PrimitiveValue::U8([0x00; 8].as_ref().into())),
1171        ];
1172
1173        validate_dataset_reader_explicit_vr(DATA, ground_truth);
1174    }
1175
1176    #[test]
1177    fn lazy_read_sequence_explicit_2_skip_values() {
1178        static DATA: &[u8] = &[
1179            // SequenceStart: (0008,2218) ; len = 54 (#=3)
1180            0x08, 0x00, 0x18, 0x22, b'S', b'Q', 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
1181            // -- 12, --
1182            // ItemStart: len = 46
1183            0xfe, 0xff, 0x00, 0xe0, 0x2e, 0x00, 0x00, 0x00,
1184            // -- 20, --
1185            // ElementHeader: (0008,0100) CodeValue; len = 8
1186            0x08, 0x00, 0x00, 0x01, b'S', b'H', 0x08, 0x00, // PrimitiveValue
1187            0x54, 0x2d, 0x44, 0x31, 0x32, 0x31, 0x33, b' ',
1188            // -- 36, --
1189            // ElementHeader: (0008,0102) CodingSchemeDesignator; len = 4
1190            0x08, 0x00, 0x02, 0x01, b'S', b'H', 0x04, 0x00, // PrimitiveValue
1191            0x53, 0x52, 0x54, b' ',
1192            // -- 48, --
1193            // (0008,0104) CodeMeaning; len = 10
1194            0x08, 0x00, 0x04, 0x01, b'L', b'O', 0x0a, 0x00, // PrimitiveValue
1195            0x4a, 0x61, 0x77, b' ', 0x72, 0x65, 0x67, 0x69, 0x6f, 0x6e,
1196            // -- 66 --
1197            // SequenceStart: (0040,0555) AcquisitionContextSequence; len = 0
1198            0x40, 0x00, 0x55, 0x05, b'S', b'Q', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1199            // ElementHeader: (2050,0020) PresentationLUTShape; len = 8
1200            0x50, 0x20, 0x20, 0x00, b'C', b'S', 0x08, 0x00, // PrimitiveValue
1201            b'I', b'D', b'E', b'N', b'T', b'I', b'T', b'Y',
1202        ];
1203
1204        let ground_truth = vec![
1205            DataToken::SequenceStart {
1206                tag: Tag(0x0008, 0x2218),
1207                len: Length(54),
1208            },
1209            DataToken::ItemStart { len: Length(46) },
1210            DataToken::ElementHeader(DataElementHeader {
1211                tag: Tag(0x0008, 0x0100),
1212                vr: VR::SH,
1213                len: Length(8),
1214            }),
1215            DataToken::PrimitiveValue(PrimitiveValue::Strs(
1216                ["T-D1213 ".to_owned()].as_ref().into(),
1217            )),
1218            DataToken::ElementHeader(DataElementHeader {
1219                tag: Tag(0x0008, 0x0102),
1220                vr: VR::SH,
1221                len: Length(4),
1222            }),
1223            DataToken::PrimitiveValue(PrimitiveValue::Strs(["SRT ".to_owned()].as_ref().into())),
1224            DataToken::ElementHeader(DataElementHeader {
1225                tag: Tag(0x0008, 0x0104),
1226                vr: VR::LO,
1227                len: Length(10),
1228            }),
1229            DataToken::PrimitiveValue(PrimitiveValue::Strs(
1230                ["Jaw region".to_owned()].as_ref().into(),
1231            )),
1232            DataToken::ItemEnd,
1233            DataToken::SequenceEnd,
1234            DataToken::SequenceStart {
1235                tag: Tag(0x0040, 0x0555),
1236                len: Length(0),
1237            },
1238            DataToken::SequenceEnd,
1239            DataToken::ElementHeader(DataElementHeader {
1240                tag: Tag(0x2050, 0x0020),
1241                vr: VR::CS,
1242                len: Length(8),
1243            }),
1244            DataToken::PrimitiveValue(PrimitiveValue::Strs(
1245                ["IDENTITY".to_owned()].as_ref().into(),
1246            )),
1247        ];
1248
1249        let mut cursor = DATA;
1250        let parser = StatefulDecoder::new(
1251            &mut cursor,
1252            ExplicitVRLittleEndianDecoder::default(),
1253            LittleEndianBasicDecoder,
1254            SpecificCharacterSet::default(),
1255        );
1256
1257        let mut dset_reader = LazyDataSetReader::new(parser);
1258
1259        let mut gt_iter = ground_truth.into_iter();
1260        while let Some(res) = dset_reader.advance() {
1261            let token = res.expect("should parse without an error");
1262            let gt_token = gt_iter.next().expect("ground truth is shorter");
1263            match token {
1264                LazyDataToken::LazyValue { .. } | LazyDataToken::LazyItemValue { .. } => {
1265                    token.skip().unwrap();
1266                }
1267                token => {
1268                    let token = token.into_owned().unwrap();
1269                    assert_eq!(token, gt_token);
1270                }
1271            }
1272        }
1273
1274        assert_eq!(
1275            gt_iter.count(), // consume til the end
1276            0,               // we have already read all of them
1277            "unexpected number of tokens remaining"
1278        );
1279        assert_eq!(dset_reader.parser.position(), DATA.len() as u64);
1280    }
1281
1282    #[test]
1283    fn lazy_read_value_via_into_value() {
1284        // manually crafted DICOM data elements
1285        //  Tag: (0002,0002) Media Storage SOP Class UID
1286        //  VR: UI
1287        //  Length: 26
1288        //  Value: "1.2.840.10008.5.1.4.1.1.1\0"
1289        // --
1290        //  Tag: (0002,0010) Transfer Syntax UID
1291        //  VR: UI
1292        //  Length: 20
1293        //  Value: "1.2.840.10008.1.2.1\0" == ExplicitVRLittleEndian
1294        // --
1295        const RAW: &[u8; 62] = &[
1296            0x02, 0x00, 0x02, 0x00, 0x55, 0x49, 0x1a, 0x00, 0x31, 0x2e, 0x32, 0x2e, 0x38, 0x34,
1297            0x30, 0x2e, 0x31, 0x30, 0x30, 0x30, 0x38, 0x2e, 0x35, 0x2e, 0x31, 0x2e, 0x34, 0x2e,
1298            0x31, 0x2e, 0x31, 0x2e, 0x31, 0x00, 0x02, 0x00, 0x10, 0x00, 0x55, 0x49, 0x14, 0x00,
1299            0x31, 0x2e, 0x32, 0x2e, 0x38, 0x34, 0x30, 0x2e, 0x31, 0x30, 0x30, 0x30, 0x38, 0x2e,
1300            0x31, 0x2e, 0x32, 0x2e, 0x31, 0x00,
1301        ];
1302        let mut cursor = &RAW[..];
1303        let parser = StatefulDecoder::new(
1304            &mut cursor,
1305            ExplicitVRLittleEndianDecoder::default(),
1306            LittleEndianBasicDecoder,
1307            SpecificCharacterSet::default(),
1308        );
1309
1310        let mut dset_reader = LazyDataSetReader::new(parser);
1311
1312        let token = dset_reader
1313            .advance()
1314            .expect("Expected token 1")
1315            .expect("Failed to read token 1");
1316
1317        let header_token1 = match token {
1318            LazyDataToken::ElementHeader(header) => header,
1319            _ => {
1320                panic!("Unexpected token type (1)");
1321            }
1322        };
1323
1324        let token = dset_reader
1325            .advance()
1326            .expect("Expected token 2")
1327            .expect("Failed to read token 2");
1328
1329        match token {
1330            LazyDataToken::LazyValue { header, decoder: _ } => {
1331                assert_eq!(header_token1, header);
1332            }
1333            _ => {
1334                panic!("Unexpected token type (2)");
1335            }
1336        }
1337
1338        // consume via into_value
1339        assert_eq!(
1340            token.into_value().unwrap(),
1341            dicom_value!(Strs, ["1.2.840.10008.5.1.4.1.1.1\0"]),
1342        );
1343
1344        let token = dset_reader
1345            .advance()
1346            .expect("Expected token 3")
1347            .expect("Failed to read token 3");
1348
1349        let header_token3 = match token {
1350            LazyDataToken::ElementHeader(header) => header,
1351            _ => {
1352                panic!("Unexpected token type (3)");
1353            }
1354        };
1355
1356        let token = dset_reader
1357            .advance()
1358            .expect("Expected token 4")
1359            .expect("Failed to read token 4");
1360
1361        match token {
1362            LazyDataToken::LazyValue { header, decoder: _ } => {
1363                assert_eq!(header_token3, header);
1364            }
1365            _ => {
1366                panic!("Unexpected token type (4)");
1367            }
1368        }
1369
1370        // consume via into_value
1371        assert_eq!(
1372            token.into_value().unwrap(),
1373            dicom_value!(Strs, ["1.2.840.10008.1.2.1\0"]),
1374        );
1375
1376        assert!(
1377            dset_reader.advance().is_none(),
1378            "unexpected number of tokens remaining"
1379        );
1380    }
1381
1382    #[test]
1383    fn peek_data_elements() {
1384        #[rustfmt::skip]
1385        static DATA: &[u8] = &[
1386            0x18, 0x00, 0x11, 0x60, // sequence tag: (0018,6011) SequenceOfUltrasoundRegions
1387            b'S', b'Q', // VR
1388            0x00, 0x00, // reserved
1389            0xff, 0xff, 0xff, 0xff, // length: undefined
1390            // -- 12 --
1391            0xfe, 0xff, 0xdd, 0xe0, 0x00, 0x00, 0x00, 0x00, // sequence end
1392            // -- 20 --
1393            0x20, 0x00, 0x00, 0x40, b'L', b'T', 0x04, 0x00, // (0020,4000) ImageComments, len = 4
1394            // -- 28 --
1395            b'T', b'E', b'S', b'T', // value = "TEST"
1396            // -- 32 --
1397        ];
1398
1399        let ground_truth = vec![
1400            DataToken::SequenceStart {
1401                tag: Tag(0x0018, 0x6011),
1402                len: Length::UNDEFINED,
1403            },
1404            DataToken::SequenceEnd,
1405            DataToken::ElementHeader(DataElementHeader {
1406                tag: Tag(0x0020, 0x4000),
1407                vr: VR::LT,
1408                len: Length(4),
1409            }),
1410            DataToken::PrimitiveValue(PrimitiveValue::Str("TEST".into())),
1411        ];
1412
1413        let mut cursor = DATA;
1414        let parser = StatefulDecoder::new(
1415            &mut cursor,
1416            ExplicitVRLittleEndianDecoder::default(),
1417            LittleEndianBasicDecoder::default(),
1418            SpecificCharacterSet::default(),
1419        );
1420        let mut dset_reader = LazyDataSetReader::new(parser);
1421
1422        // peek at first token
1423        let token = dset_reader.peek().expect("should peek first token OK");
1424        assert_eq!(token, Some(&ground_truth[0]));
1425
1426        assert_eq!(dset_reader.parser.position(), 12);
1427
1428        // peeking multiple times gives the same result
1429        let token = dset_reader
1430            .peek()
1431            .expect("should peek first token again OK");
1432        assert_eq!(token, Some(&ground_truth[0]));
1433
1434        assert_eq!(dset_reader.parser.position(), 12);
1435
1436        // Using `advance` give us the same token
1437        let token = dset_reader
1438            .advance()
1439            .expect("expected token")
1440            .expect("should read token peeked OK");
1441        assert_eq!(&token.into_owned().unwrap(), &ground_truth[0]);
1442
1443        assert_eq!(dset_reader.parser.position(), 12);
1444
1445        // sequence end
1446        let token = dset_reader
1447            .advance()
1448            .expect("expected token")
1449            .expect("should read token OK");
1450        assert_eq!(&token.into_owned().unwrap(), &ground_truth[1]);
1451
1452        assert_eq!(dset_reader.parser.position(), 20);
1453
1454        // peek data element header
1455        let token = dset_reader.peek().expect("should peek first token OK");
1456        assert_eq!(token, Some(&ground_truth[2]));
1457
1458        assert_eq!(dset_reader.parser.position(), 28);
1459
1460        // read data element header
1461        let token = dset_reader
1462            .advance()
1463            .expect("expected token")
1464            .expect("should read token OK");
1465        assert_eq!(&token.into_owned().unwrap(), &ground_truth[2]);
1466
1467        // should not have read anything else
1468        assert_eq!(dset_reader.parser.position(), 28);
1469
1470        // read string value
1471        let token = dset_reader
1472            .advance()
1473            .expect("expected token")
1474            .expect("should read token OK");
1475        assert_eq!(&token.into_owned().unwrap(), &ground_truth[3]);
1476
1477        // finished reading, peek should return None
1478        assert!(dset_reader.peek().unwrap().is_none());
1479    }
1480
1481    #[test]
1482    fn read_odd_length_element() {
1483        #[rustfmt::skip]
1484        static DATA: &[u8] = &[
1485            0x08, 0x00, 0x16, 0x00, // (0008,0016) SOPClassUID
1486            b'U', b'I', // VR
1487            0x0b, 0x00, // len = 11
1488            b'1', b'.', b'2', b'.', b'8', b'4', b'0', b'.', b'1', b'0', b'0',
1489            0x00, // padding
1490        ];
1491
1492        let ground_truth = vec![
1493            DataToken::ElementHeader(DataElementHeader {
1494                tag: Tag(0x0008, 0x0016),
1495                vr: VR::UI,
1496                len: Length(12),
1497            }),
1498            DataToken::PrimitiveValue(PrimitiveValue::from("1.2.840.100\0")),
1499        ];
1500
1501        // strategy: assume next even
1502
1503        let mut cursor = DATA;
1504        let parser = StatefulDecoder::new(
1505            &mut cursor,
1506            ExplicitVRLittleEndianDecoder::default(),
1507            LittleEndianBasicDecoder,
1508            SpecificCharacterSet::default(),
1509        );
1510        let mut dset_reader = LazyDataSetReader::new_with_options(
1511            parser,
1512            LazyDataSetReaderOptions {
1513                odd_length: OddLengthStrategy::NextEven,
1514                ..Default::default()
1515            },
1516        );
1517
1518        // read next
1519        let token = dset_reader
1520            .advance()
1521            .expect("expected token")
1522            .expect("should read token OK");
1523
1524        assert_eq!(&token.into_owned().unwrap(), &ground_truth[0],);
1525
1526        // strategy: fail
1527
1528        let mut cursor = DATA;
1529        let parser = StatefulDecoder::new(
1530            &mut cursor,
1531            ExplicitVRLittleEndianDecoder::default(),
1532            LittleEndianBasicDecoder,
1533            SpecificCharacterSet::default(),
1534        );
1535        let mut dset_reader = LazyDataSetReader::new_with_options(
1536            parser,
1537            LazyDataSetReaderOptions {
1538                odd_length: OddLengthStrategy::Fail,
1539                ..Default::default()
1540            },
1541        );
1542
1543        let token = dset_reader.advance();
1544
1545        assert!(
1546            matches!(
1547                token,
1548                Some(Err(super::Error::InvalidElementLength {
1549                    tag: Tag(0x0008, 0x0016),
1550                    len: 11,
1551                    bytes_read: 8,
1552                    ..
1553                })),
1554            ),
1555            "got: {:?}",
1556            token
1557        );
1558    }
1559}