dicom_parser/dataset/
read.rs

1//! This module contains a mid-level abstraction for reading DICOM content
2//! sequentially.
3//!
4//! The rest of the crate is used to obtain DICOM element headers and values.
5//! At this level, headers and values are treated as tokens which can be used
6//! to form a syntax tree of a full data set.
7use crate::stateful::decode::{
8    CharacterSetOverride, DynStatefulDecoder, Error as DecoderError, StatefulDecode,
9};
10use dicom_core::header::{DataElementHeader, Header, Length, SequenceItemHeader};
11use dicom_core::{PrimitiveValue, Tag, VR};
12use dicom_encoding::text::SpecificCharacterSet;
13use dicom_encoding::transfer_syntax::TransferSyntax;
14use snafu::{Backtrace, ResultExt, Snafu};
15use std::cmp::Ordering;
16use std::io::Read;
17
18use super::{DataToken, SeqTokenType};
19
20fn is_stateful_decode<T>(_: &T)
21where
22    T: StatefulDecode,
23{
24}
25
26#[derive(Debug, Snafu)]
27#[non_exhaustive]
28pub enum Error {
29    #[snafu(display("Could not create decoder"))]
30    CreateDecoder {
31        #[snafu(backtrace)]
32        source: DecoderError,
33    },
34    #[snafu(display("Could not read item header"))]
35    ReadItemHeader {
36        #[snafu(backtrace)]
37        source: DecoderError,
38    },
39    #[snafu(display("Could not read element header"))]
40    ReadHeader {
41        #[snafu(backtrace)]
42        source: DecoderError,
43    },
44    #[snafu(display("Could not read {} value bytes for element tagged {}", len, tag))]
45    ReadValue {
46        len: u32,
47        tag: Tag,
48        #[snafu(backtrace)]
49        source: DecoderError,
50    },
51    #[snafu(display("Could not read {} bytes for item value", len))]
52    ReadItemValue {
53        len: u32,
54        #[snafu(backtrace)]
55        source: DecoderError,
56    },
57    #[snafu(display(
58        "Inconsistent sequence end: expected end at {} bytes but read {}",
59        end_of_sequence,
60        bytes_read
61    ))]
62    InconsistentSequenceEnd {
63        end_of_sequence: u64,
64        bytes_read: u64,
65        backtrace: Backtrace,
66    },
67    #[snafu(display("Unexpected item tag {} while reading element header", tag))]
68    UnexpectedItemTag { tag: Tag, backtrace: Backtrace },
69    #[snafu(display(
70        "Unexpected item header outside a dataset sequence at {:#x}",
71        bytes_read
72    ))]
73    UnexpectedItemHeader {
74        bytes_read: u64,
75        backtrace: Backtrace,
76    },
77    /// Undefined pixel data item length
78    UndefinedItemLength,
79    /// Invalid data element length {len:04X} of {tag} at {bytes_read:#x}
80    InvalidElementLength { tag: Tag, len: u32, bytes_read: u64 },
81    /// Invalid sequence item length {len:04X} at {bytes_read:#x}
82    InvalidItemLength { len: u32, bytes_read: u64 },
83}
84
85pub type Result<T> = std::result::Result<T, Error>;
86
87/// A reader-specific token representing a sequence or item start.
88#[derive(Debug, Copy, Clone, PartialEq)]
89struct SeqToken {
90    /// Whether it is the start of a sequence or the start of an item.
91    typ: SeqTokenType,
92    /// The length of the value, as indicated by the starting element,
93    /// can be unknown.
94    len: Length,
95    /// Whether this sequence token is part of an encapsulated pixel data.
96    pixel_data: bool,
97    /// The number of bytes the parser has read until it reached the
98    /// beginning of the sequence or item value data.
99    base_offset: u64,
100}
101
102/// The value reading strategy for the data set reader.
103///
104/// It defines how the `PrimitiveValue`s in value tokens are constructed.
105#[derive(Debug, Default, Copy, Clone, Eq, Hash, PartialEq)]
106pub enum ValueReadStrategy {
107    /// Textual values will be decoded according to their value representation.
108    ///
109    /// Word-sized binary values are read according to
110    /// the expected byte order.
111    /// Dates, times, and date-times (DA, DT, TM) are parsed
112    /// into their more specific variants,
113    /// leading to parser failure if they are not valid DICOM.
114    /// String numbers (IS, FD) are also converted into binary representations.
115    /// For the case of floats, this may introduce precision errors.
116    Interpreted,
117    /// Values will be stored without decoding dates or textual numbers.
118    ///
119    /// Word-sized binary values are read according to
120    /// the expected byte order.
121    /// Date-time values and numbers are kept in their original string
122    /// representation as string objects.
123    /// All text is still decoded into Rust string values,
124    /// in accordance to the standard,
125    /// unless its value representation is unknown to the decoder.
126    ///
127    /// This is the default strategy.
128    #[default]
129    Preserved,
130    /// All primitive values are fetched as raw byte buffers,
131    /// without any form of decoding or interpretation.
132    /// Not even byte order conversions are made.
133    ///
134    /// This strategy is not recommended,
135    /// as it makes the retrieval of important textual data more difficult.
136    Raw,
137}
138
139/// A strategy for when the parser finds a data element with an odd number
140/// in the _length_ header field.
141#[derive(Debug, Default, Copy, Clone, Eq, Hash, PartialEq)]
142#[non_exhaustive]
143pub enum OddLengthStrategy {
144    /// Accept elements with an odd length as is,
145    /// continuing data set reading normally.
146    #[default]
147    Accept,
148    /// Assume that the real length is `length + 1`,
149    /// as in the next even number.
150    NextEven,
151    /// Raise an error instead
152    Fail,
153}
154
155/// The set of options for the data set reader.
156#[derive(Debug, Default, Copy, Clone, Eq, Hash, PartialEq)]
157#[non_exhaustive]
158pub struct DataSetReaderOptions {
159    /// The value reading strategy
160    pub value_read: ValueReadStrategy,
161
162    /// Whether to assume a different character set
163    /// depending on certain conditions
164    pub charset_override: CharacterSetOverride,
165
166    /// The strategy for handling odd length data elements
167    pub odd_length: OddLengthStrategy,
168    /// The position of the reader as received at building time in bytes.
169    /// Defaults to 0.
170    pub base_offset: u64,
171}
172
173impl DataSetReaderOptions {
174    /// Replace the value reading strategy of the options.
175    pub fn value_read(mut self, value_read: ValueReadStrategy) -> Self {
176        self.value_read = value_read;
177        self
178    }
179    /// Replace the base reader offset of the options.
180    pub fn base_offset(mut self, base_offset: u64) -> Self {
181        self.base_offset = base_offset;
182        self
183    }
184}
185
186/// A higher-level reader for retrieving structure in a DICOM data set from an
187/// arbitrary data source.
188#[derive(Debug)]
189pub struct DataSetReader<S> {
190    /// the stateful decoder
191    parser: S,
192    /// the options of this reader
193    options: DataSetReaderOptions,
194    /// whether the reader is expecting an item header next (or a sequence delimiter)
195    in_sequence: bool,
196    /// whether the reader is expecting the first item value of a pixel sequence next
197    /// (offset table)
198    offset_table_next: bool,
199    /// whether a check for a sequence or item delimitation is pending
200    delimiter_check_pending: bool,
201    /// a stack of delimiters
202    seq_delimiters: Vec<SeqToken>,
203    /// fuse the iteration process if true
204    hard_break: bool,
205    /// last decoded header
206    last_header: Option<DataElementHeader>,
207    /// if a peek was taken, this holds the token peeked
208    peek: Option<DataToken>,
209}
210
211impl<R> DataSetReader<DynStatefulDecoder<R>> {
212    /// Create a new data set token reader with the given byte source,
213    /// while considering the given transfer syntax specifier.
214    #[inline]
215    pub fn new_with_ts(source: R, ts: &TransferSyntax) -> Result<Self>
216    where
217        R: Read,
218    {
219        Self::new_with_ts_cs_options(source, ts, Default::default(), Default::default())
220    }
221
222    /// Create a new iterator with the given transfer syntax and options.
223    #[inline]
224    pub fn new_with_ts_options(
225        source: R,
226        ts: &TransferSyntax,
227        options: DataSetReaderOptions,
228    ) -> Result<Self>
229    where
230        R: Read,
231    {
232        Self::new_with_ts_cs_options(source, ts, SpecificCharacterSet::default(), options)
233    }
234
235    /// Create a new data set token reader with the given byte source,
236    /// while considering the given transfer syntax specifier
237    /// and the specific character set to assume by default.
238    ///
239    /// Note that the data set being read
240    /// can override the character set with the presence of a
241    /// _Specific Character Set_ data element.
242    #[inline]
243    pub fn new_with_ts_cs(source: R, ts: &TransferSyntax, cs: SpecificCharacterSet) -> Result<Self>
244    where
245        R: Read,
246    {
247        Self::new_with_ts_cs_options(source, ts, cs, Default::default())
248    }
249
250    /// Create a new iterator with the given stateful decoder and options.
251    pub fn new_with_ts_cs_options(
252        source: R,
253        ts: &TransferSyntax,
254        cs: SpecificCharacterSet,
255        options: DataSetReaderOptions,
256    ) -> Result<Self>
257    where
258        R: Read,
259    {
260        let parser =
261            DynStatefulDecoder::new_with_override(source, ts, cs, options.charset_override, 0)
262                .context(CreateDecoderSnafu)?;
263
264        is_stateful_decode(&parser);
265
266        Ok(DataSetReader {
267            parser,
268            options,
269            seq_delimiters: Vec::new(),
270            delimiter_check_pending: false,
271            offset_table_next: false,
272            in_sequence: false,
273            hard_break: false,
274            last_header: None,
275            peek: None,
276        })
277    }
278}
279
280impl<S> DataSetReader<S> {
281    /// Create a new iterator with the given stateful decoder and options.
282    pub fn new(decoder: S, options: DataSetReaderOptions) -> Self {
283        DataSetReader {
284            parser: decoder,
285            options,
286            seq_delimiters: Vec::new(),
287            delimiter_check_pending: false,
288            offset_table_next: false,
289            in_sequence: false,
290            hard_break: false,
291            last_header: None,
292            peek: None,
293        }
294    }
295}
296
297impl<S> Iterator for DataSetReader<S>
298where
299    S: StatefulDecode,
300{
301    type Item = Result<DataToken>;
302
303    fn next(&mut self) -> Option<Self::Item> {
304        if self.hard_break {
305            return None;
306        }
307        // if there was a peek, consume peeked token
308        if let Some(token) = self.peek.take() {
309            return Some(Ok(token));
310        }
311
312        // item or sequence delimitation logic for explicit lengths
313        if self.delimiter_check_pending {
314            match self.update_seq_delimiters() {
315                Err(e) => {
316                    self.hard_break = true;
317                    return Some(Err(e));
318                }
319                Ok(Some(token)) => return Some(Ok(token)),
320                Ok(None) => { /* no-op */ }
321            }
322        }
323
324        if self.in_sequence {
325            // at sequence level, expecting item header
326
327            match self.parser.decode_item_header() {
328                Ok(header) => {
329                    match header {
330                        SequenceItemHeader::Item { len } => {
331                            let len = match self.sanitize_length(len) {
332                                Some(len) => len,
333                                None => {
334                                    return Some(
335                                        InvalidItemLengthSnafu {
336                                            bytes_read: self.parser.position(),
337                                            len: len.0,
338                                        }
339                                        .fail(),
340                                    )
341                                }
342                            };
343                            // entered a new item
344                            self.in_sequence = false;
345
346                            let last_delimiter = match self.seq_delimiters.last() {
347                                Some(d) => d,
348                                None => {
349                                    return Some(
350                                        UnexpectedItemHeaderSnafu {
351                                            bytes_read: self.parser.position(),
352                                        }
353                                        .fail(),
354                                    )
355                                }
356                            };
357                            self.push_sequence_token(
358                                SeqTokenType::Item,
359                                len,
360                                last_delimiter.pixel_data,
361                            );
362                            // items can be empty
363                            if len == Length(0) {
364                                self.delimiter_check_pending = true;
365                            }
366                            Some(Ok(DataToken::ItemStart { len }))
367                        }
368                        SequenceItemHeader::ItemDelimiter => {
369                            // closed an item
370                            self.seq_delimiters.pop();
371                            self.in_sequence = true;
372                            // sequences can end after an item delimiter
373                            self.delimiter_check_pending = true;
374                            Some(Ok(DataToken::ItemEnd))
375                        }
376                        SequenceItemHeader::SequenceDelimiter => {
377                            // closed a sequence
378                            self.seq_delimiters.pop();
379                            self.in_sequence = false;
380                            // items can end after a nested sequence ends
381                            self.delimiter_check_pending = true;
382                            Some(Ok(DataToken::SequenceEnd))
383                        }
384                    }
385                }
386                Err(DecoderError::DecodeItemHeader {
387                    source: dicom_encoding::decode::Error::ReadItemHeader { source, .. },
388                    ..
389                }) if source.kind() == std::io::ErrorKind::UnexpectedEof
390                    && self.seq_delimiters.pop().is_some_and(|t| t.pixel_data) =>
391                {
392                    // Note: if `UnexpectedEof` was reached while inside a
393                    // PixelData Sequence, then we assume that
394                    // the end of a DICOM object was reached gracefully.
395                    self.hard_break = true;
396                    None
397                }
398                Err(e) => {
399                    self.hard_break = true;
400                    Some(Err(e).context(ReadItemHeaderSnafu))
401                }
402            }
403        } else if let Some(SeqToken {
404            typ: SeqTokenType::Item,
405            pixel_data: true,
406            len,
407            ..
408        }) = self.seq_delimiters.last()
409        {
410            let len = match len.get() {
411                Some(len) => len as usize,
412                None => return Some(UndefinedItemLengthSnafu.fail()),
413            };
414
415            if self.offset_table_next {
416                // offset table
417                let mut offset_table = Vec::with_capacity(len);
418
419                self.offset_table_next = false;
420
421                // need to pop item delimiter on the next iteration
422                self.delimiter_check_pending = true;
423
424                Some(
425                    match self.parser.read_u32_to_vec(len as u32, &mut offset_table) {
426                        Ok(()) => Ok(DataToken::OffsetTable(offset_table)),
427                        Err(e) => Err(e).context(ReadItemValueSnafu { len: len as u32 }),
428                    },
429                )
430            } else {
431                // item value
432                let mut value = Vec::with_capacity(len);
433
434                // need to pop item delimiter on the next iteration
435                self.delimiter_check_pending = true;
436                Some(
437                    self.parser
438                        .read_to_vec(len as u32, &mut value)
439                        .map(|_| Ok(DataToken::ItemValue(value)))
440                        .unwrap_or_else(|e| Err(e).context(ReadItemValueSnafu { len: len as u32 })),
441                )
442            }
443        } else if let Some(header) = self.last_header {
444            if header.is_encapsulated_pixeldata() {
445                self.push_sequence_token(SeqTokenType::Sequence, Length::UNDEFINED, true);
446                self.last_header = None;
447
448                // encapsulated pixel data, expecting offset table
449                match self.parser.decode_item_header() {
450                    Ok(header) => match header {
451                        SequenceItemHeader::Item { len } => {
452                            let len = match self.sanitize_length(len) {
453                                Some(len) => len,
454                                None => {
455                                    return Some(
456                                        InvalidItemLengthSnafu {
457                                            bytes_read: self.parser.position(),
458                                            len: len.0,
459                                        }
460                                        .fail(),
461                                    )
462                                }
463                            };
464
465                            // entered a new item
466                            self.in_sequence = false;
467                            self.push_sequence_token(SeqTokenType::Item, len, true);
468                            // items can be empty
469                            if len == Length(0) {
470                                self.delimiter_check_pending = true;
471                            } else {
472                                self.offset_table_next = true;
473                            }
474                            Some(Ok(DataToken::ItemStart { len }))
475                        }
476                        SequenceItemHeader::SequenceDelimiter => {
477                            // empty pixel data
478                            self.seq_delimiters.pop();
479                            self.in_sequence = false;
480                            Some(Ok(DataToken::SequenceEnd))
481                        }
482                        item => {
483                            self.hard_break = true;
484                            Some(UnexpectedItemTagSnafu { tag: item.tag() }.fail())
485                        }
486                    },
487                    Err(e) => {
488                        self.hard_break = true;
489                        Some(Err(e).context(ReadItemHeaderSnafu))
490                    }
491                }
492            } else {
493                // a plain element header was read, so a value is expected
494                let value = match self.read_value(&header) {
495                    Ok(v) => v,
496                    Err(e) => {
497                        self.hard_break = true;
498                        self.last_header = None;
499                        return Some(Err(e));
500                    }
501                };
502
503                self.last_header = None;
504
505                // sequences can end after this token
506                self.delimiter_check_pending = true;
507
508                Some(Ok(DataToken::PrimitiveValue(value)))
509            }
510        } else {
511            // a data element header or item delimiter is expected
512            match self.parser.decode_header() {
513                Ok(DataElementHeader {
514                    tag,
515                    vr: VR::SQ,
516                    len,
517                }) => {
518                    let len = match self.sanitize_length(len) {
519                        Some(len) => len,
520                        None => {
521                            return Some(
522                                InvalidElementLengthSnafu {
523                                    tag,
524                                    len: len.0,
525                                    bytes_read: self.parser.position(),
526                                }
527                                .fail(),
528                            )
529                        }
530                    };
531
532                    self.in_sequence = true;
533                    self.push_sequence_token(SeqTokenType::Sequence, len, false);
534
535                    // sequences can end right after they start
536                    if len == Length(0) {
537                        self.delimiter_check_pending = true;
538                    }
539
540                    Some(Ok(DataToken::SequenceStart { tag, len }))
541                }
542                Ok(DataElementHeader {
543                    tag: Tag(0xFFFE, 0xE00D),
544                    ..
545                }) if self.seq_delimiters.is_empty() => {
546                    // ignore delimiter, we are not in a sequence
547                    tracing::warn!(
548                        "Item delimitation item outside of a sequence in position {}",
549                        self.parser.position()
550                    );
551                    // return a new token by calling the method again
552                    self.next()
553                }
554                Ok(DataElementHeader {
555                    tag: Tag(0xFFFE, 0xE00D),
556                    ..
557                }) => {
558                    self.in_sequence = true;
559                    // pop item delimiter
560                    self.seq_delimiters.pop();
561                    // sequences can end after this token
562                    self.delimiter_check_pending = true;
563                    Some(Ok(DataToken::ItemEnd))
564                }
565                Ok(header) if header.is_encapsulated_pixeldata() => {
566                    // encapsulated pixel data conditions:
567                    // expect a sequence of pixel data fragments
568
569                    // save it for the next step
570                    self.last_header = Some(header);
571                    Some(Ok(DataToken::PixelSequenceStart))
572                }
573                Ok(header) if header.len.is_undefined() => {
574                    // treat other undefined length elements
575                    // as data set sequences,
576                    // discarding the VR in the process
577                    self.in_sequence = true;
578
579                    let DataElementHeader { tag, len, .. } = header;
580                    self.push_sequence_token(SeqTokenType::Sequence, len, false);
581
582                    Some(Ok(DataToken::SequenceStart { tag, len }))
583                }
584                Ok(mut header) => {
585                    match self.sanitize_length(header.len) {
586                        Some(len) => header.len = len,
587                        None => {
588                            return Some(
589                                InvalidElementLengthSnafu {
590                                    tag: header.tag,
591                                    len: header.len.0,
592                                    bytes_read: self.parser.position(),
593                                }
594                                .fail(),
595                            )
596                        }
597                    };
598
599                    // save it for the next step
600                    self.last_header = Some(header);
601                    Some(Ok(DataToken::ElementHeader(header)))
602                }
603                Err(DecoderError::DecodeElementHeader {
604                    source: dicom_encoding::decode::Error::ReadHeaderTag { source, .. },
605                    ..
606                }) if source.kind() == std::io::ErrorKind::UnexpectedEof => {
607                    // Note: if `UnexpectedEof` was reached while trying to read
608                    // an element tag, then we assume that
609                    // the end of a DICOM object was reached gracefully.
610                    // This approach is unlikely to consume trailing bytes,
611                    // but may ignore the current depth of the data set tree.
612                    self.hard_break = true;
613                    None
614                }
615                Err(e) => {
616                    self.hard_break = true;
617                    Some(Err(e).context(ReadHeaderSnafu))
618                }
619            }
620        }
621    }
622}
623
624impl<S> DataSetReader<S>
625where
626    S: StatefulDecode,
627{
628    /// Peek the next token from the source by
629    /// reading a new token in the first call.
630    /// Subsequent calls to `peek` will return the same token
631    /// until another consumer method (such as `Iterator::next`)
632    /// is called.
633    pub fn peek(&mut self) -> Result<Option<&DataToken>> {
634        if self.peek.is_none() {
635            // try to read the next token
636            match self.next() {
637                None => return Ok(None),
638                Some(Err(e)) => return Err(e),
639                Some(Ok(token)) => {
640                    self.peek = Some(token);
641                }
642            }
643        }
644        Ok(self.peek.as_ref())
645    }
646
647    fn update_seq_delimiters(&mut self) -> Result<Option<DataToken>> {
648        if let Some(sd) = self.seq_delimiters.last() {
649            if let Some(len) = sd.len.get() {
650                let end_of_sequence = sd.base_offset + len as u64;
651                let bytes_read = self.parser.position();
652                match end_of_sequence.cmp(&bytes_read) {
653                    Ordering::Equal => {
654                        // end of delimiter, as indicated by the element's length
655                        let token;
656                        match sd.typ {
657                            SeqTokenType::Sequence => {
658                                self.in_sequence = false;
659                                token = DataToken::SequenceEnd;
660                            }
661                            SeqTokenType::Item => {
662                                self.in_sequence = true;
663                                token = DataToken::ItemEnd;
664                            }
665                        }
666                        self.seq_delimiters.pop();
667                        return Ok(Some(token));
668                    }
669                    Ordering::Less => {
670                        return InconsistentSequenceEndSnafu {
671                            end_of_sequence,
672                            bytes_read,
673                        }
674                        .fail();
675                    }
676                    Ordering::Greater => {} // continue normally
677                }
678            }
679        }
680        self.delimiter_check_pending = false;
681        Ok(None)
682    }
683
684    #[inline]
685    fn push_sequence_token(&mut self, typ: SeqTokenType, len: Length, pixel_data: bool) {
686        self.seq_delimiters.push(SeqToken {
687            typ,
688            pixel_data,
689            len,
690            base_offset: self.parser.position(),
691        })
692    }
693
694    fn read_value(&mut self, header: &DataElementHeader) -> Result<PrimitiveValue> {
695        match self.options.value_read {
696            ValueReadStrategy::Interpreted => self.parser.read_value(header),
697            ValueReadStrategy::Preserved => self.parser.read_value_preserved(header),
698            ValueReadStrategy::Raw => self.parser.read_value_bytes(header),
699        }
700        .context(ReadValueSnafu {
701            len: header.len.0,
702            tag: header.tag,
703        })
704    }
705
706    /// Check for a non-compliant length
707    /// and handle it according to the current strategy.
708    /// Returns `None` if the length cannot or should not be resolved.
709    fn sanitize_length(&self, length: Length) -> Option<Length> {
710        if length.is_defined() && length.0 & 1 != 0 {
711            match self.options.odd_length {
712                OddLengthStrategy::Accept => Some(length),
713                OddLengthStrategy::NextEven => Some(length + 1),
714                OddLengthStrategy::Fail => None,
715            }
716        } else {
717            Some(length)
718        }
719    }
720}
721
722#[cfg(test)]
723mod tests {
724    use super::{DataSetReader, DataToken, StatefulDecode};
725    use crate::dataset::read::{DataSetReaderOptions, OddLengthStrategy};
726    use crate::stateful::decode::StatefulDecoder;
727    use dicom_core::header::{DataElementHeader, Length};
728    use dicom_core::value::PrimitiveValue;
729    use dicom_core::{Tag, VR};
730    use dicom_encoding::decode::basic::LittleEndianBasicDecoder;
731    use dicom_encoding::decode::{
732        explicit_le::ExplicitVRLittleEndianDecoder, implicit_le::ImplicitVRLittleEndianDecoder,
733    };
734    use dicom_encoding::text::SpecificCharacterSet;
735
736    fn validate_read_data_implicit_vr<I>(data: &[u8], ground_truth: I)
737    where
738        I: IntoIterator<Item = DataToken>,
739    {
740        let mut cursor = data;
741        let parser = StatefulDecoder::new(
742            &mut cursor,
743            ImplicitVRLittleEndianDecoder::default(),
744            LittleEndianBasicDecoder,
745            SpecificCharacterSet::default(),
746        );
747
748        validate_read_data(data, parser, ground_truth)
749    }
750
751    fn validate_read_data_explicit_vr<I>(data: &[u8], ground_truth: I)
752    where
753        I: IntoIterator<Item = DataToken>,
754    {
755        let mut cursor = data;
756        let parser = StatefulDecoder::new(
757            &mut cursor,
758            ExplicitVRLittleEndianDecoder::default(),
759            LittleEndianBasicDecoder,
760            SpecificCharacterSet::default(),
761        );
762
763        validate_read_data(data, parser, ground_truth)
764    }
765
766    fn validate_read_data<I, D>(data: &[u8], parser: D, ground_truth: I)
767    where
768        I: IntoIterator<Item = DataToken>,
769        D: StatefulDecode,
770    {
771        let dset_reader = DataSetReader::new(parser, Default::default());
772        validate_data_set_reader(data, dset_reader, ground_truth);
773    }
774
775    fn validate_data_set_reader<S, I>(
776        data: &[u8],
777        mut dset_reader: DataSetReader<S>,
778        ground_truth: I,
779    ) where
780        S: StatefulDecode,
781        I: IntoIterator<Item = DataToken>,
782    {
783        let iter = &mut dset_reader;
784        let ground_truth = ground_truth.into_iter();
785
786        for gt_token in ground_truth {
787            let token = iter
788                .next()
789                .expect("expecting more tokens from reader")
790                .expect("should fetch the next token without an error");
791            eprintln!("Next token: {token:2?} ; Expected: {gt_token:2?}");
792            assert_eq!(
793                token, gt_token,
794                "Got token {token:2?} ; but expected {gt_token:2?}"
795            );
796        }
797
798        let extra: Vec<_> = iter.collect();
799        assert_eq!(
800            extra.len(), // we have already read all of them
801            0,
802            "extraneous tokens remaining: {extra:?}",
803        );
804        assert_eq!(
805            dset_reader.parser.position(),
806            data.len() as u64,
807            "Decoder position did not match end of data",
808        );
809    }
810
811    #[test]
812    fn read_sequence_explicit() {
813        #[rustfmt::skip]
814        static DATA: &[u8] = &[
815            0x18, 0x00, 0x11, 0x60, // sequence tag: (0018,6011) SequenceOfUltrasoundRegions
816            b'S', b'Q', // VR
817            0x00, 0x00, // reserved
818            0x2e, 0x00, 0x00, 0x00, // length: 28 + 18 = 46 (#= 2)
819            // -- 12 --
820            0xfe, 0xff, 0x00, 0xe0, // item start tag
821            0x14, 0x00, 0x00, 0x00, // item length: 20 (#= 2)
822            // -- 20 --
823            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x01, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 1
824            // -- 30 --
825            0x18, 0x00, 0x14, 0x60, b'U', b'S', 0x02, 0x00, 0x02, 0x00, // (0018, 6012) RegionDataType, len = 2, value = 2
826            // -- 40 --
827            0xfe, 0xff, 0x00, 0xe0, // item start tag
828            0x0a, 0x00, 0x00, 0x00, // item length: 10 (#= 1)
829            // -- 48 --
830            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x04, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 4
831            // -- 58 --
832            0x20, 0x00, 0x00, 0x40, b'L', b'T', 0x04, 0x00, // (0020,4000) ImageComments, len = 4
833            b'T', b'E', b'S', b'T', // value = "TEST"
834        ];
835
836        let ground_truth = vec![
837            DataToken::SequenceStart {
838                tag: Tag(0x0018, 0x6011),
839                len: Length(46),
840            },
841            DataToken::ItemStart { len: Length(20) },
842            DataToken::ElementHeader(DataElementHeader {
843                tag: Tag(0x0018, 0x6012),
844                vr: VR::US,
845                len: Length(2),
846            }),
847            DataToken::PrimitiveValue(PrimitiveValue::U16([1].as_ref().into())),
848            DataToken::ElementHeader(DataElementHeader {
849                tag: Tag(0x0018, 0x6014),
850                vr: VR::US,
851                len: Length(2),
852            }),
853            DataToken::PrimitiveValue(PrimitiveValue::U16([2].as_ref().into())),
854            DataToken::ItemEnd,
855            DataToken::ItemStart { len: Length(10) },
856            DataToken::ElementHeader(DataElementHeader {
857                tag: Tag(0x0018, 0x6012),
858                vr: VR::US,
859                len: Length(2),
860            }),
861            DataToken::PrimitiveValue(PrimitiveValue::U16([4].as_ref().into())),
862            DataToken::ItemEnd,
863            DataToken::SequenceEnd,
864            DataToken::ElementHeader(DataElementHeader {
865                tag: Tag(0x0020, 0x4000),
866                vr: VR::LT,
867                len: Length(4),
868            }),
869            DataToken::PrimitiveValue(PrimitiveValue::Str("TEST".into())),
870        ];
871
872        validate_read_data_explicit_vr(DATA, ground_truth);
873    }
874
875    #[test]
876    fn read_sequence_explicit_2() {
877        static DATA: &[u8] = &[
878            // SequenceStart: (0008,2218) ; len = 54 (#=3)
879            0x08, 0x00, 0x18, 0x22, b'S', b'Q', 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
880            // -- 12, --
881            // ItemStart: len = 46
882            0xfe, 0xff, 0x00, 0xe0, 0x2e, 0x00, 0x00, 0x00,
883            // -- 20, --
884            // ElementHeader: (0008,0100) CodeValue; len = 8
885            0x08, 0x00, 0x00, 0x01, b'S', b'H', 0x08, 0x00, // PrimitiveValue
886            0x54, 0x2d, 0x44, 0x31, 0x32, 0x31, 0x33, b' ',
887            // -- 36, --
888            // ElementHeader: (0008,0102) CodingSchemeDesignator; len = 4
889            0x08, 0x00, 0x02, 0x01, b'S', b'H', 0x04, 0x00, // PrimitiveValue
890            0x53, 0x52, 0x54, b' ',
891            // -- 48, --
892            // (0008,0104) CodeMeaning; len = 10
893            0x08, 0x00, 0x04, 0x01, b'L', b'O', 0x0a, 0x00, // PrimitiveValue
894            0x4a, 0x61, 0x77, b' ', 0x72, 0x65, 0x67, 0x69, 0x6f, 0x6e,
895            // -- 66 --
896            // SequenceStart: (0040,0555) AcquisitionContextSequence; len = 0
897            0x40, 0x00, 0x55, 0x05, b'S', b'Q', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
898            // ElementHeader: (2050,0020) PresentationLUTShape; len = 8
899            0x50, 0x20, 0x20, 0x00, b'C', b'S', 0x08, 0x00, // PrimitiveValue
900            b'I', b'D', b'E', b'N', b'T', b'I', b'T', b'Y',
901        ];
902
903        let ground_truth = vec![
904            DataToken::SequenceStart {
905                tag: Tag(0x0008, 0x2218),
906                len: Length(54),
907            },
908            DataToken::ItemStart { len: Length(46) },
909            DataToken::ElementHeader(DataElementHeader {
910                tag: Tag(0x0008, 0x0100),
911                vr: VR::SH,
912                len: Length(8),
913            }),
914            DataToken::PrimitiveValue(PrimitiveValue::Strs(
915                ["T-D1213 ".to_owned()].as_ref().into(),
916            )),
917            DataToken::ElementHeader(DataElementHeader {
918                tag: Tag(0x0008, 0x0102),
919                vr: VR::SH,
920                len: Length(4),
921            }),
922            DataToken::PrimitiveValue(PrimitiveValue::Strs(["SRT ".to_owned()].as_ref().into())),
923            DataToken::ElementHeader(DataElementHeader {
924                tag: Tag(0x0008, 0x0104),
925                vr: VR::LO,
926                len: Length(10),
927            }),
928            DataToken::PrimitiveValue(PrimitiveValue::Strs(
929                ["Jaw region".to_owned()].as_ref().into(),
930            )),
931            DataToken::ItemEnd,
932            DataToken::SequenceEnd,
933            DataToken::SequenceStart {
934                tag: Tag(0x0040, 0x0555),
935                len: Length(0),
936            },
937            DataToken::SequenceEnd,
938            DataToken::ElementHeader(DataElementHeader {
939                tag: Tag(0x2050, 0x0020),
940                vr: VR::CS,
941                len: Length(8),
942            }),
943            DataToken::PrimitiveValue(PrimitiveValue::Strs(
944                ["IDENTITY".to_owned()].as_ref().into(),
945            )),
946        ];
947
948        validate_read_data_explicit_vr(DATA, ground_truth);
949    }
950
951    #[test]
952    fn read_empty_sequence_explicit() {
953        static DATA: &[u8] = &[
954            // SequenceStart: (0008,1032) ProcedureCodeSequence ; len = 0
955            0x08, 0x00, 0x18, 0x22, // VR: SQ
956            b'S', b'Q', // Reserved
957            0x00, 0x00, // Length: 0
958            0x00, 0x00, 0x00, 0x00,
959        ];
960
961        let ground_truth = vec![
962            DataToken::SequenceStart {
963                tag: Tag(0x0008, 0x2218),
964                len: Length(0),
965            },
966            DataToken::SequenceEnd,
967        ];
968
969        validate_read_data_explicit_vr(DATA, ground_truth);
970    }
971
972    /// Gracefully ignore a stray item end tag in the data set.
973    #[test]
974    fn ignore_trailing_item_delimitation_item() {
975        static DATA: &[u8] = &[
976            0x20, 0x00, 0x00, 0x40, b'L', b'T', 0x04,
977            0x00, // (0020,4000) ImageComments, len = 4
978            b'T', b'E', b'S', b'T', // value = "TEST"
979            0xfe, 0xff, 0x0d, 0xe0, 0x00, 0x00, 0x00, 0x00, // item end
980        ];
981
982        let ground_truth = vec![
983            DataToken::ElementHeader(DataElementHeader {
984                tag: Tag(0x0020, 0x4000),
985                vr: VR::LT,
986                len: Length(4),
987            }),
988            DataToken::PrimitiveValue(PrimitiveValue::Str("TEST".into())),
989            // no item end
990        ];
991
992        validate_read_data_explicit_vr(DATA, ground_truth);
993    }
994
995    #[test]
996    fn read_sequence_implicit() {
997        #[rustfmt::skip]
998        static DATA: &[u8] = &[
999            0x18, 0x00, 0x11, 0x60, // sequence tag: (0018,6011) SequenceOfUltrasoundRegions
1000            b'S', b'Q', // VR
1001            0x00, 0x00, // reserved
1002            0xff, 0xff, 0xff, 0xff, // length: undefined
1003            // -- 12 --
1004            0xfe, 0xff, 0x00, 0xe0, // item start tag
1005            0xff, 0xff, 0xff, 0xff, // item length: undefined
1006            // -- 20 --
1007            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x01, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 1
1008            // -- 30 --
1009            0x18, 0x00, 0x14, 0x60, b'U', b'S', 0x02, 0x00, 0x02, 0x00, // (0018, 6012) RegionDataType, len = 2, value = 2
1010            // -- 40 --
1011            0xfe, 0xff, 0x0d, 0xe0, 0x00, 0x00, 0x00, 0x00, // item end
1012            // -- 48 --
1013            0xfe, 0xff, 0x00, 0xe0, // item start tag
1014            0xff, 0xff, 0xff, 0xff, // item length: undefined
1015            // -- 56 --
1016            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x04, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 4
1017            // -- 66 --
1018            0xfe, 0xff, 0x0d, 0xe0, 0x00, 0x00, 0x00, 0x00, // item end
1019            // -- 74 --
1020            0xfe, 0xff, 0xdd, 0xe0, 0x00, 0x00, 0x00, 0x00, // sequence end
1021            // -- 82 --
1022            0x20, 0x00, 0x00, 0x40, b'L', b'T', 0x04, 0x00, // (0020,4000) ImageComments, len = 4
1023            b'T', b'E', b'S', b'T', // value = "TEST"
1024        ];
1025
1026        let ground_truth = vec![
1027            DataToken::SequenceStart {
1028                tag: Tag(0x0018, 0x6011),
1029                len: Length::UNDEFINED,
1030            },
1031            DataToken::ItemStart {
1032                len: Length::UNDEFINED,
1033            },
1034            DataToken::ElementHeader(DataElementHeader {
1035                tag: Tag(0x0018, 0x6012),
1036                vr: VR::US,
1037                len: Length(2),
1038            }),
1039            DataToken::PrimitiveValue(PrimitiveValue::U16([1].as_ref().into())),
1040            DataToken::ElementHeader(DataElementHeader {
1041                tag: Tag(0x0018, 0x6014),
1042                vr: VR::US,
1043                len: Length(2),
1044            }),
1045            DataToken::PrimitiveValue(PrimitiveValue::U16([2].as_ref().into())),
1046            DataToken::ItemEnd,
1047            DataToken::ItemStart {
1048                len: Length::UNDEFINED,
1049            },
1050            DataToken::ElementHeader(DataElementHeader {
1051                tag: Tag(0x0018, 0x6012),
1052                vr: VR::US,
1053                len: Length(2),
1054            }),
1055            DataToken::PrimitiveValue(PrimitiveValue::U16([4].as_ref().into())),
1056            DataToken::ItemEnd,
1057            DataToken::SequenceEnd,
1058            DataToken::ElementHeader(DataElementHeader {
1059                tag: Tag(0x0020, 0x4000),
1060                vr: VR::LT,
1061                len: Length(4),
1062            }),
1063            DataToken::PrimitiveValue(PrimitiveValue::Str("TEST".into())),
1064        ];
1065
1066        validate_read_data_explicit_vr(DATA, ground_truth);
1067    }
1068
1069    #[test]
1070    fn read_implicit_len_sequence_implicit_vr_unknown() {
1071        #[rustfmt::skip]
1072        static DATA: &[u8] = &[
1073            0x33, 0x55, 0x33, 0x55, // sequence tag: (5533,5533) «private, unknown attribute»
1074            0xff, 0xff, 0xff, 0xff, // length: undefined
1075            // -- 8 --
1076            0xfe, 0xff, 0x00, 0xe0, // item begin
1077            0xff, 0xff, 0xff, 0xff, // length: undefined
1078            // -- 16 --
1079            0xfe, 0xff, 0x0d, 0xe0, // item end
1080            0x00, 0x00, 0x00, 0x00, // length is always zero
1081            // -- 24 --
1082            0xfe, 0xff, 0xdd, 0xe0,
1083            0x00, 0x00, 0x00, 0x00, // sequence end
1084            // -- 32 --
1085        ];
1086
1087        let ground_truth = vec![
1088            DataToken::SequenceStart {
1089                tag: Tag(0x5533, 0x5533),
1090                len: Length::UNDEFINED,
1091            },
1092            DataToken::ItemStart {
1093                len: Length::UNDEFINED,
1094            },
1095            DataToken::ItemEnd,
1096            DataToken::SequenceEnd,
1097        ];
1098
1099        validate_read_data_implicit_vr(DATA, ground_truth);
1100    }
1101
1102    #[test]
1103    fn read_encapsulated_pixeldata() {
1104        #[rustfmt::skip]
1105        static DATA: &[u8] = &[
1106            0xe0, 0x7f, 0x10, 0x00, // (7FE0, 0010) PixelData
1107            b'O', b'B', // VR 
1108            0x00, 0x00, // reserved
1109            0xff, 0xff, 0xff, 0xff, // length: undefined
1110            // -- 12 -- Basic offset table
1111            0xfe, 0xff, 0x00, 0xe0, // item start tag
1112            0x00, 0x00, 0x00, 0x00, // item length: 0
1113            // -- 20 -- First fragment of pixel data
1114            0xfe, 0xff, 0x00, 0xe0, // item start tag
1115            0x20, 0x00, 0x00, 0x00, // item length: 32
1116            // -- 28 -- Compressed Fragment
1117            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1118            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1119            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1120            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1121            // -- 60 -- End of pixel data
1122            0xfe, 0xff, 0xdd, 0xe0, // sequence end tag
1123            0x00, 0x00, 0x00, 0x00,
1124            // -- 68 -- padding
1125            0xfc, 0xff, 0xfc, 0xff, // (fffc,fffc) DataSetTrailingPadding
1126            b'O', b'B', // VR
1127            0x00, 0x00, // reserved
1128            0x08, 0x00, 0x00, 0x00, // length: 8
1129            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1130        ];
1131
1132        let ground_truth = vec![
1133            DataToken::PixelSequenceStart,
1134            DataToken::ItemStart { len: Length(0) },
1135            DataToken::ItemEnd,
1136            DataToken::ItemStart { len: Length(32) },
1137            DataToken::ItemValue(vec![0x99; 32]),
1138            DataToken::ItemEnd,
1139            DataToken::SequenceEnd,
1140            DataToken::ElementHeader(DataElementHeader::new(
1141                Tag(0xfffc, 0xfffc),
1142                VR::OB,
1143                Length(8),
1144            )),
1145            DataToken::PrimitiveValue(PrimitiveValue::U8([0x00; 8].as_ref().into())),
1146        ];
1147
1148        validate_read_data_explicit_vr(DATA, ground_truth);
1149    }
1150
1151    #[test]
1152    fn read_encapsulated_pixeldata_with_offset_table() {
1153        #[rustfmt::skip]
1154        static DATA: &[u8] = &[
1155            0xe0, 0x7f, 0x10, 0x00, // (7FE0, 0010) PixelData
1156            b'O', b'B', // VR 
1157            0x00, 0x00, // reserved
1158            0xff, 0xff, 0xff, 0xff, // length: undefined
1159            // -- 12 -- Basic offset table
1160            0xfe, 0xff, 0x00, 0xe0, // item start tag
1161            0x04, 0x00, 0x00, 0x00, // item length: 4
1162            // -- 20 -- item value
1163            0x10, 0x00, 0x00, 0x00, // 16
1164            // -- 24 -- First fragment of pixel data
1165            0xfe, 0xff, 0x00, 0xe0, // item start tag
1166            0x20, 0x00, 0x00, 0x00, // item length: 32
1167            // -- 32 -- Compressed Fragment
1168            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1169            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1170            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1171            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1172            // -- 60 -- End of pixel data
1173            0xfe, 0xff, 0xdd, 0xe0, // sequence end tag
1174            0x00, 0x00, 0x00, 0x00,
1175            // -- 68 -- padding
1176            0xfc, 0xff, 0xfc, 0xff, // (fffc,fffc) DataSetTrailingPadding
1177            b'O', b'B', // VR
1178            0x00, 0x00, // reserved
1179            0x08, 0x00, 0x00, 0x00, // length: 8
1180            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1181        ];
1182
1183        let ground_truth = vec![
1184            DataToken::PixelSequenceStart,
1185            DataToken::ItemStart { len: Length(4) },
1186            DataToken::OffsetTable(vec![16]),
1187            DataToken::ItemEnd,
1188            DataToken::ItemStart { len: Length(32) },
1189            DataToken::ItemValue(vec![0x99; 32]),
1190            DataToken::ItemEnd,
1191            DataToken::SequenceEnd,
1192            DataToken::ElementHeader(DataElementHeader::new(
1193                Tag(0xfffc, 0xfffc),
1194                VR::OB,
1195                Length(8),
1196            )),
1197            DataToken::PrimitiveValue(PrimitiveValue::U8([0x00; 8].as_ref().into())),
1198        ];
1199
1200        validate_read_data_explicit_vr(DATA, ground_truth);
1201    }
1202
1203    #[test]
1204    fn read_dataset_in_dataset() {
1205        #[rustfmt::skip]
1206        const DATA: &[u8; 138] = &[
1207            // 0: (2001, 9000) private sequence
1208            0x01, 0x20, 0x00, 0x90, //
1209            // length: undefined
1210            0xFF, 0xFF, 0xFF, 0xFF, //
1211            // 8: Item start
1212            0xFE, 0xFF, 0x00, 0xE0, //
1213            // Item length explicit (114 bytes)
1214            0x72, 0x00, 0x00, 0x00, //
1215            // 16: (0008,1115) ReferencedSeriesSequence
1216            0x08, 0x00, 0x15, 0x11, //
1217            // length: undefined
1218            0xFF, 0xFF, 0xFF, 0xFF, //
1219            // 24: Item start
1220            0xFE, 0xFF, 0x00, 0xE0, //
1221            // Item length undefined
1222            0xFF, 0xFF, 0xFF, 0xFF, //
1223            // 32: (0008,1140) ReferencedImageSequence
1224            0x08, 0x00, 0x40, 0x11, //
1225            // length: undefined
1226            0xFF, 0xFF, 0xFF, 0xFF, //
1227            // 40: Item start
1228            0xFE, 0xFF, 0x00, 0xE0, //
1229            // Item length undefined
1230            0xFF, 0xFF, 0xFF, 0xFF, //
1231            // 48: (0008,1150) ReferencedSOPClassUID
1232            0x08, 0x00, 0x50, 0x11, //
1233            // length: 26
1234            0x1a, 0x00, 0x00, 0x00, //
1235            // Value: "1.2.840.10008.5.1.4.1.1.7\0" (SecondaryCaptureImageStorage)
1236            b'1', b'.', b'2', b'.', b'8', b'4', b'0', b'.', b'1', b'0', b'0', b'0', b'8', b'.',
1237            b'5', b'.', b'1', b'.', b'4', b'.', b'1', b'.', b'1', b'.', b'7', b'\0',
1238            // 82: Item End (ReferencedImageSequence)
1239            0xFE, 0xFF, 0x0D, 0xE0, //
1240            0x00, 0x00, 0x00, 0x00, //
1241            // 90: Sequence End (ReferencedImageSequence)
1242            0xFE, 0xFF, 0xDD, 0xE0, //
1243            0x00, 0x00, 0x00, 0x00, //
1244            // 98: Item End (ReferencedSeriesSequence)
1245            0xFE, 0xFF, 0x0D, 0xE0, //
1246            0x00, 0x00, 0x00, 0x00, //
1247            // 106: Sequence End (ReferencedSeriesSequence)
1248            0xFE, 0xFF, 0xDD, 0xE0, //
1249            0x00, 0x00, 0x00, 0x00, //
1250            // 114: (2050,0020) PresentationLUTShape (CS)
1251            0x50, 0x20, 0x20, 0x00, //
1252            // length: 8
1253            0x08, 0x00, 0x00, 0x00, //
1254            b'I', b'D', b'E', b'N', b'T', b'I', b'T', b'Y', //
1255            // 130: Sequence end
1256            0xFE, 0xFF, 0xDD, 0xE0, //
1257            0x00, 0x00, 0x00, 0x00, //
1258        ];
1259
1260        let ground_truth = vec![
1261            DataToken::SequenceStart {
1262                tag: Tag(0x2001, 0x9000),
1263                len: Length::UNDEFINED,
1264            },
1265            DataToken::ItemStart { len: Length(114) },
1266            DataToken::SequenceStart {
1267                tag: Tag(0x0008, 0x1115),
1268                len: Length::UNDEFINED,
1269            },
1270            DataToken::ItemStart {
1271                len: Length::UNDEFINED,
1272            },
1273            DataToken::SequenceStart {
1274                tag: Tag(0x0008, 0x1140),
1275                len: Length::UNDEFINED,
1276            },
1277            DataToken::ItemStart {
1278                len: Length::UNDEFINED,
1279            },
1280            DataToken::ElementHeader(DataElementHeader {
1281                tag: Tag(0x0008, 0x1150),
1282                vr: VR::UI,
1283                len: Length(26),
1284            }),
1285            DataToken::PrimitiveValue(PrimitiveValue::from("1.2.840.10008.5.1.4.1.1.7\0")),
1286            DataToken::ItemEnd,
1287            DataToken::SequenceEnd,
1288            DataToken::ItemEnd,
1289            DataToken::SequenceEnd,
1290            DataToken::ElementHeader(DataElementHeader {
1291                tag: Tag(0x2050, 0x0020),
1292                vr: VR::CS,
1293                len: Length(8),
1294            }),
1295            DataToken::PrimitiveValue(PrimitiveValue::from("IDENTITY")),
1296            DataToken::ItemEnd, // inserted automatically
1297            DataToken::SequenceEnd,
1298        ];
1299
1300        validate_read_data_implicit_vr(DATA, ground_truth);
1301    }
1302
1303    #[test]
1304    fn peek_data_elements() {
1305        #[rustfmt::skip]
1306        static DATA: &[u8] = &[
1307            0x18, 0x00, 0x11, 0x60, // sequence tag: (0018,6011) SequenceOfUltrasoundRegions
1308            b'S', b'Q', // VR
1309            0x00, 0x00, // reserved
1310            0xff, 0xff, 0xff, 0xff, // length: undefined
1311            // -- 12 --
1312            0xfe, 0xff, 0xdd, 0xe0, 0x00, 0x00, 0x00, 0x00, // sequence end
1313            // -- 82 --
1314            0x20, 0x00, 0x00, 0x40, b'L', b'T', 0x04, 0x00, // (0020,4000) ImageComments, len = 4
1315            b'T', b'E', b'S', b'T', // value = "TEST"
1316        ];
1317
1318        let ground_truth = vec![
1319            DataToken::SequenceStart {
1320                tag: Tag(0x0018, 0x6011),
1321                len: Length::UNDEFINED,
1322            },
1323            DataToken::SequenceEnd,
1324            DataToken::ElementHeader(DataElementHeader {
1325                tag: Tag(0x0020, 0x4000),
1326                vr: VR::LT,
1327                len: Length(4),
1328            }),
1329            DataToken::PrimitiveValue(PrimitiveValue::Str("TEST".into())),
1330        ];
1331
1332        let mut cursor = DATA;
1333        let parser = StatefulDecoder::new(
1334            &mut cursor,
1335            ExplicitVRLittleEndianDecoder::default(),
1336            LittleEndianBasicDecoder,
1337            SpecificCharacterSet::default(),
1338        );
1339        let mut dset_reader = DataSetReader::new(parser, Default::default());
1340
1341        let iter = &mut dset_reader;
1342
1343        // peek at first token
1344        let token = iter.peek().expect("should peek first token OK");
1345        assert_eq!(token, Some(&ground_truth[0]));
1346
1347        // peeking multiple times gives the same result
1348        let token = iter.peek().expect("should peek first token again OK");
1349        assert_eq!(token, Some(&ground_truth[0]));
1350
1351        // Using `next` give us the same token
1352        let token = iter
1353            .next()
1354            .expect("expected token")
1355            .expect("should read token peeked OK");
1356        assert_eq!(&token, &ground_truth[0]);
1357
1358        // read some more tokens
1359
1360        // sequence end
1361        let token = iter.next().unwrap().unwrap();
1362        assert_eq!(&token, &ground_truth[1]);
1363        // data element header
1364        let token = iter.next().unwrap().unwrap();
1365        assert_eq!(&token, &ground_truth[2]);
1366
1367        // peek string value
1368        let token = iter.peek().unwrap();
1369        assert_eq!(token, Some(&ground_truth[3]));
1370        // peek it again
1371        let token = iter.peek().unwrap();
1372        assert_eq!(token, Some(&ground_truth[3]));
1373        // then read it
1374        let token = iter.next().unwrap().unwrap();
1375        assert_eq!(&token, &ground_truth[3]);
1376
1377        // finished reading, peek should return None
1378        assert!(iter.peek().unwrap().is_none());
1379    }
1380
1381    #[test]
1382    fn read_pixel_sequence_bad_item_end() {
1383        #[rustfmt::skip]
1384        static DATA: &[u8] = &[
1385            0xe0, 0x7f, 0x10, 0x00, // (7FE0, 0010) PixelData
1386            b'O', b'B', // VR 
1387            0x00, 0x00, // reserved
1388            0xff, 0xff, 0xff, 0xff, // length: undefined
1389            // -- 12 --
1390            0xfe, 0xff, 0x00, 0xe0, // item start tag
1391            0x00, 0x00, 0x00, 0x00, // item length: 0
1392            // -- 20 --
1393            0xfe, 0xff, 0x0d, 0xe0, // item end
1394            0x00, 0x00, 0x00, 0x00, // length is always zero
1395            // -- 28 --
1396            0xfe, 0xff, 0x0d, 0xe0, // another item end (bad)
1397            0x00, 0x00, 0x00, 0x00, //
1398            // -- 36 --
1399            0xfe, 0xff, 0x00, 0xe0, // another item start
1400            0x00, 0x00, 0x00, 0x00, // item length: 0
1401        ];
1402
1403        let mut cursor = DATA;
1404        let parser = StatefulDecoder::new(
1405            &mut cursor,
1406            ExplicitVRLittleEndianDecoder::default(),
1407            LittleEndianBasicDecoder,
1408            SpecificCharacterSet::default(),
1409        );
1410        let mut dset_reader = DataSetReader::new(parser, Default::default());
1411
1412        let token_res = (&mut dset_reader).collect::<Result<Vec<_>, _>>();
1413        dbg!(&token_res);
1414        assert!(token_res.is_err());
1415    }
1416
1417    #[test]
1418    fn read_odd_length_element() {
1419        #[rustfmt::skip]
1420        static DATA: &[u8] = &[
1421            0x08, 0x00, 0x16, 0x00, // (0008,0016) SOPClassUID
1422            b'U', b'I', // VR
1423            0x0b, 0x00, // len = 11
1424            b'1', b'.', b'2', b'.', b'8', b'4', b'0', b'.', b'1', b'0', b'0',
1425            0x00, // padding
1426        ];
1427
1428        let ground_truth = vec![
1429            DataToken::ElementHeader(DataElementHeader {
1430                tag: Tag(0x0008, 0x0016),
1431                vr: VR::UI,
1432                len: Length(12),
1433            }),
1434            DataToken::PrimitiveValue(PrimitiveValue::from("1.2.840.100\0")),
1435        ];
1436
1437        // strategy: assume next even
1438
1439        let mut cursor = DATA;
1440        let parser = StatefulDecoder::new(
1441            &mut cursor,
1442            ExplicitVRLittleEndianDecoder::default(),
1443            LittleEndianBasicDecoder,
1444            SpecificCharacterSet::default(),
1445        );
1446        let dset_reader = DataSetReader::new(
1447            parser,
1448            DataSetReaderOptions {
1449                odd_length: OddLengthStrategy::NextEven,
1450                ..Default::default()
1451            },
1452        );
1453
1454        validate_data_set_reader(DATA, dset_reader, ground_truth);
1455
1456        // strategy: fail
1457
1458        let mut cursor = DATA;
1459        let parser = StatefulDecoder::new(
1460            &mut cursor,
1461            ExplicitVRLittleEndianDecoder::default(),
1462            LittleEndianBasicDecoder,
1463            SpecificCharacterSet::default(),
1464        );
1465        let dset_reader = DataSetReader::new(
1466            parser,
1467            DataSetReaderOptions {
1468                odd_length: OddLengthStrategy::Fail,
1469                ..Default::default()
1470            },
1471        );
1472
1473        let mut tokens = dset_reader.into_iter();
1474        let token = tokens.next();
1475
1476        assert!(
1477            matches!(
1478                token,
1479                Some(Err(super::Error::InvalidElementLength {
1480                    tag: Tag(0x0008, 0x0016),
1481                    len: 11,
1482                    bytes_read: 8,
1483                })),
1484            ),
1485            "got: {:?}",
1486            token
1487        );
1488    }
1489}