Skip to main content

dicom_parser/dataset/
read.rs

1//! This module contains a mid-level abstraction for reading DICOM content
2//! sequentially.
3//!
4//! The rest of the crate is used to obtain DICOM element headers and values.
5//! At this level, headers and values are treated as tokens which can be used
6//! to form a syntax tree of a full data set.
7use crate::stateful::decode::{
8    CharacterSetOverride, DynStatefulDecoder, Error as DecoderError, StatefulDecode,
9    StatefulDecoder,
10};
11use dicom_core::header::{DataElementHeader, Header, Length, SequenceItemHeader};
12use dicom_core::{PrimitiveValue, Tag, VR};
13use dicom_encoding::decode::adaptive_le::StandardAdaptiveVRLittleEndianDecoder;
14use dicom_encoding::text::SpecificCharacterSet;
15use dicom_encoding::transfer_syntax::{DynDecoder, TransferSyntax};
16use snafu::{Backtrace, ResultExt, Snafu};
17use std::cmp::Ordering;
18use std::io::Read;
19
20use super::{DataToken, SeqTokenType};
21
22fn is_stateful_decode<T>(_: &T)
23where
24    T: StatefulDecode,
25{
26}
27
28#[derive(Debug, Snafu)]
29#[non_exhaustive]
30pub enum Error {
31    #[snafu(display("Could not create decoder"))]
32    CreateDecoder {
33        #[snafu(backtrace)]
34        source: DecoderError,
35    },
36    #[snafu(display("Could not read item header"))]
37    ReadItemHeader {
38        #[snafu(backtrace)]
39        source: DecoderError,
40    },
41    #[snafu(display("Could not read element header"))]
42    ReadHeader {
43        #[snafu(backtrace)]
44        source: DecoderError,
45    },
46    #[snafu(display("Could not read {} value bytes for element tagged {}", len, tag))]
47    ReadValue {
48        len: u32,
49        tag: Tag,
50        #[snafu(backtrace)]
51        source: DecoderError,
52    },
53    #[snafu(display("Could not read {} bytes for item value", len))]
54    ReadItemValue {
55        len: u32,
56        #[snafu(backtrace)]
57        source: DecoderError,
58    },
59    #[snafu(display(
60        "Inconsistent sequence end: expected end at {} bytes but read {}",
61        end_of_sequence,
62        bytes_read
63    ))]
64    InconsistentSequenceEnd {
65        end_of_sequence: u64,
66        bytes_read: u64,
67        backtrace: Backtrace,
68    },
69    #[snafu(display("Unexpected item tag {} while reading element header", tag))]
70    UnexpectedItemTag { tag: Tag, backtrace: Backtrace },
71    #[snafu(display(
72        "Unexpected item header outside a dataset sequence at {:#x}",
73        bytes_read
74    ))]
75    UnexpectedItemHeader {
76        bytes_read: u64,
77        backtrace: Backtrace,
78    },
79    /// Undefined pixel data item length
80    UndefinedItemLength,
81    /// Invalid data element length {len:04X} of {tag} at {bytes_read:#x}
82    InvalidElementLength { tag: Tag, len: u32, bytes_read: u64 },
83    /// Invalid sequence item length {len:04X} at {bytes_read:#x}
84    InvalidItemLength { len: u32, bytes_read: u64 },
85}
86
87pub type Result<T> = std::result::Result<T, Error>;
88
89/// A reader-specific token representing a sequence or item start.
90#[derive(Debug, Copy, Clone, PartialEq)]
91struct SeqToken {
92    /// Whether it is the start of a sequence or the start of an item.
93    typ: SeqTokenType,
94    /// The length of the value, as indicated by the starting element,
95    /// can be unknown.
96    len: Length,
97    /// Whether this sequence token is part of an encapsulated pixel data.
98    pixel_data: bool,
99    /// The number of bytes the parser has read until it reached the
100    /// beginning of the sequence or item value data.
101    base_offset: u64,
102}
103
104/// The value reading strategy for the data set reader.
105///
106/// It defines how the `PrimitiveValue`s in value tokens are constructed.
107#[derive(Debug, Default, Copy, Clone, Eq, Hash, PartialEq)]
108pub enum ValueReadStrategy {
109    /// Textual values will be decoded according to their value representation.
110    ///
111    /// Word-sized binary values are read according to
112    /// the expected byte order.
113    /// Dates, times, and date-times (DA, DT, TM) are parsed
114    /// into their more specific variants,
115    /// leading to parser failure if they are not valid DICOM.
116    /// String numbers (IS, FD) are also converted into binary representations.
117    /// For the case of floats, this may introduce precision errors.
118    Interpreted,
119    /// Values will be stored without decoding dates or textual numbers.
120    ///
121    /// Word-sized binary values are read according to
122    /// the expected byte order.
123    /// Date-time values and numbers are kept in their original string
124    /// representation as string objects.
125    /// All text is still decoded into Rust string values,
126    /// in accordance to the standard,
127    /// unless its value representation is unknown to the decoder.
128    ///
129    /// This is the default strategy.
130    #[default]
131    Preserved,
132    /// All primitive values are fetched as raw byte buffers,
133    /// without any form of decoding or interpretation.
134    /// Not even byte order conversions are made.
135    ///
136    /// This strategy is not recommended,
137    /// as it makes the retrieval of important textual data more difficult.
138    Raw,
139}
140
141/// A strategy for when the parser finds a data element with an odd number
142/// in the _length_ header field.
143#[derive(Debug, Default, Copy, Clone, Eq, Hash, PartialEq)]
144#[non_exhaustive]
145pub enum OddLengthStrategy {
146    /// Accept elements with an odd length as is,
147    /// continuing data set reading normally.
148    #[default]
149    Accept,
150    /// Assume that the real length is `length + 1`,
151    /// as in the next even number.
152    NextEven,
153    /// Raise an error instead
154    Fail,
155}
156
157/// The set of options for the data set reader.
158#[derive(Debug, Default, Copy, Clone, Eq, Hash, PartialEq)]
159#[non_exhaustive]
160pub struct DataSetReaderOptions {
161    /// The value reading strategy
162    pub value_read: ValueReadStrategy,
163
164    /// Whether to assume a different character set
165    /// depending on certain conditions
166    pub charset_override: CharacterSetOverride,
167
168    /// The strategy for handling odd length data elements
169    pub odd_length: OddLengthStrategy,
170    /// The position of the reader as received at building time in bytes.
171    /// Defaults to 0.
172    pub base_offset: u64,
173
174    /// When enabled, the decoder will probe the first non-meta element
175    /// to determine whether the dataset actually uses explicit or implicit VR,
176    /// regardless of what the transfer syntax declares.
177    ///
178    /// This handles non-conformant files that declare Explicit VR Little Endian
179    /// but actually encode the dataset in Implicit VR.
180    /// Defaults to `false`.
181    pub flexible_decoding: bool,
182}
183
184impl DataSetReaderOptions {
185    /// Replace the value reading strategy of the options.
186    pub fn value_read(mut self, value_read: ValueReadStrategy) -> Self {
187        self.value_read = value_read;
188        self
189    }
190    /// Replace the base reader offset of the options.
191    pub fn base_offset(mut self, base_offset: u64) -> Self {
192        self.base_offset = base_offset;
193        self
194    }
195    /// Enable or disable flexible VR decoding.
196    pub fn flexible_decoding(mut self, flexible_decoding: bool) -> Self {
197        self.flexible_decoding = flexible_decoding;
198        self
199    }
200}
201
202/// A higher-level reader for retrieving structure in a DICOM data set from an
203/// arbitrary data source.
204#[derive(Debug)]
205pub struct DataSetReader<S> {
206    /// the stateful decoder
207    parser: S,
208    /// the options of this reader
209    options: DataSetReaderOptions,
210    /// whether the reader is expecting an item header next (or a sequence delimiter)
211    in_sequence: bool,
212    /// whether the reader is expecting the first item value of a pixel sequence next
213    /// (offset table)
214    offset_table_next: bool,
215    /// whether a check for a sequence or item delimitation is pending
216    delimiter_check_pending: bool,
217    /// a stack of delimiters
218    seq_delimiters: Vec<SeqToken>,
219    /// fuse the iteration process if true
220    hard_break: bool,
221    /// last decoded header
222    last_header: Option<DataElementHeader>,
223    /// if a peek was taken, this holds the token peeked
224    peek: Option<DataToken>,
225}
226
227impl<R> DataSetReader<DynStatefulDecoder<R>> {
228    /// Create a new data set token reader with the given byte source,
229    /// while considering the given transfer syntax specifier.
230    #[inline]
231    pub fn new_with_ts(source: R, ts: &TransferSyntax) -> Result<Self>
232    where
233        R: Read,
234    {
235        Self::new_with_ts_cs_options(source, ts, Default::default(), Default::default())
236    }
237
238    /// Create a new iterator with the given transfer syntax and options.
239    #[inline]
240    pub fn new_with_ts_options(
241        source: R,
242        ts: &TransferSyntax,
243        options: DataSetReaderOptions,
244    ) -> Result<Self>
245    where
246        R: Read,
247    {
248        Self::new_with_ts_cs_options(source, ts, SpecificCharacterSet::default(), options)
249    }
250
251    /// Create a new data set token reader with the given byte source,
252    /// while considering the given transfer syntax specifier
253    /// and the specific character set to assume by default.
254    ///
255    /// Note that the data set being read
256    /// can override the character set with the presence of a
257    /// _Specific Character Set_ data element.
258    #[inline]
259    pub fn new_with_ts_cs(source: R, ts: &TransferSyntax, cs: SpecificCharacterSet) -> Result<Self>
260    where
261        R: Read,
262    {
263        Self::new_with_ts_cs_options(source, ts, cs, Default::default())
264    }
265
266    /// Create a new iterator with the given stateful decoder and options.
267    pub fn new_with_ts_cs_options(
268        source: R,
269        ts: &TransferSyntax,
270        cs: SpecificCharacterSet,
271        options: DataSetReaderOptions,
272    ) -> Result<Self>
273    where
274        R: Read,
275    {
276        let parser = if options.flexible_decoding
277            && ts.endianness() == dicom_encoding::Endianness::Little
278        {
279            let basic = ts.basic_decoder();
280            let decoder: DynDecoder<R> = Box::<StandardAdaptiveVRLittleEndianDecoder>::default();
281            StatefulDecoder::new_with_all_options(
282                source,
283                decoder,
284                basic,
285                cs,
286                options.charset_override,
287                0,
288            )
289        } else {
290            DynStatefulDecoder::new_with_override(source, ts, cs, options.charset_override, 0)
291                .context(CreateDecoderSnafu)?
292        };
293
294        is_stateful_decode(&parser);
295
296        Ok(DataSetReader {
297            parser,
298            options,
299            seq_delimiters: Vec::new(),
300            delimiter_check_pending: false,
301            offset_table_next: false,
302            in_sequence: false,
303            hard_break: false,
304            last_header: None,
305            peek: None,
306        })
307    }
308}
309
310impl<S> DataSetReader<S> {
311    /// Create a new iterator with the given stateful decoder and options.
312    pub fn new(decoder: S, options: DataSetReaderOptions) -> Self {
313        DataSetReader {
314            parser: decoder,
315            options,
316            seq_delimiters: Vec::new(),
317            delimiter_check_pending: false,
318            offset_table_next: false,
319            in_sequence: false,
320            hard_break: false,
321            last_header: None,
322            peek: None,
323        }
324    }
325}
326
327impl<S> Iterator for DataSetReader<S>
328where
329    S: StatefulDecode,
330{
331    type Item = Result<DataToken>;
332
333    fn next(&mut self) -> Option<Self::Item> {
334        loop {
335            if self.hard_break {
336                return None;
337            }
338            // if there was a peek, consume peeked token
339            if let Some(token) = self.peek.take() {
340                return Some(Ok(token));
341            }
342
343            // item or sequence delimitation logic for explicit lengths
344            if self.delimiter_check_pending {
345                match self.update_seq_delimiters() {
346                    Err(e) => {
347                        self.hard_break = true;
348                        return Some(Err(e));
349                    }
350                    Ok(Some(token)) => return Some(Ok(token)),
351                    Ok(None) => { /* no-op */ }
352                }
353            }
354
355            // This will always return unless we do a `continue`.
356            return if self.in_sequence {
357                // at sequence level, expecting item header
358
359                match self.parser.decode_item_header() {
360                    Ok(header) => {
361                        match header {
362                            SequenceItemHeader::Item { len } => {
363                                let len = match self.sanitize_length(len) {
364                                    Some(len) => len,
365                                    None => {
366                                        return Some(
367                                            InvalidItemLengthSnafu {
368                                                bytes_read: self.parser.position(),
369                                                len: len.0,
370                                            }
371                                            .fail(),
372                                        );
373                                    }
374                                };
375                                // entered a new item
376                                self.in_sequence = false;
377
378                                let last_delimiter = match self.seq_delimiters.last() {
379                                    Some(d) => d,
380                                    None => {
381                                        return Some(
382                                            UnexpectedItemHeaderSnafu {
383                                                bytes_read: self.parser.position(),
384                                            }
385                                            .fail(),
386                                        );
387                                    }
388                                };
389                                self.push_sequence_token(
390                                    SeqTokenType::Item,
391                                    len,
392                                    last_delimiter.pixel_data,
393                                );
394                                // items can be empty
395                                if len == Length(0) {
396                                    self.delimiter_check_pending = true;
397                                }
398                                Some(Ok(DataToken::ItemStart { len }))
399                            }
400                            SequenceItemHeader::ItemDelimiter => {
401                                // closed an item
402                                self.seq_delimiters.pop();
403                                self.in_sequence = true;
404                                // sequences can end after an item delimiter
405                                self.delimiter_check_pending = true;
406                                Some(Ok(DataToken::ItemEnd))
407                            }
408                            SequenceItemHeader::SequenceDelimiter => {
409                                // closed a sequence
410                                self.seq_delimiters.pop();
411                                self.in_sequence = false;
412                                // items can end after a nested sequence ends
413                                self.delimiter_check_pending = true;
414                                Some(Ok(DataToken::SequenceEnd))
415                            }
416                        }
417                    }
418                    Err(DecoderError::DecodeItemHeader {
419                        source: dicom_encoding::decode::Error::ReadItemHeader { source, .. },
420                        ..
421                    }) if source.kind() == std::io::ErrorKind::UnexpectedEof
422                        && self.seq_delimiters.pop().is_some_and(|t| t.pixel_data) =>
423                    {
424                        // Note: if `UnexpectedEof` was reached while inside a
425                        // PixelData Sequence, then we assume that
426                        // the end of a DICOM object was reached gracefully.
427                        self.hard_break = true;
428                        None
429                    }
430                    Err(e) => {
431                        self.hard_break = true;
432                        Some(Err(e).context(ReadItemHeaderSnafu))
433                    }
434                }
435            } else if let Some(SeqToken {
436                typ: SeqTokenType::Item,
437                pixel_data: true,
438                len,
439                ..
440            }) = self.seq_delimiters.last()
441            {
442                let len = match len.get() {
443                    Some(len) => len as usize,
444                    None => return Some(UndefinedItemLengthSnafu.fail()),
445                };
446
447                if self.offset_table_next {
448                    // offset table
449                    let mut offset_table = Vec::with_capacity(len);
450
451                    self.offset_table_next = false;
452
453                    // need to pop item delimiter on the next iteration
454                    self.delimiter_check_pending = true;
455
456                    Some(
457                        match self.parser.read_u32_to_vec(len as u32, &mut offset_table) {
458                            Ok(()) => Ok(DataToken::OffsetTable(offset_table)),
459                            Err(e) => Err(e).context(ReadItemValueSnafu { len: len as u32 }),
460                        },
461                    )
462                } else {
463                    // item value
464                    let mut value = Vec::with_capacity(len);
465
466                    // need to pop item delimiter on the next iteration
467                    self.delimiter_check_pending = true;
468                    Some(
469                        self.parser
470                            .read_to_vec(len as u32, &mut value)
471                            .map(|_| Ok(DataToken::ItemValue(value)))
472                            .unwrap_or_else(|e| {
473                                Err(e).context(ReadItemValueSnafu { len: len as u32 })
474                            }),
475                    )
476                }
477            } else if let Some(header) = self.last_header {
478                if header.is_encapsulated_pixeldata() {
479                    self.push_sequence_token(SeqTokenType::Sequence, Length::UNDEFINED, true);
480                    self.last_header = None;
481
482                    // encapsulated pixel data, expecting offset table
483                    match self.parser.decode_item_header() {
484                        Ok(header) => match header {
485                            SequenceItemHeader::Item { len } => {
486                                let len = match self.sanitize_length(len) {
487                                    Some(len) => len,
488                                    None => {
489                                        return Some(
490                                            InvalidItemLengthSnafu {
491                                                bytes_read: self.parser.position(),
492                                                len: len.0,
493                                            }
494                                            .fail(),
495                                        );
496                                    }
497                                };
498
499                                // entered a new item
500                                self.in_sequence = false;
501                                self.push_sequence_token(SeqTokenType::Item, len, true);
502                                // items can be empty
503                                if len == Length(0) {
504                                    self.delimiter_check_pending = true;
505                                } else {
506                                    self.offset_table_next = true;
507                                }
508                                Some(Ok(DataToken::ItemStart { len }))
509                            }
510                            SequenceItemHeader::SequenceDelimiter => {
511                                // empty pixel data
512                                self.seq_delimiters.pop();
513                                self.in_sequence = false;
514                                Some(Ok(DataToken::SequenceEnd))
515                            }
516                            item => {
517                                self.hard_break = true;
518                                Some(UnexpectedItemTagSnafu { tag: item.tag() }.fail())
519                            }
520                        },
521                        Err(e) => {
522                            self.hard_break = true;
523                            Some(Err(e).context(ReadItemHeaderSnafu))
524                        }
525                    }
526                } else {
527                    // a plain element header was read, so a value is expected
528                    let value = match self.read_value(&header) {
529                        Ok(v) => v,
530                        Err(e) => {
531                            self.hard_break = true;
532                            self.last_header = None;
533                            return Some(Err(e));
534                        }
535                    };
536
537                    self.last_header = None;
538
539                    // sequences can end after this token
540                    self.delimiter_check_pending = true;
541
542                    Some(Ok(DataToken::PrimitiveValue(value)))
543                }
544            } else {
545                // a data element header or item delimiter is expected
546                match self.parser.decode_header() {
547                    Ok(DataElementHeader {
548                        tag,
549                        vr: VR::SQ,
550                        len,
551                    }) => {
552                        let len = match self.sanitize_length(len) {
553                            Some(len) => len,
554                            None => {
555                                return Some(
556                                    InvalidElementLengthSnafu {
557                                        tag,
558                                        len: len.0,
559                                        bytes_read: self.parser.position(),
560                                    }
561                                    .fail(),
562                                );
563                            }
564                        };
565
566                        self.in_sequence = true;
567                        self.push_sequence_token(SeqTokenType::Sequence, len, false);
568
569                        // sequences can end right after they start
570                        if len == Length(0) {
571                            self.delimiter_check_pending = true;
572                        }
573
574                        Some(Ok(DataToken::SequenceStart { tag, len }))
575                    }
576                    Ok(DataElementHeader {
577                        tag: Tag(0xFFFE, 0xE00D),
578                        ..
579                    }) if self.seq_delimiters.is_empty() => {
580                        // ignore delimiter, we are not in a sequence
581                        tracing::warn!(
582                            "Item delimitation item outside of a sequence in position {}",
583                            self.parser.position()
584                        );
585                        // return a new token by repeating the method again
586                        continue;
587                    }
588                    Ok(DataElementHeader {
589                        tag: Tag(0xFFFE, 0xE00D),
590                        ..
591                    }) => {
592                        self.in_sequence = true;
593                        // pop item delimiter
594                        self.seq_delimiters.pop();
595                        // sequences can end after this token
596                        self.delimiter_check_pending = true;
597                        Some(Ok(DataToken::ItemEnd))
598                    }
599                    Ok(header) if header.is_encapsulated_pixeldata() => {
600                        // encapsulated pixel data conditions:
601                        // expect a sequence of pixel data fragments
602
603                        // save it for the next step
604                        self.last_header = Some(header);
605                        Some(Ok(DataToken::PixelSequenceStart))
606                    }
607                    Ok(header) if header.len.is_undefined() => {
608                        // treat other undefined length elements
609                        // as data set sequences,
610                        // discarding the VR in the process
611                        self.in_sequence = true;
612
613                        let DataElementHeader { tag, len, .. } = header;
614                        self.push_sequence_token(SeqTokenType::Sequence, len, false);
615
616                        Some(Ok(DataToken::SequenceStart { tag, len }))
617                    }
618                    Ok(mut header) => {
619                        match self.sanitize_length(header.len) {
620                            Some(len) => header.len = len,
621                            None => {
622                                return Some(
623                                    InvalidElementLengthSnafu {
624                                        tag: header.tag,
625                                        len: header.len.0,
626                                        bytes_read: self.parser.position(),
627                                    }
628                                    .fail(),
629                                );
630                            }
631                        };
632
633                        // save it for the next step
634                        self.last_header = Some(header);
635                        Some(Ok(DataToken::ElementHeader(header)))
636                    }
637                    Err(DecoderError::DecodeElementHeader {
638                        source: dicom_encoding::decode::Error::ReadHeaderTag { source, .. },
639                        ..
640                    }) if source.kind() == std::io::ErrorKind::UnexpectedEof => {
641                        // Note: if `UnexpectedEof` was reached while trying to read
642                        // an element tag, then we assume that
643                        // the end of a DICOM object was reached gracefully.
644                        // This approach is unlikely to consume trailing bytes,
645                        // but may ignore the current depth of the data set tree.
646                        self.hard_break = true;
647                        None
648                    }
649                    Err(e) => {
650                        self.hard_break = true;
651                        Some(Err(e).context(ReadHeaderSnafu))
652                    }
653                }
654            };
655        }
656    }
657}
658
659impl<S> DataSetReader<S>
660where
661    S: StatefulDecode,
662{
663    /// Peek the next token from the source by
664    /// reading a new token in the first call.
665    /// Subsequent calls to `peek` will return the same token
666    /// until another consumer method (such as `Iterator::next`)
667    /// is called.
668    pub fn peek(&mut self) -> Result<Option<&DataToken>> {
669        if self.peek.is_none() {
670            // try to read the next token
671            match self.next() {
672                None => return Ok(None),
673                Some(Err(e)) => return Err(e),
674                Some(Ok(token)) => {
675                    self.peek = Some(token);
676                }
677            }
678        }
679        Ok(self.peek.as_ref())
680    }
681
682    fn update_seq_delimiters(&mut self) -> Result<Option<DataToken>> {
683        if let Some(sd) = self.seq_delimiters.last() {
684            if let Some(len) = sd.len.get() {
685                let end_of_sequence = sd.base_offset + len as u64;
686                let bytes_read = self.parser.position();
687                match end_of_sequence.cmp(&bytes_read) {
688                    Ordering::Equal => {
689                        // end of delimiter, as indicated by the element's length
690                        let token;
691                        match sd.typ {
692                            SeqTokenType::Sequence => {
693                                self.in_sequence = false;
694                                token = DataToken::SequenceEnd;
695                            }
696                            SeqTokenType::Item => {
697                                self.in_sequence = true;
698                                token = DataToken::ItemEnd;
699                            }
700                        }
701                        self.seq_delimiters.pop();
702                        return Ok(Some(token));
703                    }
704                    Ordering::Less => {
705                        return InconsistentSequenceEndSnafu {
706                            end_of_sequence,
707                            bytes_read,
708                        }
709                        .fail();
710                    }
711                    Ordering::Greater => {} // continue normally
712                }
713            }
714        }
715        self.delimiter_check_pending = false;
716        Ok(None)
717    }
718
719    #[inline]
720    fn push_sequence_token(&mut self, typ: SeqTokenType, len: Length, pixel_data: bool) {
721        self.seq_delimiters.push(SeqToken {
722            typ,
723            pixel_data,
724            len,
725            base_offset: self.parser.position(),
726        })
727    }
728
729    fn read_value(&mut self, header: &DataElementHeader) -> Result<PrimitiveValue> {
730        match self.options.value_read {
731            ValueReadStrategy::Interpreted => self.parser.read_value(header),
732            ValueReadStrategy::Preserved => self.parser.read_value_preserved(header),
733            ValueReadStrategy::Raw => self.parser.read_value_bytes(header),
734        }
735        .context(ReadValueSnafu {
736            len: header.len.0,
737            tag: header.tag,
738        })
739    }
740
741    /// Check for a non-compliant length
742    /// and handle it according to the current strategy.
743    /// Returns `None` if the length cannot or should not be resolved.
744    fn sanitize_length(&self, length: Length) -> Option<Length> {
745        if length.is_defined() && length.0 & 1 != 0 {
746            match self.options.odd_length {
747                OddLengthStrategy::Accept => Some(length),
748                OddLengthStrategy::NextEven => Some(length + 1),
749                OddLengthStrategy::Fail => None,
750            }
751        } else {
752            Some(length)
753        }
754    }
755}
756
757#[cfg(test)]
758mod tests {
759    use super::{DataSetReader, DataToken, StatefulDecode};
760    use crate::dataset::read::{DataSetReaderOptions, OddLengthStrategy};
761    use crate::stateful::decode::StatefulDecoder;
762    use dicom_core::header::{DataElementHeader, Length};
763    use dicom_core::value::PrimitiveValue;
764    use dicom_core::{Tag, VR};
765    use dicom_encoding::decode::basic::LittleEndianBasicDecoder;
766    use dicom_encoding::decode::{
767        explicit_le::ExplicitVRLittleEndianDecoder, implicit_le::ImplicitVRLittleEndianDecoder,
768    };
769    use dicom_encoding::text::SpecificCharacterSet;
770
771    fn validate_read_data_implicit_vr<I>(data: &[u8], ground_truth: I)
772    where
773        I: IntoIterator<Item = DataToken>,
774    {
775        let mut cursor = data;
776        let parser = StatefulDecoder::new(
777            &mut cursor,
778            ImplicitVRLittleEndianDecoder::default(),
779            LittleEndianBasicDecoder,
780            SpecificCharacterSet::default(),
781        );
782
783        validate_read_data(data, parser, ground_truth)
784    }
785
786    fn validate_read_data_explicit_vr<I>(data: &[u8], ground_truth: I)
787    where
788        I: IntoIterator<Item = DataToken>,
789    {
790        let mut cursor = data;
791        let parser = StatefulDecoder::new(
792            &mut cursor,
793            ExplicitVRLittleEndianDecoder::default(),
794            LittleEndianBasicDecoder,
795            SpecificCharacterSet::default(),
796        );
797
798        validate_read_data(data, parser, ground_truth)
799    }
800
801    fn validate_read_data<I, D>(data: &[u8], parser: D, ground_truth: I)
802    where
803        I: IntoIterator<Item = DataToken>,
804        D: StatefulDecode,
805    {
806        let dset_reader = DataSetReader::new(parser, Default::default());
807        validate_data_set_reader(data, dset_reader, ground_truth);
808    }
809
810    fn validate_data_set_reader<S, I>(
811        data: &[u8],
812        mut dset_reader: DataSetReader<S>,
813        ground_truth: I,
814    ) where
815        S: StatefulDecode,
816        I: IntoIterator<Item = DataToken>,
817    {
818        let iter = &mut dset_reader;
819        let ground_truth = ground_truth.into_iter();
820
821        for gt_token in ground_truth {
822            let token = iter
823                .next()
824                .expect("expecting more tokens from reader")
825                .expect("should fetch the next token without an error");
826            eprintln!("Next token: {token:2?} ; Expected: {gt_token:2?}");
827            assert_eq!(
828                token, gt_token,
829                "Got token {token:2?} ; but expected {gt_token:2?}"
830            );
831        }
832
833        let extra: Vec<_> = iter.collect();
834        assert_eq!(
835            extra.len(), // we have already read all of them
836            0,
837            "extraneous tokens remaining: {extra:?}",
838        );
839        assert_eq!(
840            dset_reader.parser.position(),
841            data.len() as u64,
842            "Decoder position did not match end of data",
843        );
844    }
845
846    #[test]
847    fn read_sequence_explicit() {
848        #[rustfmt::skip]
849        static DATA: &[u8] = &[
850            0x18, 0x00, 0x11, 0x60, // sequence tag: (0018,6011) SequenceOfUltrasoundRegions
851            b'S', b'Q', // VR
852            0x00, 0x00, // reserved
853            0x2e, 0x00, 0x00, 0x00, // length: 28 + 18 = 46 (#= 2)
854            // -- 12 --
855            0xfe, 0xff, 0x00, 0xe0, // item start tag
856            0x14, 0x00, 0x00, 0x00, // item length: 20 (#= 2)
857            // -- 20 --
858            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x01, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 1
859            // -- 30 --
860            0x18, 0x00, 0x14, 0x60, b'U', b'S', 0x02, 0x00, 0x02, 0x00, // (0018, 6012) RegionDataType, len = 2, value = 2
861            // -- 40 --
862            0xfe, 0xff, 0x00, 0xe0, // item start tag
863            0x0a, 0x00, 0x00, 0x00, // item length: 10 (#= 1)
864            // -- 48 --
865            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x04, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 4
866            // -- 58 --
867            0x20, 0x00, 0x00, 0x40, b'L', b'T', 0x04, 0x00, // (0020,4000) ImageComments, len = 4
868            b'T', b'E', b'S', b'T', // value = "TEST"
869        ];
870
871        let ground_truth = vec![
872            DataToken::SequenceStart {
873                tag: Tag(0x0018, 0x6011),
874                len: Length(46),
875            },
876            DataToken::ItemStart { len: Length(20) },
877            DataToken::ElementHeader(DataElementHeader {
878                tag: Tag(0x0018, 0x6012),
879                vr: VR::US,
880                len: Length(2),
881            }),
882            DataToken::PrimitiveValue(PrimitiveValue::U16([1].as_ref().into())),
883            DataToken::ElementHeader(DataElementHeader {
884                tag: Tag(0x0018, 0x6014),
885                vr: VR::US,
886                len: Length(2),
887            }),
888            DataToken::PrimitiveValue(PrimitiveValue::U16([2].as_ref().into())),
889            DataToken::ItemEnd,
890            DataToken::ItemStart { len: Length(10) },
891            DataToken::ElementHeader(DataElementHeader {
892                tag: Tag(0x0018, 0x6012),
893                vr: VR::US,
894                len: Length(2),
895            }),
896            DataToken::PrimitiveValue(PrimitiveValue::U16([4].as_ref().into())),
897            DataToken::ItemEnd,
898            DataToken::SequenceEnd,
899            DataToken::ElementHeader(DataElementHeader {
900                tag: Tag(0x0020, 0x4000),
901                vr: VR::LT,
902                len: Length(4),
903            }),
904            DataToken::PrimitiveValue(PrimitiveValue::Str("TEST".into())),
905        ];
906
907        validate_read_data_explicit_vr(DATA, ground_truth);
908    }
909
910    #[test]
911    fn read_sequence_explicit_2() {
912        static DATA: &[u8] = &[
913            // SequenceStart: (0008,2218) ; len = 54 (#=3)
914            0x08, 0x00, 0x18, 0x22, b'S', b'Q', 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
915            // -- 12, --
916            // ItemStart: len = 46
917            0xfe, 0xff, 0x00, 0xe0, 0x2e, 0x00, 0x00, 0x00,
918            // -- 20, --
919            // ElementHeader: (0008,0100) CodeValue; len = 8
920            0x08, 0x00, 0x00, 0x01, b'S', b'H', 0x08, 0x00, // PrimitiveValue
921            0x54, 0x2d, 0x44, 0x31, 0x32, 0x31, 0x33, b' ',
922            // -- 36, --
923            // ElementHeader: (0008,0102) CodingSchemeDesignator; len = 4
924            0x08, 0x00, 0x02, 0x01, b'S', b'H', 0x04, 0x00, // PrimitiveValue
925            0x53, 0x52, 0x54, b' ',
926            // -- 48, --
927            // (0008,0104) CodeMeaning; len = 10
928            0x08, 0x00, 0x04, 0x01, b'L', b'O', 0x0a, 0x00, // PrimitiveValue
929            0x4a, 0x61, 0x77, b' ', 0x72, 0x65, 0x67, 0x69, 0x6f, 0x6e,
930            // -- 66 --
931            // SequenceStart: (0040,0555) AcquisitionContextSequence; len = 0
932            0x40, 0x00, 0x55, 0x05, b'S', b'Q', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
933            // ElementHeader: (2050,0020) PresentationLUTShape; len = 8
934            0x50, 0x20, 0x20, 0x00, b'C', b'S', 0x08, 0x00, // PrimitiveValue
935            b'I', b'D', b'E', b'N', b'T', b'I', b'T', b'Y',
936        ];
937
938        let ground_truth = vec![
939            DataToken::SequenceStart {
940                tag: Tag(0x0008, 0x2218),
941                len: Length(54),
942            },
943            DataToken::ItemStart { len: Length(46) },
944            DataToken::ElementHeader(DataElementHeader {
945                tag: Tag(0x0008, 0x0100),
946                vr: VR::SH,
947                len: Length(8),
948            }),
949            DataToken::PrimitiveValue(PrimitiveValue::Strs(
950                ["T-D1213 ".to_owned()].as_ref().into(),
951            )),
952            DataToken::ElementHeader(DataElementHeader {
953                tag: Tag(0x0008, 0x0102),
954                vr: VR::SH,
955                len: Length(4),
956            }),
957            DataToken::PrimitiveValue(PrimitiveValue::Strs(["SRT ".to_owned()].as_ref().into())),
958            DataToken::ElementHeader(DataElementHeader {
959                tag: Tag(0x0008, 0x0104),
960                vr: VR::LO,
961                len: Length(10),
962            }),
963            DataToken::PrimitiveValue(PrimitiveValue::Strs(
964                ["Jaw region".to_owned()].as_ref().into(),
965            )),
966            DataToken::ItemEnd,
967            DataToken::SequenceEnd,
968            DataToken::SequenceStart {
969                tag: Tag(0x0040, 0x0555),
970                len: Length(0),
971            },
972            DataToken::SequenceEnd,
973            DataToken::ElementHeader(DataElementHeader {
974                tag: Tag(0x2050, 0x0020),
975                vr: VR::CS,
976                len: Length(8),
977            }),
978            DataToken::PrimitiveValue(PrimitiveValue::Strs(
979                ["IDENTITY".to_owned()].as_ref().into(),
980            )),
981        ];
982
983        validate_read_data_explicit_vr(DATA, ground_truth);
984    }
985
986    #[test]
987    fn read_empty_sequence_explicit() {
988        static DATA: &[u8] = &[
989            // SequenceStart: (0008,1032) ProcedureCodeSequence ; len = 0
990            0x08, 0x00, 0x18, 0x22, // VR: SQ
991            b'S', b'Q', // Reserved
992            0x00, 0x00, // Length: 0
993            0x00, 0x00, 0x00, 0x00,
994        ];
995
996        let ground_truth = vec![
997            DataToken::SequenceStart {
998                tag: Tag(0x0008, 0x2218),
999                len: Length(0),
1000            },
1001            DataToken::SequenceEnd,
1002        ];
1003
1004        validate_read_data_explicit_vr(DATA, ground_truth);
1005    }
1006
1007    /// Gracefully ignore a stray item end tag in the data set.
1008    #[test]
1009    fn ignore_trailing_item_delimitation_item() {
1010        static DATA: &[u8] = &[
1011            0x20, 0x00, 0x00, 0x40, b'L', b'T', 0x04,
1012            0x00, // (0020,4000) ImageComments, len = 4
1013            b'T', b'E', b'S', b'T', // value = "TEST"
1014            0xfe, 0xff, 0x0d, 0xe0, 0x00, 0x00, 0x00, 0x00, // item end
1015        ];
1016
1017        let ground_truth = vec![
1018            DataToken::ElementHeader(DataElementHeader {
1019                tag: Tag(0x0020, 0x4000),
1020                vr: VR::LT,
1021                len: Length(4),
1022            }),
1023            DataToken::PrimitiveValue(PrimitiveValue::Str("TEST".into())),
1024            // no item end
1025        ];
1026
1027        validate_read_data_explicit_vr(DATA, ground_truth);
1028    }
1029
1030    #[test]
1031    fn read_sequence_implicit() {
1032        #[rustfmt::skip]
1033        static DATA: &[u8] = &[
1034            0x18, 0x00, 0x11, 0x60, // sequence tag: (0018,6011) SequenceOfUltrasoundRegions
1035            b'S', b'Q', // VR
1036            0x00, 0x00, // reserved
1037            0xff, 0xff, 0xff, 0xff, // length: undefined
1038            // -- 12 --
1039            0xfe, 0xff, 0x00, 0xe0, // item start tag
1040            0xff, 0xff, 0xff, 0xff, // item length: undefined
1041            // -- 20 --
1042            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x01, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 1
1043            // -- 30 --
1044            0x18, 0x00, 0x14, 0x60, b'U', b'S', 0x02, 0x00, 0x02, 0x00, // (0018, 6012) RegionDataType, len = 2, value = 2
1045            // -- 40 --
1046            0xfe, 0xff, 0x0d, 0xe0, 0x00, 0x00, 0x00, 0x00, // item end
1047            // -- 48 --
1048            0xfe, 0xff, 0x00, 0xe0, // item start tag
1049            0xff, 0xff, 0xff, 0xff, // item length: undefined
1050            // -- 56 --
1051            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x04, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 4
1052            // -- 66 --
1053            0xfe, 0xff, 0x0d, 0xe0, 0x00, 0x00, 0x00, 0x00, // item end
1054            // -- 74 --
1055            0xfe, 0xff, 0xdd, 0xe0, 0x00, 0x00, 0x00, 0x00, // sequence end
1056            // -- 82 --
1057            0x20, 0x00, 0x00, 0x40, b'L', b'T', 0x04, 0x00, // (0020,4000) ImageComments, len = 4
1058            b'T', b'E', b'S', b'T', // value = "TEST"
1059        ];
1060
1061        let ground_truth = vec![
1062            DataToken::SequenceStart {
1063                tag: Tag(0x0018, 0x6011),
1064                len: Length::UNDEFINED,
1065            },
1066            DataToken::ItemStart {
1067                len: Length::UNDEFINED,
1068            },
1069            DataToken::ElementHeader(DataElementHeader {
1070                tag: Tag(0x0018, 0x6012),
1071                vr: VR::US,
1072                len: Length(2),
1073            }),
1074            DataToken::PrimitiveValue(PrimitiveValue::U16([1].as_ref().into())),
1075            DataToken::ElementHeader(DataElementHeader {
1076                tag: Tag(0x0018, 0x6014),
1077                vr: VR::US,
1078                len: Length(2),
1079            }),
1080            DataToken::PrimitiveValue(PrimitiveValue::U16([2].as_ref().into())),
1081            DataToken::ItemEnd,
1082            DataToken::ItemStart {
1083                len: Length::UNDEFINED,
1084            },
1085            DataToken::ElementHeader(DataElementHeader {
1086                tag: Tag(0x0018, 0x6012),
1087                vr: VR::US,
1088                len: Length(2),
1089            }),
1090            DataToken::PrimitiveValue(PrimitiveValue::U16([4].as_ref().into())),
1091            DataToken::ItemEnd,
1092            DataToken::SequenceEnd,
1093            DataToken::ElementHeader(DataElementHeader {
1094                tag: Tag(0x0020, 0x4000),
1095                vr: VR::LT,
1096                len: Length(4),
1097            }),
1098            DataToken::PrimitiveValue(PrimitiveValue::Str("TEST".into())),
1099        ];
1100
1101        validate_read_data_explicit_vr(DATA, ground_truth);
1102    }
1103
1104    #[test]
1105    fn read_implicit_len_sequence_implicit_vr_unknown() {
1106        #[rustfmt::skip]
1107        static DATA: &[u8] = &[
1108            0x33, 0x55, 0x33, 0x55, // sequence tag: (5533,5533) «private, unknown attribute»
1109            0xff, 0xff, 0xff, 0xff, // length: undefined
1110            // -- 8 --
1111            0xfe, 0xff, 0x00, 0xe0, // item begin
1112            0xff, 0xff, 0xff, 0xff, // length: undefined
1113            // -- 16 --
1114            0xfe, 0xff, 0x0d, 0xe0, // item end
1115            0x00, 0x00, 0x00, 0x00, // length is always zero
1116            // -- 24 --
1117            0xfe, 0xff, 0xdd, 0xe0,
1118            0x00, 0x00, 0x00, 0x00, // sequence end
1119            // -- 32 --
1120        ];
1121
1122        let ground_truth = vec![
1123            DataToken::SequenceStart {
1124                tag: Tag(0x5533, 0x5533),
1125                len: Length::UNDEFINED,
1126            },
1127            DataToken::ItemStart {
1128                len: Length::UNDEFINED,
1129            },
1130            DataToken::ItemEnd,
1131            DataToken::SequenceEnd,
1132        ];
1133
1134        validate_read_data_implicit_vr(DATA, ground_truth);
1135    }
1136
1137    #[test]
1138    fn read_encapsulated_pixeldata() {
1139        #[rustfmt::skip]
1140        static DATA: &[u8] = &[
1141            0xe0, 0x7f, 0x10, 0x00, // (7FE0, 0010) PixelData
1142            b'O', b'B', // VR 
1143            0x00, 0x00, // reserved
1144            0xff, 0xff, 0xff, 0xff, // length: undefined
1145            // -- 12 -- Basic offset table
1146            0xfe, 0xff, 0x00, 0xe0, // item start tag
1147            0x00, 0x00, 0x00, 0x00, // item length: 0
1148            // -- 20 -- First fragment of pixel data
1149            0xfe, 0xff, 0x00, 0xe0, // item start tag
1150            0x20, 0x00, 0x00, 0x00, // item length: 32
1151            // -- 28 -- Compressed Fragment
1152            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1153            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1154            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1155            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1156            // -- 60 -- End of pixel data
1157            0xfe, 0xff, 0xdd, 0xe0, // sequence end tag
1158            0x00, 0x00, 0x00, 0x00,
1159            // -- 68 -- padding
1160            0xfc, 0xff, 0xfc, 0xff, // (fffc,fffc) DataSetTrailingPadding
1161            b'O', b'B', // VR
1162            0x00, 0x00, // reserved
1163            0x08, 0x00, 0x00, 0x00, // length: 8
1164            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1165        ];
1166
1167        let ground_truth = vec![
1168            DataToken::PixelSequenceStart,
1169            DataToken::ItemStart { len: Length(0) },
1170            DataToken::ItemEnd,
1171            DataToken::ItemStart { len: Length(32) },
1172            DataToken::ItemValue(vec![0x99; 32]),
1173            DataToken::ItemEnd,
1174            DataToken::SequenceEnd,
1175            DataToken::ElementHeader(DataElementHeader::new(
1176                Tag(0xfffc, 0xfffc),
1177                VR::OB,
1178                Length(8),
1179            )),
1180            DataToken::PrimitiveValue(PrimitiveValue::U8([0x00; 8].as_ref().into())),
1181        ];
1182
1183        validate_read_data_explicit_vr(DATA, ground_truth);
1184    }
1185
1186    #[test]
1187    fn read_encapsulated_pixeldata_with_offset_table() {
1188        #[rustfmt::skip]
1189        static DATA: &[u8] = &[
1190            0xe0, 0x7f, 0x10, 0x00, // (7FE0, 0010) PixelData
1191            b'O', b'B', // VR 
1192            0x00, 0x00, // reserved
1193            0xff, 0xff, 0xff, 0xff, // length: undefined
1194            // -- 12 -- Basic offset table
1195            0xfe, 0xff, 0x00, 0xe0, // item start tag
1196            0x04, 0x00, 0x00, 0x00, // item length: 4
1197            // -- 20 -- item value
1198            0x10, 0x00, 0x00, 0x00, // 16
1199            // -- 24 -- First fragment of pixel data
1200            0xfe, 0xff, 0x00, 0xe0, // item start tag
1201            0x20, 0x00, 0x00, 0x00, // item length: 32
1202            // -- 32 -- Compressed Fragment
1203            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1204            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1205            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1206            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1207            // -- 60 -- End of pixel data
1208            0xfe, 0xff, 0xdd, 0xe0, // sequence end tag
1209            0x00, 0x00, 0x00, 0x00,
1210            // -- 68 -- padding
1211            0xfc, 0xff, 0xfc, 0xff, // (fffc,fffc) DataSetTrailingPadding
1212            b'O', b'B', // VR
1213            0x00, 0x00, // reserved
1214            0x08, 0x00, 0x00, 0x00, // length: 8
1215            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1216        ];
1217
1218        let ground_truth = vec![
1219            DataToken::PixelSequenceStart,
1220            DataToken::ItemStart { len: Length(4) },
1221            DataToken::OffsetTable(vec![16]),
1222            DataToken::ItemEnd,
1223            DataToken::ItemStart { len: Length(32) },
1224            DataToken::ItemValue(vec![0x99; 32]),
1225            DataToken::ItemEnd,
1226            DataToken::SequenceEnd,
1227            DataToken::ElementHeader(DataElementHeader::new(
1228                Tag(0xfffc, 0xfffc),
1229                VR::OB,
1230                Length(8),
1231            )),
1232            DataToken::PrimitiveValue(PrimitiveValue::U8([0x00; 8].as_ref().into())),
1233        ];
1234
1235        validate_read_data_explicit_vr(DATA, ground_truth);
1236    }
1237
1238    #[test]
1239    fn read_dataset_in_dataset() {
1240        #[rustfmt::skip]
1241        const DATA: &[u8; 138] = &[
1242            // 0: (2001, 9000) private sequence
1243            0x01, 0x20, 0x00, 0x90, //
1244            // length: undefined
1245            0xFF, 0xFF, 0xFF, 0xFF, //
1246            // 8: Item start
1247            0xFE, 0xFF, 0x00, 0xE0, //
1248            // Item length explicit (114 bytes)
1249            0x72, 0x00, 0x00, 0x00, //
1250            // 16: (0008,1115) ReferencedSeriesSequence
1251            0x08, 0x00, 0x15, 0x11, //
1252            // length: undefined
1253            0xFF, 0xFF, 0xFF, 0xFF, //
1254            // 24: Item start
1255            0xFE, 0xFF, 0x00, 0xE0, //
1256            // Item length undefined
1257            0xFF, 0xFF, 0xFF, 0xFF, //
1258            // 32: (0008,1140) ReferencedImageSequence
1259            0x08, 0x00, 0x40, 0x11, //
1260            // length: undefined
1261            0xFF, 0xFF, 0xFF, 0xFF, //
1262            // 40: Item start
1263            0xFE, 0xFF, 0x00, 0xE0, //
1264            // Item length undefined
1265            0xFF, 0xFF, 0xFF, 0xFF, //
1266            // 48: (0008,1150) ReferencedSOPClassUID
1267            0x08, 0x00, 0x50, 0x11, //
1268            // length: 26
1269            0x1a, 0x00, 0x00, 0x00, //
1270            // Value: "1.2.840.10008.5.1.4.1.1.7\0" (SecondaryCaptureImageStorage)
1271            b'1', b'.', b'2', b'.', b'8', b'4', b'0', b'.', b'1', b'0', b'0', b'0', b'8', b'.',
1272            b'5', b'.', b'1', b'.', b'4', b'.', b'1', b'.', b'1', b'.', b'7', b'\0',
1273            // 82: Item End (ReferencedImageSequence)
1274            0xFE, 0xFF, 0x0D, 0xE0, //
1275            0x00, 0x00, 0x00, 0x00, //
1276            // 90: Sequence End (ReferencedImageSequence)
1277            0xFE, 0xFF, 0xDD, 0xE0, //
1278            0x00, 0x00, 0x00, 0x00, //
1279            // 98: Item End (ReferencedSeriesSequence)
1280            0xFE, 0xFF, 0x0D, 0xE0, //
1281            0x00, 0x00, 0x00, 0x00, //
1282            // 106: Sequence End (ReferencedSeriesSequence)
1283            0xFE, 0xFF, 0xDD, 0xE0, //
1284            0x00, 0x00, 0x00, 0x00, //
1285            // 114: (2050,0020) PresentationLUTShape (CS)
1286            0x50, 0x20, 0x20, 0x00, //
1287            // length: 8
1288            0x08, 0x00, 0x00, 0x00, //
1289            b'I', b'D', b'E', b'N', b'T', b'I', b'T', b'Y', //
1290            // 130: Sequence end
1291            0xFE, 0xFF, 0xDD, 0xE0, //
1292            0x00, 0x00, 0x00, 0x00, //
1293        ];
1294
1295        let ground_truth = vec![
1296            DataToken::SequenceStart {
1297                tag: Tag(0x2001, 0x9000),
1298                len: Length::UNDEFINED,
1299            },
1300            DataToken::ItemStart { len: Length(114) },
1301            DataToken::SequenceStart {
1302                tag: Tag(0x0008, 0x1115),
1303                len: Length::UNDEFINED,
1304            },
1305            DataToken::ItemStart {
1306                len: Length::UNDEFINED,
1307            },
1308            DataToken::SequenceStart {
1309                tag: Tag(0x0008, 0x1140),
1310                len: Length::UNDEFINED,
1311            },
1312            DataToken::ItemStart {
1313                len: Length::UNDEFINED,
1314            },
1315            DataToken::ElementHeader(DataElementHeader {
1316                tag: Tag(0x0008, 0x1150),
1317                vr: VR::UI,
1318                len: Length(26),
1319            }),
1320            DataToken::PrimitiveValue(PrimitiveValue::from("1.2.840.10008.5.1.4.1.1.7\0")),
1321            DataToken::ItemEnd,
1322            DataToken::SequenceEnd,
1323            DataToken::ItemEnd,
1324            DataToken::SequenceEnd,
1325            DataToken::ElementHeader(DataElementHeader {
1326                tag: Tag(0x2050, 0x0020),
1327                vr: VR::CS,
1328                len: Length(8),
1329            }),
1330            DataToken::PrimitiveValue(PrimitiveValue::from("IDENTITY")),
1331            DataToken::ItemEnd, // inserted automatically
1332            DataToken::SequenceEnd,
1333        ];
1334
1335        validate_read_data_implicit_vr(DATA, ground_truth);
1336    }
1337
1338    #[test]
1339    fn peek_data_elements() {
1340        #[rustfmt::skip]
1341        static DATA: &[u8] = &[
1342            0x18, 0x00, 0x11, 0x60, // sequence tag: (0018,6011) SequenceOfUltrasoundRegions
1343            b'S', b'Q', // VR
1344            0x00, 0x00, // reserved
1345            0xff, 0xff, 0xff, 0xff, // length: undefined
1346            // -- 12 --
1347            0xfe, 0xff, 0xdd, 0xe0, 0x00, 0x00, 0x00, 0x00, // sequence end
1348            // -- 82 --
1349            0x20, 0x00, 0x00, 0x40, b'L', b'T', 0x04, 0x00, // (0020,4000) ImageComments, len = 4
1350            b'T', b'E', b'S', b'T', // value = "TEST"
1351        ];
1352
1353        let ground_truth = vec![
1354            DataToken::SequenceStart {
1355                tag: Tag(0x0018, 0x6011),
1356                len: Length::UNDEFINED,
1357            },
1358            DataToken::SequenceEnd,
1359            DataToken::ElementHeader(DataElementHeader {
1360                tag: Tag(0x0020, 0x4000),
1361                vr: VR::LT,
1362                len: Length(4),
1363            }),
1364            DataToken::PrimitiveValue(PrimitiveValue::Str("TEST".into())),
1365        ];
1366
1367        let mut cursor = DATA;
1368        let parser = StatefulDecoder::new(
1369            &mut cursor,
1370            ExplicitVRLittleEndianDecoder::default(),
1371            LittleEndianBasicDecoder,
1372            SpecificCharacterSet::default(),
1373        );
1374        let mut dset_reader = DataSetReader::new(parser, Default::default());
1375
1376        let iter = &mut dset_reader;
1377
1378        // peek at first token
1379        let token = iter.peek().expect("should peek first token OK");
1380        assert_eq!(token, Some(&ground_truth[0]));
1381
1382        // peeking multiple times gives the same result
1383        let token = iter.peek().expect("should peek first token again OK");
1384        assert_eq!(token, Some(&ground_truth[0]));
1385
1386        // Using `next` give us the same token
1387        let token = iter
1388            .next()
1389            .expect("expected token")
1390            .expect("should read token peeked OK");
1391        assert_eq!(&token, &ground_truth[0]);
1392
1393        // read some more tokens
1394
1395        // sequence end
1396        let token = iter.next().unwrap().unwrap();
1397        assert_eq!(&token, &ground_truth[1]);
1398        // data element header
1399        let token = iter.next().unwrap().unwrap();
1400        assert_eq!(&token, &ground_truth[2]);
1401
1402        // peek string value
1403        let token = iter.peek().unwrap();
1404        assert_eq!(token, Some(&ground_truth[3]));
1405        // peek it again
1406        let token = iter.peek().unwrap();
1407        assert_eq!(token, Some(&ground_truth[3]));
1408        // then read it
1409        let token = iter.next().unwrap().unwrap();
1410        assert_eq!(&token, &ground_truth[3]);
1411
1412        // finished reading, peek should return None
1413        assert!(iter.peek().unwrap().is_none());
1414    }
1415
1416    #[test]
1417    fn read_pixel_sequence_bad_item_end() {
1418        #[rustfmt::skip]
1419        static DATA: &[u8] = &[
1420            0xe0, 0x7f, 0x10, 0x00, // (7FE0, 0010) PixelData
1421            b'O', b'B', // VR 
1422            0x00, 0x00, // reserved
1423            0xff, 0xff, 0xff, 0xff, // length: undefined
1424            // -- 12 --
1425            0xfe, 0xff, 0x00, 0xe0, // item start tag
1426            0x00, 0x00, 0x00, 0x00, // item length: 0
1427            // -- 20 --
1428            0xfe, 0xff, 0x0d, 0xe0, // item end
1429            0x00, 0x00, 0x00, 0x00, // length is always zero
1430            // -- 28 --
1431            0xfe, 0xff, 0x0d, 0xe0, // another item end (bad)
1432            0x00, 0x00, 0x00, 0x00, //
1433            // -- 36 --
1434            0xfe, 0xff, 0x00, 0xe0, // another item start
1435            0x00, 0x00, 0x00, 0x00, // item length: 0
1436        ];
1437
1438        let mut cursor = DATA;
1439        let parser = StatefulDecoder::new(
1440            &mut cursor,
1441            ExplicitVRLittleEndianDecoder::default(),
1442            LittleEndianBasicDecoder,
1443            SpecificCharacterSet::default(),
1444        );
1445        let mut dset_reader = DataSetReader::new(parser, Default::default());
1446
1447        let token_res = (&mut dset_reader).collect::<Result<Vec<_>, _>>();
1448        dbg!(&token_res);
1449        assert!(token_res.is_err());
1450    }
1451
1452    #[test]
1453    fn read_odd_length_element() {
1454        #[rustfmt::skip]
1455        static DATA: &[u8] = &[
1456            0x08, 0x00, 0x16, 0x00, // (0008,0016) SOPClassUID
1457            b'U', b'I', // VR
1458            0x0b, 0x00, // len = 11
1459            b'1', b'.', b'2', b'.', b'8', b'4', b'0', b'.', b'1', b'0', b'0',
1460            0x00, // padding
1461        ];
1462
1463        let ground_truth = vec![
1464            DataToken::ElementHeader(DataElementHeader {
1465                tag: Tag(0x0008, 0x0016),
1466                vr: VR::UI,
1467                len: Length(12),
1468            }),
1469            DataToken::PrimitiveValue(PrimitiveValue::from("1.2.840.100\0")),
1470        ];
1471
1472        // strategy: assume next even
1473
1474        let mut cursor = DATA;
1475        let parser = StatefulDecoder::new(
1476            &mut cursor,
1477            ExplicitVRLittleEndianDecoder::default(),
1478            LittleEndianBasicDecoder,
1479            SpecificCharacterSet::default(),
1480        );
1481        let dset_reader = DataSetReader::new(
1482            parser,
1483            DataSetReaderOptions {
1484                odd_length: OddLengthStrategy::NextEven,
1485                ..Default::default()
1486            },
1487        );
1488
1489        validate_data_set_reader(DATA, dset_reader, ground_truth);
1490
1491        // strategy: fail
1492
1493        let mut cursor = DATA;
1494        let parser = StatefulDecoder::new(
1495            &mut cursor,
1496            ExplicitVRLittleEndianDecoder::default(),
1497            LittleEndianBasicDecoder,
1498            SpecificCharacterSet::default(),
1499        );
1500        let dset_reader = DataSetReader::new(
1501            parser,
1502            DataSetReaderOptions {
1503                odd_length: OddLengthStrategy::Fail,
1504                ..Default::default()
1505            },
1506        );
1507
1508        let mut tokens = dset_reader.into_iter();
1509        let token = tokens.next();
1510
1511        assert!(
1512            matches!(
1513                token,
1514                Some(Err(super::Error::InvalidElementLength {
1515                    tag: Tag(0x0008, 0x0016),
1516                    len: 11,
1517                    bytes_read: 8,
1518                })),
1519            ),
1520            "got: {:?}",
1521            token
1522        );
1523    }
1524}