dicom_parser/dataset/
read.rs

1//! This module contains a mid-level abstraction for reading DICOM content
2//! sequentially.
3//!
4//! The rest of the crate is used to obtain DICOM element headers and values.
5//! At this level, headers and values are treated as tokens which can be used
6//! to form a syntax tree of a full data set.
7use crate::stateful::decode::{DynStatefulDecoder, Error as DecoderError, StatefulDecode};
8use dicom_core::header::{DataElementHeader, Header, Length, SequenceItemHeader};
9use dicom_core::{PrimitiveValue, Tag, VR};
10use dicom_encoding::text::SpecificCharacterSet;
11use dicom_encoding::transfer_syntax::TransferSyntax;
12use snafu::{Backtrace, ResultExt, Snafu};
13use std::cmp::Ordering;
14use std::io::Read;
15
16use super::{DataToken, SeqTokenType};
17
18fn is_stateful_decode<T>(_: &T)
19where
20    T: StatefulDecode,
21{
22}
23
24#[derive(Debug, Snafu)]
25#[non_exhaustive]
26pub enum Error {
27    #[snafu(display("Could not create decoder"))]
28    CreateDecoder {
29        #[snafu(backtrace)]
30        source: DecoderError,
31    },
32    #[snafu(display("Could not read item header"))]
33    ReadItemHeader {
34        #[snafu(backtrace)]
35        source: DecoderError,
36    },
37    #[snafu(display("Could not read element header"))]
38    ReadHeader {
39        #[snafu(backtrace)]
40        source: DecoderError,
41    },
42    #[snafu(display("Could not read {} value bytes for element tagged {}", len, tag))]
43    ReadValue {
44        len: u32,
45        tag: Tag,
46        #[snafu(backtrace)]
47        source: DecoderError,
48    },
49    #[snafu(display("Could not read {} bytes for item value", len))]
50    ReadItemValue {
51        len: u32,
52        #[snafu(backtrace)]
53        source: DecoderError,
54    },
55    #[snafu(display(
56        "Inconsistent sequence end: expected end at {} bytes but read {}",
57        end_of_sequence,
58        bytes_read
59    ))]
60    InconsistentSequenceEnd {
61        end_of_sequence: u64,
62        bytes_read: u64,
63        backtrace: Backtrace,
64    },
65    #[snafu(display("Unexpected item tag {} while reading element header", tag))]
66    UnexpectedItemTag { tag: Tag, backtrace: Backtrace },
67    #[snafu(display(
68        "Unexpected item header outside a dataset sequence at {:#x}",
69        bytes_read
70    ))]
71    UnexpectedItemHeader {
72        bytes_read: u64,
73        backtrace: Backtrace,
74    },
75    /// Undefined pixel data item length
76    UndefinedItemLength,
77    /// Invalid data element length {len:04X} of {tag} at {bytes_read:#x}
78    InvalidElementLength { tag: Tag, len: u32, bytes_read: u64 },
79    /// Invalid sequence item length {len:04X} at {bytes_read:#x}
80    InvalidItemLength { len: u32, bytes_read: u64 },
81}
82
83pub type Result<T> = std::result::Result<T, Error>;
84
85/// A reader-specific token representing a sequence or item start.
86#[derive(Debug, Copy, Clone, PartialEq)]
87struct SeqToken {
88    /// Whether it is the start of a sequence or the start of an item.
89    typ: SeqTokenType,
90    /// The length of the value, as indicated by the starting element,
91    /// can be unknown.
92    len: Length,
93    /// Whether this sequence token is part of an encapsulated pixel data.
94    pixel_data: bool,
95    /// The number of bytes the parser has read until it reached the
96    /// beginning of the sequence or item value data.
97    base_offset: u64,
98}
99
100/// The value reading strategy for the data set reader.
101///
102/// It defines how the `PrimitiveValue`s in value tokens are constructed.
103#[derive(Debug, Default, Copy, Clone, Eq, Hash, PartialEq)]
104pub enum ValueReadStrategy {
105    /// Textual values will be decoded according to their value representation.
106    ///
107    /// Word-sized binary values are read according to
108    /// the expected byte order.
109    /// Dates, times, and date-times (DA, DT, TM) are parsed
110    /// into their more specific variants,
111    /// leading to parser failure if they are not valid DICOM.
112    /// String numbers (IS, FD) are also converted into binary representations.
113    /// For the case of floats, this may introduce precision errors.
114    Interpreted,
115    /// Values will be stored without decoding dates or textual numbers.
116    ///
117    /// Word-sized binary values are read according to
118    /// the expected byte order.
119    /// Date-time values and numbers are kept in their original string
120    /// representation as string objects.
121    /// All text is still decoded into Rust string values,
122    /// in accordance to the standard,
123    /// unless its value representation is unknown to the decoder.
124    ///
125    /// This is the default strategy.
126    #[default]
127    Preserved,
128    /// All primitive values are fetched as raw byte buffers,
129    /// without any form of decoding or interpretation.
130    /// Not even byte order conversions are made.
131    ///
132    /// This strategy is not recommended,
133    /// as it makes the retrieval of important textual data more difficult.
134    Raw,
135}
136
137/// A strategy for when the parser finds a data element with an odd number
138/// in the _length_ header field.
139#[derive(Debug, Default, Copy, Clone, Eq, Hash, PartialEq)]
140#[non_exhaustive]
141pub enum OddLengthStrategy {
142    /// Accept elements with an odd length as is,
143    /// continuing data set reading normally.
144    #[default]
145    Accept,
146    /// Assume that the real length is `length + 1`,
147    /// as in the next even number.
148    NextEven,
149    /// Raise an error instead
150    Fail,
151}
152
153/// The set of options for the data set reader.
154#[derive(Debug, Default, Copy, Clone, Eq, Hash, PartialEq)]
155#[non_exhaustive]
156pub struct DataSetReaderOptions {
157    /// The value reading strategy
158    pub value_read: ValueReadStrategy,
159    /// The strategy for handling odd length data elements
160    pub odd_length: OddLengthStrategy,
161    /// The position of the reader as received at building time in bytes.
162    /// Defaults to 0.
163    pub base_offset: u64,
164}
165
166impl DataSetReaderOptions {
167    /// Replace the value reading strategy of the options.
168    pub fn value_read(mut self, value_read: ValueReadStrategy) -> Self {
169        self.value_read = value_read;
170        self
171    }
172    /// Replace the base reader offset of the options.
173    pub fn base_offset(mut self, base_offset: u64) -> Self {
174        self.base_offset = base_offset;
175        self
176    }
177}
178
179/// A higher-level reader for retrieving structure in a DICOM data set from an
180/// arbitrary data source.
181#[derive(Debug)]
182pub struct DataSetReader<S> {
183    /// the stateful decoder
184    parser: S,
185    /// the options of this reader
186    options: DataSetReaderOptions,
187    /// whether the reader is expecting an item header next (or a sequence delimiter)
188    in_sequence: bool,
189    /// whether the reader is expecting the first item value of a pixel sequence next
190    /// (offset table)
191    offset_table_next: bool,
192    /// whether a check for a sequence or item delimitation is pending
193    delimiter_check_pending: bool,
194    /// a stack of delimiters
195    seq_delimiters: Vec<SeqToken>,
196    /// fuse the iteration process if true
197    hard_break: bool,
198    /// last decoded header
199    last_header: Option<DataElementHeader>,
200    /// if a peek was taken, this holds the token peeked
201    peek: Option<DataToken>,
202}
203
204impl<R> DataSetReader<DynStatefulDecoder<R>> {
205    /// Create a new data set token reader with the given byte source,
206    /// while considering the given transfer syntax specifier.
207    #[inline]
208    pub fn new_with_ts(source: R, ts: &TransferSyntax) -> Result<Self>
209    where
210        R: Read,
211    {
212        Self::new_with_ts_cs_options(source, ts, Default::default(), Default::default())
213    }
214
215    /// Create a new iterator with the given transfer syntax and options.
216    #[inline]
217    pub fn new_with_ts_options(
218        source: R,
219        ts: &TransferSyntax,
220        options: DataSetReaderOptions,
221    ) -> Result<Self>
222    where
223        R: Read,
224    {
225        Self::new_with_ts_cs_options(source, ts, SpecificCharacterSet::default(), options)
226    }
227
228    /// Create a new data set token reader with the given byte source,
229    /// while considering the given transfer syntax specifier
230    /// and the specific character set to assume by default.
231    ///
232    /// Note that the data set being read
233    /// can override the character set with the presence of a
234    /// _Specific Character Set_ data element.
235    #[inline]
236    pub fn new_with_ts_cs(source: R, ts: &TransferSyntax, cs: SpecificCharacterSet) -> Result<Self>
237    where
238        R: Read,
239    {
240        Self::new_with_ts_cs_options(source, ts, cs, Default::default())
241    }
242
243    /// Create a new iterator with the given stateful decoder and options.
244    pub fn new_with_ts_cs_options(
245        source: R,
246        ts: &TransferSyntax,
247        cs: SpecificCharacterSet,
248        options: DataSetReaderOptions,
249    ) -> Result<Self>
250    where
251        R: Read,
252    {
253        let parser = DynStatefulDecoder::new_with(source, ts, cs, 0).context(CreateDecoderSnafu)?;
254
255        is_stateful_decode(&parser);
256
257        Ok(DataSetReader {
258            parser,
259            options,
260            seq_delimiters: Vec::new(),
261            delimiter_check_pending: false,
262            offset_table_next: false,
263            in_sequence: false,
264            hard_break: false,
265            last_header: None,
266            peek: None,
267        })
268    }
269}
270
271impl<S> DataSetReader<S> {
272    /// Create a new iterator with the given stateful decoder and options.
273    pub fn new(decoder: S, options: DataSetReaderOptions) -> Self {
274        DataSetReader {
275            parser: decoder,
276            options,
277            seq_delimiters: Vec::new(),
278            delimiter_check_pending: false,
279            offset_table_next: false,
280            in_sequence: false,
281            hard_break: false,
282            last_header: None,
283            peek: None,
284        }
285    }
286}
287
288impl<S> Iterator for DataSetReader<S>
289where
290    S: StatefulDecode,
291{
292    type Item = Result<DataToken>;
293
294    fn next(&mut self) -> Option<Self::Item> {
295        if self.hard_break {
296            return None;
297        }
298        // if there was a peek, consume peeked token
299        if let Some(token) = self.peek.take() {
300            return Some(Ok(token));
301        }
302
303        // item or sequence delimitation logic for explicit lengths
304        if self.delimiter_check_pending {
305            match self.update_seq_delimiters() {
306                Err(e) => {
307                    self.hard_break = true;
308                    return Some(Err(e));
309                }
310                Ok(Some(token)) => return Some(Ok(token)),
311                Ok(None) => { /* no-op */ }
312            }
313        }
314
315        if self.in_sequence {
316            // at sequence level, expecting item header
317
318            match self.parser.decode_item_header() {
319                Ok(header) => {
320                    match header {
321                        SequenceItemHeader::Item { len } => {
322                            let len = match self.sanitize_length(len) {
323                                Some(len) => len,
324                                None => {
325                                    return Some(
326                                        InvalidItemLengthSnafu {
327                                            bytes_read: self.parser.position(),
328                                            len: len.0,
329                                        }
330                                        .fail(),
331                                    )
332                                }
333                            };
334                            // entered a new item
335                            self.in_sequence = false;
336
337                            let last_delimiter = match self.seq_delimiters.last() {
338                                Some(d) => d,
339                                None => {
340                                    return Some(
341                                        UnexpectedItemHeaderSnafu {
342                                            bytes_read: self.parser.position(),
343                                        }
344                                        .fail(),
345                                    )
346                                }
347                            };
348                            self.push_sequence_token(
349                                SeqTokenType::Item,
350                                len,
351                                last_delimiter.pixel_data,
352                            );
353                            // items can be empty
354                            if len == Length(0) {
355                                self.delimiter_check_pending = true;
356                            }
357                            Some(Ok(DataToken::ItemStart { len }))
358                        }
359                        SequenceItemHeader::ItemDelimiter => {
360                            // closed an item
361                            self.seq_delimiters.pop();
362                            self.in_sequence = true;
363                            // sequences can end after an item delimiter
364                            self.delimiter_check_pending = true;
365                            Some(Ok(DataToken::ItemEnd))
366                        }
367                        SequenceItemHeader::SequenceDelimiter => {
368                            // closed a sequence
369                            self.seq_delimiters.pop();
370                            self.in_sequence = false;
371                            // items can end after a nested sequence ends
372                            self.delimiter_check_pending = true;
373                            Some(Ok(DataToken::SequenceEnd))
374                        }
375                    }
376                }
377                Err(DecoderError::DecodeItemHeader {
378                    source: dicom_encoding::decode::Error::ReadItemHeader { source, .. },
379                    ..
380                }) if source.kind() == std::io::ErrorKind::UnexpectedEof
381                   && self.seq_delimiters.pop().is_some_and(|t| t.pixel_data)
382                 => {
383                    // Note: if `UnexpectedEof` was reached while inside a 
384                    // PixelData Sequence, then we assume that
385                    // the end of a DICOM object was reached gracefully.
386                    self.hard_break = true;
387                    None
388                }
389                Err(e) => {
390                    self.hard_break = true;
391                    Some(Err(e).context(ReadItemHeaderSnafu))
392                }
393            }
394        } else if let Some(SeqToken {
395            typ: SeqTokenType::Item,
396            pixel_data: true,
397            len,
398            ..
399        }) = self.seq_delimiters.last()
400        {
401            let len = match len.get() {
402                Some(len) => len as usize,
403                None => return Some(UndefinedItemLengthSnafu.fail()),
404            };
405
406            if self.offset_table_next {
407                // offset table
408                let mut offset_table = Vec::with_capacity(len);
409
410                self.offset_table_next = false;
411
412                // need to pop item delimiter on the next iteration
413                self.delimiter_check_pending = true;
414
415                Some(
416                    match self.parser.read_u32_to_vec(len as u32, &mut offset_table) {
417                        Ok(()) => Ok(DataToken::OffsetTable(offset_table)),
418                        Err(e) => Err(e).context(ReadItemValueSnafu { len: len as u32 }),
419                    },
420                )
421            } else {
422                // item value
423                let mut value = Vec::with_capacity(len);
424
425                // need to pop item delimiter on the next iteration
426                self.delimiter_check_pending = true;
427                Some(
428                    self.parser
429                        .read_to_vec(len as u32, &mut value)
430                        .map(|_| Ok(DataToken::ItemValue(value)))
431                        .unwrap_or_else(|e| Err(e).context(ReadItemValueSnafu { len: len as u32 })),
432                )
433            }
434        } else if let Some(header) = self.last_header {
435            if header.is_encapsulated_pixeldata() {
436                self.push_sequence_token(SeqTokenType::Sequence, Length::UNDEFINED, true);
437                self.last_header = None;
438
439                // encapsulated pixel data, expecting offset table
440                match self.parser.decode_item_header() {
441                    Ok(header) => match header {
442                        SequenceItemHeader::Item { len } => {
443                            let len = match self.sanitize_length(len) {
444                                Some(len) => len,
445                                None => {
446                                    return Some(
447                                        InvalidItemLengthSnafu {
448                                            bytes_read: self.parser.position(),
449                                            len: len.0,
450                                        }
451                                        .fail(),
452                                    )
453                                }
454                            };
455
456                            // entered a new item
457                            self.in_sequence = false;
458                            self.push_sequence_token(SeqTokenType::Item, len, true);
459                            // items can be empty
460                            if len == Length(0) {
461                                self.delimiter_check_pending = true;
462                            } else {
463                                self.offset_table_next = true;
464                            }
465                            Some(Ok(DataToken::ItemStart { len }))
466                        }
467                        SequenceItemHeader::SequenceDelimiter => {
468                            // empty pixel data
469                            self.seq_delimiters.pop();
470                            self.in_sequence = false;
471                            Some(Ok(DataToken::SequenceEnd))
472                        }
473                        item => {
474                            self.hard_break = true;
475                            Some(UnexpectedItemTagSnafu { tag: item.tag() }.fail())
476                        }
477                    },
478                    Err(e) => {
479                        self.hard_break = true;
480                        Some(Err(e).context(ReadItemHeaderSnafu))
481                    }
482                }
483            } else {
484                // a plain element header was read, so a value is expected
485                let value = match self.read_value(&header) {
486                    Ok(v) => v,
487                    Err(e) => {
488                        self.hard_break = true;
489                        self.last_header = None;
490                        return Some(Err(e));
491                    }
492                };
493
494                self.last_header = None;
495
496                // sequences can end after this token
497                self.delimiter_check_pending = true;
498
499                Some(Ok(DataToken::PrimitiveValue(value)))
500            }
501        } else {
502            // a data element header or item delimiter is expected
503            match self.parser.decode_header() {
504                Ok(DataElementHeader {
505                    tag,
506                    vr: VR::SQ,
507                    len,
508                }) => {
509                    let len = match self.sanitize_length(len) {
510                        Some(len) => len,
511                        None => {
512                            return Some(
513                                InvalidElementLengthSnafu {
514                                    tag,
515                                    len: len.0,
516                                    bytes_read: self.parser.position(),
517                                }
518                                .fail(),
519                            )
520                        }
521                    };
522
523                    self.in_sequence = true;
524                    self.push_sequence_token(SeqTokenType::Sequence, len, false);
525
526                    // sequences can end right after they start
527                    if len == Length(0) {
528                        self.delimiter_check_pending = true;
529                    }
530
531                    Some(Ok(DataToken::SequenceStart { tag, len }))
532                }
533                Ok(DataElementHeader {
534                    tag: Tag(0xFFFE, 0xE00D),
535                    ..
536                }) if self.seq_delimiters.is_empty() => {
537                    // ignore delimiter, we are not in a sequence
538                    tracing::warn!(
539                        "Item delimitation item outside of a sequence in position {}",
540                        self.parser.position()
541                    );
542                    // return a new token by calling the method again
543                    self.next()
544                }
545                Ok(DataElementHeader {
546                    tag: Tag(0xFFFE, 0xE00D),
547                    ..
548                }) => {
549                    self.in_sequence = true;
550                    // pop item delimiter
551                    self.seq_delimiters.pop();
552                    // sequences can end after this token
553                    self.delimiter_check_pending = true;
554                    Some(Ok(DataToken::ItemEnd))
555                }
556                Ok(header) if header.is_encapsulated_pixeldata() => {
557                    // encapsulated pixel data conditions:
558                    // expect a sequence of pixel data fragments
559
560                    // save it for the next step
561                    self.last_header = Some(header);
562                    Some(Ok(DataToken::PixelSequenceStart))
563                }
564                Ok(header) if header.len.is_undefined() => {
565                    // treat other undefined length elements
566                    // as data set sequences,
567                    // discarding the VR in the process
568                    self.in_sequence = true;
569
570                    let DataElementHeader { tag, len, .. } = header;
571                    self.push_sequence_token(SeqTokenType::Sequence, len, false);
572
573                    Some(Ok(DataToken::SequenceStart { tag, len }))
574                }
575                Ok(mut header) => {
576                    match self.sanitize_length(header.len) {
577                        Some(len) => header.len = len,
578                        None => {
579                            return Some(
580                                InvalidElementLengthSnafu {
581                                    tag: header.tag,
582                                    len: header.len.0,
583                                    bytes_read: self.parser.position(),
584                                }
585                                .fail(),
586                            )
587                        }
588                    };
589
590                    // save it for the next step
591                    self.last_header = Some(header);
592                    Some(Ok(DataToken::ElementHeader(header)))
593                }
594                Err(DecoderError::DecodeElementHeader {
595                    source: dicom_encoding::decode::Error::ReadHeaderTag { source, .. },
596                    ..
597                }) if source.kind() == std::io::ErrorKind::UnexpectedEof => {
598                    // Note: if `UnexpectedEof` was reached while trying to read
599                    // an element tag, then we assume that
600                    // the end of a DICOM object was reached gracefully.
601                    // This approach is unlikely to consume trailing bytes,
602                    // but may ignore the current depth of the data set tree.
603                    self.hard_break = true;
604                    None
605                }
606                Err(e) => {
607                    self.hard_break = true;
608                    Some(Err(e).context(ReadHeaderSnafu))
609                }
610            }
611        }
612    }
613}
614
615impl<S> DataSetReader<S>
616where
617    S: StatefulDecode,
618{
619    /// Peek the next token from the source by
620    /// reading a new token in the first call.
621    /// Subsequent calls to `peek` will return the same token
622    /// until another consumer method (such as `Iterator::next`)
623    /// is called.
624    pub fn peek(&mut self) -> Result<Option<&DataToken>> {
625        if self.peek.is_none() {
626            // try to read the next token
627            match self.next() {
628                None => return Ok(None),
629                Some(Err(e)) => return Err(e),
630                Some(Ok(token)) => {
631                    self.peek = Some(token);
632                }
633            }
634        }
635        Ok(self.peek.as_ref())
636    }
637
638    fn update_seq_delimiters(&mut self) -> Result<Option<DataToken>> {
639        if let Some(sd) = self.seq_delimiters.last() {
640            if let Some(len) = sd.len.get() {
641                let end_of_sequence = sd.base_offset + len as u64;
642                let bytes_read = self.parser.position();
643                match end_of_sequence.cmp(&bytes_read) {
644                    Ordering::Equal => {
645                        // end of delimiter, as indicated by the element's length
646                        let token;
647                        match sd.typ {
648                            SeqTokenType::Sequence => {
649                                self.in_sequence = false;
650                                token = DataToken::SequenceEnd;
651                            }
652                            SeqTokenType::Item => {
653                                self.in_sequence = true;
654                                token = DataToken::ItemEnd;
655                            }
656                        }
657                        self.seq_delimiters.pop();
658                        return Ok(Some(token));
659                    }
660                    Ordering::Less => {
661                        return InconsistentSequenceEndSnafu {
662                            end_of_sequence,
663                            bytes_read,
664                        }
665                        .fail();
666                    }
667                    Ordering::Greater => {} // continue normally
668                }
669            }
670        }
671        self.delimiter_check_pending = false;
672        Ok(None)
673    }
674
675    #[inline]
676    fn push_sequence_token(&mut self, typ: SeqTokenType, len: Length, pixel_data: bool) {
677        self.seq_delimiters.push(SeqToken {
678            typ,
679            pixel_data,
680            len,
681            base_offset: self.parser.position(),
682        })
683    }
684
685    fn read_value(&mut self, header: &DataElementHeader) -> Result<PrimitiveValue> {
686        match self.options.value_read {
687            ValueReadStrategy::Interpreted => self.parser.read_value(header),
688            ValueReadStrategy::Preserved => self.parser.read_value_preserved(header),
689            ValueReadStrategy::Raw => self.parser.read_value_bytes(header),
690        }
691        .context(ReadValueSnafu {
692            len: header.len.0,
693            tag: header.tag,
694        })
695    }
696
697    /// Check for a non-compliant length
698    /// and handle it according to the current strategy.
699    /// Returns `None` if the length cannot or should not be resolved.
700    fn sanitize_length(&self, length: Length) -> Option<Length> {
701        if length.is_defined() && length.0 & 1 != 0 {
702            match self.options.odd_length {
703                OddLengthStrategy::Accept => Some(length),
704                OddLengthStrategy::NextEven => Some(length + 1),
705                OddLengthStrategy::Fail => None,
706            }
707        } else {
708            Some(length)
709        }
710    }
711}
712
713#[cfg(test)]
714mod tests {
715    use super::{DataSetReader, DataToken, StatefulDecode};
716    use crate::dataset::read::{DataSetReaderOptions, OddLengthStrategy};
717    use crate::stateful::decode::StatefulDecoder;
718    use dicom_core::header::{DataElementHeader, Length};
719    use dicom_core::value::PrimitiveValue;
720    use dicom_core::{Tag, VR};
721    use dicom_encoding::decode::basic::LittleEndianBasicDecoder;
722    use dicom_encoding::decode::{
723        explicit_le::ExplicitVRLittleEndianDecoder, implicit_le::ImplicitVRLittleEndianDecoder,
724    };
725    use dicom_encoding::text::SpecificCharacterSet;
726
727    fn validate_read_data_implicit_vr<I>(data: &[u8], ground_truth: I)
728    where
729        I: IntoIterator<Item = DataToken>,
730    {
731        let mut cursor = data;
732        let parser = StatefulDecoder::new(
733            &mut cursor,
734            ImplicitVRLittleEndianDecoder::default(),
735            LittleEndianBasicDecoder::default(),
736            SpecificCharacterSet::default(),
737        );
738
739        validate_read_data(data, parser, ground_truth)
740    }
741
742    fn validate_read_data_explicit_vr<I>(data: &[u8], ground_truth: I)
743    where
744        I: IntoIterator<Item = DataToken>,
745    {
746        let mut cursor = data;
747        let parser = StatefulDecoder::new(
748            &mut cursor,
749            ExplicitVRLittleEndianDecoder::default(),
750            LittleEndianBasicDecoder::default(),
751            SpecificCharacterSet::default(),
752        );
753
754        validate_read_data(&data, parser, ground_truth)
755    }
756
757    fn validate_read_data<I, D>(data: &[u8], parser: D, ground_truth: I)
758    where
759        I: IntoIterator<Item = DataToken>,
760        D: StatefulDecode,
761    {
762        let dset_reader = DataSetReader::new(parser, Default::default());
763        validate_data_set_reader(data, dset_reader, ground_truth);
764    }
765
766    fn validate_data_set_reader<S, I>(
767        data: &[u8],
768        mut dset_reader: DataSetReader<S>,
769        ground_truth: I,
770    ) where
771        S: StatefulDecode,
772        I: IntoIterator<Item = DataToken>,
773    {
774        let iter = (&mut dset_reader).into_iter();
775        let mut ground_truth = ground_truth.into_iter();
776
777        while let Some(gt_token) = ground_truth.next() {
778            let token = iter
779                .next()
780                .expect("expecting more tokens from reader")
781                .expect("should fetch the next token without an error");
782            eprintln!("Next token: {:2?} ; Expected: {:2?}", token, gt_token);
783            assert_eq!(
784                token, gt_token,
785                "Got token {:2?} ; but expected {:2?}",
786                token, gt_token
787            );
788        }
789
790        let extra: Vec<_> = iter.collect();
791        assert_eq!(
792            extra.len(), // we have already read all of them
793            0,
794            "extraneous tokens remaining: {:?}",
795            extra,
796        );
797        assert_eq!(
798            dset_reader.parser.position(),
799            data.len() as u64,
800            "Decoder position did not match end of data",
801        );
802    }
803
804    #[test]
805    fn read_sequence_explicit() {
806        #[rustfmt::skip]
807        static DATA: &[u8] = &[
808            0x18, 0x00, 0x11, 0x60, // sequence tag: (0018,6011) SequenceOfUltrasoundRegions
809            b'S', b'Q', // VR
810            0x00, 0x00, // reserved
811            0x2e, 0x00, 0x00, 0x00, // length: 28 + 18 = 46 (#= 2)
812            // -- 12 --
813            0xfe, 0xff, 0x00, 0xe0, // item start tag
814            0x14, 0x00, 0x00, 0x00, // item length: 20 (#= 2)
815            // -- 20 --
816            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x01, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 1
817            // -- 30 --
818            0x18, 0x00, 0x14, 0x60, b'U', b'S', 0x02, 0x00, 0x02, 0x00, // (0018, 6012) RegionDataType, len = 2, value = 2
819            // -- 40 --
820            0xfe, 0xff, 0x00, 0xe0, // item start tag
821            0x0a, 0x00, 0x00, 0x00, // item length: 10 (#= 1)
822            // -- 48 --
823            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x04, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 4
824            // -- 58 --
825            0x20, 0x00, 0x00, 0x40, b'L', b'T', 0x04, 0x00, // (0020,4000) ImageComments, len = 4
826            b'T', b'E', b'S', b'T', // value = "TEST"
827        ];
828
829        let ground_truth = vec![
830            DataToken::SequenceStart {
831                tag: Tag(0x0018, 0x6011),
832                len: Length(46),
833            },
834            DataToken::ItemStart { len: Length(20) },
835            DataToken::ElementHeader(DataElementHeader {
836                tag: Tag(0x0018, 0x6012),
837                vr: VR::US,
838                len: Length(2),
839            }),
840            DataToken::PrimitiveValue(PrimitiveValue::U16([1].as_ref().into())),
841            DataToken::ElementHeader(DataElementHeader {
842                tag: Tag(0x0018, 0x6014),
843                vr: VR::US,
844                len: Length(2),
845            }),
846            DataToken::PrimitiveValue(PrimitiveValue::U16([2].as_ref().into())),
847            DataToken::ItemEnd,
848            DataToken::ItemStart { len: Length(10) },
849            DataToken::ElementHeader(DataElementHeader {
850                tag: Tag(0x0018, 0x6012),
851                vr: VR::US,
852                len: Length(2),
853            }),
854            DataToken::PrimitiveValue(PrimitiveValue::U16([4].as_ref().into())),
855            DataToken::ItemEnd,
856            DataToken::SequenceEnd,
857            DataToken::ElementHeader(DataElementHeader {
858                tag: Tag(0x0020, 0x4000),
859                vr: VR::LT,
860                len: Length(4),
861            }),
862            DataToken::PrimitiveValue(PrimitiveValue::Str("TEST".into())),
863        ];
864
865        validate_read_data_explicit_vr(DATA, ground_truth);
866    }
867
868    #[test]
869    fn read_sequence_explicit_2() {
870        static DATA: &[u8] = &[
871            // SequenceStart: (0008,2218) ; len = 54 (#=3)
872            0x08, 0x00, 0x18, 0x22, b'S', b'Q', 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
873            // -- 12, --
874            // ItemStart: len = 46
875            0xfe, 0xff, 0x00, 0xe0, 0x2e, 0x00, 0x00, 0x00,
876            // -- 20, --
877            // ElementHeader: (0008,0100) CodeValue; len = 8
878            0x08, 0x00, 0x00, 0x01, b'S', b'H', 0x08, 0x00, // PrimitiveValue
879            0x54, 0x2d, 0x44, 0x31, 0x32, 0x31, 0x33, b' ',
880            // -- 36, --
881            // ElementHeader: (0008,0102) CodingSchemeDesignator; len = 4
882            0x08, 0x00, 0x02, 0x01, b'S', b'H', 0x04, 0x00, // PrimitiveValue
883            0x53, 0x52, 0x54, b' ',
884            // -- 48, --
885            // (0008,0104) CodeMeaning; len = 10
886            0x08, 0x00, 0x04, 0x01, b'L', b'O', 0x0a, 0x00, // PrimitiveValue
887            0x4a, 0x61, 0x77, b' ', 0x72, 0x65, 0x67, 0x69, 0x6f, 0x6e,
888            // -- 66 --
889            // SequenceStart: (0040,0555) AcquisitionContextSequence; len = 0
890            0x40, 0x00, 0x55, 0x05, b'S', b'Q', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
891            // ElementHeader: (2050,0020) PresentationLUTShape; len = 8
892            0x50, 0x20, 0x20, 0x00, b'C', b'S', 0x08, 0x00, // PrimitiveValue
893            b'I', b'D', b'E', b'N', b'T', b'I', b'T', b'Y',
894        ];
895
896        let ground_truth = vec![
897            DataToken::SequenceStart {
898                tag: Tag(0x0008, 0x2218),
899                len: Length(54),
900            },
901            DataToken::ItemStart { len: Length(46) },
902            DataToken::ElementHeader(DataElementHeader {
903                tag: Tag(0x0008, 0x0100),
904                vr: VR::SH,
905                len: Length(8),
906            }),
907            DataToken::PrimitiveValue(PrimitiveValue::Strs(
908                ["T-D1213 ".to_owned()].as_ref().into(),
909            )),
910            DataToken::ElementHeader(DataElementHeader {
911                tag: Tag(0x0008, 0x0102),
912                vr: VR::SH,
913                len: Length(4),
914            }),
915            DataToken::PrimitiveValue(PrimitiveValue::Strs(["SRT ".to_owned()].as_ref().into())),
916            DataToken::ElementHeader(DataElementHeader {
917                tag: Tag(0x0008, 0x0104),
918                vr: VR::LO,
919                len: Length(10),
920            }),
921            DataToken::PrimitiveValue(PrimitiveValue::Strs(
922                ["Jaw region".to_owned()].as_ref().into(),
923            )),
924            DataToken::ItemEnd,
925            DataToken::SequenceEnd,
926            DataToken::SequenceStart {
927                tag: Tag(0x0040, 0x0555),
928                len: Length(0),
929            },
930            DataToken::SequenceEnd,
931            DataToken::ElementHeader(DataElementHeader {
932                tag: Tag(0x2050, 0x0020),
933                vr: VR::CS,
934                len: Length(8),
935            }),
936            DataToken::PrimitiveValue(PrimitiveValue::Strs(
937                ["IDENTITY".to_owned()].as_ref().into(),
938            )),
939        ];
940
941        validate_read_data_explicit_vr(DATA, ground_truth);
942    }
943
944    #[test]
945    fn read_empty_sequence_explicit() {
946        static DATA: &[u8] = &[
947            // SequenceStart: (0008,1032) ProcedureCodeSequence ; len = 0
948            0x08, 0x00, 0x18, 0x22, // VR: SQ
949            b'S', b'Q', // Reserved
950            0x00, 0x00, // Length: 0
951            0x00, 0x00, 0x00, 0x00,
952        ];
953
954        let ground_truth = vec![
955            DataToken::SequenceStart {
956                tag: Tag(0x0008, 0x2218),
957                len: Length(0),
958            },
959            DataToken::SequenceEnd,
960        ];
961
962        validate_read_data_explicit_vr(DATA, ground_truth);
963    }
964
965    /// Gracefully ignore a stray item end tag in the data set.
966    #[test]
967    fn ignore_trailing_item_delimitation_item() {
968        static DATA: &[u8] = &[
969            0x20, 0x00, 0x00, 0x40, b'L', b'T', 0x04,
970            0x00, // (0020,4000) ImageComments, len = 4
971            b'T', b'E', b'S', b'T', // value = "TEST"
972            0xfe, 0xff, 0x0d, 0xe0, 0x00, 0x00, 0x00, 0x00, // item end
973        ];
974
975        let ground_truth = vec![
976            DataToken::ElementHeader(DataElementHeader {
977                tag: Tag(0x0020, 0x4000),
978                vr: VR::LT,
979                len: Length(4),
980            }),
981            DataToken::PrimitiveValue(PrimitiveValue::Str("TEST".into())),
982            // no item end
983        ];
984
985        validate_read_data_explicit_vr(DATA, ground_truth);
986    }
987
988    #[test]
989    fn read_sequence_implicit() {
990        #[rustfmt::skip]
991        static DATA: &[u8] = &[
992            0x18, 0x00, 0x11, 0x60, // sequence tag: (0018,6011) SequenceOfUltrasoundRegions
993            b'S', b'Q', // VR
994            0x00, 0x00, // reserved
995            0xff, 0xff, 0xff, 0xff, // length: undefined
996            // -- 12 --
997            0xfe, 0xff, 0x00, 0xe0, // item start tag
998            0xff, 0xff, 0xff, 0xff, // item length: undefined
999            // -- 20 --
1000            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x01, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 1
1001            // -- 30 --
1002            0x18, 0x00, 0x14, 0x60, b'U', b'S', 0x02, 0x00, 0x02, 0x00, // (0018, 6012) RegionDataType, len = 2, value = 2
1003            // -- 40 --
1004            0xfe, 0xff, 0x0d, 0xe0, 0x00, 0x00, 0x00, 0x00, // item end
1005            // -- 48 --
1006            0xfe, 0xff, 0x00, 0xe0, // item start tag
1007            0xff, 0xff, 0xff, 0xff, // item length: undefined
1008            // -- 56 --
1009            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x04, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 4
1010            // -- 66 --
1011            0xfe, 0xff, 0x0d, 0xe0, 0x00, 0x00, 0x00, 0x00, // item end
1012            // -- 74 --
1013            0xfe, 0xff, 0xdd, 0xe0, 0x00, 0x00, 0x00, 0x00, // sequence end
1014            // -- 82 --
1015            0x20, 0x00, 0x00, 0x40, b'L', b'T', 0x04, 0x00, // (0020,4000) ImageComments, len = 4
1016            b'T', b'E', b'S', b'T', // value = "TEST"
1017        ];
1018
1019        let ground_truth = vec![
1020            DataToken::SequenceStart {
1021                tag: Tag(0x0018, 0x6011),
1022                len: Length::UNDEFINED,
1023            },
1024            DataToken::ItemStart {
1025                len: Length::UNDEFINED,
1026            },
1027            DataToken::ElementHeader(DataElementHeader {
1028                tag: Tag(0x0018, 0x6012),
1029                vr: VR::US,
1030                len: Length(2),
1031            }),
1032            DataToken::PrimitiveValue(PrimitiveValue::U16([1].as_ref().into())),
1033            DataToken::ElementHeader(DataElementHeader {
1034                tag: Tag(0x0018, 0x6014),
1035                vr: VR::US,
1036                len: Length(2),
1037            }),
1038            DataToken::PrimitiveValue(PrimitiveValue::U16([2].as_ref().into())),
1039            DataToken::ItemEnd,
1040            DataToken::ItemStart {
1041                len: Length::UNDEFINED,
1042            },
1043            DataToken::ElementHeader(DataElementHeader {
1044                tag: Tag(0x0018, 0x6012),
1045                vr: VR::US,
1046                len: Length(2),
1047            }),
1048            DataToken::PrimitiveValue(PrimitiveValue::U16([4].as_ref().into())),
1049            DataToken::ItemEnd,
1050            DataToken::SequenceEnd,
1051            DataToken::ElementHeader(DataElementHeader {
1052                tag: Tag(0x0020, 0x4000),
1053                vr: VR::LT,
1054                len: Length(4),
1055            }),
1056            DataToken::PrimitiveValue(PrimitiveValue::Str("TEST".into())),
1057        ];
1058
1059        validate_read_data_explicit_vr(DATA, ground_truth);
1060    }
1061
1062    #[test]
1063    fn read_implicit_len_sequence_implicit_vr_unknown() {
1064        #[rustfmt::skip]
1065        static DATA: &[u8] = &[
1066            0x33, 0x55, 0x33, 0x55, // sequence tag: (5533,5533) «private, unknown attribute»
1067            0xff, 0xff, 0xff, 0xff, // length: undefined
1068            // -- 8 --
1069            0xfe, 0xff, 0x00, 0xe0, // item begin
1070            0xff, 0xff, 0xff, 0xff, // length: undefined
1071            // -- 16 --
1072            0xfe, 0xff, 0x0d, 0xe0, // item end
1073            0x00, 0x00, 0x00, 0x00, // length is always zero
1074            // -- 24 --
1075            0xfe, 0xff, 0xdd, 0xe0,
1076            0x00, 0x00, 0x00, 0x00, // sequence end
1077            // -- 32 --
1078        ];
1079
1080        let ground_truth = vec![
1081            DataToken::SequenceStart {
1082                tag: Tag(0x5533, 0x5533),
1083                len: Length::UNDEFINED,
1084            },
1085            DataToken::ItemStart {
1086                len: Length::UNDEFINED,
1087            },
1088            DataToken::ItemEnd,
1089            DataToken::SequenceEnd,
1090        ];
1091
1092        validate_read_data_implicit_vr(DATA, ground_truth);
1093    }
1094
1095    #[test]
1096    fn read_encapsulated_pixeldata() {
1097        #[rustfmt::skip]
1098        static DATA: &[u8] = &[
1099            0xe0, 0x7f, 0x10, 0x00, // (7FE0, 0010) PixelData
1100            b'O', b'B', // VR 
1101            0x00, 0x00, // reserved
1102            0xff, 0xff, 0xff, 0xff, // length: undefined
1103            // -- 12 -- Basic offset table
1104            0xfe, 0xff, 0x00, 0xe0, // item start tag
1105            0x00, 0x00, 0x00, 0x00, // item length: 0
1106            // -- 20 -- First fragment of pixel data
1107            0xfe, 0xff, 0x00, 0xe0, // item start tag
1108            0x20, 0x00, 0x00, 0x00, // item length: 32
1109            // -- 28 -- Compressed Fragment
1110            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1111            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1112            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1113            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1114            // -- 60 -- End of pixel data
1115            0xfe, 0xff, 0xdd, 0xe0, // sequence end tag
1116            0x00, 0x00, 0x00, 0x00,
1117            // -- 68 -- padding
1118            0xfc, 0xff, 0xfc, 0xff, // (fffc,fffc) DataSetTrailingPadding
1119            b'O', b'B', // VR
1120            0x00, 0x00, // reserved
1121            0x08, 0x00, 0x00, 0x00, // length: 8
1122            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1123        ];
1124
1125        let ground_truth = vec![
1126            DataToken::PixelSequenceStart,
1127            DataToken::ItemStart { len: Length(0) },
1128            DataToken::ItemEnd,
1129            DataToken::ItemStart { len: Length(32) },
1130            DataToken::ItemValue(vec![0x99; 32]),
1131            DataToken::ItemEnd,
1132            DataToken::SequenceEnd,
1133            DataToken::ElementHeader(DataElementHeader::new(
1134                Tag(0xfffc, 0xfffc),
1135                VR::OB,
1136                Length(8),
1137            )),
1138            DataToken::PrimitiveValue(PrimitiveValue::U8([0x00; 8].as_ref().into())),
1139        ];
1140
1141        validate_read_data_explicit_vr(DATA, ground_truth);
1142    }
1143
1144    #[test]
1145    fn read_encapsulated_pixeldata_with_offset_table() {
1146        #[rustfmt::skip]
1147        static DATA: &[u8] = &[
1148            0xe0, 0x7f, 0x10, 0x00, // (7FE0, 0010) PixelData
1149            b'O', b'B', // VR 
1150            0x00, 0x00, // reserved
1151            0xff, 0xff, 0xff, 0xff, // length: undefined
1152            // -- 12 -- Basic offset table
1153            0xfe, 0xff, 0x00, 0xe0, // item start tag
1154            0x04, 0x00, 0x00, 0x00, // item length: 4
1155            // -- 20 -- item value
1156            0x10, 0x00, 0x00, 0x00, // 16
1157            // -- 24 -- First fragment of pixel data
1158            0xfe, 0xff, 0x00, 0xe0, // item start tag
1159            0x20, 0x00, 0x00, 0x00, // item length: 32
1160            // -- 32 -- Compressed Fragment
1161            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1162            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1163            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1164            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
1165            // -- 60 -- End of pixel data
1166            0xfe, 0xff, 0xdd, 0xe0, // sequence end tag
1167            0x00, 0x00, 0x00, 0x00,
1168            // -- 68 -- padding
1169            0xfc, 0xff, 0xfc, 0xff, // (fffc,fffc) DataSetTrailingPadding
1170            b'O', b'B', // VR
1171            0x00, 0x00, // reserved
1172            0x08, 0x00, 0x00, 0x00, // length: 8
1173            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1174        ];
1175
1176        let ground_truth = vec![
1177            DataToken::PixelSequenceStart,
1178            DataToken::ItemStart { len: Length(4) },
1179            DataToken::OffsetTable(vec![16]),
1180            DataToken::ItemEnd,
1181            DataToken::ItemStart { len: Length(32) },
1182            DataToken::ItemValue(vec![0x99; 32]),
1183            DataToken::ItemEnd,
1184            DataToken::SequenceEnd,
1185            DataToken::ElementHeader(DataElementHeader::new(
1186                Tag(0xfffc, 0xfffc),
1187                VR::OB,
1188                Length(8),
1189            )),
1190            DataToken::PrimitiveValue(PrimitiveValue::U8([0x00; 8].as_ref().into())),
1191        ];
1192
1193        validate_read_data_explicit_vr(DATA, ground_truth);
1194    }
1195
1196    #[test]
1197    fn read_dataset_in_dataset() {
1198        #[rustfmt::skip]
1199        const DATA: &'static [u8; 138] = &[
1200            // 0: (2001, 9000) private sequence
1201            0x01, 0x20, 0x00, 0x90, //
1202            // length: undefined
1203            0xFF, 0xFF, 0xFF, 0xFF, //
1204            // 8: Item start
1205            0xFE, 0xFF, 0x00, 0xE0, //
1206            // Item length explicit (114 bytes)
1207            0x72, 0x00, 0x00, 0x00, //
1208            // 16: (0008,1115) ReferencedSeriesSequence
1209            0x08, 0x00, 0x15, 0x11, //
1210            // length: undefined
1211            0xFF, 0xFF, 0xFF, 0xFF, //
1212            // 24: Item start
1213            0xFE, 0xFF, 0x00, 0xE0, //
1214            // Item length undefined
1215            0xFF, 0xFF, 0xFF, 0xFF, //
1216            // 32: (0008,1140) ReferencedImageSequence
1217            0x08, 0x00, 0x40, 0x11, //
1218            // length: undefined
1219            0xFF, 0xFF, 0xFF, 0xFF, //
1220            // 40: Item start
1221            0xFE, 0xFF, 0x00, 0xE0, //
1222            // Item length undefined
1223            0xFF, 0xFF, 0xFF, 0xFF, //
1224            // 48: (0008,1150) ReferencedSOPClassUID
1225            0x08, 0x00, 0x50, 0x11, //
1226            // length: 26
1227            0x1a, 0x00, 0x00, 0x00, //
1228            // Value: "1.2.840.10008.5.1.4.1.1.7\0" (SecondaryCaptureImageStorage)
1229            b'1', b'.', b'2', b'.', b'8', b'4', b'0', b'.', b'1', b'0', b'0', b'0', b'8', b'.',
1230            b'5', b'.', b'1', b'.', b'4', b'.', b'1', b'.', b'1', b'.', b'7', b'\0',
1231            // 82: Item End (ReferencedImageSequence)
1232            0xFE, 0xFF, 0x0D, 0xE0, //
1233            0x00, 0x00, 0x00, 0x00, //
1234            // 90: Sequence End (ReferencedImageSequence)
1235            0xFE, 0xFF, 0xDD, 0xE0, //
1236            0x00, 0x00, 0x00, 0x00, //
1237            // 98: Item End (ReferencedSeriesSequence)
1238            0xFE, 0xFF, 0x0D, 0xE0, //
1239            0x00, 0x00, 0x00, 0x00, //
1240            // 106: Sequence End (ReferencedSeriesSequence)
1241            0xFE, 0xFF, 0xDD, 0xE0, //
1242            0x00, 0x00, 0x00, 0x00, //
1243            // 114: (2050,0020) PresentationLUTShape (CS)
1244            0x50, 0x20, 0x20, 0x00, //
1245            // length: 8
1246            0x08, 0x00, 0x00, 0x00, //
1247            b'I', b'D', b'E', b'N', b'T', b'I', b'T', b'Y', //
1248            // 130: Sequence end
1249            0xFE, 0xFF, 0xDD, 0xE0, //
1250            0x00, 0x00, 0x00, 0x00, //
1251        ];
1252
1253        let ground_truth = vec![
1254            DataToken::SequenceStart {
1255                tag: Tag(0x2001, 0x9000),
1256                len: Length::UNDEFINED,
1257            },
1258            DataToken::ItemStart { len: Length(114) },
1259            DataToken::SequenceStart {
1260                tag: Tag(0x0008, 0x1115),
1261                len: Length::UNDEFINED,
1262            },
1263            DataToken::ItemStart {
1264                len: Length::UNDEFINED,
1265            },
1266            DataToken::SequenceStart {
1267                tag: Tag(0x0008, 0x1140),
1268                len: Length::UNDEFINED,
1269            },
1270            DataToken::ItemStart {
1271                len: Length::UNDEFINED,
1272            },
1273            DataToken::ElementHeader(DataElementHeader {
1274                tag: Tag(0x0008, 0x1150),
1275                vr: VR::UI,
1276                len: Length(26),
1277            }),
1278            DataToken::PrimitiveValue(PrimitiveValue::from("1.2.840.10008.5.1.4.1.1.7\0")),
1279            DataToken::ItemEnd,
1280            DataToken::SequenceEnd,
1281            DataToken::ItemEnd,
1282            DataToken::SequenceEnd,
1283            DataToken::ElementHeader(DataElementHeader {
1284                tag: Tag(0x2050, 0x0020),
1285                vr: VR::CS,
1286                len: Length(8),
1287            }),
1288            DataToken::PrimitiveValue(PrimitiveValue::from("IDENTITY")),
1289            DataToken::ItemEnd, // inserted automatically
1290            DataToken::SequenceEnd,
1291        ];
1292
1293        validate_read_data_implicit_vr(DATA, ground_truth);
1294    }
1295
1296    #[test]
1297    fn peek_data_elements_implicit() {
1298        #[rustfmt::skip]
1299        static DATA: &[u8] = &[
1300            0x18, 0x00, 0x11, 0x60, // sequence tag: (0018,6011) SequenceOfUltrasoundRegions
1301            b'S', b'Q', // VR
1302            0x00, 0x00, // reserved
1303            0xff, 0xff, 0xff, 0xff, // length: undefined
1304            // -- 12 --
1305            0xfe, 0xff, 0xdd, 0xe0, 0x00, 0x00, 0x00, 0x00, // sequence end
1306            // -- 82 --
1307            0x20, 0x00, 0x00, 0x40, b'L', b'T', 0x04, 0x00, // (0020,4000) ImageComments, len = 4
1308            b'T', b'E', b'S', b'T', // value = "TEST"
1309        ];
1310
1311        let ground_truth = vec![
1312            DataToken::SequenceStart {
1313                tag: Tag(0x0018, 0x6011),
1314                len: Length::UNDEFINED,
1315            },
1316            DataToken::SequenceEnd,
1317            DataToken::ElementHeader(DataElementHeader {
1318                tag: Tag(0x0020, 0x4000),
1319                vr: VR::LT,
1320                len: Length(4),
1321            }),
1322            DataToken::PrimitiveValue(PrimitiveValue::Str("TEST".into())),
1323        ];
1324
1325        let mut cursor = DATA;
1326        let parser = StatefulDecoder::new(
1327            &mut cursor,
1328            ExplicitVRLittleEndianDecoder::default(),
1329            LittleEndianBasicDecoder::default(),
1330            SpecificCharacterSet::default(),
1331        );
1332        let mut dset_reader = DataSetReader::new(parser, Default::default());
1333
1334        let iter = (&mut dset_reader).into_iter();
1335
1336        // peek at first token
1337        let token = iter.peek().expect("should peek first token OK");
1338        assert_eq!(token, Some(&ground_truth[0]));
1339
1340        // peeking multiple times gives the same result
1341        let token = iter.peek().expect("should peek first token again OK");
1342        assert_eq!(token, Some(&ground_truth[0]));
1343
1344        // Using `next` give us the same token
1345        let token = iter
1346            .next()
1347            .expect("expected token")
1348            .expect("should read token peeked OK");
1349        assert_eq!(&token, &ground_truth[0]);
1350
1351        // read some more tokens
1352
1353        // sequence end
1354        let token = iter.next().unwrap().unwrap();
1355        assert_eq!(&token, &ground_truth[1]);
1356        // data element header
1357        let token = iter.next().unwrap().unwrap();
1358        assert_eq!(&token, &ground_truth[2]);
1359
1360        // peek string value
1361        let token = iter.peek().unwrap();
1362        assert_eq!(token, Some(&ground_truth[3]));
1363        // peek it again
1364        let token = iter.peek().unwrap();
1365        assert_eq!(token, Some(&ground_truth[3]));
1366        // then read it
1367        let token = iter.next().unwrap().unwrap();
1368        assert_eq!(&token, &ground_truth[3]);
1369
1370        // finished reading, peek should return None
1371        assert!(iter.peek().unwrap().is_none());
1372    }
1373
1374    #[test]
1375    fn read_pixel_sequence_bad_item_end() {
1376        #[rustfmt::skip]
1377        static DATA: &[u8] = &[
1378            0xe0, 0x7f, 0x10, 0x00, // (7FE0, 0010) PixelData
1379            b'O', b'B', // VR 
1380            0x00, 0x00, // reserved
1381            0xff, 0xff, 0xff, 0xff, // length: undefined
1382            // -- 12 --
1383            0xfe, 0xff, 0x00, 0xe0, // item start tag
1384            0x00, 0x00, 0x00, 0x00, // item length: 0
1385            // -- 20 --
1386            0xfe, 0xff, 0x0d, 0xe0, // item end
1387            0x00, 0x00, 0x00, 0x00, // length is always zero
1388            // -- 28 --
1389            0xfe, 0xff, 0x0d, 0xe0, // another item end (bad)
1390            0x00, 0x00, 0x00, 0x00, //
1391            // -- 36 --
1392            0xfe, 0xff, 0x00, 0xe0, // another item start
1393            0x00, 0x00, 0x00, 0x00, // item length: 0
1394        ];
1395
1396        let mut cursor = DATA;
1397        let parser = StatefulDecoder::new(
1398            &mut cursor,
1399            ExplicitVRLittleEndianDecoder::default(),
1400            LittleEndianBasicDecoder::default(),
1401            SpecificCharacterSet::default(),
1402        );
1403        let mut dset_reader = DataSetReader::new(parser, Default::default());
1404
1405        let token_res = (&mut dset_reader)
1406            .into_iter()
1407            .collect::<Result<Vec<_>, _>>();
1408        dbg!(&token_res);
1409        assert!(token_res.is_err());
1410    }
1411
1412    #[test]
1413    fn read_odd_length_element() {
1414        #[rustfmt::skip]
1415        static DATA: &[u8] = &[
1416            0x08, 0x00, 0x16, 0x00, // (0008,0016) SOPClassUID
1417            b'U', b'I', // VR
1418            0x0b, 0x00, // len = 11
1419            b'1', b'.', b'2', b'.', b'8', b'4', b'0', b'.', b'1', b'0', b'0',
1420            0x00, // padding
1421        ];
1422
1423        let ground_truth = vec![
1424            DataToken::ElementHeader(DataElementHeader {
1425                tag: Tag(0x0008, 0x0016),
1426                vr: VR::UI,
1427                len: Length(12),
1428            }),
1429            DataToken::PrimitiveValue(PrimitiveValue::from("1.2.840.100\0")),
1430        ];
1431
1432        // strategy: assume next even
1433
1434        let mut cursor = DATA;
1435        let parser = StatefulDecoder::new(
1436            &mut cursor,
1437            ExplicitVRLittleEndianDecoder::default(),
1438            LittleEndianBasicDecoder::default(),
1439            SpecificCharacterSet::default(),
1440        );
1441        let dset_reader = DataSetReader::new(
1442            parser,
1443            DataSetReaderOptions {
1444                odd_length: OddLengthStrategy::NextEven,
1445                ..Default::default()
1446            },
1447        );
1448
1449        validate_data_set_reader(DATA, dset_reader, ground_truth);
1450
1451        // strategy: fail
1452
1453        let mut cursor = DATA;
1454        let parser = StatefulDecoder::new(
1455            &mut cursor,
1456            ExplicitVRLittleEndianDecoder::default(),
1457            LittleEndianBasicDecoder::default(),
1458            SpecificCharacterSet::default(),
1459        );
1460        let dset_reader = DataSetReader::new(
1461            parser,
1462            DataSetReaderOptions {
1463                odd_length: OddLengthStrategy::Fail,
1464                ..Default::default()
1465            },
1466        );
1467
1468        let mut tokens = dset_reader.into_iter();
1469        let token = tokens.next();
1470
1471        assert!(matches!(
1472            token,
1473            Some(Err(super::Error::InvalidElementLength {
1474                tag: Tag(0x0008, 0x0016),
1475                len: 11,
1476                bytes_read: 8,
1477            })),
1478        ), "got: {:?}", token);
1479    }
1480}