dicom_parser/dataset/
lazy_read.rs

1//! This module contains a mid-level abstraction for reading DICOM content
2//! sequentially and in a lazy fashion.
3//! That is, unlike the reader in the [`read`](super::read) module,
4//! DICOM values can be skipped and most allocations can be avoided.
5//!
6//! At this level, headers and values are treated as tokens which can be used
7//! to form a syntax tree of a full data set.
8//! Whenever an element value or pixel sequence item is encountered,
9//! the given token does not consume the value from the reader,
10//! thus letting users decide whether to:
11//! - fully read the value and turn it into an in-memory representation;
12//! - skip the value altogether, by reading into a sink;
13//! - copying the bytes of the value into another writer,
14//!   such as a previously allocated buffer.
15use crate::stateful::decode::{DynStatefulDecoder, Error as DecoderError, StatefulDecode};
16use crate::util::ReadSeek;
17use dicom_core::header::{DataElementHeader, Header, Length, SequenceItemHeader};
18use dicom_core::{Tag, VR};
19use dicom_encoding::text::SpecificCharacterSet;
20use dicom_encoding::transfer_syntax::TransferSyntax;
21use snafu::{Backtrace, OptionExt, ResultExt, Snafu};
22use std::cmp::Ordering;
23
24use super::{LazyDataToken, SeqTokenType};
25
26#[derive(Debug, Snafu)]
27#[non_exhaustive]
28pub enum Error {
29    #[snafu(display("Could not create decoder"))]
30    CreateDecoder {
31        #[snafu(backtrace)]
32        source: DecoderError,
33    },
34    #[snafu(display("Could not read item header at {} bytes", bytes_read))]
35    ReadItemHeader {
36        bytes_read: u64,
37        #[snafu(backtrace)]
38        source: DecoderError,
39    },
40    #[snafu(display("Could not read element header at {} bytes", bytes_read))]
41    ReadHeader {
42        bytes_read: u64,
43        #[snafu(backtrace)]
44        source: DecoderError,
45    },
46    #[snafu(display("Could not read value"))]
47    ReadValue {
48        #[snafu(backtrace)]
49        source: DecoderError,
50    },
51    #[snafu(display("Failed to get reader position"))]
52    GetPosition {
53        source: std::io::Error,
54        backtrace: Backtrace,
55    },
56    #[snafu(display(
57        "Inconsistent sequence end: expected end at {} bytes but read {}",
58        end_of_sequence,
59        bytes_read
60    ))]
61    InconsistentSequenceEnd {
62        end_of_sequence: u64,
63        bytes_read: u64,
64        backtrace: Backtrace,
65    },
66    #[snafu(display("Unexpected item delimiter at {} bytes", bytes_read))]
67    UnexpectedItemDelimiter {
68        bytes_read: u64,
69        backtrace: Backtrace,
70    },
71    #[snafu(display("Unexpected undefined value length at {} bytes", bytes_read))]
72    UndefinedLength {
73        bytes_read: u64,
74        backtrace: Backtrace,
75    },
76}
77
78pub type Result<T, E = Error> = std::result::Result<T, E>;
79
80/// A reader-specific token representing a sequence or item start.
81#[derive(Debug, Copy, Clone, PartialEq)]
82struct SeqToken {
83    /// Whether it is the start of a sequence or the start of an item.
84    typ: SeqTokenType,
85    /// The length of the value, as indicated by the starting element,
86    /// can be unknown.
87    len: Length,
88    /// Whether this sequence token is part of an encapsulated pixel data.
89    pixel_data: bool,
90    /// The number of bytes the parser has read until it reached the
91    /// beginning of the sequence or item value data.
92    base_offset: u64,
93}
94
95/// An attached iterator for retrieving DICOM object element markers
96/// from a random access data source.
97///
98/// This iterator produces data tokens without eagerly reading the bytes
99/// of a value.
100#[derive(Debug)]
101pub struct LazyDataSetReader<S> {
102    /// the stateful decoder
103    parser: S,
104    /// whether the reader is expecting an item next (or a sequence delimiter)
105    in_sequence: bool,
106    /// whether a check for a sequence or item delimitation is pending
107    delimiter_check_pending: bool,
108    /// a stack of delimiters
109    seq_delimiters: Vec<SeqToken>,
110    /// fuse the iteration process if true
111    hard_break: bool,
112    /// last decoded header
113    last_header: Option<DataElementHeader>,
114}
115
116impl<R> LazyDataSetReader<DynStatefulDecoder<R>> {
117    /// Create a new lazy data set reader
118    /// with the given random access source and element dictionary,
119    /// while considering the given transfer syntax and specific character set.
120    pub fn new_with_ts_cs(
121        mut source: R,
122        ts: &TransferSyntax,
123        cs: SpecificCharacterSet,
124    ) -> Result<Self>
125    where
126        R: ReadSeek,
127    {
128        let position = source.stream_position().context(GetPositionSnafu)?;
129        let parser =
130            DynStatefulDecoder::new_with(source, ts, cs, position).context(CreateDecoderSnafu)?;
131
132        Ok(LazyDataSetReader {
133            parser,
134            seq_delimiters: Vec::new(),
135            delimiter_check_pending: false,
136            in_sequence: false,
137            hard_break: false,
138            last_header: None,
139        })
140    }
141}
142
143impl<S> LazyDataSetReader<S>
144where
145    S: StatefulDecode,
146{
147    /// Create a new iterator with the given stateful decoder.
148    pub fn new(parser: S) -> Self {
149        LazyDataSetReader {
150            parser,
151            seq_delimiters: Vec::new(),
152            delimiter_check_pending: false,
153            in_sequence: false,
154            hard_break: false,
155            last_header: None,
156        }
157    }
158}
159
160impl<S> LazyDataSetReader<S>
161where
162    S: StatefulDecode,
163{
164    fn update_seq_delimiters<'b>(&mut self) -> Result<Option<LazyDataToken<&'b mut S>>> {
165        if let Some(sd) = self.seq_delimiters.last() {
166            if let Some(len) = sd.len.get() {
167                let end_of_sequence = sd.base_offset + len as u64;
168                let bytes_read = self.parser.position();
169                match end_of_sequence.cmp(&bytes_read) {
170                    Ordering::Equal => {
171                        // end of delimiter, as indicated by the element's length
172                        let token;
173                        match sd.typ {
174                            SeqTokenType::Sequence => {
175                                self.in_sequence = false;
176                                token = LazyDataToken::SequenceEnd;
177                            }
178                            SeqTokenType::Item => {
179                                self.in_sequence = true;
180                                token = LazyDataToken::ItemEnd;
181                            }
182                        }
183                        self.seq_delimiters.pop();
184                        return Ok(Some(token));
185                    }
186                    Ordering::Less => {
187                        return InconsistentSequenceEndSnafu {
188                            end_of_sequence,
189                            bytes_read,
190                        }
191                        .fail();
192                    }
193                    Ordering::Greater => {} // continue normally
194                }
195            }
196        }
197        self.delimiter_check_pending = false;
198        Ok(None)
199    }
200
201    #[inline]
202    fn push_sequence_token(&mut self, typ: SeqTokenType, len: Length, pixel_data: bool) {
203        self.seq_delimiters.push(SeqToken {
204            typ,
205            pixel_data,
206            len,
207            base_offset: self.parser.position(),
208        })
209    }
210
211    /// Retrieve the inner stateful decoder from this data set reader.
212    pub fn into_decoder(self) -> S {
213        self.parser
214    }
215
216    /// Advance and retrieve the next DICOM data token.
217    ///
218    /// **Note:** For the data set to be successfully parsed,
219    /// the resulting data tokens needs to be consumed
220    /// if they are of a value type.
221    pub fn advance(&mut self) -> Option<Result<LazyDataToken<&mut S>>> {
222        if self.hard_break {
223            return None;
224        }
225        // record the reading position before any further reading
226        let bytes_read = self.parser.position();
227
228        // item or sequence delimitation logic for explicit lengths
229        if self.delimiter_check_pending {
230            match self.update_seq_delimiters() {
231                Err(e) => {
232                    self.hard_break = true;
233                    return Some(Err(e));
234                }
235                Ok(Some(token)) => return Some(Ok(token)),
236                Ok(None) => { /* no-op */ }
237            }
238        }
239
240        if self.in_sequence {
241            // at sequence level, expecting item header
242
243            match self.parser.decode_item_header() {
244                Ok(header) => {
245                    match header {
246                        SequenceItemHeader::Item { len } => {
247                            // entered a new item
248                            self.in_sequence = false;
249                            self.push_sequence_token(
250                                SeqTokenType::Item,
251                                len,
252                                self.seq_delimiters.last()
253                                    .expect("item header should be read only inside an existing sequence")
254                                    .pixel_data);
255                            // items can be empty
256                            if len == Length(0) {
257                                self.delimiter_check_pending = true;
258                            }
259                            Some(Ok(LazyDataToken::ItemStart { len }))
260                        }
261                        SequenceItemHeader::ItemDelimiter => {
262                            // closed an item
263                            self.seq_delimiters.pop();
264                            self.in_sequence = true;
265                            // sequences can end after an item delimiter
266                            self.delimiter_check_pending = true;
267                            Some(Ok(LazyDataToken::ItemEnd))
268                        }
269                        SequenceItemHeader::SequenceDelimiter => {
270                            // closed a sequence
271                            self.seq_delimiters.pop();
272                            self.in_sequence = false;
273                            // items can end after a nested sequence ends
274                            self.delimiter_check_pending = true;
275                            Some(Ok(LazyDataToken::SequenceEnd))
276                        }
277                    }
278                }
279                Err(e) => {
280                    self.hard_break = true;
281                    Some(Err(e).context(ReadItemHeaderSnafu { bytes_read }))
282                }
283            }
284        } else if let Some(SeqToken {
285            typ: SeqTokenType::Item,
286            pixel_data: true,
287            len,
288            ..
289        }) = self.seq_delimiters.last()
290        {
291            // item value
292
293            let len = match len
294                .get()
295                .with_context(|| UndefinedLengthSnafu { bytes_read })
296            {
297                Ok(len) => len,
298                Err(e) => return Some(Err(e)),
299            };
300
301            // need to pop item delimiter on the next iteration
302            self.delimiter_check_pending = true;
303            Some(Ok(LazyDataToken::LazyItemValue {
304                len,
305                decoder: &mut self.parser,
306            }))
307        } else if let Some(header) = self.last_header {
308            if header.is_encapsulated_pixeldata() {
309                self.push_sequence_token(SeqTokenType::Sequence, Length::UNDEFINED, true);
310                self.last_header = None;
311
312                // encapsulated pixel data, expecting offset table
313                match self.parser.decode_item_header() {
314                    Ok(header) => match header {
315                        SequenceItemHeader::Item { len } => {
316                            // entered a new item
317                            self.in_sequence = false;
318                            self.push_sequence_token(SeqTokenType::Item, len, true);
319                            // items can be empty
320                            if len == Length(0) {
321                                self.delimiter_check_pending = true;
322                            }
323                            Some(Ok(LazyDataToken::ItemStart { len }))
324                        }
325                        SequenceItemHeader::SequenceDelimiter => {
326                            // empty pixel data
327                            self.seq_delimiters.pop();
328                            self.in_sequence = false;
329                            Some(Ok(LazyDataToken::SequenceEnd))
330                        }
331                        SequenceItemHeader::ItemDelimiter => {
332                            self.hard_break = true;
333                            Some(UnexpectedItemDelimiterSnafu { bytes_read }.fail())
334                        }
335                    },
336                    Err(e) => {
337                        self.hard_break = true;
338                        Some(Err(e).context(ReadItemHeaderSnafu { bytes_read }))
339                    }
340                }
341            } else {
342                // a plain element header was read, so an element value is expected
343                self.last_header = None;
344
345                // sequences can end after this token
346                self.delimiter_check_pending = true;
347
348                Some(Ok(LazyDataToken::LazyValue {
349                    header,
350                    decoder: &mut self.parser,
351                }))
352            }
353        } else {
354            // a data element header or item delimiter is expected
355            match self.parser.decode_header() {
356                Ok(DataElementHeader {
357                    tag,
358                    vr: VR::SQ,
359                    len,
360                }) => {
361                    self.in_sequence = true;
362                    self.push_sequence_token(SeqTokenType::Sequence, len, false);
363
364                    // sequences can end right after they start
365                    if len == Length(0) {
366                        self.delimiter_check_pending = true;
367                    }
368
369                    Some(Ok(LazyDataToken::SequenceStart { tag, len }))
370                }
371                Ok(DataElementHeader {
372                    tag: Tag(0xFFFE, 0xE00D),
373                    ..
374                }) => {
375                    self.in_sequence = true;
376                    // pop item delimiter
377                    self.seq_delimiters.pop();
378                    // sequences can end after this token
379                    self.delimiter_check_pending = true;
380                    Some(Ok(LazyDataToken::ItemEnd))
381                }
382                Ok(header) if header.is_encapsulated_pixeldata() => {
383                    // encapsulated pixel data conditions:
384                    // expect a sequence of pixel data fragments
385
386                    // save it for the next step
387                    self.last_header = Some(header);
388                    Some(Ok(LazyDataToken::PixelSequenceStart))
389                }
390                Ok(header) if header.len.is_undefined() => {
391                    // treat other undefined length elements
392                    // as data set sequences,
393                    // discarding the VR in the process
394                    self.in_sequence = true;
395
396                    let DataElementHeader { tag, len, .. } = header;
397                    self.push_sequence_token(SeqTokenType::Sequence, len, false);
398
399                    Some(Ok(LazyDataToken::SequenceStart { tag, len }))
400                }
401                Ok(header) => {
402                    // save it for the next step
403                    self.last_header = Some(header);
404                    Some(Ok(LazyDataToken::ElementHeader(header)))
405                }
406                Err(DecoderError::DecodeElementHeader {
407                    source: dicom_encoding::decode::Error::ReadHeaderTag { source, .. },
408                    ..
409                }) if source.kind() == std::io::ErrorKind::UnexpectedEof => {
410                    // Note: if `UnexpectedEof` was reached while trying to read
411                    // an element tag, then we assume that
412                    // the end of a DICOM object was reached gracefully.
413                    // This approach is unlikely to consume trailing bytes,
414                    // but may ignore the current depth of the data set tree.
415                    self.hard_break = true;
416                    None
417                }
418                Err(e) => {
419                    self.hard_break = true;
420                    Some(Err(e).context(ReadHeaderSnafu { bytes_read }))
421                }
422            }
423        }
424    }
425}
426
427#[cfg(test)]
428mod tests {
429    use super::{LazyDataSetReader, StatefulDecode};
430    use crate::{
431        dataset::{DataToken, LazyDataToken},
432        StatefulDecoder,
433    };
434    use dicom_core::value::PrimitiveValue;
435    use dicom_core::{
436        dicom_value,
437        header::{DataElementHeader, Length},
438    };
439    use dicom_core::{Tag, VR};
440    use dicom_encoding::decode::{
441        explicit_le::ExplicitVRLittleEndianDecoder, implicit_le::ImplicitVRLittleEndianDecoder,
442    };
443    use dicom_encoding::{decode::basic::LittleEndianBasicDecoder, text::SpecificCharacterSet};
444
445    fn validate_dataset_reader_implicit_vr<I>(data: &[u8], ground_truth: I)
446    where
447        I: IntoIterator<Item = DataToken>,
448    {
449        let mut cursor = data;
450        let parser = StatefulDecoder::new(
451            &mut cursor,
452            ImplicitVRLittleEndianDecoder::default(),
453            LittleEndianBasicDecoder::default(),
454            SpecificCharacterSet::default(),
455        );
456
457        validate_dataset_reader(data, parser, ground_truth)
458    }
459
460    fn validate_dataset_reader_explicit_vr<I>(data: &[u8], ground_truth: I)
461    where
462        I: IntoIterator<Item = DataToken>,
463    {
464        let mut cursor = data;
465        let parser = StatefulDecoder::new(
466            &mut cursor,
467            ExplicitVRLittleEndianDecoder::default(),
468            LittleEndianBasicDecoder::default(),
469            SpecificCharacterSet::default(),
470        );
471
472        validate_dataset_reader(&data, parser, ground_truth)
473    }
474
475    fn validate_dataset_reader<I, D>(data: &[u8], parser: D, ground_truth: I)
476    where
477        I: IntoIterator<Item = DataToken>,
478        D: StatefulDecode,
479    {
480        let mut dset_reader = LazyDataSetReader::new(parser);
481
482        let mut gt_iter = ground_truth.into_iter();
483        while let Some(res) = dset_reader.advance() {
484            let gt_token = gt_iter.next().expect("ground truth is shorter");
485            let token = res.expect("should parse without an error");
486            let token = token.into_owned().unwrap();
487            assert_eq!(token, gt_token);
488        }
489
490        assert_eq!(
491            gt_iter.count(), // consume til the end
492            0,               // we have already read all of them
493            "unexpected number of tokens remaining"
494        );
495        assert_eq!(dset_reader.parser.position(), data.len() as u64);
496    }
497
498    #[test]
499    fn lazy_read_sequence_explicit() {
500        #[rustfmt::skip]
501        static DATA: &[u8] = &[
502            0x18, 0x00, 0x11, 0x60, // sequence tag: (0018,6011) SequenceOfUltrasoundRegions
503            b'S', b'Q', // VR
504            0x00, 0x00, // reserved
505            0x2e, 0x00, 0x00, 0x00, // length: 28 + 18 = 46 (#= 2)
506            // -- 12 --
507            0xfe, 0xff, 0x00, 0xe0, // item start tag
508            0x14, 0x00, 0x00, 0x00, // item length: 20 (#= 2)
509            // -- 20 --
510            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x01, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 1
511            // -- 30 --
512            0x18, 0x00, 0x14, 0x60, b'U', b'S', 0x02, 0x00, 0x02, 0x00, // (0018, 6012) RegionDataType, len = 2, value = 2
513            // -- 40 --
514            0xfe, 0xff, 0x00, 0xe0, // item start tag
515            0x0a, 0x00, 0x00, 0x00, // item length: 10 (#= 1)
516            // -- 48 --
517            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x04, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 4
518            // -- 58 --
519            0x20, 0x00, 0x00, 0x40, b'L', b'T', 0x04, 0x00, // (0020,4000) ImageComments, len = 4
520            b'T', b'E', b'S', b'T', // value = "TEST"
521        ];
522
523        let ground_truth = vec![
524            DataToken::SequenceStart {
525                tag: Tag(0x0018, 0x6011),
526                len: Length(46),
527            },
528            DataToken::ItemStart { len: Length(20) },
529            DataToken::ElementHeader(DataElementHeader {
530                tag: Tag(0x0018, 0x6012),
531                vr: VR::US,
532                len: Length(2),
533            }),
534            DataToken::PrimitiveValue(PrimitiveValue::U16([1].as_ref().into())),
535            DataToken::ElementHeader(DataElementHeader {
536                tag: Tag(0x0018, 0x6014),
537                vr: VR::US,
538                len: Length(2),
539            }),
540            DataToken::PrimitiveValue(PrimitiveValue::U16([2].as_ref().into())),
541            DataToken::ItemEnd,
542            DataToken::ItemStart { len: Length(10) },
543            DataToken::ElementHeader(DataElementHeader {
544                tag: Tag(0x0018, 0x6012),
545                vr: VR::US,
546                len: Length(2),
547            }),
548            DataToken::PrimitiveValue(PrimitiveValue::U16([4].as_ref().into())),
549            DataToken::ItemEnd,
550            DataToken::SequenceEnd,
551            DataToken::ElementHeader(DataElementHeader {
552                tag: Tag(0x0020, 0x4000),
553                vr: VR::LT,
554                len: Length(4),
555            }),
556            DataToken::PrimitiveValue(PrimitiveValue::Str("TEST".into())),
557        ];
558
559        validate_dataset_reader_explicit_vr(DATA, ground_truth);
560    }
561
562    #[test]
563    fn lazy_read_sequence_explicit_2() {
564        static DATA: &[u8] = &[
565            // SequenceStart: (0008,2218) ; len = 54 (#=3)
566            0x08, 0x00, 0x18, 0x22, b'S', b'Q', 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
567            // -- 12, --
568            // ItemStart: len = 46
569            0xfe, 0xff, 0x00, 0xe0, 0x2e, 0x00, 0x00, 0x00,
570            // -- 20, --
571            // ElementHeader: (0008,0100) CodeValue; len = 8
572            0x08, 0x00, 0x00, 0x01, b'S', b'H', 0x08, 0x00, // PrimitiveValue
573            0x54, 0x2d, 0x44, 0x31, 0x32, 0x31, 0x33, b' ',
574            // -- 36, --
575            // ElementHeader: (0008,0102) CodingSchemeDesignator; len = 4
576            0x08, 0x00, 0x02, 0x01, b'S', b'H', 0x04, 0x00, // PrimitiveValue
577            0x53, 0x52, 0x54, b' ',
578            // -- 48, --
579            // (0008,0104) CodeMeaning; len = 10
580            0x08, 0x00, 0x04, 0x01, b'L', b'O', 0x0a, 0x00, // PrimitiveValue
581            0x4a, 0x61, 0x77, b' ', 0x72, 0x65, 0x67, 0x69, 0x6f, 0x6e,
582            // -- 66 --
583            // SequenceStart: (0040,0555) AcquisitionContextSequence; len = 0
584            0x40, 0x00, 0x55, 0x05, b'S', b'Q', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
585            // ElementHeader: (2050,0020) PresentationLUTShape; len = 8
586            0x50, 0x20, 0x20, 0x00, b'C', b'S', 0x08, 0x00, // PrimitiveValue
587            b'I', b'D', b'E', b'N', b'T', b'I', b'T', b'Y',
588        ];
589
590        let ground_truth = vec![
591            DataToken::SequenceStart {
592                tag: Tag(0x0008, 0x2218),
593                len: Length(54),
594            },
595            DataToken::ItemStart { len: Length(46) },
596            DataToken::ElementHeader(DataElementHeader {
597                tag: Tag(0x0008, 0x0100),
598                vr: VR::SH,
599                len: Length(8),
600            }),
601            DataToken::PrimitiveValue(PrimitiveValue::Strs(
602                ["T-D1213 ".to_owned()].as_ref().into(),
603            )),
604            DataToken::ElementHeader(DataElementHeader {
605                tag: Tag(0x0008, 0x0102),
606                vr: VR::SH,
607                len: Length(4),
608            }),
609            DataToken::PrimitiveValue(PrimitiveValue::Strs(["SRT ".to_owned()].as_ref().into())),
610            DataToken::ElementHeader(DataElementHeader {
611                tag: Tag(0x0008, 0x0104),
612                vr: VR::LO,
613                len: Length(10),
614            }),
615            DataToken::PrimitiveValue(PrimitiveValue::Strs(
616                ["Jaw region".to_owned()].as_ref().into(),
617            )),
618            DataToken::ItemEnd,
619            DataToken::SequenceEnd,
620            DataToken::SequenceStart {
621                tag: Tag(0x0040, 0x0555),
622                len: Length(0),
623            },
624            DataToken::SequenceEnd,
625            DataToken::ElementHeader(DataElementHeader {
626                tag: Tag(0x2050, 0x0020),
627                vr: VR::CS,
628                len: Length(8),
629            }),
630            DataToken::PrimitiveValue(PrimitiveValue::Strs(
631                ["IDENTITY".to_owned()].as_ref().into(),
632            )),
633        ];
634
635        validate_dataset_reader_explicit_vr(DATA, ground_truth);
636    }
637
638    #[test]
639    fn lazy_read_sequence_implicit() {
640        #[rustfmt::skip]
641        static DATA: &[u8] = &[
642            0x18, 0x00, 0x11, 0x60, // sequence tag: (0018,6011) SequenceOfUltrasoundRegions
643            b'S', b'Q', // VR
644            0x00, 0x00, // reserved
645            0xff, 0xff, 0xff, 0xff, // length: undefined
646            // -- 12 --
647            0xfe, 0xff, 0x00, 0xe0, // item start tag
648            0xff, 0xff, 0xff, 0xff, // item length: undefined
649            // -- 20 --
650            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x01, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 1
651            // -- 30 --
652            0x18, 0x00, 0x14, 0x60, b'U', b'S', 0x02, 0x00, 0x02, 0x00, // (0018, 6012) RegionDataType, len = 2, value = 2
653            // -- 40 --
654            0xfe, 0xff, 0x0d, 0xe0, 0x00, 0x00, 0x00, 0x00, // item end
655            // -- 48 --
656            0xfe, 0xff, 0x00, 0xe0, // item start tag
657            0xff, 0xff, 0xff, 0xff, // item length: undefined
658            // -- 56 --
659            0x18, 0x00, 0x12, 0x60, b'U', b'S', 0x02, 0x00, 0x04, 0x00, // (0018, 6012) RegionSpatialformat, len = 2, value = 4
660            // -- 66 --
661            0xfe, 0xff, 0x0d, 0xe0, 0x00, 0x00, 0x00, 0x00, // item end
662            // -- 74 --
663            0xfe, 0xff, 0xdd, 0xe0, 0x00, 0x00, 0x00, 0x00, // sequence end
664            // -- 82 --
665            0x20, 0x00, 0x00, 0x40, b'L', b'T', 0x04, 0x00, // (0020,4000) ImageComments, len = 4
666            b'T', b'E', b'S', b'T', // value = "TEST"
667        ];
668
669        let ground_truth = vec![
670            DataToken::SequenceStart {
671                tag: Tag(0x0018, 0x6011),
672                len: Length::UNDEFINED,
673            },
674            DataToken::ItemStart {
675                len: Length::UNDEFINED,
676            },
677            DataToken::ElementHeader(DataElementHeader {
678                tag: Tag(0x0018, 0x6012),
679                vr: VR::US,
680                len: Length(2),
681            }),
682            DataToken::PrimitiveValue(PrimitiveValue::U16([1].as_ref().into())),
683            DataToken::ElementHeader(DataElementHeader {
684                tag: Tag(0x0018, 0x6014),
685                vr: VR::US,
686                len: Length(2),
687            }),
688            DataToken::PrimitiveValue(PrimitiveValue::U16([2].as_ref().into())),
689            DataToken::ItemEnd,
690            DataToken::ItemStart {
691                len: Length::UNDEFINED,
692            },
693            DataToken::ElementHeader(DataElementHeader {
694                tag: Tag(0x0018, 0x6012),
695                vr: VR::US,
696                len: Length(2),
697            }),
698            DataToken::PrimitiveValue(PrimitiveValue::U16([4].as_ref().into())),
699            DataToken::ItemEnd,
700            DataToken::SequenceEnd,
701            DataToken::ElementHeader(DataElementHeader {
702                tag: Tag(0x0020, 0x4000),
703                vr: VR::LT,
704                len: Length(4),
705            }),
706            DataToken::PrimitiveValue(PrimitiveValue::Str("TEST".into())),
707        ];
708
709        validate_dataset_reader_explicit_vr(DATA, ground_truth);
710    }
711
712    #[test]
713    fn lazy_read_dataset_in_dataset() {
714        #[rustfmt::skip]
715        const DATA: &'static [u8; 138] = &[
716            // 0: (2001, 9000) private sequence
717            0x01, 0x20, 0x00, 0x90, //
718            // length: undefined
719            0xFF, 0xFF, 0xFF, 0xFF, //
720            // 8: Item start
721            0xFE, 0xFF, 0x00, 0xE0, //
722            // Item length explicit (114 bytes)
723            0x72, 0x00, 0x00, 0x00, //
724            // 16: (0008,1115) ReferencedSeriesSequence
725            0x08, 0x00, 0x15, 0x11, //
726            // length: undefined
727            0xFF, 0xFF, 0xFF, 0xFF, //
728            // 24: Item start
729            0xFE, 0xFF, 0x00, 0xE0, //
730            // Item length undefined
731            0xFF, 0xFF, 0xFF, 0xFF, //
732            // 32: (0008,1140) ReferencedImageSequence
733            0x08, 0x00, 0x40, 0x11, //
734            // length: undefined
735            0xFF, 0xFF, 0xFF, 0xFF, //
736            // 40: Item start
737            0xFE, 0xFF, 0x00, 0xE0, //
738            // Item length undefined
739            0xFF, 0xFF, 0xFF, 0xFF, //
740            // 48: (0008,1150) ReferencedSOPClassUID
741            0x08, 0x00, 0x50, 0x11, //
742            // length: 26
743            0x1a, 0x00, 0x00, 0x00, //
744            // Value: "1.2.840.10008.5.1.4.1.1.7\0" (SecondaryCaptureImageStorage)
745            b'1', b'.', b'2', b'.', b'8', b'4', b'0', b'.', b'1', b'0', b'0', b'0', b'8', b'.',
746            b'5', b'.', b'1', b'.', b'4', b'.', b'1', b'.', b'1', b'.', b'7', b'\0',
747            // 82: Item End (ReferencedImageSequence)
748            0xFE, 0xFF, 0x0D, 0xE0, //
749            0x00, 0x00, 0x00, 0x00, //
750            // 90: Sequence End (ReferencedImageSequence)
751            0xFE, 0xFF, 0xDD, 0xE0, //
752            0x00, 0x00, 0x00, 0x00, //
753            // 98: Item End (ReferencedSeriesSequence)
754            0xFE, 0xFF, 0x0D, 0xE0, //
755            0x00, 0x00, 0x00, 0x00, //
756            // 106: Sequence End (ReferencedSeriesSequence)
757            0xFE, 0xFF, 0xDD, 0xE0, //
758            0x00, 0x00, 0x00, 0x00, //
759            // 114: (2050,0020) PresentationLUTShape (CS)
760            0x50, 0x20, 0x20, 0x00, //
761            // length: 8
762            0x08, 0x00, 0x00, 0x00, //
763            b'I', b'D', b'E', b'N', b'T', b'I', b'T', b'Y', //
764            // 130: Sequence end
765            0xFE, 0xFF, 0xDD, 0xE0, //
766            0x00, 0x00, 0x00, 0x00, //
767        ];
768
769        let ground_truth = vec![
770            DataToken::SequenceStart {
771                tag: Tag(0x2001, 0x9000),
772                len: Length::UNDEFINED,
773            },
774            DataToken::ItemStart { len: Length(114) },
775            DataToken::SequenceStart {
776                tag: Tag(0x0008, 0x1115),
777                len: Length::UNDEFINED,
778            },
779            DataToken::ItemStart {
780                len: Length::UNDEFINED,
781            },
782            DataToken::SequenceStart {
783                tag: Tag(0x0008, 0x1140),
784                len: Length::UNDEFINED,
785            },
786            DataToken::ItemStart {
787                len: Length::UNDEFINED,
788            },
789            DataToken::ElementHeader(DataElementHeader {
790                tag: Tag(0x0008, 0x1150),
791                vr: VR::UI,
792                len: Length(26),
793            }),
794            DataToken::PrimitiveValue(PrimitiveValue::from("1.2.840.10008.5.1.4.1.1.7\0")),
795            DataToken::ItemEnd,
796            DataToken::SequenceEnd,
797            DataToken::ItemEnd,
798            DataToken::SequenceEnd,
799            DataToken::ElementHeader(DataElementHeader {
800                tag: Tag(0x2050, 0x0020),
801                vr: VR::CS,
802                len: Length(8),
803            }),
804            DataToken::PrimitiveValue(PrimitiveValue::from("IDENTITY")),
805            DataToken::ItemEnd, // inserted automatically
806            DataToken::SequenceEnd,
807        ];
808
809        validate_dataset_reader_implicit_vr(DATA, ground_truth);
810    }
811
812    #[test]
813    fn lazy_read_implicit_len_sequence_implicit_vr_unknown() {
814        #[rustfmt::skip]
815        static DATA: &[u8] = &[
816            0x33, 0x55, 0x33, 0x55, // sequence tag: (5533,5533) «private, unknown attribute»
817            0xff, 0xff, 0xff, 0xff, // length: undefined
818            // -- 8 --
819            0xfe, 0xff, 0x00, 0xe0, // item begin
820            0xff, 0xff, 0xff, 0xff, // length: undefined
821            // -- 16 --
822            0xfe, 0xff, 0x0d, 0xe0, // item end
823            0x00, 0x00, 0x00, 0x00, // length is always zero
824            // -- 24 --
825            0xfe, 0xff, 0xdd, 0xe0,
826            0x00, 0x00, 0x00, 0x00, // sequence end
827            // -- 32 --
828        ];
829
830        let ground_truth = vec![
831            DataToken::SequenceStart {
832                tag: Tag(0x5533, 0x5533),
833                len: Length::UNDEFINED,
834            },
835            DataToken::ItemStart {
836                len: Length::UNDEFINED,
837            },
838            DataToken::ItemEnd,
839            DataToken::SequenceEnd,
840        ];
841
842        validate_dataset_reader_implicit_vr(DATA, ground_truth);
843    }
844
845    #[test]
846    fn read_encapsulated_pixeldata() {
847        #[rustfmt::skip]
848        static DATA: &[u8] = &[
849            0xe0, 0x7f, 0x10, 0x00, // (7FE0, 0010) PixelData
850            b'O', b'B', // VR 
851            0x00, 0x00, // reserved
852            0xff, 0xff, 0xff, 0xff, // length: undefined
853            // -- 12 -- Basic offset table
854            0xfe, 0xff, 0x00, 0xe0, // item start tag
855            0x00, 0x00, 0x00, 0x00, // item length: 0
856            // -- 20 -- First fragment of pixel data
857            0xfe, 0xff, 0x00, 0xe0, // item start tag
858            0x20, 0x00, 0x00, 0x00, // item length: 32
859            // -- 28 -- Compressed Fragment
860            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
861            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
862            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
863            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
864            // -- 60 -- End of pixel data
865            0xfe, 0xff, 0xdd, 0xe0, // sequence end tag
866            0x00, 0x00, 0x00, 0x00,
867            // -- 68 -- padding
868            0xfc, 0xff, 0xfc, 0xff, // (fffc,fffc) DataSetTrailingPadding
869            b'O', b'B', // VR
870            0x00, 0x00, // reserved
871            0x08, 0x00, 0x00, 0x00, // length: 8
872            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
873        ];
874
875        let ground_truth = vec![
876            DataToken::PixelSequenceStart,
877            DataToken::ItemStart { len: Length(0) },
878            DataToken::ItemEnd,
879            DataToken::ItemStart { len: Length(32) },
880            DataToken::ItemValue(vec![0x99; 32]),
881            DataToken::ItemEnd,
882            DataToken::SequenceEnd,
883            DataToken::ElementHeader(DataElementHeader::new(
884                Tag(0xfffc, 0xfffc),
885                VR::OB,
886                Length(8),
887            )),
888            DataToken::PrimitiveValue(PrimitiveValue::U8([0x00; 8].as_ref().into())),
889        ];
890
891        validate_dataset_reader_explicit_vr(DATA, ground_truth);
892    }
893
894    #[test]
895    fn lazy_read_encapsulated_pixeldata_with_offset_table() {
896        #[rustfmt::skip]
897        static DATA: &[u8] = &[
898            0xe0, 0x7f, 0x10, 0x00, // (7FE0, 0010) PixelData
899            b'O', b'B', // VR 
900            0x00, 0x00, // reserved
901            0xff, 0xff, 0xff, 0xff, // length: undefined
902            // -- 12 -- Basic offset table
903            0xfe, 0xff, 0x00, 0xe0, // item start tag
904            0x04, 0x00, 0x00, 0x00, // item length: 4
905            // -- 20 -- item value
906            0x10, 0x00, 0x00, 0x00, // 16
907            // -- 24 -- First fragment of pixel data
908            0xfe, 0xff, 0x00, 0xe0, // item start tag
909            0x20, 0x00, 0x00, 0x00, // item length: 32
910            // -- 32 -- Compressed Fragment
911            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
912            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
913            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
914            0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99,
915            // -- 60 -- End of pixel data
916            0xfe, 0xff, 0xdd, 0xe0, // sequence end tag
917            0x00, 0x00, 0x00, 0x00,
918            // -- 68 -- padding
919            0xfc, 0xff, 0xfc, 0xff, // (fffc,fffc) DataSetTrailingPadding
920            b'O', b'B', // VR
921            0x00, 0x00, // reserved
922            0x08, 0x00, 0x00, 0x00, // length: 8
923            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
924        ];
925
926        let ground_truth = vec![
927            DataToken::PixelSequenceStart,
928            DataToken::ItemStart { len: Length(4) },
929            DataToken::ItemValue(vec![0x10, 0x00, 0x00, 0x00]),
930            DataToken::ItemEnd,
931            DataToken::ItemStart { len: Length(32) },
932            DataToken::ItemValue(vec![0x99; 32]),
933            DataToken::ItemEnd,
934            DataToken::SequenceEnd,
935            DataToken::ElementHeader(DataElementHeader::new(
936                Tag(0xfffc, 0xfffc),
937                VR::OB,
938                Length(8),
939            )),
940            DataToken::PrimitiveValue(PrimitiveValue::U8([0x00; 8].as_ref().into())),
941        ];
942
943        validate_dataset_reader_explicit_vr(DATA, ground_truth);
944    }
945
946    #[test]
947    fn lazy_read_sequence_explicit_2_skip_values() {
948        static DATA: &[u8] = &[
949            // SequenceStart: (0008,2218) ; len = 54 (#=3)
950            0x08, 0x00, 0x18, 0x22, b'S', b'Q', 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
951            // -- 12, --
952            // ItemStart: len = 46
953            0xfe, 0xff, 0x00, 0xe0, 0x2e, 0x00, 0x00, 0x00,
954            // -- 20, --
955            // ElementHeader: (0008,0100) CodeValue; len = 8
956            0x08, 0x00, 0x00, 0x01, b'S', b'H', 0x08, 0x00, // PrimitiveValue
957            0x54, 0x2d, 0x44, 0x31, 0x32, 0x31, 0x33, b' ',
958            // -- 36, --
959            // ElementHeader: (0008,0102) CodingSchemeDesignator; len = 4
960            0x08, 0x00, 0x02, 0x01, b'S', b'H', 0x04, 0x00, // PrimitiveValue
961            0x53, 0x52, 0x54, b' ',
962            // -- 48, --
963            // (0008,0104) CodeMeaning; len = 10
964            0x08, 0x00, 0x04, 0x01, b'L', b'O', 0x0a, 0x00, // PrimitiveValue
965            0x4a, 0x61, 0x77, b' ', 0x72, 0x65, 0x67, 0x69, 0x6f, 0x6e,
966            // -- 66 --
967            // SequenceStart: (0040,0555) AcquisitionContextSequence; len = 0
968            0x40, 0x00, 0x55, 0x05, b'S', b'Q', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
969            // ElementHeader: (2050,0020) PresentationLUTShape; len = 8
970            0x50, 0x20, 0x20, 0x00, b'C', b'S', 0x08, 0x00, // PrimitiveValue
971            b'I', b'D', b'E', b'N', b'T', b'I', b'T', b'Y',
972        ];
973
974        let ground_truth = vec![
975            DataToken::SequenceStart {
976                tag: Tag(0x0008, 0x2218),
977                len: Length(54),
978            },
979            DataToken::ItemStart { len: Length(46) },
980            DataToken::ElementHeader(DataElementHeader {
981                tag: Tag(0x0008, 0x0100),
982                vr: VR::SH,
983                len: Length(8),
984            }),
985            DataToken::PrimitiveValue(PrimitiveValue::Strs(
986                ["T-D1213 ".to_owned()].as_ref().into(),
987            )),
988            DataToken::ElementHeader(DataElementHeader {
989                tag: Tag(0x0008, 0x0102),
990                vr: VR::SH,
991                len: Length(4),
992            }),
993            DataToken::PrimitiveValue(PrimitiveValue::Strs(["SRT ".to_owned()].as_ref().into())),
994            DataToken::ElementHeader(DataElementHeader {
995                tag: Tag(0x0008, 0x0104),
996                vr: VR::LO,
997                len: Length(10),
998            }),
999            DataToken::PrimitiveValue(PrimitiveValue::Strs(
1000                ["Jaw region".to_owned()].as_ref().into(),
1001            )),
1002            DataToken::ItemEnd,
1003            DataToken::SequenceEnd,
1004            DataToken::SequenceStart {
1005                tag: Tag(0x0040, 0x0555),
1006                len: Length(0),
1007            },
1008            DataToken::SequenceEnd,
1009            DataToken::ElementHeader(DataElementHeader {
1010                tag: Tag(0x2050, 0x0020),
1011                vr: VR::CS,
1012                len: Length(8),
1013            }),
1014            DataToken::PrimitiveValue(PrimitiveValue::Strs(
1015                ["IDENTITY".to_owned()].as_ref().into(),
1016            )),
1017        ];
1018
1019        let mut cursor = DATA;
1020        let parser = StatefulDecoder::new(
1021            &mut cursor,
1022            ExplicitVRLittleEndianDecoder::default(),
1023            LittleEndianBasicDecoder::default(),
1024            SpecificCharacterSet::default(),
1025        );
1026
1027        let mut dset_reader = LazyDataSetReader::new(parser);
1028
1029        let mut gt_iter = ground_truth.into_iter();
1030        while let Some(res) = dset_reader.advance() {
1031            let token = res.expect("should parse without an error");
1032            let gt_token = gt_iter.next().expect("ground truth is shorter");
1033            match token {
1034                LazyDataToken::LazyValue { .. } | LazyDataToken::LazyItemValue { .. } => {
1035                    token.skip().unwrap();
1036                }
1037                token => {
1038                    let token = token.into_owned().unwrap();
1039                    assert_eq!(token, gt_token);
1040                }
1041            }
1042        }
1043
1044        assert_eq!(
1045            gt_iter.count(), // consume til the end
1046            0,               // we have already read all of them
1047            "unexpected number of tokens remaining"
1048        );
1049        assert_eq!(dset_reader.parser.position(), DATA.len() as u64);
1050    }
1051
1052    #[test]
1053    fn lazy_read_value_via_into_value() {
1054        // manually crafted DICOM data elements
1055        //  Tag: (0002,0002) Media Storage SOP Class UID
1056        //  VR: UI
1057        //  Length: 26
1058        //  Value: "1.2.840.10008.5.1.4.1.1.1\0"
1059        // --
1060        //  Tag: (0002,0010) Transfer Syntax UID
1061        //  VR: UI
1062        //  Length: 20
1063        //  Value: "1.2.840.10008.1.2.1\0" == ExplicitVRLittleEndian
1064        // --
1065        const RAW: &'static [u8; 62] = &[
1066            0x02, 0x00, 0x02, 0x00, 0x55, 0x49, 0x1a, 0x00, 0x31, 0x2e, 0x32, 0x2e, 0x38, 0x34,
1067            0x30, 0x2e, 0x31, 0x30, 0x30, 0x30, 0x38, 0x2e, 0x35, 0x2e, 0x31, 0x2e, 0x34, 0x2e,
1068            0x31, 0x2e, 0x31, 0x2e, 0x31, 0x00, 0x02, 0x00, 0x10, 0x00, 0x55, 0x49, 0x14, 0x00,
1069            0x31, 0x2e, 0x32, 0x2e, 0x38, 0x34, 0x30, 0x2e, 0x31, 0x30, 0x30, 0x30, 0x38, 0x2e,
1070            0x31, 0x2e, 0x32, 0x2e, 0x31, 0x00,
1071        ];
1072        let mut cursor = &RAW[..];
1073        let parser = StatefulDecoder::new(
1074            &mut cursor,
1075            ExplicitVRLittleEndianDecoder::default(),
1076            LittleEndianBasicDecoder::default(),
1077            SpecificCharacterSet::default(),
1078        );
1079
1080        let mut dset_reader = LazyDataSetReader::new(parser);
1081
1082        let token = dset_reader
1083            .advance()
1084            .expect("Expected token 1")
1085            .expect("Failed to read token 1");
1086
1087        let header_token1 = match token {
1088            LazyDataToken::ElementHeader(header) => header,
1089            _ => {
1090                panic!("Unexpected token type (1)");
1091            }
1092        };
1093
1094        let token = dset_reader
1095            .advance()
1096            .expect("Expected token 2")
1097            .expect("Failed to read token 2");
1098
1099        match token {
1100            LazyDataToken::LazyValue { header, decoder: _ } => {
1101                assert_eq!(header_token1, header);
1102            }
1103            _ => {
1104                panic!("Unexpected token type (2)");
1105            }
1106        }
1107
1108        // consume via into_value
1109        assert_eq!(
1110            token.into_value().unwrap(),
1111            dicom_value!(Strs, ["1.2.840.10008.5.1.4.1.1.1\0"]),
1112        );
1113
1114        let token = dset_reader
1115            .advance()
1116            .expect("Expected token 3")
1117            .expect("Failed to read token 3");
1118
1119        let header_token3 = match token {
1120            LazyDataToken::ElementHeader(header) => header,
1121            _ => {
1122                panic!("Unexpected token type (3)");
1123            }
1124        };
1125
1126        let token = dset_reader
1127            .advance()
1128            .expect("Expected token 4")
1129            .expect("Failed to read token 4");
1130
1131        match token {
1132            LazyDataToken::LazyValue { header, decoder: _ } => {
1133                assert_eq!(header_token3, header);
1134            }
1135            _ => {
1136                panic!("Unexpected token type (4)");
1137            }
1138        }
1139
1140        // consume via into_value
1141        assert_eq!(
1142            token.into_value().unwrap(),
1143            dicom_value!(Strs, ["1.2.840.10008.1.2.1\0"]),
1144        );
1145
1146        assert!(
1147            dset_reader.advance().is_none(),
1148            "unexpected number of tokens remaining"
1149        );
1150    }
1151}