dicom_parser/dataset/
mod.rs

1//! Interpretation of DICOM data sets as streams of tokens.
2use crate::stateful::decode;
3use dicom_core::header::{DataElementHeader, HasLength, Length, VR};
4use dicom_core::value::{DicomValueType, PrimitiveValue};
5use dicom_core::{value::Value, DataElement, Tag};
6use snafu::{OptionExt, ResultExt, Snafu};
7use std::default::Default;
8use std::fmt;
9
10pub mod lazy_read;
11pub mod read;
12pub mod write;
13
14pub use self::read::DataSetReader;
15use self::read::ValueReadStrategy;
16pub use self::write::DataSetWriter;
17
18#[derive(Debug, Snafu)]
19pub enum Error {
20    /// Could not read item value
21    ReadItemValue { source: decode::Error },
22    /// Could not read element value
23    ReadElementValue { source: decode::Error },
24    /// Could not skip the bytes of a value
25    SkipValue { source: decode::Error },
26    /// Unexpected token type for operation
27    UnexpectedTokenType,
28    /// Unexpected undefined value length
29    UndefinedLength,
30}
31
32pub type Result<T, E = Error> = std::result::Result<T, E>;
33
34/// A token of a DICOM data set stream. This is part of the interpretation of a
35/// data set as a stream of symbols, which may either represent data headers or
36/// actual value data.
37#[derive(Debug, Clone)]
38pub enum DataToken {
39    /// A data header of a primitive value.
40    ElementHeader(DataElementHeader),
41    /// The beginning of a sequence element.
42    SequenceStart { tag: Tag, len: Length },
43    /// The beginning of an encapsulated pixel data element.
44    PixelSequenceStart,
45    /// The ending delimiter of a sequence or encapsulated pixel data.
46    SequenceEnd,
47    /// The beginning of a new item in the sequence.
48    ItemStart { len: Length },
49    /// The ending delimiter of an item.
50    ItemEnd,
51    /// A primitive data element value.
52    PrimitiveValue(PrimitiveValue),
53    /// An owned piece of raw data representing an item's value.
54    ///
55    /// This variant is used to represent
56    /// the value of an encoded fragment.
57    /// It should not be used to represent nested data sets.
58    ItemValue(Vec<u8>),
59    /// An owned sequence of unsigned 32 bit integers
60    /// representing a pixel data offset table.
61    ///
62    /// This variant is used to represent
63    /// the byte offsets to the first byte of the Item tag of the first fragment
64    /// for each frame in the sequence of items,
65    /// as per PS 3.5, Section A.4.
66    OffsetTable(Vec<u32>),
67}
68
69impl fmt::Display for DataToken {
70    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
71        match self {
72            DataToken::PrimitiveValue(ref v) => write!(f, "PrimitiveValue({:?})", v.value_type()),
73            other => write!(f, "{:?}", other),
74        }
75    }
76}
77
78/// This implementation treats undefined lengths as equal.
79impl PartialEq<Self> for DataToken {
80    fn eq(&self, other: &Self) -> bool {
81        use DataToken::*;
82        match (self, other) {
83            (
84                ElementHeader(DataElementHeader {
85                    tag: tag1,
86                    vr: vr1,
87                    len: len1,
88                }),
89                ElementHeader(DataElementHeader {
90                    tag: tag2,
91                    vr: vr2,
92                    len: len2,
93                }),
94            ) => tag1 == tag2 && vr1 == vr2 && len1.inner_eq(*len2),
95            (
96                SequenceStart {
97                    tag: tag1,
98                    len: len1,
99                },
100                SequenceStart {
101                    tag: tag2,
102                    len: len2,
103                },
104            ) => tag1 == tag2 && len1.inner_eq(*len2),
105            (ItemStart { len: len1 }, ItemStart { len: len2 }) => len1.inner_eq(*len2),
106            (PrimitiveValue(v1), PrimitiveValue(v2)) => v1 == v2,
107            (ItemValue(v1), ItemValue(v2)) => v1 == v2,
108            (OffsetTable(v1), OffsetTable(v2)) => v1 == v2,
109            (ItemEnd, ItemEnd)
110            | (SequenceEnd, SequenceEnd)
111            | (PixelSequenceStart, PixelSequenceStart) => true,
112            _ => false,
113        }
114    }
115}
116
117impl From<DataElementHeader> for DataToken {
118    fn from(header: DataElementHeader) -> Self {
119        match (header.vr(), header.tag) {
120            (VR::OB, Tag(0x7fe0, 0x0010)) if header.len.is_undefined() => {
121                DataToken::PixelSequenceStart
122            }
123            (VR::SQ, _) => DataToken::SequenceStart {
124                tag: header.tag,
125                len: header.len,
126            },
127            _ => DataToken::ElementHeader(header),
128        }
129    }
130}
131
132impl DataToken {
133    /// Check whether this token represents the start of a sequence
134    /// of nested data sets.
135    pub fn is_sequence_start(&self) -> bool {
136        matches!(self, DataToken::SequenceStart { .. })
137    }
138
139    /// Check whether this token represents the end of a sequence
140    /// or the end of an encapsulated element.
141    pub fn is_sequence_end(&self) -> bool {
142        matches!(self, DataToken::SequenceEnd)
143    }
144}
145
146/// A lazy data token for reading a data set
147/// without requiring values to be fully read in memory.
148/// This is part of the interpretation of a
149/// data set as a stream of symbols,
150/// which may either represent data headers
151/// or actual value data.
152///
153/// The parameter type `D` represents
154/// the original type of the stateful decoder,
155/// and through which the values can be retrieved.
156#[derive(Debug)]
157#[non_exhaustive]
158pub enum LazyDataToken<D> {
159    /// A data header of a primitive value.
160    ElementHeader(DataElementHeader),
161    /// The beginning of a sequence element.
162    SequenceStart { tag: Tag, len: Length },
163    /// The beginning of an encapsulated pixel data element.
164    PixelSequenceStart,
165    /// The ending delimiter of a sequence or encapsulated pixel data.
166    SequenceEnd,
167    /// The beginning of a new item in the sequence.
168    ItemStart { len: Length },
169    /// The ending delimiter of an item.
170    ItemEnd,
171    /// An element value yet to be fetched
172    LazyValue {
173        /// the header of the respective value
174        header: DataElementHeader,
175        /// the stateful decoder for fetching the bytes of the value
176        decoder: D,
177    },
178    /// An item value yet to be fetched
179    LazyItemValue {
180        /// the full length of the value, always well defined
181        len: u32,
182        /// the stateful decoder for fetching the bytes of the value
183        decoder: D,
184    },
185}
186
187impl<D> LazyDataToken<D> {
188    /// Check whether this token represents the start of a sequence
189    /// of nested data sets.
190    pub fn is_sequence_start(&self) -> bool {
191        matches!(self, LazyDataToken::SequenceStart { .. })
192    }
193
194    /// Check whether this token represents the end of a sequence
195    /// or the end of an encapsulated element.
196    pub fn is_sequence_end(&self) -> bool {
197        matches!(self, LazyDataToken::SequenceEnd)
198    }
199}
200
201impl<D> LazyDataToken<D>
202where
203    D: decode::StatefulDecode,
204{
205    pub fn skip(self) -> crate::stateful::decode::Result<()> {
206        match self {
207            LazyDataToken::LazyValue {
208                header,
209                mut decoder,
210            } => decoder.skip_bytes(header.len.0),
211            LazyDataToken::LazyItemValue { len, mut decoder } => decoder.skip_bytes(len),
212            _ => Ok(()), // do nothing
213        }
214    }
215    /// Construct the data token into memory,
216    /// consuming the reader if necessary.
217    ///
218    /// If the token represents a lazy element value,
219    /// the inner decoder is read with string preservation.
220    pub fn into_owned(self) -> Result<DataToken> {
221        self.into_owned_with_strategy(ValueReadStrategy::Preserved)
222    }
223
224    /// Construct the data token into memory,
225    /// consuming the reader if necessary.
226    ///
227    /// If the token represents a lazy element value,
228    /// the inner decoder is read
229    /// with the given value reading strategy.
230    pub fn into_owned_with_strategy(self, strategy: ValueReadStrategy) -> Result<DataToken> {
231        match self {
232            LazyDataToken::ElementHeader(header) => Ok(DataToken::ElementHeader(header)),
233            LazyDataToken::ItemEnd => Ok(DataToken::ItemEnd),
234            LazyDataToken::ItemStart { len } => Ok(DataToken::ItemStart { len }),
235            LazyDataToken::PixelSequenceStart => Ok(DataToken::PixelSequenceStart),
236            LazyDataToken::SequenceEnd => Ok(DataToken::SequenceEnd),
237            LazyDataToken::SequenceStart { tag, len } => Ok(DataToken::SequenceStart { tag, len }),
238            LazyDataToken::LazyValue {
239                header,
240                mut decoder,
241            } => {
242                // use the stateful decoder to eagerly read the value
243                let value = match strategy {
244                    ValueReadStrategy::Interpreted => {
245                        decoder.read_value(&header).context(ReadElementValueSnafu)?
246                    }
247                    ValueReadStrategy::Preserved => decoder
248                        .read_value_preserved(&header)
249                        .context(ReadElementValueSnafu)?,
250                    ValueReadStrategy::Raw => decoder
251                        .read_value_bytes(&header)
252                        .context(ReadElementValueSnafu)?,
253                };
254                Ok(DataToken::PrimitiveValue(value))
255            }
256            LazyDataToken::LazyItemValue { len, mut decoder } => {
257                let mut data = Vec::new();
258                decoder
259                    .read_to_vec(len, &mut data)
260                    .context(ReadItemValueSnafu)?;
261                Ok(DataToken::ItemValue(data))
262            }
263        }
264    }
265
266    /// Retrieve a primitive element value from the token,
267    /// consuming the reader with the given reading strategy.
268    ///
269    /// The operation fails if the token does not represent an element value.
270    pub fn into_value_with_strategy(self, strategy: ValueReadStrategy) -> Result<PrimitiveValue> {
271        match self {
272            LazyDataToken::LazyValue {
273                header,
274                mut decoder,
275            } => {
276                // use the stateful decoder to eagerly read the value
277                match strategy {
278                    ValueReadStrategy::Interpreted => {
279                        decoder.read_value(&header).context(ReadElementValueSnafu)
280                    }
281                    ValueReadStrategy::Preserved => decoder
282                        .read_value_preserved(&header)
283                        .context(ReadElementValueSnafu),
284                    ValueReadStrategy::Raw => decoder
285                        .read_value_bytes(&header)
286                        .context(ReadElementValueSnafu),
287                }
288            }
289            _ => UnexpectedTokenTypeSnafu.fail(),
290        }
291    }
292
293    /// Retrieve a primitive element value from the token,
294    /// consuming the reader with the default reading strategy.
295    ///
296    /// The operation fails if the token does not represent an element value.
297    pub fn into_value(self) -> Result<PrimitiveValue> {
298        self.into_value_with_strategy(ValueReadStrategy::Preserved)
299    }
300
301    /// Read the bytes of a value into the given writer,
302    /// consuming the reader.
303    ///
304    /// This operation will not interpret the value,
305    /// like in the `Bytes` value reading strategy.
306    /// It works for both data elements and non-dataset items.
307    ///
308    /// The operation fails if
309    /// the token does not represent an element or item value.
310    pub fn read_value_into<W>(self, out: W) -> Result<()>
311    where
312        W: std::io::Write,
313    {
314        match self {
315            LazyDataToken::LazyValue {
316                header,
317                mut decoder,
318            } => {
319                let len = header.len.get().context(UndefinedLengthSnafu)?;
320                decoder.read_to(len, out).context(ReadElementValueSnafu)?;
321            }
322            LazyDataToken::LazyItemValue { len, mut decoder } => {
323                decoder.read_to(len, out).context(ReadItemValueSnafu)?;
324            }
325            _other => return UnexpectedTokenTypeSnafu.fail(),
326        };
327        Ok(())
328    }
329
330    /// Convert this token into a structured representation,
331    /// for diagnostics and error reporting purposes.
332    pub fn into_repr(self) -> LazyDataTokenRepr {
333        LazyDataTokenRepr::from(self)
334    }
335
336    /// Create a structured representation of this token,
337    /// for diagnostics and error reporting purposes.
338    pub fn repr(&self) -> LazyDataTokenRepr {
339        LazyDataTokenRepr::from(self)
340    }
341}
342
343impl<D> From<LazyDataToken<D>> for LazyDataTokenRepr {
344    fn from(token: LazyDataToken<D>) -> Self {
345        match token {
346            LazyDataToken::ElementHeader(h) => LazyDataTokenRepr::ElementHeader(h),
347            LazyDataToken::SequenceStart { tag, len } => {
348                LazyDataTokenRepr::SequenceStart { tag, len }
349            }
350            LazyDataToken::PixelSequenceStart => LazyDataTokenRepr::PixelSequenceStart,
351            LazyDataToken::SequenceEnd => LazyDataTokenRepr::SequenceEnd,
352            LazyDataToken::ItemStart { len } => LazyDataTokenRepr::ItemStart { len },
353            LazyDataToken::ItemEnd => LazyDataTokenRepr::ItemEnd,
354            LazyDataToken::LazyValue { header, decoder: _ } => {
355                LazyDataTokenRepr::LazyValue { header }
356            }
357            LazyDataToken::LazyItemValue { len, decoder: _ } => {
358                LazyDataTokenRepr::LazyItemValue { len }
359            }
360        }
361    }
362}
363
364impl<D> From<&LazyDataToken<D>> for LazyDataTokenRepr {
365    fn from(token: &LazyDataToken<D>) -> Self {
366        match *token {
367            LazyDataToken::ElementHeader(h) => LazyDataTokenRepr::ElementHeader(h),
368            LazyDataToken::SequenceStart { tag, len } => {
369                LazyDataTokenRepr::SequenceStart { tag, len }
370            }
371            LazyDataToken::PixelSequenceStart => LazyDataTokenRepr::PixelSequenceStart,
372            LazyDataToken::SequenceEnd => LazyDataTokenRepr::SequenceEnd,
373            LazyDataToken::ItemStart { len } => LazyDataTokenRepr::ItemStart { len },
374            LazyDataToken::ItemEnd => LazyDataTokenRepr::ItemEnd,
375            LazyDataToken::LazyValue { header, decoder: _ } => {
376                LazyDataTokenRepr::LazyValue { header }
377            }
378            LazyDataToken::LazyItemValue { len, decoder: _ } => {
379                LazyDataTokenRepr::LazyItemValue { len }
380            }
381        }
382    }
383}
384
385/// A structured description of a lazy data token,
386/// for diagnostics and error reporting purposes.
387#[derive(Debug, Clone, PartialEq)]
388pub enum LazyDataTokenRepr {
389    /// A data header of a primitive value.
390    ElementHeader(DataElementHeader),
391    /// The beginning of a sequence element.
392    SequenceStart { tag: Tag, len: Length },
393    /// The beginning of an encapsulated pixel data element.
394    PixelSequenceStart,
395    /// The ending delimiter of a sequence or encapsulated pixel data.
396    SequenceEnd,
397    /// The beginning of a new item in the sequence.
398    ItemStart { len: Length },
399    /// The ending delimiter of an item.
400    ItemEnd,
401    /// An element value yet to be fetched
402    LazyValue {
403        /// the header of the respective value
404        header: DataElementHeader,
405    },
406    /// An item value yet to be fetched
407    LazyItemValue {
408        /// the full length of the value, always well defined
409        len: u32,
410    },
411}
412
413/// The type of delimiter: sequence or item.
414#[derive(Debug, Copy, Clone, Eq, PartialEq)]
415pub enum SeqTokenType {
416    Sequence,
417    Item,
418}
419
420/// Options for token generation
421#[derive(Debug, Copy, Clone, Default, Eq, PartialEq)]
422#[non_exhaustive]
423pub struct IntoTokensOptions {
424    /// Whether to ignore all sequence lengths in the DICOM data set,
425    /// resulting in sequences with undefined length.
426    ///
427    /// Set this to `true` when the sequence lengths in bytes might no longer be valid,
428    /// such as when changing the character set,
429    /// and as such data set sequence lengths should be replaced with undefined.
430    /// When set to `false`,
431    /// whether to retain or replace these lengths
432    /// is left at the implementation's discretion.
433    /// either be recalculated or marked as undefined.
434    pub force_invalidate_sq_length: bool,
435}
436
437impl IntoTokensOptions {
438    pub fn new(force_invalidate_sq_length: bool) -> Self {
439        IntoTokensOptions {
440            force_invalidate_sq_length,
441        }
442    }
443}
444
445/// A trait for converting structured DICOM data into a stream of data tokens.
446pub trait IntoTokens {
447    /// The iterator type through which tokens are obtained.
448    type Iter: Iterator<Item = DataToken>;
449
450    fn into_tokens(self) -> Self::Iter;
451    fn into_tokens_with_options(self, options: IntoTokensOptions) -> Self::Iter;
452}
453
454impl IntoTokens for dicom_core::header::EmptyObject {
455    type Iter = std::iter::Empty<DataToken>;
456
457    fn into_tokens(self) -> Self::Iter {
458        unreachable!()
459    }
460
461    fn into_tokens_with_options(self, _options: IntoTokensOptions) -> Self::Iter {
462        unreachable!()
463    }
464}
465
466/// Token generator from a DICOM data element.
467pub enum DataElementTokens<I, P>
468where
469    I: IntoTokens,
470{
471    /// initial state, at the beginning of the element
472    Start(
473        // Option is used for easy taking from a &mut,
474        // should always be Some in practice
475        Option<DataElement<I, P>>,
476        IntoTokensOptions,
477    ),
478    /// the header of a plain primitive element was read
479    Header(
480        // Option is used for easy taking from a &mut,
481        // should always be Some in practice
482        Option<DataElement<I, P>>,
483    ),
484    /// reading tokens from items
485    Items(
486        FlattenTokens<
487            <dicom_core::value::C<AsItem<I>> as IntoIterator>::IntoIter,
488            ItemTokens<I::Iter>,
489        >,
490    ),
491    /// the header of encapsulated pixel data was read, will read
492    /// the offset table next
493    PixelData(
494        /// Pixel fragments
495        ///
496        /// Option is used for easy taking from a &mut,
497        /// should always be Some in practice
498        Option<dicom_core::value::C<P>>,
499        /// Frame offset table
500        OffsetTableItemTokens<dicom_core::value::C<u32>>,
501    ),
502    /// the header and offset of encapsulated pixel data was read,
503    /// fragments come next
504    PixelDataFragments(
505        FlattenTokens<
506            <dicom_core::value::C<ItemValue<P>> as IntoIterator>::IntoIter,
507            ItemValueTokens<P>,
508        >,
509    ),
510    /// no more elements
511    End,
512}
513
514impl<I, P> Iterator for DataElementTokens<I, P>
515where
516    I: IntoTokens,
517    I: HasLength,
518    P: AsRef<[u8]>,
519{
520    type Item = DataToken;
521
522    fn next(&mut self) -> Option<Self::Item> {
523        let (out, next_state) = match self {
524            DataElementTokens::Start(elem, options) => {
525                let elem = elem.take().unwrap();
526                // data element header token
527
528                let mut header = *elem.header();
529                if options.force_invalidate_sq_length && elem.vr() == VR::SQ {
530                    header.len = Length::UNDEFINED;
531                }
532
533                let token = DataToken::from(header);
534                match token {
535                    DataToken::SequenceStart { tag, len } => {
536                        // retrieve sequence value, begin item sequence
537                        match elem.into_value() {
538                            v @ Value::Primitive(_) => {
539                                // this can only happen in malformed data (wrong VR),
540                                // but we try to handle it gracefully anyway:
541                                // return a header token instead and continue
542                                // as if it were a primitive value
543                                if len.is_defined() {
544                                    tracing::warn!("Unexpected primitive value after header {} with VR SQ", tag);
545                                    let adapted_elem =  DataElement::new_with_len(tag, VR::SQ, len, v);
546                                    (
547                                        Some(DataToken::ElementHeader(*adapted_elem.header())),
548                                        DataElementTokens::Header(Some(adapted_elem)),
549                                    )
550                                } else {
551                                    // without a defined length,
552                                    // it is too risky to provide any tokens
553                                    tracing::warn!("Unexpected primitive value after header {} with VR SQ, ignoring", tag);
554                                    (None, DataElementTokens::End)
555                                }
556                            },
557                            Value::PixelSequence { .. } => {
558                                // this is also invalid because
559                                // this is a data element sequence start,
560                                // not a pixel data fragment sequence start.
561                                // stop here and return nothing
562                                tracing::warn!("Unexpected pixel data fragments after header {} with VR SQ, ignored", tag);
563                                (None, DataElementTokens::End)
564                            },
565                            Value::Sequence(seq) => {
566                                let seq = if options.force_invalidate_sq_length {
567                                    seq.into_items().into_vec().into()
568                                } else {
569                                    seq
570                                };
571
572                                let items: dicom_core::value::C<_> = seq
573                                    .into_items()
574                                    .into_iter()
575                                    .map(|o| AsItem(o.length(), o))
576                                    .collect();
577                                (
578                                    Some(token),
579                                    DataElementTokens::Items(
580                                        items.into_tokens_with_options(*options),
581                                    ),
582                                )
583                            }
584                        }
585                    }
586                    DataToken::PixelSequenceStart => {
587                        match elem.into_value() {
588                            Value::PixelSequence(seq) => {
589                                let (offset_table, fragments) = seq.into_parts();
590                                (
591                                    // begin pixel sequence
592                                    Some(DataToken::PixelSequenceStart),
593                                    DataElementTokens::PixelData(
594                                        Some(fragments),
595                                        OffsetTableItem(offset_table)
596                                            .into_tokens_with_options(*options),
597                                    ),
598                                )
599                            }
600                            Value::Primitive(_) | Value::Sequence { .. } => unreachable!(),
601                        }
602                    }
603                    _ => (
604                        Some(DataToken::ElementHeader(*elem.header())),
605                        DataElementTokens::Header(Some(elem)),
606                    ),
607                }
608            }
609            DataElementTokens::Header(elem) => {
610                let elem = elem.take().unwrap();
611                match elem.into_value() {
612                    Value::Sequence { .. } | Value::PixelSequence { .. } => unreachable!(),
613                    Value::Primitive(value) => {
614                        // return primitive value, done
615                        let token = DataToken::PrimitiveValue(value);
616                        (Some(token), DataElementTokens::End)
617                    }
618                }
619            }
620            DataElementTokens::Items(tokens) => {
621                if let Some(token) = tokens.next() {
622                    // bypass manual state transition
623                    return Some(token);
624                } else {
625                    // sequence end token, end
626                    (Some(DataToken::SequenceEnd), DataElementTokens::End)
627                }
628            }
629            DataElementTokens::PixelData(fragments, tokens) => {
630                if let Some(token) = tokens.next() {
631                    // bypass manual state transition
632                    return Some(token);
633                }
634                // pixel data fragments next
635                let fragments = fragments.take().unwrap();
636                let tokens: dicom_core::value::C<_> =
637                    fragments.into_iter().map(ItemValue).collect();
638                *self = DataElementTokens::PixelDataFragments(tokens.into_tokens());
639                // recursive call to ensure the retrieval of a data token
640                return self.next();
641            }
642            DataElementTokens::PixelDataFragments(tokens) => {
643                if let Some(token) = tokens.next() {
644                    // bypass manual state transition
645                    return Some(token);
646                } else {
647                    // sequence end token, end
648                    (Some(DataToken::SequenceEnd), DataElementTokens::End)
649                }
650            }
651            DataElementTokens::End => return None,
652        };
653        *self = next_state;
654
655        out
656    }
657}
658
659impl<I, P> IntoTokens for DataElement<I, P>
660where
661    I: IntoTokens,
662    I: HasLength,
663    P: AsRef<[u8]>,
664{
665    type Iter = DataElementTokens<I, P>;
666
667    fn into_tokens(self) -> Self::Iter {
668        //Avoid
669        self.into_tokens_with_options(Default::default())
670    }
671
672    fn into_tokens_with_options(self, options: IntoTokensOptions) -> Self::Iter {
673        DataElementTokens::Start(Some(self), options)
674    }
675}
676
677/// Flatten a sequence of elements into their respective
678/// token sequence in order.
679#[derive(Debug, PartialEq)]
680pub struct FlattenTokens<O, K> {
681    seq: O,
682    tokens: Option<K>,
683    into_token_options: IntoTokensOptions,
684}
685
686impl<O, K> Iterator for FlattenTokens<O, K>
687where
688    O: Iterator,
689    O::Item: IntoTokens<Iter = K>,
690    K: Iterator<Item = DataToken>,
691{
692    type Item = DataToken;
693
694    fn next(&mut self) -> Option<Self::Item> {
695        // ensure a token sequence
696        if self.tokens.is_none() {
697            match self.seq.next() {
698                Some(entries) => {
699                    self.tokens = Some(entries.into_tokens_with_options(self.into_token_options));
700                }
701                None => return None,
702            }
703        }
704
705        // retrieve the next token
706        match self.tokens.as_mut().map(|s| s.next()) {
707            Some(Some(token)) => Some(token),
708            Some(None) => {
709                self.tokens = None;
710                self.next()
711            }
712            None => unreachable!(),
713        }
714    }
715}
716
717impl<T> IntoTokens for Vec<T>
718where
719    T: IntoTokens,
720{
721    type Iter = FlattenTokens<<Vec<T> as IntoIterator>::IntoIter, <T as IntoTokens>::Iter>;
722
723    fn into_tokens(self) -> Self::Iter {
724        self.into_tokens_with_options(Default::default())
725    }
726
727    fn into_tokens_with_options(self, into_token_options: IntoTokensOptions) -> Self::Iter {
728        FlattenTokens {
729            seq: self.into_iter(),
730            tokens: None,
731            into_token_options,
732        }
733    }
734}
735
736impl<T> IntoTokens for dicom_core::value::C<T>
737where
738    T: IntoTokens,
739{
740    type Iter =
741        FlattenTokens<<dicom_core::value::C<T> as IntoIterator>::IntoIter, <T as IntoTokens>::Iter>;
742
743    fn into_tokens(self) -> Self::Iter {
744        self.into_tokens_with_options(Default::default())
745    }
746
747    fn into_tokens_with_options(self, into_token_options: IntoTokensOptions) -> Self::Iter {
748        FlattenTokens {
749            seq: self.into_iter(),
750            tokens: None,
751            into_token_options,
752        }
753    }
754}
755
756// A stream of tokens from a DICOM item.
757#[derive(Debug)]
758pub enum ItemTokens<T> {
759    /// Just started, an item header token will come next
760    Start {
761        len: Length,
762        object_tokens: Option<T>,
763    },
764    /// Will return tokens from the inner object, then an end of item token
765    /// when it ends
766    Object { object_tokens: T },
767    /// Just ended, no more tokens
768    End,
769}
770
771impl<T> ItemTokens<T>
772where
773    T: Iterator<Item = DataToken>,
774{
775    pub fn new<O>(len: Length, object: O, options: IntoTokensOptions) -> Self
776    where
777        O: IntoTokens<Iter = T>,
778    {
779        let len = if len.0 != 0 && options.force_invalidate_sq_length {
780            Length::UNDEFINED
781        } else {
782            len
783        };
784        ItemTokens::Start {
785            len,
786            object_tokens: Some(object.into_tokens_with_options(options)),
787        }
788    }
789}
790
791impl<T> Iterator for ItemTokens<T>
792where
793    T: Iterator<Item = DataToken>,
794{
795    type Item = DataToken;
796
797    fn next(&mut self) -> Option<Self::Item> {
798        let (next_state, out) = match self {
799            ItemTokens::Start { len, object_tokens } => (
800                ItemTokens::Object {
801                    object_tokens: object_tokens.take().unwrap(),
802                },
803                Some(DataToken::ItemStart { len: *len }),
804            ),
805            ItemTokens::Object { object_tokens } => {
806                if let Some(token) = object_tokens.next() {
807                    return Some(token);
808                } else {
809                    (ItemTokens::End, Some(DataToken::ItemEnd))
810                }
811            }
812            ItemTokens::End => {
813                return None;
814            }
815        };
816
817        *self = next_state;
818        out
819    }
820}
821
822/// A newtype for interpreting the given data as an item.
823/// When converting a value of this type into tokens, the inner value's tokens
824/// will be surrounded by an item start and an item delimiter.
825#[derive(Debug, Clone, PartialEq)]
826pub struct AsItem<I>(Length, I);
827
828impl<I> IntoTokens for AsItem<I>
829where
830    I: IntoTokens,
831{
832    type Iter = ItemTokens<I::Iter>;
833
834    fn into_tokens(self) -> Self::Iter {
835        self.into_tokens_with_options(Default::default())
836    }
837
838    fn into_tokens_with_options(self, options: IntoTokensOptions) -> Self::Iter {
839        ItemTokens::new(self.0, self.1, options)
840    }
841}
842
843impl<I> HasLength for AsItem<I> {
844    fn length(&self) -> Length {
845        self.0
846    }
847}
848
849/// A newtype for wrapping a piece of raw data into an item.
850/// When converting a value of this type into tokens, the algorithm
851/// will create an item start with an explicit length, followed by
852/// an item value token, then an item delimiter.
853#[derive(Debug, Clone, PartialEq)]
854pub struct ItemValue<P>(P);
855
856impl<P> IntoTokens for ItemValue<P>
857where
858    P: AsRef<[u8]>,
859{
860    type Iter = ItemValueTokens<P>;
861
862    fn into_tokens(self) -> Self::Iter {
863        self.into_tokens_with_options(Default::default())
864    }
865
866    fn into_tokens_with_options(self, options: IntoTokensOptions) -> Self::Iter {
867        ItemValueTokens::new(self.0, options)
868    }
869}
870
871#[derive(Debug)]
872pub enum ItemValueTokens<P> {
873    /// Just started, an item header token will come next. Takes a bool to configure if inner
874    /// lengths can be trusted to be valid
875    Start(Option<P>, bool),
876    /// Will return a token of the value
877    Value(P),
878    /// Will return an end of item token
879    Done,
880    /// Just ended, no more tokens
881    End,
882}
883
884impl<P> ItemValueTokens<P> {
885    #[inline]
886    pub fn new(value: P, into_tokens_options: IntoTokensOptions) -> Self {
887        ItemValueTokens::Start(Some(value), into_tokens_options.force_invalidate_sq_length)
888    }
889}
890
891impl<P> Iterator for ItemValueTokens<P>
892where
893    P: AsRef<[u8]>,
894{
895    type Item = DataToken;
896
897    fn next(&mut self) -> Option<Self::Item> {
898        let (out, next_state) = match self {
899            ItemValueTokens::Start(value, invalidate_len) => {
900                let value = value.take().unwrap();
901                let end_item = value.as_ref().is_empty();
902                let len = if *invalidate_len && !end_item {
903                    Length::UNDEFINED
904                } else {
905                    Length(value.as_ref().len() as u32)
906                };
907
908                (
909                    Some(DataToken::ItemStart { len }),
910                    if end_item {
911                        ItemValueTokens::Done
912                    } else {
913                        ItemValueTokens::Value(value)
914                    },
915                )
916            }
917            ItemValueTokens::Value(value) => (
918                Some(DataToken::ItemValue(value.as_ref().to_owned())),
919                ItemValueTokens::Done,
920            ),
921            ItemValueTokens::Done => (Some(DataToken::ItemEnd), ItemValueTokens::End),
922            ItemValueTokens::End => return None,
923        };
924
925        *self = next_state;
926        out
927    }
928}
929
930/// A newtype for wrapping a sequence of `u32`s into an offset table item.
931/// When converting a value of this type into tokens,
932/// the algorithm will create an item start with an explicit length,
933/// followed by an item value token,
934/// then an item delimiter.
935#[derive(Debug, Clone, PartialEq)]
936pub struct OffsetTableItem<P>(P);
937
938impl<P> IntoTokens for OffsetTableItem<P>
939where
940    P: AsRef<[u32]>,
941{
942    type Iter = OffsetTableItemTokens<P>;
943
944    fn into_tokens(self) -> Self::Iter {
945        self.into_tokens_with_options(Default::default())
946    }
947
948    fn into_tokens_with_options(self, _options: IntoTokensOptions) -> Self::Iter {
949        //There are no sequences here that might need to be invalidated
950        OffsetTableItemTokens::new(self.0)
951    }
952}
953
954#[derive(Debug)]
955pub enum OffsetTableItemTokens<P> {
956    /// Just started, an item header token will come next
957    Start(Option<P>),
958    /// Will return a token of the actual offset table
959    Value(P),
960    /// Will return an end of item token
961    Done,
962    /// Just ended, no more tokens
963    End,
964}
965
966impl<P> OffsetTableItemTokens<P> {
967    #[inline]
968    pub fn new(value: P) -> Self {
969        OffsetTableItemTokens::Start(Some(value))
970    }
971}
972
973impl<P> Iterator for OffsetTableItemTokens<P>
974where
975    P: AsRef<[u32]>,
976{
977    type Item = DataToken;
978
979    fn next(&mut self) -> Option<Self::Item> {
980        let (out, next_state) = match self {
981            OffsetTableItemTokens::Start(value) => {
982                let value = value.take().unwrap();
983                let len = Length(value.as_ref().len() as u32 * 4);
984
985                (
986                    Some(DataToken::ItemStart { len }),
987                    if len == Length(0) {
988                        OffsetTableItemTokens::Done
989                    } else {
990                        OffsetTableItemTokens::Value(value)
991                    },
992                )
993            }
994            OffsetTableItemTokens::Value(value) => (
995                Some(DataToken::OffsetTable(value.as_ref().to_owned())),
996                OffsetTableItemTokens::Done,
997            ),
998            OffsetTableItemTokens::Done => (Some(DataToken::ItemEnd), OffsetTableItemTokens::End),
999            OffsetTableItemTokens::End => return None,
1000        };
1001
1002        *self = next_state;
1003        out
1004    }
1005}
1006
1007#[cfg(test)]
1008mod tests {
1009    use dicom_core::{
1010        dicom_value, header::HasLength, value::PixelFragmentSequence, DataElement, DataElementHeader, DicomValue, Length, PrimitiveValue, Tag, VR
1011    };
1012
1013    use super::{DataToken, IntoTokens, IntoTokensOptions, LazyDataToken};
1014    use smallvec::smallvec;
1015
1016    use dicom_encoding::{
1017        decode::{basic::LittleEndianBasicDecoder, explicit_le::ExplicitVRLittleEndianDecoder},
1018        text::SpecificCharacterSet,
1019    };
1020
1021    use crate::stateful::decode::StatefulDecode;
1022    use crate::stateful::decode::StatefulDecoder;
1023
1024    fn is_stateful_decode<D: StatefulDecode>(_: &D) {}
1025
1026    /// A simple object representing a DICOM data set,
1027    /// used merely for testing purposes.
1028    #[derive(Debug, Clone)]
1029    struct SimpleObject<T>(Length, dicom_core::value::C<T>);
1030
1031    impl<T> HasLength for SimpleObject<T> {
1032        fn length(&self) -> Length {
1033            self.0
1034        }
1035    }
1036
1037    impl<T> IntoTokens for SimpleObject<T>
1038    where
1039        T: IntoTokens,
1040        T: HasLength,
1041    {
1042        type Iter = super::FlattenTokens<
1043            <dicom_core::value::C<T> as IntoIterator>::IntoIter,
1044            <T as IntoTokens>::Iter,
1045        >;
1046
1047        fn into_tokens(self) -> Self::Iter {
1048            self.into_tokens_with_options(Default::default())
1049        }
1050
1051        fn into_tokens_with_options(self, into_token_options: IntoTokensOptions) -> Self::Iter {
1052            super::FlattenTokens {
1053                seq: self.1.into_iter(),
1054                tokens: None,
1055                into_token_options,
1056            }
1057        }
1058    }
1059
1060    #[test]
1061    fn basic_element_into_tokens() {
1062        let element = DataElement::new(
1063            Tag(0x0010, 0x0010),
1064            VR::PN,
1065            DicomValue::new("Doe^John".into()),
1066        );
1067
1068        let tokens: Vec<_> = element.clone().into_tokens().collect();
1069
1070        assert_eq!(
1071            &tokens,
1072            &[
1073                DataToken::ElementHeader(*element.header()),
1074                DataToken::PrimitiveValue("Doe^John".into()),
1075            ],
1076        )
1077    }
1078
1079    #[test]
1080    fn sequence_implicit_len_into_tokens() {
1081        let element = DataElement::new(
1082            Tag(0x0008, 0x2218),
1083            VR::SQ,
1084            DicomValue::new_sequence(
1085                vec![SimpleObject(
1086                    Length::UNDEFINED,
1087                    smallvec![
1088                        DataElement::new(
1089                            Tag(0x0008, 0x0100),
1090                            VR::SH,
1091                            DicomValue::new(dicom_value!(Strs, ["T-D1213 "])),
1092                        ),
1093                        DataElement::new(
1094                            Tag(0x0008, 0x0102),
1095                            VR::SH,
1096                            DicomValue::new(dicom_value!(Strs, ["SRT "])),
1097                        ),
1098                        DataElement::new(
1099                            Tag(0x0008, 0x0104),
1100                            VR::LO,
1101                            DicomValue::new(dicom_value!(Strs, ["Jaw region"])),
1102                        ),
1103                    ],
1104                )],
1105                Length::UNDEFINED,
1106            ),
1107        );
1108
1109        let tokens: Vec<_> = element.clone().into_tokens().collect();
1110
1111        assert_eq!(
1112            &tokens,
1113            &[
1114                DataToken::SequenceStart {
1115                    tag: Tag(0x0008, 0x2218),
1116                    len: Length::UNDEFINED,
1117                },
1118                DataToken::ItemStart {
1119                    len: Length::UNDEFINED
1120                },
1121                DataToken::ElementHeader(DataElementHeader {
1122                    tag: Tag(0x0008, 0x0100),
1123                    vr: VR::SH,
1124                    len: Length(8),
1125                }),
1126                DataToken::PrimitiveValue(PrimitiveValue::Strs(
1127                    ["T-D1213 ".to_owned()].as_ref().into(),
1128                )),
1129                DataToken::ElementHeader(DataElementHeader {
1130                    tag: Tag(0x0008, 0x0102),
1131                    vr: VR::SH,
1132                    len: Length(4),
1133                }),
1134                DataToken::PrimitiveValue(PrimitiveValue::Strs(
1135                    ["SRT ".to_owned()].as_ref().into()
1136                )),
1137                DataToken::ElementHeader(DataElementHeader {
1138                    tag: Tag(0x0008, 0x0104),
1139                    vr: VR::LO,
1140                    len: Length(10),
1141                }),
1142                DataToken::PrimitiveValue(PrimitiveValue::Strs(
1143                    ["Jaw region".to_owned()].as_ref().into(),
1144                )),
1145                DataToken::ItemEnd,
1146                DataToken::SequenceEnd,
1147            ],
1148        )
1149    }
1150
1151    #[test]
1152    fn sequence_explicit_len_into_tokens() {
1153        let element = DataElement::new(
1154            Tag(0x0008, 0x2218),
1155            VR::SQ,
1156            DicomValue::new_sequence(
1157                vec![SimpleObject(
1158                    Length(46),
1159                    smallvec![
1160                        DataElement::new(
1161                            Tag(0x0008, 0x0100),
1162                            VR::SH,
1163                            DicomValue::new(dicom_value!(Strs, ["T-D1213 "])),
1164                        ),
1165                        DataElement::new(
1166                            Tag(0x0008, 0x0102),
1167                            VR::SH,
1168                            DicomValue::new(dicom_value!(Strs, ["SRT "])),
1169                        ),
1170                        DataElement::new(
1171                            Tag(0x0008, 0x0104),
1172                            VR::LO,
1173                            DicomValue::new(dicom_value!(Strs, ["Jaw region"])),
1174                        ),
1175                    ],
1176                )],
1177                Length(54),
1178            ),
1179        );
1180
1181        let tokens: Vec<_> = element.clone().into_tokens().collect();
1182
1183        assert_eq!(
1184            &tokens,
1185            &[
1186                DataToken::SequenceStart {
1187                    tag: Tag(0x0008, 0x2218),
1188                    len: Length(54),
1189                },
1190                DataToken::ItemStart { len: Length(46) },
1191                DataToken::ElementHeader(DataElementHeader {
1192                    tag: Tag(0x0008, 0x0100),
1193                    vr: VR::SH,
1194                    len: Length(8),
1195                }),
1196                DataToken::PrimitiveValue(PrimitiveValue::Strs(
1197                    ["T-D1213 ".to_owned()].as_ref().into(),
1198                )),
1199                DataToken::ElementHeader(DataElementHeader {
1200                    tag: Tag(0x0008, 0x0102),
1201                    vr: VR::SH,
1202                    len: Length(4),
1203                }),
1204                DataToken::PrimitiveValue(PrimitiveValue::Strs(
1205                    ["SRT ".to_owned()].as_ref().into()
1206                )),
1207                DataToken::ElementHeader(DataElementHeader {
1208                    tag: Tag(0x0008, 0x0104),
1209                    vr: VR::LO,
1210                    len: Length(10),
1211                }),
1212                DataToken::PrimitiveValue(PrimitiveValue::Strs(
1213                    ["Jaw region".to_owned()].as_ref().into(),
1214                )),
1215                DataToken::ItemEnd,
1216                DataToken::SequenceEnd,
1217            ],
1218        )
1219    }
1220
1221    #[test]
1222    fn lazy_dataset_token_value() {
1223        let data = b"1.234\0";
1224        let mut data = &data[..];
1225        let decoder = StatefulDecoder::new(
1226            &mut data,
1227            ExplicitVRLittleEndianDecoder::default(),
1228            LittleEndianBasicDecoder,
1229            SpecificCharacterSet::default(),
1230        );
1231
1232        is_stateful_decode(&decoder);
1233
1234        let token = LazyDataToken::LazyValue {
1235            header: DataElementHeader {
1236                tag: Tag(0x0020, 0x000D),
1237                vr: VR::UI,
1238                len: Length(6),
1239            },
1240            decoder,
1241        };
1242
1243        match token.into_owned().unwrap() {
1244            DataToken::PrimitiveValue(v) => {
1245                assert_eq!(v.to_raw_str(), "1.234\0",);
1246            }
1247            t => panic!("Unexpected type of token {:?}", t),
1248        }
1249    }
1250
1251    #[test]
1252    fn lazy_dataset_token_value_as_mut() {
1253        let data = b"1.234\0";
1254        let mut data = &data[..];
1255        let mut decoder = StatefulDecoder::new(
1256            &mut data,
1257            ExplicitVRLittleEndianDecoder::default(),
1258            LittleEndianBasicDecoder,
1259            SpecificCharacterSet::default(),
1260        );
1261
1262        is_stateful_decode(&decoder);
1263
1264        let token = LazyDataToken::LazyValue {
1265            header: DataElementHeader {
1266                tag: Tag(0x0020, 0x000D),
1267                vr: VR::UI,
1268                len: Length(6),
1269            },
1270            decoder: &mut decoder,
1271        };
1272
1273        match token.into_owned().unwrap() {
1274            DataToken::PrimitiveValue(v) => {
1275                assert_eq!(v.to_raw_str(), "1.234\0",);
1276            }
1277            t => panic!("Unexpected type of token {:?}", t),
1278        }
1279        assert_eq!(decoder.position(), 6);
1280    }
1281
1282    #[test]
1283    fn lazy_dataset_token_value_skip() {
1284        let data = b"1.234\0";
1285        let mut data = &data[..];
1286        let mut decoder = StatefulDecoder::new(
1287            &mut data,
1288            ExplicitVRLittleEndianDecoder::default(),
1289            LittleEndianBasicDecoder,
1290            SpecificCharacterSet::default(),
1291        );
1292
1293        is_stateful_decode(&decoder);
1294
1295        let token = LazyDataToken::LazyValue {
1296            header: DataElementHeader {
1297                tag: Tag(0x0020, 0x000D),
1298                vr: VR::UI,
1299                len: Length(6),
1300            },
1301            decoder: &mut decoder,
1302        };
1303
1304        token.skip().unwrap();
1305
1306        assert_eq!(decoder.position(), 6);
1307    }
1308
1309    /// A malformed data element (wrong VR) should not panic
1310    /// when converting it to tokens
1311    #[test]
1312    fn bad_element_to_tokens() {
1313        let e: DataElement = DataElement::new_with_len(
1314            Tag(0x0008, 0x0080),
1315            VR::SQ, // wrong VR
1316            Length(6),
1317            PrimitiveValue::from("Oops!"),
1318        );
1319
1320        // should not panic
1321        let tokens = e.into_tokens().collect::<Vec<_>>();
1322        // still expects 2 tokens (header + value)
1323        assert_eq!(tokens.len(), 2);
1324
1325        let e: DataElement = DataElement::new(
1326            Tag(0x7FE0, 0x0010),
1327            VR::SQ, // wrong VR
1328            PixelFragmentSequence::new_fragments(vec![
1329                // one fragment
1330                vec![0x55; 128]
1331            ]),
1332        );
1333
1334        // should not panic,
1335        // other than that there are no guarantees about the output
1336        let _ = e.into_tokens().collect::<Vec<_>>();
1337    }
1338}