dicom_parser/dataset/
mod.rs

1//! Interpretation of DICOM data sets as streams of tokens.
2use crate::stateful::decode;
3use dicom_core::header::{DataElementHeader, HasLength, Length, VR};
4use dicom_core::value::{DicomValueType, PrimitiveValue};
5use dicom_core::{value::Value, DataElement, Tag};
6use snafu::{OptionExt, ResultExt, Snafu};
7use std::default::Default;
8use std::fmt;
9
10pub mod lazy_read;
11pub mod read;
12pub mod write;
13
14pub use self::read::DataSetReader;
15use self::read::ValueReadStrategy;
16pub use self::write::DataSetWriter;
17
18#[derive(Debug, Snafu)]
19pub enum Error {
20    /// Could not read item value
21    ReadItemValue { source: decode::Error },
22    /// Could not read element value
23    ReadElementValue { source: decode::Error },
24    /// Could not skip the bytes of a value
25    SkipValue { source: decode::Error },
26    /// Unexpected token type for operation
27    UnexpectedTokenType,
28    /// Unexpected undefined value length
29    UndefinedLength,
30}
31
32pub type Result<T, E = Error> = std::result::Result<T, E>;
33
34/// A token of a DICOM data set stream. This is part of the interpretation of a
35/// data set as a stream of symbols, which may either represent data headers or
36/// actual value data.
37#[derive(Debug, Clone)]
38pub enum DataToken {
39    /// A data header of a primitive value.
40    ElementHeader(DataElementHeader),
41    /// The beginning of a sequence element.
42    SequenceStart { tag: Tag, len: Length },
43    /// The beginning of an encapsulated pixel data element.
44    PixelSequenceStart,
45    /// The ending delimiter of a sequence or encapsulated pixel data.
46    SequenceEnd,
47    /// The beginning of a new item in the sequence.
48    ItemStart { len: Length },
49    /// The ending delimiter of an item.
50    ItemEnd,
51    /// A primitive data element value.
52    PrimitiveValue(PrimitiveValue),
53    /// An owned piece of raw data representing an item's value.
54    ///
55    /// This variant is used to represent
56    /// the value of an encoded fragment.
57    /// It should not be used to represent nested data sets.
58    ItemValue(Vec<u8>),
59    /// An owned sequence of unsigned 32 bit integers
60    /// representing a pixel data offset table.
61    ///
62    /// This variant is used to represent
63    /// the byte offsets to the first byte of the Item tag of the first fragment
64    /// for each frame in the sequence of items,
65    /// as per PS 3.5, Section A.4.
66    OffsetTable(Vec<u32>),
67}
68
69impl fmt::Display for DataToken {
70    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
71        match self {
72            DataToken::PrimitiveValue(ref v) => write!(f, "PrimitiveValue({:?})", v.value_type()),
73            other => write!(f, "{other:?}"),
74        }
75    }
76}
77
78/// This implementation treats undefined lengths as equal.
79impl PartialEq<Self> for DataToken {
80    fn eq(&self, other: &Self) -> bool {
81        use DataToken::*;
82        match (self, other) {
83            (
84                ElementHeader(DataElementHeader {
85                    tag: tag1,
86                    vr: vr1,
87                    len: len1,
88                }),
89                ElementHeader(DataElementHeader {
90                    tag: tag2,
91                    vr: vr2,
92                    len: len2,
93                }),
94            ) => tag1 == tag2 && vr1 == vr2 && len1.inner_eq(*len2),
95            (
96                SequenceStart {
97                    tag: tag1,
98                    len: len1,
99                },
100                SequenceStart {
101                    tag: tag2,
102                    len: len2,
103                },
104            ) => tag1 == tag2 && len1.inner_eq(*len2),
105            (ItemStart { len: len1 }, ItemStart { len: len2 }) => len1.inner_eq(*len2),
106            (PrimitiveValue(v1), PrimitiveValue(v2)) => v1 == v2,
107            (ItemValue(v1), ItemValue(v2)) => v1 == v2,
108            (OffsetTable(v1), OffsetTable(v2)) => v1 == v2,
109            (ItemEnd, ItemEnd)
110            | (SequenceEnd, SequenceEnd)
111            | (PixelSequenceStart, PixelSequenceStart) => true,
112            _ => false,
113        }
114    }
115}
116
117impl From<DataElementHeader> for DataToken {
118    fn from(header: DataElementHeader) -> Self {
119        match (header.vr(), header.tag) {
120            (VR::OB, Tag(0x7fe0, 0x0010)) if header.len.is_undefined() => {
121                DataToken::PixelSequenceStart
122            }
123            (VR::SQ, _) => DataToken::SequenceStart {
124                tag: header.tag,
125                len: header.len,
126            },
127            _ => DataToken::ElementHeader(header),
128        }
129    }
130}
131
132impl DataToken {
133    /// Check whether this token represents the start of a sequence
134    /// of nested data sets.
135    pub fn is_sequence_start(&self) -> bool {
136        matches!(self, DataToken::SequenceStart { .. })
137    }
138
139    /// Check whether this token represents the end of a sequence
140    /// or the end of an encapsulated element.
141    pub fn is_sequence_end(&self) -> bool {
142        matches!(self, DataToken::SequenceEnd)
143    }
144}
145
146/// A lazy data token for reading a data set
147/// without requiring values to be fully read in memory.
148/// This is part of the interpretation of a
149/// data set as a stream of symbols,
150/// which may either represent data headers
151/// or actual value data.
152///
153/// The parameter type `D` represents
154/// the original type of the stateful decoder,
155/// and through which the values can be retrieved.
156#[non_exhaustive]
157pub enum LazyDataToken<D> {
158    /// A data header of a primitive value.
159    ElementHeader(DataElementHeader),
160    /// The beginning of a sequence element.
161    SequenceStart { tag: Tag, len: Length },
162    /// The beginning of an encapsulated pixel data element.
163    PixelSequenceStart,
164    /// The ending delimiter of a sequence or encapsulated pixel data.
165    SequenceEnd,
166    /// The beginning of a new item in the sequence.
167    ItemStart { len: Length },
168    /// The ending delimiter of an item.
169    ItemEnd,
170    /// An element value yet to be fetched
171    LazyValue {
172        /// the header of the respective value
173        header: DataElementHeader,
174        /// the stateful decoder for fetching the bytes of the value
175        decoder: D,
176    },
177    /// An item value yet to be fetched
178    LazyItemValue {
179        /// the full length of the value, always well defined
180        len: u32,
181        /// the stateful decoder for fetching the bytes of the value
182        decoder: D,
183    },
184}
185
186impl<D> fmt::Debug for LazyDataToken<D> {
187    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
188        match self {
189            LazyDataToken::ElementHeader(header) => {
190                f.debug_tuple("ElementHeader").field(header).finish()
191            }
192            LazyDataToken::SequenceStart { tag, len } => f
193                .debug_struct("SequenceStart")
194                .field("tag", tag)
195                .field("len", len)
196                .finish(),
197            LazyDataToken::PixelSequenceStart => f.write_str("PixelSequenceStart"),
198            LazyDataToken::SequenceEnd => f.write_str("SequenceEnd"),
199            LazyDataToken::ItemStart { len } => {
200                f.debug_struct("ItemStart").field("len", len).finish()
201            }
202            LazyDataToken::ItemEnd => f.write_str("ItemEnd"),
203            LazyDataToken::LazyValue { header, decoder: _ } => {
204                f.debug_struct("LazyValue").field("header", header).finish()
205            }
206            LazyDataToken::LazyItemValue { len, decoder: _ } => {
207                f.debug_struct("LazyItemValue").field("len", len).finish()
208            }
209        }
210    }
211}
212
213impl<D> LazyDataToken<D> {
214    /// Check whether this token represents the start of a sequence
215    /// of nested data sets.
216    pub fn is_sequence_start(&self) -> bool {
217        matches!(self, LazyDataToken::SequenceStart { .. })
218    }
219
220    /// Check whether this token represents the end of a sequence
221    /// or the end of an encapsulated element.
222    pub fn is_sequence_end(&self) -> bool {
223        matches!(self, LazyDataToken::SequenceEnd)
224    }
225}
226
227impl<D> LazyDataToken<D>
228where
229    D: decode::StatefulDecode,
230{
231    /// Skip the value data referred by this token.
232    ///
233    /// This must be called when receiving a token
234    /// of variant [`LazyValue`](LazyDataToken::LazyValue)
235    /// or [`LazyItemValue`](LazyDataToken::LazyItemValue),
236    /// otherwise the data set reader may fail to read subsequent items.
237    ///
238    /// Does nothing for tokens of other variants.
239    pub fn skip(self) -> crate::stateful::decode::Result<()> {
240        match self {
241            LazyDataToken::LazyValue {
242                header,
243                mut decoder,
244            } => decoder.skip_bytes(header.len.0),
245            LazyDataToken::LazyItemValue { len, mut decoder } => decoder.skip_bytes(len),
246            _ => Ok(()), // do nothing
247        }
248    }
249    /// Construct the data token into memory,
250    /// consuming the reader if necessary.
251    ///
252    /// If the token represents a lazy element value,
253    /// the inner decoder is read with string preservation.
254    pub fn into_owned(self) -> Result<DataToken> {
255        self.into_owned_with_strategy(ValueReadStrategy::Preserved)
256    }
257
258    /// Construct the data token into memory,
259    /// consuming the reader if necessary.
260    ///
261    /// If the token represents a lazy element value,
262    /// the inner decoder is read
263    /// with the given value reading strategy.
264    pub fn into_owned_with_strategy(self, strategy: ValueReadStrategy) -> Result<DataToken> {
265        match self {
266            LazyDataToken::ElementHeader(header) => Ok(DataToken::ElementHeader(header)),
267            LazyDataToken::ItemEnd => Ok(DataToken::ItemEnd),
268            LazyDataToken::ItemStart { len } => Ok(DataToken::ItemStart { len }),
269            LazyDataToken::PixelSequenceStart => Ok(DataToken::PixelSequenceStart),
270            LazyDataToken::SequenceEnd => Ok(DataToken::SequenceEnd),
271            LazyDataToken::SequenceStart { tag, len } => Ok(DataToken::SequenceStart { tag, len }),
272            LazyDataToken::LazyValue {
273                header,
274                mut decoder,
275            } => {
276                // use the stateful decoder to eagerly read the value
277                let value = match strategy {
278                    ValueReadStrategy::Interpreted => {
279                        decoder.read_value(&header).context(ReadElementValueSnafu)?
280                    }
281                    ValueReadStrategy::Preserved => decoder
282                        .read_value_preserved(&header)
283                        .context(ReadElementValueSnafu)?,
284                    ValueReadStrategy::Raw => decoder
285                        .read_value_bytes(&header)
286                        .context(ReadElementValueSnafu)?,
287                };
288                Ok(DataToken::PrimitiveValue(value))
289            }
290            LazyDataToken::LazyItemValue { len, mut decoder } => {
291                let mut data = Vec::new();
292                decoder
293                    .read_to_vec(len, &mut data)
294                    .context(ReadItemValueSnafu)?;
295                Ok(DataToken::ItemValue(data))
296            }
297        }
298    }
299
300    /// Retrieve a primitive element value from the token,
301    /// consuming the reader with the given reading strategy.
302    ///
303    /// The operation fails if the token does not represent an element value.
304    pub fn into_value_with_strategy(self, strategy: ValueReadStrategy) -> Result<PrimitiveValue> {
305        match self {
306            LazyDataToken::LazyValue {
307                header,
308                mut decoder,
309            } => {
310                // use the stateful decoder to eagerly read the value
311                match strategy {
312                    ValueReadStrategy::Interpreted => {
313                        decoder.read_value(&header).context(ReadElementValueSnafu)
314                    }
315                    ValueReadStrategy::Preserved => decoder
316                        .read_value_preserved(&header)
317                        .context(ReadElementValueSnafu),
318                    ValueReadStrategy::Raw => decoder
319                        .read_value_bytes(&header)
320                        .context(ReadElementValueSnafu),
321                }
322            }
323            _ => UnexpectedTokenTypeSnafu.fail(),
324        }
325    }
326
327    /// Retrieve a primitive element value from the token,
328    /// consuming the reader with the default reading strategy.
329    ///
330    /// The operation fails if the token does not represent an element value.
331    pub fn into_value(self) -> Result<PrimitiveValue> {
332        self.into_value_with_strategy(ValueReadStrategy::Preserved)
333    }
334
335    /// Read the bytes of a value into the given writer,
336    /// consuming the reader.
337    ///
338    /// This operation will not interpret the value,
339    /// like in the `Bytes` value reading strategy.
340    /// It works for both data elements and non-dataset items.
341    ///
342    /// The operation fails if
343    /// the token does not represent an element or item value.
344    pub fn read_value_into<W>(self, out: W) -> Result<()>
345    where
346        W: std::io::Write,
347    {
348        match self {
349            LazyDataToken::LazyValue {
350                header,
351                mut decoder,
352            } => {
353                let len = header.len.get().context(UndefinedLengthSnafu)?;
354                decoder.read_to(len, out).context(ReadElementValueSnafu)?;
355            }
356            LazyDataToken::LazyItemValue { len, mut decoder } => {
357                decoder.read_to(len, out).context(ReadItemValueSnafu)?;
358            }
359            _other => return UnexpectedTokenTypeSnafu.fail(),
360        };
361        Ok(())
362    }
363
364    /// Convert this token into a structured representation,
365    /// for diagnostics and error reporting purposes.
366    pub fn into_repr(self) -> LazyDataTokenRepr {
367        LazyDataTokenRepr::from(self)
368    }
369
370    /// Create a structured representation of this token,
371    /// for diagnostics and error reporting purposes.
372    pub fn repr(&self) -> LazyDataTokenRepr {
373        LazyDataTokenRepr::from(self)
374    }
375}
376
377impl<D> From<LazyDataToken<D>> for LazyDataTokenRepr {
378    fn from(token: LazyDataToken<D>) -> Self {
379        match token {
380            LazyDataToken::ElementHeader(h) => LazyDataTokenRepr::ElementHeader(h),
381            LazyDataToken::SequenceStart { tag, len } => {
382                LazyDataTokenRepr::SequenceStart { tag, len }
383            }
384            LazyDataToken::PixelSequenceStart => LazyDataTokenRepr::PixelSequenceStart,
385            LazyDataToken::SequenceEnd => LazyDataTokenRepr::SequenceEnd,
386            LazyDataToken::ItemStart { len } => LazyDataTokenRepr::ItemStart { len },
387            LazyDataToken::ItemEnd => LazyDataTokenRepr::ItemEnd,
388            LazyDataToken::LazyValue { header, decoder: _ } => {
389                LazyDataTokenRepr::LazyValue { header }
390            }
391            LazyDataToken::LazyItemValue { len, decoder: _ } => {
392                LazyDataTokenRepr::LazyItemValue { len }
393            }
394        }
395    }
396}
397
398impl<D> From<&LazyDataToken<D>> for LazyDataTokenRepr {
399    fn from(token: &LazyDataToken<D>) -> Self {
400        match *token {
401            LazyDataToken::ElementHeader(h) => LazyDataTokenRepr::ElementHeader(h),
402            LazyDataToken::SequenceStart { tag, len } => {
403                LazyDataTokenRepr::SequenceStart { tag, len }
404            }
405            LazyDataToken::PixelSequenceStart => LazyDataTokenRepr::PixelSequenceStart,
406            LazyDataToken::SequenceEnd => LazyDataTokenRepr::SequenceEnd,
407            LazyDataToken::ItemStart { len } => LazyDataTokenRepr::ItemStart { len },
408            LazyDataToken::ItemEnd => LazyDataTokenRepr::ItemEnd,
409            LazyDataToken::LazyValue { header, decoder: _ } => {
410                LazyDataTokenRepr::LazyValue { header }
411            }
412            LazyDataToken::LazyItemValue { len, decoder: _ } => {
413                LazyDataTokenRepr::LazyItemValue { len }
414            }
415        }
416    }
417}
418
419/// A structured description of a lazy data token,
420/// for diagnostics and error reporting purposes.
421#[derive(Debug, Clone, PartialEq)]
422pub enum LazyDataTokenRepr {
423    /// A data header of a primitive value.
424    ElementHeader(DataElementHeader),
425    /// The beginning of a sequence element.
426    SequenceStart { tag: Tag, len: Length },
427    /// The beginning of an encapsulated pixel data element.
428    PixelSequenceStart,
429    /// The ending delimiter of a sequence or encapsulated pixel data.
430    SequenceEnd,
431    /// The beginning of a new item in the sequence.
432    ItemStart { len: Length },
433    /// The ending delimiter of an item.
434    ItemEnd,
435    /// An element value yet to be fetched
436    LazyValue {
437        /// the header of the respective value
438        header: DataElementHeader,
439    },
440    /// An item value yet to be fetched
441    LazyItemValue {
442        /// the full length of the value, always well defined
443        len: u32,
444    },
445}
446
447/// The type of delimiter: sequence or item.
448#[derive(Debug, Copy, Clone, Eq, PartialEq)]
449pub enum SeqTokenType {
450    Sequence,
451    Item,
452}
453
454/// Options for token generation
455#[derive(Debug, Copy, Clone, Default, Eq, PartialEq)]
456#[non_exhaustive]
457pub struct IntoTokensOptions {
458    /// Whether to ignore all sequence lengths in the DICOM data set,
459    /// resulting in sequences with undefined length.
460    ///
461    /// Set this to `true` when the sequence lengths in bytes might no longer be valid,
462    /// such as when changing the character set,
463    /// and as such data set sequence lengths should be replaced with undefined.
464    /// When set to `false`,
465    /// whether to retain or replace these lengths
466    /// is left at the implementation's discretion.
467    /// either be recalculated or marked as undefined.
468    pub force_invalidate_sq_length: bool,
469}
470
471impl IntoTokensOptions {
472    pub fn new(force_invalidate_sq_length: bool) -> Self {
473        IntoTokensOptions {
474            force_invalidate_sq_length,
475        }
476    }
477}
478
479/// A trait for converting structured DICOM data into a stream of data tokens.
480pub trait IntoTokens {
481    /// The iterator type through which tokens are obtained.
482    type Iter: Iterator<Item = DataToken>;
483
484    fn into_tokens(self) -> Self::Iter;
485    fn into_tokens_with_options(self, options: IntoTokensOptions) -> Self::Iter;
486}
487
488impl IntoTokens for dicom_core::header::EmptyObject {
489    type Iter = std::iter::Empty<DataToken>;
490
491    fn into_tokens(self) -> Self::Iter {
492        unreachable!()
493    }
494
495    fn into_tokens_with_options(self, _options: IntoTokensOptions) -> Self::Iter {
496        unreachable!()
497    }
498}
499
500/// Token generator from a DICOM data element.
501pub enum DataElementTokens<I, P>
502where
503    I: IntoTokens,
504{
505    /// initial state, at the beginning of the element
506    Start(
507        // Option is used for easy taking from a &mut,
508        // should always be Some in practice
509        Option<DataElement<I, P>>,
510        IntoTokensOptions,
511    ),
512    /// the header of a plain primitive element was read
513    Header(
514        // Option is used for easy taking from a &mut,
515        // should always be Some in practice
516        Option<DataElement<I, P>>,
517    ),
518    /// reading tokens from items
519    Items(
520        FlattenTokens<
521            <dicom_core::value::C<AsItem<I>> as IntoIterator>::IntoIter,
522            ItemTokens<I::Iter>,
523        >,
524    ),
525    /// the header of encapsulated pixel data was read, will read
526    /// the offset table next
527    PixelData(
528        /// Pixel fragments
529        ///
530        /// Option is used for easy taking from a &mut,
531        /// should always be Some in practice
532        Option<dicom_core::value::C<P>>,
533        /// Frame offset table
534        OffsetTableItemTokens<dicom_core::value::C<u32>>,
535    ),
536    /// the header and offset of encapsulated pixel data was read,
537    /// fragments come next
538    PixelDataFragments(
539        FlattenTokens<
540            <dicom_core::value::C<ItemValue<P>> as IntoIterator>::IntoIter,
541            ItemValueTokens<P>,
542        >,
543    ),
544    /// no more elements
545    End,
546}
547
548impl<I, P> Iterator for DataElementTokens<I, P>
549where
550    I: IntoTokens,
551    I: HasLength,
552    P: AsRef<[u8]>,
553{
554    type Item = DataToken;
555
556    fn next(&mut self) -> Option<Self::Item> {
557        let (out, next_state) = match self {
558            DataElementTokens::Start(elem, options) => {
559                let elem = elem.take().unwrap();
560                // data element header token
561
562                let mut header = *elem.header();
563                if options.force_invalidate_sq_length && elem.vr() == VR::SQ {
564                    header.len = Length::UNDEFINED;
565                }
566
567                let token = DataToken::from(header);
568                match token {
569                    DataToken::SequenceStart { tag, len } => {
570                        // retrieve sequence value, begin item sequence
571                        match elem.into_value() {
572                            v @ Value::Primitive(_) => {
573                                // this can only happen in malformed data (wrong VR),
574                                // but we try to handle it gracefully anyway:
575                                // return a header token instead and continue
576                                // as if it were a primitive value
577                                if len.is_defined() {
578                                    tracing::warn!(
579                                        "Unexpected primitive value after header {} with VR SQ",
580                                        tag
581                                    );
582                                    let adapted_elem =
583                                        DataElement::new_with_len(tag, VR::SQ, len, v);
584                                    (
585                                        Some(DataToken::ElementHeader(*adapted_elem.header())),
586                                        DataElementTokens::Header(Some(adapted_elem)),
587                                    )
588                                } else {
589                                    // without a defined length,
590                                    // it is too risky to provide any tokens
591                                    tracing::warn!("Unexpected primitive value after header {} with VR SQ, ignoring", tag);
592                                    (None, DataElementTokens::End)
593                                }
594                            }
595                            Value::PixelSequence { .. } => {
596                                // this is also invalid because
597                                // this is a data element sequence start,
598                                // not a pixel data fragment sequence start.
599                                // stop here and return nothing
600                                tracing::warn!("Unexpected pixel data fragments after header {} with VR SQ, ignored", tag);
601                                (None, DataElementTokens::End)
602                            }
603                            Value::Sequence(seq) => {
604                                let seq = if options.force_invalidate_sq_length {
605                                    seq.into_items().into_vec().into()
606                                } else {
607                                    seq
608                                };
609
610                                let items: dicom_core::value::C<_> = seq
611                                    .into_items()
612                                    .into_iter()
613                                    .map(|o| AsItem(o.length(), o))
614                                    .collect();
615                                (
616                                    Some(token),
617                                    DataElementTokens::Items(
618                                        items.into_tokens_with_options(*options),
619                                    ),
620                                )
621                            }
622                        }
623                    }
624                    DataToken::PixelSequenceStart => {
625                        match elem.into_value() {
626                            Value::PixelSequence(seq) => {
627                                let (offset_table, fragments) = seq.into_parts();
628                                (
629                                    // begin pixel sequence
630                                    Some(DataToken::PixelSequenceStart),
631                                    DataElementTokens::PixelData(
632                                        Some(fragments),
633                                        OffsetTableItem(offset_table)
634                                            .into_tokens_with_options(*options),
635                                    ),
636                                )
637                            }
638                            Value::Primitive(_) | Value::Sequence { .. } => unreachable!(),
639                        }
640                    }
641                    _ => (
642                        Some(DataToken::ElementHeader(*elem.header())),
643                        DataElementTokens::Header(Some(elem)),
644                    ),
645                }
646            }
647            DataElementTokens::Header(elem) => {
648                let elem = elem.take().unwrap();
649                match elem.into_value() {
650                    Value::Sequence { .. } | Value::PixelSequence { .. } => unreachable!(),
651                    Value::Primitive(value) => {
652                        // return primitive value, done
653                        let token = DataToken::PrimitiveValue(value);
654                        (Some(token), DataElementTokens::End)
655                    }
656                }
657            }
658            DataElementTokens::Items(tokens) => {
659                if let Some(token) = tokens.next() {
660                    // bypass manual state transition
661                    return Some(token);
662                } else {
663                    // sequence end token, end
664                    (Some(DataToken::SequenceEnd), DataElementTokens::End)
665                }
666            }
667            DataElementTokens::PixelData(fragments, tokens) => {
668                if let Some(token) = tokens.next() {
669                    // bypass manual state transition
670                    return Some(token);
671                }
672                // pixel data fragments next
673                let fragments = fragments.take().unwrap();
674                let tokens: dicom_core::value::C<_> =
675                    fragments.into_iter().map(ItemValue).collect();
676                *self = DataElementTokens::PixelDataFragments(tokens.into_tokens());
677                // recursive call to ensure the retrieval of a data token
678                return self.next();
679            }
680            DataElementTokens::PixelDataFragments(tokens) => {
681                if let Some(token) = tokens.next() {
682                    // bypass manual state transition
683                    return Some(token);
684                } else {
685                    // sequence end token, end
686                    (Some(DataToken::SequenceEnd), DataElementTokens::End)
687                }
688            }
689            DataElementTokens::End => return None,
690        };
691        *self = next_state;
692
693        out
694    }
695}
696
697impl<I, P> IntoTokens for DataElement<I, P>
698where
699    I: IntoTokens,
700    I: HasLength,
701    P: AsRef<[u8]>,
702{
703    type Iter = DataElementTokens<I, P>;
704
705    fn into_tokens(self) -> Self::Iter {
706        //Avoid
707        self.into_tokens_with_options(Default::default())
708    }
709
710    fn into_tokens_with_options(self, options: IntoTokensOptions) -> Self::Iter {
711        DataElementTokens::Start(Some(self), options)
712    }
713}
714
715/// Flatten a sequence of elements into their respective
716/// token sequence in order.
717#[derive(Debug, PartialEq)]
718pub struct FlattenTokens<O, K> {
719    seq: O,
720    tokens: Option<K>,
721    into_token_options: IntoTokensOptions,
722}
723
724impl<O, K> Iterator for FlattenTokens<O, K>
725where
726    O: Iterator,
727    O::Item: IntoTokens<Iter = K>,
728    K: Iterator<Item = DataToken>,
729{
730    type Item = DataToken;
731
732    fn next(&mut self) -> Option<Self::Item> {
733        // ensure a token sequence
734        if self.tokens.is_none() {
735            match self.seq.next() {
736                Some(entries) => {
737                    self.tokens = Some(entries.into_tokens_with_options(self.into_token_options));
738                }
739                None => return None,
740            }
741        }
742
743        // retrieve the next token
744        match self.tokens.as_mut().map(|s| s.next()) {
745            Some(Some(token)) => Some(token),
746            Some(None) => {
747                self.tokens = None;
748                self.next()
749            }
750            None => unreachable!(),
751        }
752    }
753}
754
755impl<T> IntoTokens for Vec<T>
756where
757    T: IntoTokens,
758{
759    type Iter = FlattenTokens<<Vec<T> as IntoIterator>::IntoIter, <T as IntoTokens>::Iter>;
760
761    fn into_tokens(self) -> Self::Iter {
762        self.into_tokens_with_options(Default::default())
763    }
764
765    fn into_tokens_with_options(self, into_token_options: IntoTokensOptions) -> Self::Iter {
766        FlattenTokens {
767            seq: self.into_iter(),
768            tokens: None,
769            into_token_options,
770        }
771    }
772}
773
774impl<T> IntoTokens for dicom_core::value::C<T>
775where
776    T: IntoTokens,
777{
778    type Iter =
779        FlattenTokens<<dicom_core::value::C<T> as IntoIterator>::IntoIter, <T as IntoTokens>::Iter>;
780
781    fn into_tokens(self) -> Self::Iter {
782        self.into_tokens_with_options(Default::default())
783    }
784
785    fn into_tokens_with_options(self, into_token_options: IntoTokensOptions) -> Self::Iter {
786        FlattenTokens {
787            seq: self.into_iter(),
788            tokens: None,
789            into_token_options,
790        }
791    }
792}
793
794// A stream of tokens from a DICOM item.
795#[derive(Debug)]
796pub enum ItemTokens<T> {
797    /// Just started, an item header token will come next
798    Start {
799        len: Length,
800        object_tokens: Option<T>,
801    },
802    /// Will return tokens from the inner object, then an end of item token
803    /// when it ends
804    Object { object_tokens: T },
805    /// Just ended, no more tokens
806    End,
807}
808
809impl<T> ItemTokens<T>
810where
811    T: Iterator<Item = DataToken>,
812{
813    pub fn new<O>(len: Length, object: O, options: IntoTokensOptions) -> Self
814    where
815        O: IntoTokens<Iter = T>,
816    {
817        let len = if len.0 != 0 && options.force_invalidate_sq_length {
818            Length::UNDEFINED
819        } else {
820            len
821        };
822        ItemTokens::Start {
823            len,
824            object_tokens: Some(object.into_tokens_with_options(options)),
825        }
826    }
827}
828
829impl<T> Iterator for ItemTokens<T>
830where
831    T: Iterator<Item = DataToken>,
832{
833    type Item = DataToken;
834
835    fn next(&mut self) -> Option<Self::Item> {
836        let (next_state, out) = match self {
837            ItemTokens::Start { len, object_tokens } => (
838                ItemTokens::Object {
839                    object_tokens: object_tokens.take().unwrap(),
840                },
841                Some(DataToken::ItemStart { len: *len }),
842            ),
843            ItemTokens::Object { object_tokens } => {
844                if let Some(token) = object_tokens.next() {
845                    return Some(token);
846                } else {
847                    (ItemTokens::End, Some(DataToken::ItemEnd))
848                }
849            }
850            ItemTokens::End => {
851                return None;
852            }
853        };
854
855        *self = next_state;
856        out
857    }
858}
859
860/// A newtype for interpreting the given data as an item.
861/// When converting a value of this type into tokens, the inner value's tokens
862/// will be surrounded by an item start and an item delimiter.
863#[derive(Debug, Clone, PartialEq)]
864pub struct AsItem<I>(Length, I);
865
866impl<I> IntoTokens for AsItem<I>
867where
868    I: IntoTokens,
869{
870    type Iter = ItemTokens<I::Iter>;
871
872    fn into_tokens(self) -> Self::Iter {
873        self.into_tokens_with_options(Default::default())
874    }
875
876    fn into_tokens_with_options(self, options: IntoTokensOptions) -> Self::Iter {
877        ItemTokens::new(self.0, self.1, options)
878    }
879}
880
881impl<I> HasLength for AsItem<I> {
882    fn length(&self) -> Length {
883        self.0
884    }
885}
886
887/// A newtype for wrapping a piece of raw data into an item.
888/// When converting a value of this type into tokens, the algorithm
889/// will create an item start with an explicit length, followed by
890/// an item value token, then an item delimiter.
891#[derive(Debug, Clone, PartialEq)]
892pub struct ItemValue<P>(P);
893
894impl<P> IntoTokens for ItemValue<P>
895where
896    P: AsRef<[u8]>,
897{
898    type Iter = ItemValueTokens<P>;
899
900    fn into_tokens(self) -> Self::Iter {
901        self.into_tokens_with_options(Default::default())
902    }
903
904    fn into_tokens_with_options(self, options: IntoTokensOptions) -> Self::Iter {
905        ItemValueTokens::new(self.0, options)
906    }
907}
908
909#[derive(Debug)]
910pub enum ItemValueTokens<P> {
911    /// Just started, an item header token will come next. Takes a bool to configure if inner
912    /// lengths can be trusted to be valid
913    Start(Option<P>, bool),
914    /// Will return a token of the value
915    Value(P),
916    /// Will return an end of item token
917    Done,
918    /// Just ended, no more tokens
919    End,
920}
921
922impl<P> ItemValueTokens<P> {
923    #[inline]
924    pub fn new(value: P, into_tokens_options: IntoTokensOptions) -> Self {
925        ItemValueTokens::Start(Some(value), into_tokens_options.force_invalidate_sq_length)
926    }
927}
928
929impl<P> Iterator for ItemValueTokens<P>
930where
931    P: AsRef<[u8]>,
932{
933    type Item = DataToken;
934
935    fn next(&mut self) -> Option<Self::Item> {
936        let (out, next_state) = match self {
937            ItemValueTokens::Start(value, invalidate_len) => {
938                let value = value.take().unwrap();
939                let end_item = value.as_ref().is_empty();
940                let len = if *invalidate_len && !end_item {
941                    Length::UNDEFINED
942                } else {
943                    Length(value.as_ref().len() as u32)
944                };
945
946                (
947                    Some(DataToken::ItemStart { len }),
948                    if end_item {
949                        ItemValueTokens::Done
950                    } else {
951                        ItemValueTokens::Value(value)
952                    },
953                )
954            }
955            ItemValueTokens::Value(value) => (
956                Some(DataToken::ItemValue(value.as_ref().to_owned())),
957                ItemValueTokens::Done,
958            ),
959            ItemValueTokens::Done => (Some(DataToken::ItemEnd), ItemValueTokens::End),
960            ItemValueTokens::End => return None,
961        };
962
963        *self = next_state;
964        out
965    }
966}
967
968/// A newtype for wrapping a sequence of `u32`s into an offset table item.
969/// When converting a value of this type into tokens,
970/// the algorithm will create an item start with an explicit length,
971/// followed by an item value token,
972/// then an item delimiter.
973#[derive(Debug, Clone, PartialEq)]
974pub struct OffsetTableItem<P>(P);
975
976impl<P> IntoTokens for OffsetTableItem<P>
977where
978    P: AsRef<[u32]>,
979{
980    type Iter = OffsetTableItemTokens<P>;
981
982    fn into_tokens(self) -> Self::Iter {
983        self.into_tokens_with_options(Default::default())
984    }
985
986    fn into_tokens_with_options(self, _options: IntoTokensOptions) -> Self::Iter {
987        //There are no sequences here that might need to be invalidated
988        OffsetTableItemTokens::new(self.0)
989    }
990}
991
992#[derive(Debug)]
993pub enum OffsetTableItemTokens<P> {
994    /// Just started, an item header token will come next
995    Start(Option<P>),
996    /// Will return a token of the actual offset table
997    Value(P),
998    /// Will return an end of item token
999    Done,
1000    /// Just ended, no more tokens
1001    End,
1002}
1003
1004impl<P> OffsetTableItemTokens<P> {
1005    #[inline]
1006    pub fn new(value: P) -> Self {
1007        OffsetTableItemTokens::Start(Some(value))
1008    }
1009}
1010
1011impl<P> Iterator for OffsetTableItemTokens<P>
1012where
1013    P: AsRef<[u32]>,
1014{
1015    type Item = DataToken;
1016
1017    fn next(&mut self) -> Option<Self::Item> {
1018        let (out, next_state) = match self {
1019            OffsetTableItemTokens::Start(value) => {
1020                let value = value.take().unwrap();
1021                let len = Length(value.as_ref().len() as u32 * 4);
1022
1023                (
1024                    Some(DataToken::ItemStart { len }),
1025                    if len == Length(0) {
1026                        OffsetTableItemTokens::Done
1027                    } else {
1028                        OffsetTableItemTokens::Value(value)
1029                    },
1030                )
1031            }
1032            OffsetTableItemTokens::Value(value) => (
1033                Some(DataToken::OffsetTable(value.as_ref().to_owned())),
1034                OffsetTableItemTokens::Done,
1035            ),
1036            OffsetTableItemTokens::Done => (Some(DataToken::ItemEnd), OffsetTableItemTokens::End),
1037            OffsetTableItemTokens::End => return None,
1038        };
1039
1040        *self = next_state;
1041        out
1042    }
1043}
1044
1045#[cfg(test)]
1046mod tests {
1047    use dicom_core::{
1048        dicom_value, header::HasLength, value::PixelFragmentSequence, DataElement,
1049        DataElementHeader, DicomValue, Length, PrimitiveValue, Tag, VR,
1050    };
1051
1052    use super::{DataToken, IntoTokens, IntoTokensOptions, LazyDataToken};
1053    use smallvec::smallvec;
1054
1055    use dicom_encoding::{
1056        decode::{basic::LittleEndianBasicDecoder, explicit_le::ExplicitVRLittleEndianDecoder},
1057        text::SpecificCharacterSet,
1058    };
1059
1060    use crate::stateful::decode::StatefulDecode;
1061    use crate::stateful::decode::StatefulDecoder;
1062
1063    fn is_stateful_decode<D: StatefulDecode>(_: &D) {}
1064
1065    /// A simple object representing a DICOM data set,
1066    /// used merely for testing purposes.
1067    #[derive(Debug, Clone)]
1068    struct SimpleObject<T>(Length, dicom_core::value::C<T>);
1069
1070    impl<T> HasLength for SimpleObject<T> {
1071        fn length(&self) -> Length {
1072            self.0
1073        }
1074    }
1075
1076    impl<T> IntoTokens for SimpleObject<T>
1077    where
1078        T: IntoTokens,
1079        T: HasLength,
1080    {
1081        type Iter = super::FlattenTokens<
1082            <dicom_core::value::C<T> as IntoIterator>::IntoIter,
1083            <T as IntoTokens>::Iter,
1084        >;
1085
1086        fn into_tokens(self) -> Self::Iter {
1087            self.into_tokens_with_options(Default::default())
1088        }
1089
1090        fn into_tokens_with_options(self, into_token_options: IntoTokensOptions) -> Self::Iter {
1091            super::FlattenTokens {
1092                seq: self.1.into_iter(),
1093                tokens: None,
1094                into_token_options,
1095            }
1096        }
1097    }
1098
1099    #[test]
1100    fn basic_element_into_tokens() {
1101        let element = DataElement::new(
1102            Tag(0x0010, 0x0010),
1103            VR::PN,
1104            DicomValue::new("Doe^John".into()),
1105        );
1106
1107        let tokens: Vec<_> = element.clone().into_tokens().collect();
1108
1109        assert_eq!(
1110            &tokens,
1111            &[
1112                DataToken::ElementHeader(*element.header()),
1113                DataToken::PrimitiveValue("Doe^John".into()),
1114            ],
1115        )
1116    }
1117
1118    #[test]
1119    fn sequence_implicit_len_into_tokens() {
1120        let element = DataElement::new(
1121            Tag(0x0008, 0x2218),
1122            VR::SQ,
1123            DicomValue::new_sequence(
1124                vec![SimpleObject(
1125                    Length::UNDEFINED,
1126                    smallvec![
1127                        DataElement::new(
1128                            Tag(0x0008, 0x0100),
1129                            VR::SH,
1130                            DicomValue::new(dicom_value!(Strs, ["T-D1213 "])),
1131                        ),
1132                        DataElement::new(
1133                            Tag(0x0008, 0x0102),
1134                            VR::SH,
1135                            DicomValue::new(dicom_value!(Strs, ["SRT "])),
1136                        ),
1137                        DataElement::new(
1138                            Tag(0x0008, 0x0104),
1139                            VR::LO,
1140                            DicomValue::new(dicom_value!(Strs, ["Jaw region"])),
1141                        ),
1142                    ],
1143                )],
1144                Length::UNDEFINED,
1145            ),
1146        );
1147
1148        let tokens: Vec<_> = element.clone().into_tokens().collect();
1149
1150        assert_eq!(
1151            &tokens,
1152            &[
1153                DataToken::SequenceStart {
1154                    tag: Tag(0x0008, 0x2218),
1155                    len: Length::UNDEFINED,
1156                },
1157                DataToken::ItemStart {
1158                    len: Length::UNDEFINED
1159                },
1160                DataToken::ElementHeader(DataElementHeader {
1161                    tag: Tag(0x0008, 0x0100),
1162                    vr: VR::SH,
1163                    len: Length(8),
1164                }),
1165                DataToken::PrimitiveValue(PrimitiveValue::Strs(
1166                    ["T-D1213 ".to_owned()].as_ref().into(),
1167                )),
1168                DataToken::ElementHeader(DataElementHeader {
1169                    tag: Tag(0x0008, 0x0102),
1170                    vr: VR::SH,
1171                    len: Length(4),
1172                }),
1173                DataToken::PrimitiveValue(PrimitiveValue::Strs(
1174                    ["SRT ".to_owned()].as_ref().into()
1175                )),
1176                DataToken::ElementHeader(DataElementHeader {
1177                    tag: Tag(0x0008, 0x0104),
1178                    vr: VR::LO,
1179                    len: Length(10),
1180                }),
1181                DataToken::PrimitiveValue(PrimitiveValue::Strs(
1182                    ["Jaw region".to_owned()].as_ref().into(),
1183                )),
1184                DataToken::ItemEnd,
1185                DataToken::SequenceEnd,
1186            ],
1187        )
1188    }
1189
1190    #[test]
1191    fn sequence_explicit_len_into_tokens() {
1192        let element = DataElement::new(
1193            Tag(0x0008, 0x2218),
1194            VR::SQ,
1195            DicomValue::new_sequence(
1196                vec![SimpleObject(
1197                    Length(46),
1198                    smallvec![
1199                        DataElement::new(
1200                            Tag(0x0008, 0x0100),
1201                            VR::SH,
1202                            DicomValue::new(dicom_value!(Strs, ["T-D1213 "])),
1203                        ),
1204                        DataElement::new(
1205                            Tag(0x0008, 0x0102),
1206                            VR::SH,
1207                            DicomValue::new(dicom_value!(Strs, ["SRT "])),
1208                        ),
1209                        DataElement::new(
1210                            Tag(0x0008, 0x0104),
1211                            VR::LO,
1212                            DicomValue::new(dicom_value!(Strs, ["Jaw region"])),
1213                        ),
1214                    ],
1215                )],
1216                Length(54),
1217            ),
1218        );
1219
1220        let tokens: Vec<_> = element.clone().into_tokens().collect();
1221
1222        assert_eq!(
1223            &tokens,
1224            &[
1225                DataToken::SequenceStart {
1226                    tag: Tag(0x0008, 0x2218),
1227                    len: Length(54),
1228                },
1229                DataToken::ItemStart { len: Length(46) },
1230                DataToken::ElementHeader(DataElementHeader {
1231                    tag: Tag(0x0008, 0x0100),
1232                    vr: VR::SH,
1233                    len: Length(8),
1234                }),
1235                DataToken::PrimitiveValue(PrimitiveValue::Strs(
1236                    ["T-D1213 ".to_owned()].as_ref().into(),
1237                )),
1238                DataToken::ElementHeader(DataElementHeader {
1239                    tag: Tag(0x0008, 0x0102),
1240                    vr: VR::SH,
1241                    len: Length(4),
1242                }),
1243                DataToken::PrimitiveValue(PrimitiveValue::Strs(
1244                    ["SRT ".to_owned()].as_ref().into()
1245                )),
1246                DataToken::ElementHeader(DataElementHeader {
1247                    tag: Tag(0x0008, 0x0104),
1248                    vr: VR::LO,
1249                    len: Length(10),
1250                }),
1251                DataToken::PrimitiveValue(PrimitiveValue::Strs(
1252                    ["Jaw region".to_owned()].as_ref().into(),
1253                )),
1254                DataToken::ItemEnd,
1255                DataToken::SequenceEnd,
1256            ],
1257        )
1258    }
1259
1260    #[test]
1261    fn lazy_dataset_token_value() {
1262        let data = b"1.234\0";
1263        let mut data = &data[..];
1264        let decoder = StatefulDecoder::new(
1265            &mut data,
1266            ExplicitVRLittleEndianDecoder::default(),
1267            LittleEndianBasicDecoder,
1268            SpecificCharacterSet::default(),
1269        );
1270
1271        is_stateful_decode(&decoder);
1272
1273        let token = LazyDataToken::LazyValue {
1274            header: DataElementHeader {
1275                tag: Tag(0x0020, 0x000D),
1276                vr: VR::UI,
1277                len: Length(6),
1278            },
1279            decoder,
1280        };
1281
1282        match token.into_owned().unwrap() {
1283            DataToken::PrimitiveValue(v) => {
1284                assert_eq!(v.to_raw_str(), "1.234\0",);
1285            }
1286            t => panic!("Unexpected type of token {:?}", t),
1287        }
1288    }
1289
1290    #[test]
1291    fn lazy_dataset_token_value_as_mut() {
1292        let data = b"1.234\0";
1293        let mut data = &data[..];
1294        let mut decoder = StatefulDecoder::new(
1295            &mut data,
1296            ExplicitVRLittleEndianDecoder::default(),
1297            LittleEndianBasicDecoder,
1298            SpecificCharacterSet::default(),
1299        );
1300
1301        is_stateful_decode(&decoder);
1302
1303        let token = LazyDataToken::LazyValue {
1304            header: DataElementHeader {
1305                tag: Tag(0x0020, 0x000D),
1306                vr: VR::UI,
1307                len: Length(6),
1308            },
1309            decoder: &mut decoder,
1310        };
1311
1312        match token.into_owned().unwrap() {
1313            DataToken::PrimitiveValue(v) => {
1314                assert_eq!(v.to_raw_str(), "1.234\0",);
1315            }
1316            t => panic!("Unexpected type of token {:?}", t),
1317        }
1318        assert_eq!(decoder.position(), 6);
1319    }
1320
1321    #[test]
1322    fn lazy_dataset_token_value_skip() {
1323        let data = b"1.234\0";
1324        let mut data = &data[..];
1325        let mut decoder = StatefulDecoder::new(
1326            &mut data,
1327            ExplicitVRLittleEndianDecoder::default(),
1328            LittleEndianBasicDecoder,
1329            SpecificCharacterSet::default(),
1330        );
1331
1332        is_stateful_decode(&decoder);
1333
1334        let token = LazyDataToken::LazyValue {
1335            header: DataElementHeader {
1336                tag: Tag(0x0020, 0x000D),
1337                vr: VR::UI,
1338                len: Length(6),
1339            },
1340            decoder: &mut decoder,
1341        };
1342
1343        token.skip().unwrap();
1344
1345        assert_eq!(decoder.position(), 6);
1346    }
1347
1348    /// A malformed data element (wrong VR) should not panic
1349    /// when converting it to tokens
1350    #[test]
1351    fn bad_element_to_tokens() {
1352        let e: DataElement = DataElement::new_with_len(
1353            Tag(0x0008, 0x0080),
1354            VR::SQ, // wrong VR
1355            Length(6),
1356            PrimitiveValue::from("Oops!"),
1357        );
1358
1359        // should not panic
1360        let tokens = e.into_tokens().collect::<Vec<_>>();
1361        // still expects 2 tokens (header + value)
1362        assert_eq!(tokens.len(), 2);
1363
1364        let e: DataElement = DataElement::new(
1365            Tag(0x7FE0, 0x0010),
1366            VR::SQ, // wrong VR
1367            PixelFragmentSequence::new_fragments(vec![
1368                // one fragment
1369                vec![0x55; 128],
1370            ]),
1371        );
1372
1373        // should not panic,
1374        // other than that there are no guarantees about the output
1375        let _ = e.into_tokens().collect::<Vec<_>>();
1376    }
1377}