Skip to main content

dicom_parser/dataset/
mod.rs

1//! Interpretation of DICOM data sets as streams of tokens.
2use crate::stateful::decode;
3use dicom_core::header::{DataElementHeader, HasLength, Length, VR};
4use dicom_core::value::{DicomValueType, PrimitiveValue};
5use dicom_core::{DataElement, Tag, value::Value};
6use snafu::{OptionExt, ResultExt, Snafu};
7use std::default::Default;
8use std::fmt;
9
10pub mod lazy_read;
11pub mod read;
12pub mod write;
13
14pub use self::read::DataSetReader;
15use self::read::ValueReadStrategy;
16pub use self::write::DataSetWriter;
17
18#[derive(Debug, Snafu)]
19pub enum Error {
20    /// Could not read item value
21    ReadItemValue { source: decode::Error },
22    /// Could not read element value
23    ReadElementValue { source: decode::Error },
24    /// Could not skip the bytes of a value
25    SkipValue { source: decode::Error },
26    /// Unexpected token type for operation
27    UnexpectedTokenType,
28    /// Unexpected undefined value length
29    UndefinedLength,
30}
31
32pub type Result<T, E = Error> = std::result::Result<T, E>;
33
34/// A token of a DICOM data set stream. This is part of the interpretation of a
35/// data set as a stream of symbols, which may either represent data headers or
36/// actual value data.
37#[derive(Debug, Clone)]
38pub enum DataToken {
39    /// A data header of a primitive value.
40    ElementHeader(DataElementHeader),
41    /// The beginning of a sequence element.
42    SequenceStart { tag: Tag, len: Length },
43    /// The beginning of an encapsulated pixel data element.
44    PixelSequenceStart,
45    /// The ending delimiter of a sequence or encapsulated pixel data.
46    SequenceEnd,
47    /// The beginning of a new item in the sequence.
48    ItemStart { len: Length },
49    /// The ending delimiter of an item.
50    ItemEnd,
51    /// A primitive data element value.
52    PrimitiveValue(PrimitiveValue),
53    /// An owned piece of raw data representing an item's value.
54    ///
55    /// This variant is used to represent
56    /// the value of an encoded fragment.
57    /// It should not be used to represent nested data sets.
58    ItemValue(Vec<u8>),
59    /// An owned sequence of unsigned 32 bit integers
60    /// representing a pixel data offset table.
61    ///
62    /// This variant is used to represent
63    /// the byte offsets to the first byte of the Item tag of the first fragment
64    /// for each frame in the sequence of items,
65    /// as per PS 3.5, Section A.4.
66    OffsetTable(Vec<u32>),
67}
68
69impl fmt::Display for DataToken {
70    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
71        match self {
72            DataToken::PrimitiveValue(v) => write!(f, "PrimitiveValue({:?})", v.value_type()),
73            other => write!(f, "{other:?}"),
74        }
75    }
76}
77
78/// This implementation treats undefined lengths as equal.
79impl PartialEq<Self> for DataToken {
80    fn eq(&self, other: &Self) -> bool {
81        use DataToken::*;
82        match (self, other) {
83            (
84                ElementHeader(DataElementHeader {
85                    tag: tag1,
86                    vr: vr1,
87                    len: len1,
88                }),
89                ElementHeader(DataElementHeader {
90                    tag: tag2,
91                    vr: vr2,
92                    len: len2,
93                }),
94            ) => tag1 == tag2 && vr1 == vr2 && len1.inner_eq(*len2),
95            (
96                SequenceStart {
97                    tag: tag1,
98                    len: len1,
99                },
100                SequenceStart {
101                    tag: tag2,
102                    len: len2,
103                },
104            ) => tag1 == tag2 && len1.inner_eq(*len2),
105            (ItemStart { len: len1 }, ItemStart { len: len2 }) => len1.inner_eq(*len2),
106            (PrimitiveValue(v1), PrimitiveValue(v2)) => v1 == v2,
107            (ItemValue(v1), ItemValue(v2)) => v1 == v2,
108            (OffsetTable(v1), OffsetTable(v2)) => v1 == v2,
109            (ItemEnd, ItemEnd)
110            | (SequenceEnd, SequenceEnd)
111            | (PixelSequenceStart, PixelSequenceStart) => true,
112            _ => false,
113        }
114    }
115}
116
117impl From<DataElementHeader> for DataToken {
118    fn from(header: DataElementHeader) -> Self {
119        match (header.vr(), header.tag) {
120            (VR::OB, Tag(0x7fe0, 0x0010)) if header.len.is_undefined() => {
121                DataToken::PixelSequenceStart
122            }
123            (VR::SQ, _) => DataToken::SequenceStart {
124                tag: header.tag,
125                len: header.len,
126            },
127            _ => DataToken::ElementHeader(header),
128        }
129    }
130}
131
132impl DataToken {
133    /// Check whether this token represents the start of a sequence
134    /// of nested data sets.
135    pub fn is_sequence_start(&self) -> bool {
136        matches!(self, DataToken::SequenceStart { .. })
137    }
138
139    /// Check whether this token represents the end of a sequence
140    /// or the end of an encapsulated element.
141    pub fn is_sequence_end(&self) -> bool {
142        matches!(self, DataToken::SequenceEnd)
143    }
144}
145
146/// A lazy data token for reading a data set
147/// without requiring values to be fully read in memory.
148/// This is part of the interpretation of a
149/// data set as a stream of symbols,
150/// which may either represent data headers
151/// or actual value data.
152///
153/// The parameter type `D` represents
154/// the original type of the stateful decoder,
155/// and through which the values can be retrieved.
156#[non_exhaustive]
157pub enum LazyDataToken<D> {
158    /// A data header of a primitive value.
159    ElementHeader(DataElementHeader),
160    /// The beginning of a sequence element.
161    SequenceStart { tag: Tag, len: Length },
162    /// The beginning of an encapsulated pixel data element.
163    PixelSequenceStart,
164    /// The ending delimiter of a sequence or encapsulated pixel data.
165    SequenceEnd,
166    /// The beginning of a new item in the sequence.
167    ItemStart { len: Length },
168    /// The ending delimiter of an item.
169    ItemEnd,
170    /// An element value yet to be fetched
171    LazyValue {
172        /// the header of the respective value
173        header: DataElementHeader,
174        /// the stateful decoder for fetching the bytes of the value
175        decoder: D,
176    },
177    /// An item value yet to be fetched
178    LazyItemValue {
179        /// the full length of the value, always well defined
180        len: u32,
181        /// the stateful decoder for fetching the bytes of the value
182        decoder: D,
183    },
184}
185
186impl<D> fmt::Debug for LazyDataToken<D> {
187    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
188        match self {
189            LazyDataToken::ElementHeader(header) => {
190                f.debug_tuple("ElementHeader").field(header).finish()
191            }
192            LazyDataToken::SequenceStart { tag, len } => f
193                .debug_struct("SequenceStart")
194                .field("tag", tag)
195                .field("len", len)
196                .finish(),
197            LazyDataToken::PixelSequenceStart => f.write_str("PixelSequenceStart"),
198            LazyDataToken::SequenceEnd => f.write_str("SequenceEnd"),
199            LazyDataToken::ItemStart { len } => {
200                f.debug_struct("ItemStart").field("len", len).finish()
201            }
202            LazyDataToken::ItemEnd => f.write_str("ItemEnd"),
203            LazyDataToken::LazyValue { header, decoder: _ } => {
204                f.debug_struct("LazyValue").field("header", header).finish()
205            }
206            LazyDataToken::LazyItemValue { len, decoder: _ } => {
207                f.debug_struct("LazyItemValue").field("len", len).finish()
208            }
209        }
210    }
211}
212
213impl<D> LazyDataToken<D> {
214    /// Check whether this token represents the start of a sequence
215    /// of nested data sets.
216    pub fn is_sequence_start(&self) -> bool {
217        matches!(self, LazyDataToken::SequenceStart { .. })
218    }
219
220    /// Check whether this token represents the end of a sequence
221    /// or the end of an encapsulated element.
222    pub fn is_sequence_end(&self) -> bool {
223        matches!(self, LazyDataToken::SequenceEnd)
224    }
225}
226
227impl<D> LazyDataToken<D>
228where
229    D: decode::StatefulDecode,
230{
231    /// Skip the value data referred by this token.
232    ///
233    /// This must be called when receiving a token
234    /// of variant [`LazyValue`](LazyDataToken::LazyValue)
235    /// or [`LazyItemValue`](LazyDataToken::LazyItemValue),
236    /// otherwise the data set reader may fail to read subsequent items.
237    ///
238    /// Does nothing for tokens of other variants.
239    pub fn skip(self) -> crate::stateful::decode::Result<()> {
240        match self {
241            LazyDataToken::LazyValue {
242                header,
243                mut decoder,
244            } => decoder.skip_bytes(header.len.0),
245            LazyDataToken::LazyItemValue { len, mut decoder } => decoder.skip_bytes(len),
246            _ => Ok(()), // do nothing
247        }
248    }
249    /// Construct the data token into memory,
250    /// consuming the reader if necessary.
251    ///
252    /// If the token represents a lazy element value,
253    /// the inner decoder is read with string preservation.
254    pub fn into_owned(self) -> Result<DataToken> {
255        self.into_owned_with_strategy(ValueReadStrategy::Preserved)
256    }
257
258    /// Construct the data token into memory,
259    /// consuming the reader if necessary.
260    ///
261    /// If the token represents a lazy element value,
262    /// the inner decoder is read
263    /// with the given value reading strategy.
264    pub fn into_owned_with_strategy(self, strategy: ValueReadStrategy) -> Result<DataToken> {
265        match self {
266            LazyDataToken::ElementHeader(header) => Ok(DataToken::ElementHeader(header)),
267            LazyDataToken::ItemEnd => Ok(DataToken::ItemEnd),
268            LazyDataToken::ItemStart { len } => Ok(DataToken::ItemStart { len }),
269            LazyDataToken::PixelSequenceStart => Ok(DataToken::PixelSequenceStart),
270            LazyDataToken::SequenceEnd => Ok(DataToken::SequenceEnd),
271            LazyDataToken::SequenceStart { tag, len } => Ok(DataToken::SequenceStart { tag, len }),
272            LazyDataToken::LazyValue {
273                header,
274                mut decoder,
275            } => {
276                // use the stateful decoder to eagerly read the value
277                let value = match strategy {
278                    ValueReadStrategy::Interpreted => {
279                        decoder.read_value(&header).context(ReadElementValueSnafu)?
280                    }
281                    ValueReadStrategy::Preserved => decoder
282                        .read_value_preserved(&header)
283                        .context(ReadElementValueSnafu)?,
284                    ValueReadStrategy::Raw => decoder
285                        .read_value_bytes(&header)
286                        .context(ReadElementValueSnafu)?,
287                };
288                Ok(DataToken::PrimitiveValue(value))
289            }
290            LazyDataToken::LazyItemValue { len, mut decoder } => {
291                let mut data = Vec::new();
292                decoder
293                    .read_to_vec(len, &mut data)
294                    .context(ReadItemValueSnafu)?;
295                Ok(DataToken::ItemValue(data))
296            }
297        }
298    }
299
300    /// Retrieve a primitive element value from the token,
301    /// consuming the reader with the given reading strategy.
302    ///
303    /// The operation fails if the token does not represent an element value.
304    pub fn into_value_with_strategy(self, strategy: ValueReadStrategy) -> Result<PrimitiveValue> {
305        match self {
306            LazyDataToken::LazyValue {
307                header,
308                mut decoder,
309            } => {
310                // use the stateful decoder to eagerly read the value
311                match strategy {
312                    ValueReadStrategy::Interpreted => {
313                        decoder.read_value(&header).context(ReadElementValueSnafu)
314                    }
315                    ValueReadStrategy::Preserved => decoder
316                        .read_value_preserved(&header)
317                        .context(ReadElementValueSnafu),
318                    ValueReadStrategy::Raw => decoder
319                        .read_value_bytes(&header)
320                        .context(ReadElementValueSnafu),
321                }
322            }
323            _ => UnexpectedTokenTypeSnafu.fail(),
324        }
325    }
326
327    /// Retrieve a primitive element value from the token,
328    /// consuming the reader with the default reading strategy.
329    ///
330    /// The operation fails if the token does not represent an element value.
331    pub fn into_value(self) -> Result<PrimitiveValue> {
332        self.into_value_with_strategy(ValueReadStrategy::Preserved)
333    }
334
335    /// Read the bytes of a value into the given writer,
336    /// consuming the reader.
337    ///
338    /// This operation will not interpret the value,
339    /// like in the `Bytes` value reading strategy.
340    /// It works for both data elements and non-dataset items.
341    ///
342    /// The operation fails if
343    /// the token does not represent an element or item value.
344    pub fn read_value_into<W>(self, out: W) -> Result<()>
345    where
346        W: std::io::Write,
347    {
348        match self {
349            LazyDataToken::LazyValue {
350                header,
351                mut decoder,
352            } => {
353                let len = header.len.get().context(UndefinedLengthSnafu)?;
354                decoder.read_to(len, out).context(ReadElementValueSnafu)?;
355            }
356            LazyDataToken::LazyItemValue { len, mut decoder } => {
357                decoder.read_to(len, out).context(ReadItemValueSnafu)?;
358            }
359            _other => return UnexpectedTokenTypeSnafu.fail(),
360        };
361        Ok(())
362    }
363
364    /// Convert this token into a structured representation,
365    /// for diagnostics and error reporting purposes.
366    pub fn into_repr(self) -> LazyDataTokenRepr {
367        LazyDataTokenRepr::from(self)
368    }
369
370    /// Create a structured representation of this token,
371    /// for diagnostics and error reporting purposes.
372    pub fn repr(&self) -> LazyDataTokenRepr {
373        LazyDataTokenRepr::from(self)
374    }
375}
376
377impl<D> From<LazyDataToken<D>> for LazyDataTokenRepr {
378    fn from(token: LazyDataToken<D>) -> Self {
379        match token {
380            LazyDataToken::ElementHeader(h) => LazyDataTokenRepr::ElementHeader(h),
381            LazyDataToken::SequenceStart { tag, len } => {
382                LazyDataTokenRepr::SequenceStart { tag, len }
383            }
384            LazyDataToken::PixelSequenceStart => LazyDataTokenRepr::PixelSequenceStart,
385            LazyDataToken::SequenceEnd => LazyDataTokenRepr::SequenceEnd,
386            LazyDataToken::ItemStart { len } => LazyDataTokenRepr::ItemStart { len },
387            LazyDataToken::ItemEnd => LazyDataTokenRepr::ItemEnd,
388            LazyDataToken::LazyValue { header, decoder: _ } => {
389                LazyDataTokenRepr::LazyValue { header }
390            }
391            LazyDataToken::LazyItemValue { len, decoder: _ } => {
392                LazyDataTokenRepr::LazyItemValue { len }
393            }
394        }
395    }
396}
397
398impl<D> From<&LazyDataToken<D>> for LazyDataTokenRepr {
399    fn from(token: &LazyDataToken<D>) -> Self {
400        match *token {
401            LazyDataToken::ElementHeader(h) => LazyDataTokenRepr::ElementHeader(h),
402            LazyDataToken::SequenceStart { tag, len } => {
403                LazyDataTokenRepr::SequenceStart { tag, len }
404            }
405            LazyDataToken::PixelSequenceStart => LazyDataTokenRepr::PixelSequenceStart,
406            LazyDataToken::SequenceEnd => LazyDataTokenRepr::SequenceEnd,
407            LazyDataToken::ItemStart { len } => LazyDataTokenRepr::ItemStart { len },
408            LazyDataToken::ItemEnd => LazyDataTokenRepr::ItemEnd,
409            LazyDataToken::LazyValue { header, decoder: _ } => {
410                LazyDataTokenRepr::LazyValue { header }
411            }
412            LazyDataToken::LazyItemValue { len, decoder: _ } => {
413                LazyDataTokenRepr::LazyItemValue { len }
414            }
415        }
416    }
417}
418
419/// A structured description of a lazy data token,
420/// for diagnostics and error reporting purposes.
421#[derive(Debug, Clone, PartialEq)]
422pub enum LazyDataTokenRepr {
423    /// A data header of a primitive value.
424    ElementHeader(DataElementHeader),
425    /// The beginning of a sequence element.
426    SequenceStart { tag: Tag, len: Length },
427    /// The beginning of an encapsulated pixel data element.
428    PixelSequenceStart,
429    /// The ending delimiter of a sequence or encapsulated pixel data.
430    SequenceEnd,
431    /// The beginning of a new item in the sequence.
432    ItemStart { len: Length },
433    /// The ending delimiter of an item.
434    ItemEnd,
435    /// An element value yet to be fetched
436    LazyValue {
437        /// the header of the respective value
438        header: DataElementHeader,
439    },
440    /// An item value yet to be fetched
441    LazyItemValue {
442        /// the full length of the value, always well defined
443        len: u32,
444    },
445}
446
447/// The type of delimiter: sequence or item.
448#[derive(Debug, Copy, Clone, Eq, PartialEq)]
449pub enum SeqTokenType {
450    Sequence,
451    Item,
452}
453
454/// Options for token generation
455#[derive(Debug, Copy, Clone, Default, Eq, PartialEq)]
456#[non_exhaustive]
457pub struct IntoTokensOptions {
458    /// Whether to ignore all sequence lengths in the DICOM data set,
459    /// resulting in sequences with undefined length.
460    ///
461    /// Set this to `true` when the sequence lengths in bytes might no longer be valid,
462    /// such as when changing the character set,
463    /// and as such data set sequence lengths should be replaced with undefined.
464    /// When set to `false`,
465    /// whether to retain or replace these lengths
466    /// is left at the implementation's discretion.
467    /// either be recalculated or marked as undefined.
468    pub force_invalidate_sq_length: bool,
469}
470
471impl IntoTokensOptions {
472    pub fn new(force_invalidate_sq_length: bool) -> Self {
473        IntoTokensOptions {
474            force_invalidate_sq_length,
475        }
476    }
477}
478
479/// A trait for converting structured DICOM data into a stream of data tokens.
480pub trait IntoTokens {
481    /// The iterator type through which tokens are obtained.
482    type Iter: Iterator<Item = DataToken>;
483
484    fn into_tokens(self) -> Self::Iter;
485    fn into_tokens_with_options(self, options: IntoTokensOptions) -> Self::Iter;
486}
487
488impl IntoTokens for dicom_core::header::EmptyObject {
489    type Iter = std::iter::Empty<DataToken>;
490
491    fn into_tokens(self) -> Self::Iter {
492        unreachable!()
493    }
494
495    fn into_tokens_with_options(self, _options: IntoTokensOptions) -> Self::Iter {
496        unreachable!()
497    }
498}
499
500/// Token generator from a DICOM data element.
501pub enum DataElementTokens<I, P>
502where
503    I: IntoTokens,
504{
505    /// initial state, at the beginning of the element
506    Start(
507        // Option is used for easy taking from a &mut,
508        // should always be Some in practice
509        Option<DataElement<I, P>>,
510        IntoTokensOptions,
511    ),
512    /// the header of a plain primitive element was read
513    Header(
514        // Option is used for easy taking from a &mut,
515        // should always be Some in practice
516        Option<DataElement<I, P>>,
517    ),
518    /// reading tokens from items
519    Items(
520        FlattenTokens<
521            <dicom_core::value::C<AsItem<I>> as IntoIterator>::IntoIter,
522            ItemTokens<I::Iter>,
523        >,
524    ),
525    /// the header of encapsulated pixel data was read, will read
526    /// the offset table next
527    PixelData(
528        /// Pixel fragments
529        ///
530        /// Option is used for easy taking from a &mut,
531        /// should always be Some in practice
532        Option<dicom_core::value::C<P>>,
533        /// Frame offset table
534        OffsetTableItemTokens<dicom_core::value::C<u32>>,
535    ),
536    /// the header and offset of encapsulated pixel data was read,
537    /// fragments come next
538    PixelDataFragments(
539        FlattenTokens<
540            <dicom_core::value::C<ItemValue<P>> as IntoIterator>::IntoIter,
541            ItemValueTokens<P>,
542        >,
543    ),
544    /// no more elements
545    End,
546}
547
548impl<I, P> Iterator for DataElementTokens<I, P>
549where
550    I: IntoTokens,
551    I: HasLength,
552    P: AsRef<[u8]>,
553{
554    type Item = DataToken;
555
556    fn next(&mut self) -> Option<Self::Item> {
557        let (out, next_state) = match self {
558            DataElementTokens::Start(elem, options) => {
559                let elem = elem.take().unwrap();
560                // data element header token
561
562                let mut header = *elem.header();
563                if options.force_invalidate_sq_length && elem.vr() == VR::SQ {
564                    header.len = Length::UNDEFINED;
565                }
566
567                let token = DataToken::from(header);
568                match token {
569                    DataToken::SequenceStart { tag, len } => {
570                        // retrieve sequence value, begin item sequence
571                        match elem.into_value() {
572                            v @ Value::Primitive(_) => {
573                                // this can only happen in malformed data (wrong VR),
574                                // but we try to handle it gracefully anyway:
575                                // return a header token instead and continue
576                                // as if it were a primitive value
577                                if len.is_defined() {
578                                    tracing::warn!(
579                                        "Unexpected primitive value after header {} with VR SQ",
580                                        tag
581                                    );
582                                    let adapted_elem =
583                                        DataElement::new_with_len(tag, VR::SQ, len, v);
584                                    (
585                                        Some(DataToken::ElementHeader(*adapted_elem.header())),
586                                        DataElementTokens::Header(Some(adapted_elem)),
587                                    )
588                                } else {
589                                    // without a defined length,
590                                    // it is too risky to provide any tokens
591                                    tracing::warn!(
592                                        "Unexpected primitive value after header {} with VR SQ, ignoring",
593                                        tag
594                                    );
595                                    (None, DataElementTokens::End)
596                                }
597                            }
598                            Value::PixelSequence { .. } => {
599                                // this is also invalid because
600                                // this is a data element sequence start,
601                                // not a pixel data fragment sequence start.
602                                // stop here and return nothing
603                                tracing::warn!(
604                                    "Unexpected pixel data fragments after header {} with VR SQ, ignored",
605                                    tag
606                                );
607                                (None, DataElementTokens::End)
608                            }
609                            Value::Sequence(seq) => {
610                                let seq = if options.force_invalidate_sq_length {
611                                    seq.into_items().into_vec().into()
612                                } else {
613                                    seq
614                                };
615
616                                let items: dicom_core::value::C<_> = seq
617                                    .into_items()
618                                    .into_iter()
619                                    .map(|o| AsItem(o.length(), o))
620                                    .collect();
621                                (
622                                    Some(token),
623                                    DataElementTokens::Items(
624                                        items.into_tokens_with_options(*options),
625                                    ),
626                                )
627                            }
628                        }
629                    }
630                    DataToken::PixelSequenceStart => {
631                        match elem.into_value() {
632                            Value::PixelSequence(seq) => {
633                                let (offset_table, fragments) = seq.into_parts();
634                                (
635                                    // begin pixel sequence
636                                    Some(DataToken::PixelSequenceStart),
637                                    DataElementTokens::PixelData(
638                                        Some(fragments),
639                                        OffsetTableItem(offset_table)
640                                            .into_tokens_with_options(*options),
641                                    ),
642                                )
643                            }
644                            Value::Primitive(_) | Value::Sequence { .. } => unreachable!(),
645                        }
646                    }
647                    _ => (
648                        Some(DataToken::ElementHeader(*elem.header())),
649                        DataElementTokens::Header(Some(elem)),
650                    ),
651                }
652            }
653            DataElementTokens::Header(elem) => {
654                let elem = elem.take().unwrap();
655                match elem.into_value() {
656                    Value::Sequence { .. } | Value::PixelSequence { .. } => unreachable!(),
657                    Value::Primitive(value) => {
658                        // return primitive value, done
659                        let token = DataToken::PrimitiveValue(value);
660                        (Some(token), DataElementTokens::End)
661                    }
662                }
663            }
664            DataElementTokens::Items(tokens) => {
665                if let Some(token) = tokens.next() {
666                    // bypass manual state transition
667                    return Some(token);
668                } else {
669                    // sequence end token, end
670                    (Some(DataToken::SequenceEnd), DataElementTokens::End)
671                }
672            }
673            DataElementTokens::PixelData(fragments, tokens) => {
674                if let Some(token) = tokens.next() {
675                    // bypass manual state transition
676                    return Some(token);
677                }
678                // pixel data fragments next
679                let fragments = fragments.take().unwrap();
680                let tokens: dicom_core::value::C<_> =
681                    fragments.into_iter().map(ItemValue).collect();
682                *self = DataElementTokens::PixelDataFragments(tokens.into_tokens());
683                // recursive call to ensure the retrieval of a data token
684                return self.next();
685            }
686            DataElementTokens::PixelDataFragments(tokens) => {
687                if let Some(token) = tokens.next() {
688                    // bypass manual state transition
689                    return Some(token);
690                } else {
691                    // sequence end token, end
692                    (Some(DataToken::SequenceEnd), DataElementTokens::End)
693                }
694            }
695            DataElementTokens::End => return None,
696        };
697        *self = next_state;
698
699        out
700    }
701}
702
703impl<I, P> IntoTokens for DataElement<I, P>
704where
705    I: IntoTokens,
706    I: HasLength,
707    P: AsRef<[u8]>,
708{
709    type Iter = DataElementTokens<I, P>;
710
711    fn into_tokens(self) -> Self::Iter {
712        //Avoid
713        self.into_tokens_with_options(Default::default())
714    }
715
716    fn into_tokens_with_options(self, options: IntoTokensOptions) -> Self::Iter {
717        DataElementTokens::Start(Some(self), options)
718    }
719}
720
721/// Flatten a sequence of elements into their respective
722/// token sequence in order.
723#[derive(Debug, PartialEq)]
724pub struct FlattenTokens<O, K> {
725    seq: O,
726    tokens: Option<K>,
727    into_token_options: IntoTokensOptions,
728}
729
730impl<O, K> Iterator for FlattenTokens<O, K>
731where
732    O: Iterator,
733    O::Item: IntoTokens<Iter = K>,
734    K: Iterator<Item = DataToken>,
735{
736    type Item = DataToken;
737
738    fn next(&mut self) -> Option<Self::Item> {
739        // ensure a token sequence
740        if self.tokens.is_none() {
741            match self.seq.next() {
742                Some(entries) => {
743                    self.tokens = Some(entries.into_tokens_with_options(self.into_token_options));
744                }
745                None => return None,
746            }
747        }
748
749        // retrieve the next token
750        match self.tokens.as_mut().map(|s| s.next()) {
751            Some(Some(token)) => Some(token),
752            Some(None) => {
753                self.tokens = None;
754                self.next()
755            }
756            None => unreachable!(),
757        }
758    }
759}
760
761impl<T> IntoTokens for Vec<T>
762where
763    T: IntoTokens,
764{
765    type Iter = FlattenTokens<<Vec<T> as IntoIterator>::IntoIter, <T as IntoTokens>::Iter>;
766
767    fn into_tokens(self) -> Self::Iter {
768        self.into_tokens_with_options(Default::default())
769    }
770
771    fn into_tokens_with_options(self, into_token_options: IntoTokensOptions) -> Self::Iter {
772        FlattenTokens {
773            seq: self.into_iter(),
774            tokens: None,
775            into_token_options,
776        }
777    }
778}
779
780impl<T> IntoTokens for dicom_core::value::C<T>
781where
782    T: IntoTokens,
783{
784    type Iter =
785        FlattenTokens<<dicom_core::value::C<T> as IntoIterator>::IntoIter, <T as IntoTokens>::Iter>;
786
787    fn into_tokens(self) -> Self::Iter {
788        self.into_tokens_with_options(Default::default())
789    }
790
791    fn into_tokens_with_options(self, into_token_options: IntoTokensOptions) -> Self::Iter {
792        FlattenTokens {
793            seq: self.into_iter(),
794            tokens: None,
795            into_token_options,
796        }
797    }
798}
799
800// A stream of tokens from a DICOM item.
801#[derive(Debug)]
802pub enum ItemTokens<T> {
803    /// Just started, an item header token will come next
804    Start {
805        len: Length,
806        object_tokens: Option<T>,
807    },
808    /// Will return tokens from the inner object, then an end of item token
809    /// when it ends
810    Object { object_tokens: T },
811    /// Just ended, no more tokens
812    End,
813}
814
815impl<T> ItemTokens<T>
816where
817    T: Iterator<Item = DataToken>,
818{
819    pub fn new<O>(len: Length, object: O, options: IntoTokensOptions) -> Self
820    where
821        O: IntoTokens<Iter = T>,
822    {
823        let len = if len.0 != 0 && options.force_invalidate_sq_length {
824            Length::UNDEFINED
825        } else {
826            len
827        };
828        ItemTokens::Start {
829            len,
830            object_tokens: Some(object.into_tokens_with_options(options)),
831        }
832    }
833}
834
835impl<T> Iterator for ItemTokens<T>
836where
837    T: Iterator<Item = DataToken>,
838{
839    type Item = DataToken;
840
841    fn next(&mut self) -> Option<Self::Item> {
842        let (next_state, out) = match self {
843            ItemTokens::Start { len, object_tokens } => (
844                ItemTokens::Object {
845                    object_tokens: object_tokens.take().unwrap(),
846                },
847                Some(DataToken::ItemStart { len: *len }),
848            ),
849            ItemTokens::Object { object_tokens } => {
850                if let Some(token) = object_tokens.next() {
851                    return Some(token);
852                } else {
853                    (ItemTokens::End, Some(DataToken::ItemEnd))
854                }
855            }
856            ItemTokens::End => {
857                return None;
858            }
859        };
860
861        *self = next_state;
862        out
863    }
864}
865
866/// A newtype for interpreting the given data as an item.
867/// When converting a value of this type into tokens, the inner value's tokens
868/// will be surrounded by an item start and an item delimiter.
869#[derive(Debug, Clone, PartialEq)]
870pub struct AsItem<I>(Length, I);
871
872impl<I> IntoTokens for AsItem<I>
873where
874    I: IntoTokens,
875{
876    type Iter = ItemTokens<I::Iter>;
877
878    fn into_tokens(self) -> Self::Iter {
879        self.into_tokens_with_options(Default::default())
880    }
881
882    fn into_tokens_with_options(self, options: IntoTokensOptions) -> Self::Iter {
883        ItemTokens::new(self.0, self.1, options)
884    }
885}
886
887impl<I> HasLength for AsItem<I> {
888    fn length(&self) -> Length {
889        self.0
890    }
891}
892
893/// A newtype for wrapping a piece of raw data into an item.
894/// When converting a value of this type into tokens, the algorithm
895/// will create an item start with an explicit length, followed by
896/// an item value token, then an item delimiter.
897#[derive(Debug, Clone, PartialEq)]
898pub struct ItemValue<P>(P);
899
900impl<P> IntoTokens for ItemValue<P>
901where
902    P: AsRef<[u8]>,
903{
904    type Iter = ItemValueTokens<P>;
905
906    fn into_tokens(self) -> Self::Iter {
907        self.into_tokens_with_options(Default::default())
908    }
909
910    fn into_tokens_with_options(self, options: IntoTokensOptions) -> Self::Iter {
911        ItemValueTokens::new(self.0, options)
912    }
913}
914
915#[derive(Debug)]
916pub enum ItemValueTokens<P> {
917    /// Just started, an item header token will come next. Takes a bool to configure if inner
918    /// lengths can be trusted to be valid
919    Start(Option<P>, bool),
920    /// Will return a token of the value
921    Value(P),
922    /// Will return an end of item token
923    Done,
924    /// Just ended, no more tokens
925    End,
926}
927
928impl<P> ItemValueTokens<P> {
929    #[inline]
930    pub fn new(value: P, into_tokens_options: IntoTokensOptions) -> Self {
931        ItemValueTokens::Start(Some(value), into_tokens_options.force_invalidate_sq_length)
932    }
933}
934
935impl<P> Iterator for ItemValueTokens<P>
936where
937    P: AsRef<[u8]>,
938{
939    type Item = DataToken;
940
941    fn next(&mut self) -> Option<Self::Item> {
942        let (out, next_state) = match self {
943            ItemValueTokens::Start(value, invalidate_len) => {
944                let value = value.take().unwrap();
945                let end_item = value.as_ref().is_empty();
946                let len = if *invalidate_len && !end_item {
947                    Length::UNDEFINED
948                } else {
949                    Length(value.as_ref().len() as u32)
950                };
951
952                (
953                    Some(DataToken::ItemStart { len }),
954                    if end_item {
955                        ItemValueTokens::Done
956                    } else {
957                        ItemValueTokens::Value(value)
958                    },
959                )
960            }
961            ItemValueTokens::Value(value) => (
962                Some(DataToken::ItemValue(value.as_ref().to_owned())),
963                ItemValueTokens::Done,
964            ),
965            ItemValueTokens::Done => (Some(DataToken::ItemEnd), ItemValueTokens::End),
966            ItemValueTokens::End => return None,
967        };
968
969        *self = next_state;
970        out
971    }
972}
973
974/// A newtype for wrapping a sequence of `u32`s into an offset table item.
975/// When converting a value of this type into tokens,
976/// the algorithm will create an item start with an explicit length,
977/// followed by an item value token,
978/// then an item delimiter.
979#[derive(Debug, Clone, PartialEq)]
980pub struct OffsetTableItem<P>(P);
981
982impl<P> IntoTokens for OffsetTableItem<P>
983where
984    P: AsRef<[u32]>,
985{
986    type Iter = OffsetTableItemTokens<P>;
987
988    fn into_tokens(self) -> Self::Iter {
989        self.into_tokens_with_options(Default::default())
990    }
991
992    fn into_tokens_with_options(self, _options: IntoTokensOptions) -> Self::Iter {
993        //There are no sequences here that might need to be invalidated
994        OffsetTableItemTokens::new(self.0)
995    }
996}
997
998#[derive(Debug)]
999pub enum OffsetTableItemTokens<P> {
1000    /// Just started, an item header token will come next
1001    Start(Option<P>),
1002    /// Will return a token of the actual offset table
1003    Value(P),
1004    /// Will return an end of item token
1005    Done,
1006    /// Just ended, no more tokens
1007    End,
1008}
1009
1010impl<P> OffsetTableItemTokens<P> {
1011    #[inline]
1012    pub fn new(value: P) -> Self {
1013        OffsetTableItemTokens::Start(Some(value))
1014    }
1015}
1016
1017impl<P> Iterator for OffsetTableItemTokens<P>
1018where
1019    P: AsRef<[u32]>,
1020{
1021    type Item = DataToken;
1022
1023    fn next(&mut self) -> Option<Self::Item> {
1024        let (out, next_state) = match self {
1025            OffsetTableItemTokens::Start(value) => {
1026                let value = value.take().unwrap();
1027                let len = Length(value.as_ref().len() as u32 * 4);
1028
1029                (
1030                    Some(DataToken::ItemStart { len }),
1031                    if len == Length(0) {
1032                        OffsetTableItemTokens::Done
1033                    } else {
1034                        OffsetTableItemTokens::Value(value)
1035                    },
1036                )
1037            }
1038            OffsetTableItemTokens::Value(value) => (
1039                Some(DataToken::OffsetTable(value.as_ref().to_owned())),
1040                OffsetTableItemTokens::Done,
1041            ),
1042            OffsetTableItemTokens::Done => (Some(DataToken::ItemEnd), OffsetTableItemTokens::End),
1043            OffsetTableItemTokens::End => return None,
1044        };
1045
1046        *self = next_state;
1047        out
1048    }
1049}
1050
1051#[cfg(test)]
1052mod tests {
1053    use dicom_core::{
1054        DataElement, DataElementHeader, DicomValue, Length, PrimitiveValue, Tag, VR, dicom_value,
1055        header::HasLength, value::PixelFragmentSequence,
1056    };
1057
1058    use super::{DataToken, IntoTokens, IntoTokensOptions, LazyDataToken};
1059    use smallvec::smallvec;
1060
1061    use dicom_encoding::{
1062        decode::{basic::LittleEndianBasicDecoder, explicit_le::ExplicitVRLittleEndianDecoder},
1063        text::SpecificCharacterSet,
1064    };
1065
1066    use crate::stateful::decode::StatefulDecode;
1067    use crate::stateful::decode::StatefulDecoder;
1068
1069    fn is_stateful_decode<D: StatefulDecode>(_: &D) {}
1070
1071    /// A simple object representing a DICOM data set,
1072    /// used merely for testing purposes.
1073    #[derive(Debug, Clone)]
1074    struct SimpleObject<T>(Length, dicom_core::value::C<T>);
1075
1076    impl<T> HasLength for SimpleObject<T> {
1077        fn length(&self) -> Length {
1078            self.0
1079        }
1080    }
1081
1082    impl<T> IntoTokens for SimpleObject<T>
1083    where
1084        T: IntoTokens,
1085        T: HasLength,
1086    {
1087        type Iter = super::FlattenTokens<
1088            <dicom_core::value::C<T> as IntoIterator>::IntoIter,
1089            <T as IntoTokens>::Iter,
1090        >;
1091
1092        fn into_tokens(self) -> Self::Iter {
1093            self.into_tokens_with_options(Default::default())
1094        }
1095
1096        fn into_tokens_with_options(self, into_token_options: IntoTokensOptions) -> Self::Iter {
1097            super::FlattenTokens {
1098                seq: self.1.into_iter(),
1099                tokens: None,
1100                into_token_options,
1101            }
1102        }
1103    }
1104
1105    #[test]
1106    fn basic_element_into_tokens() {
1107        let element = DataElement::new(
1108            Tag(0x0010, 0x0010),
1109            VR::PN,
1110            DicomValue::new("Doe^John".into()),
1111        );
1112
1113        let tokens: Vec<_> = element.clone().into_tokens().collect();
1114
1115        assert_eq!(
1116            &tokens,
1117            &[
1118                DataToken::ElementHeader(*element.header()),
1119                DataToken::PrimitiveValue("Doe^John".into()),
1120            ],
1121        )
1122    }
1123
1124    #[test]
1125    fn sequence_implicit_len_into_tokens() {
1126        let element = DataElement::new(
1127            Tag(0x0008, 0x2218),
1128            VR::SQ,
1129            DicomValue::new_sequence(
1130                vec![SimpleObject(
1131                    Length::UNDEFINED,
1132                    smallvec![
1133                        DataElement::new(
1134                            Tag(0x0008, 0x0100),
1135                            VR::SH,
1136                            DicomValue::new(dicom_value!(Strs, ["T-D1213 "])),
1137                        ),
1138                        DataElement::new(
1139                            Tag(0x0008, 0x0102),
1140                            VR::SH,
1141                            DicomValue::new(dicom_value!(Strs, ["SRT "])),
1142                        ),
1143                        DataElement::new(
1144                            Tag(0x0008, 0x0104),
1145                            VR::LO,
1146                            DicomValue::new(dicom_value!(Strs, ["Jaw region"])),
1147                        ),
1148                    ],
1149                )],
1150                Length::UNDEFINED,
1151            ),
1152        );
1153
1154        let tokens: Vec<_> = element.clone().into_tokens().collect();
1155
1156        assert_eq!(
1157            &tokens,
1158            &[
1159                DataToken::SequenceStart {
1160                    tag: Tag(0x0008, 0x2218),
1161                    len: Length::UNDEFINED,
1162                },
1163                DataToken::ItemStart {
1164                    len: Length::UNDEFINED
1165                },
1166                DataToken::ElementHeader(DataElementHeader {
1167                    tag: Tag(0x0008, 0x0100),
1168                    vr: VR::SH,
1169                    len: Length(8),
1170                }),
1171                DataToken::PrimitiveValue(PrimitiveValue::Strs(
1172                    ["T-D1213 ".to_owned()].as_ref().into(),
1173                )),
1174                DataToken::ElementHeader(DataElementHeader {
1175                    tag: Tag(0x0008, 0x0102),
1176                    vr: VR::SH,
1177                    len: Length(4),
1178                }),
1179                DataToken::PrimitiveValue(PrimitiveValue::Strs(
1180                    ["SRT ".to_owned()].as_ref().into()
1181                )),
1182                DataToken::ElementHeader(DataElementHeader {
1183                    tag: Tag(0x0008, 0x0104),
1184                    vr: VR::LO,
1185                    len: Length(10),
1186                }),
1187                DataToken::PrimitiveValue(PrimitiveValue::Strs(
1188                    ["Jaw region".to_owned()].as_ref().into(),
1189                )),
1190                DataToken::ItemEnd,
1191                DataToken::SequenceEnd,
1192            ],
1193        )
1194    }
1195
1196    #[test]
1197    fn sequence_explicit_len_into_tokens() {
1198        let element = DataElement::new(
1199            Tag(0x0008, 0x2218),
1200            VR::SQ,
1201            DicomValue::new_sequence(
1202                vec![SimpleObject(
1203                    Length(46),
1204                    smallvec![
1205                        DataElement::new(
1206                            Tag(0x0008, 0x0100),
1207                            VR::SH,
1208                            DicomValue::new(dicom_value!(Strs, ["T-D1213 "])),
1209                        ),
1210                        DataElement::new(
1211                            Tag(0x0008, 0x0102),
1212                            VR::SH,
1213                            DicomValue::new(dicom_value!(Strs, ["SRT "])),
1214                        ),
1215                        DataElement::new(
1216                            Tag(0x0008, 0x0104),
1217                            VR::LO,
1218                            DicomValue::new(dicom_value!(Strs, ["Jaw region"])),
1219                        ),
1220                    ],
1221                )],
1222                Length(54),
1223            ),
1224        );
1225
1226        let tokens: Vec<_> = element.clone().into_tokens().collect();
1227
1228        assert_eq!(
1229            &tokens,
1230            &[
1231                DataToken::SequenceStart {
1232                    tag: Tag(0x0008, 0x2218),
1233                    len: Length(54),
1234                },
1235                DataToken::ItemStart { len: Length(46) },
1236                DataToken::ElementHeader(DataElementHeader {
1237                    tag: Tag(0x0008, 0x0100),
1238                    vr: VR::SH,
1239                    len: Length(8),
1240                }),
1241                DataToken::PrimitiveValue(PrimitiveValue::Strs(
1242                    ["T-D1213 ".to_owned()].as_ref().into(),
1243                )),
1244                DataToken::ElementHeader(DataElementHeader {
1245                    tag: Tag(0x0008, 0x0102),
1246                    vr: VR::SH,
1247                    len: Length(4),
1248                }),
1249                DataToken::PrimitiveValue(PrimitiveValue::Strs(
1250                    ["SRT ".to_owned()].as_ref().into()
1251                )),
1252                DataToken::ElementHeader(DataElementHeader {
1253                    tag: Tag(0x0008, 0x0104),
1254                    vr: VR::LO,
1255                    len: Length(10),
1256                }),
1257                DataToken::PrimitiveValue(PrimitiveValue::Strs(
1258                    ["Jaw region".to_owned()].as_ref().into(),
1259                )),
1260                DataToken::ItemEnd,
1261                DataToken::SequenceEnd,
1262            ],
1263        )
1264    }
1265
1266    #[test]
1267    fn lazy_dataset_token_value() {
1268        let data = b"1.234\0";
1269        let mut data = &data[..];
1270        let decoder = StatefulDecoder::new(
1271            &mut data,
1272            ExplicitVRLittleEndianDecoder::default(),
1273            LittleEndianBasicDecoder,
1274            SpecificCharacterSet::default(),
1275        );
1276
1277        is_stateful_decode(&decoder);
1278
1279        let token = LazyDataToken::LazyValue {
1280            header: DataElementHeader {
1281                tag: Tag(0x0020, 0x000D),
1282                vr: VR::UI,
1283                len: Length(6),
1284            },
1285            decoder,
1286        };
1287
1288        match token.into_owned().unwrap() {
1289            DataToken::PrimitiveValue(v) => {
1290                assert_eq!(v.to_raw_str(), "1.234\0",);
1291            }
1292            t => panic!("Unexpected type of token {:?}", t),
1293        }
1294    }
1295
1296    #[test]
1297    fn lazy_dataset_token_value_as_mut() {
1298        let data = b"1.234\0";
1299        let mut data = &data[..];
1300        let mut decoder = StatefulDecoder::new(
1301            &mut data,
1302            ExplicitVRLittleEndianDecoder::default(),
1303            LittleEndianBasicDecoder,
1304            SpecificCharacterSet::default(),
1305        );
1306
1307        is_stateful_decode(&decoder);
1308
1309        let token = LazyDataToken::LazyValue {
1310            header: DataElementHeader {
1311                tag: Tag(0x0020, 0x000D),
1312                vr: VR::UI,
1313                len: Length(6),
1314            },
1315            decoder: &mut decoder,
1316        };
1317
1318        match token.into_owned().unwrap() {
1319            DataToken::PrimitiveValue(v) => {
1320                assert_eq!(v.to_raw_str(), "1.234\0",);
1321            }
1322            t => panic!("Unexpected type of token {:?}", t),
1323        }
1324        assert_eq!(decoder.position(), 6);
1325    }
1326
1327    #[test]
1328    fn lazy_dataset_token_value_skip() {
1329        let data = b"1.234\0";
1330        let mut data = &data[..];
1331        let mut decoder = StatefulDecoder::new(
1332            &mut data,
1333            ExplicitVRLittleEndianDecoder::default(),
1334            LittleEndianBasicDecoder,
1335            SpecificCharacterSet::default(),
1336        );
1337
1338        is_stateful_decode(&decoder);
1339
1340        let token = LazyDataToken::LazyValue {
1341            header: DataElementHeader {
1342                tag: Tag(0x0020, 0x000D),
1343                vr: VR::UI,
1344                len: Length(6),
1345            },
1346            decoder: &mut decoder,
1347        };
1348
1349        token.skip().unwrap();
1350
1351        assert_eq!(decoder.position(), 6);
1352    }
1353
1354    /// A malformed data element (wrong VR) should not panic
1355    /// when converting it to tokens
1356    #[test]
1357    fn bad_element_to_tokens() {
1358        let e: DataElement = DataElement::new_with_len(
1359            Tag(0x0008, 0x0080),
1360            VR::SQ, // wrong VR
1361            Length(6),
1362            PrimitiveValue::from("Oops!"),
1363        );
1364
1365        // should not panic
1366        let tokens = e.into_tokens().collect::<Vec<_>>();
1367        // still expects 2 tokens (header + value)
1368        assert_eq!(tokens.len(), 2);
1369
1370        let e: DataElement = DataElement::new(
1371            Tag(0x7FE0, 0x0010),
1372            VR::SQ, // wrong VR
1373            PixelFragmentSequence::new_fragments(vec![
1374                // one fragment
1375                vec![0x55; 128],
1376            ]),
1377        );
1378
1379        // should not panic,
1380        // other than that there are no guarantees about the output
1381        let _ = e.into_tokens().collect::<Vec<_>>();
1382    }
1383}