dicom_object/
collector.rs

1//! DICOM collector API:
2//! high-level construct for reading DICOM data sets in controlled chunks.
3//!
4//! The DICOM collector API
5//! can be used for reading DICOM objects in cohesive portions.
6//! Unlike [`open_file`](crate::open_file) or [`OpenFileOptions`](crate::OpenFileOptions),
7//! this API makes it possible to read and process pieces of meta-data
8//! without gathering the entire data set in memory,
9//! making it appealing when working with data sets which are known to be large,
10//! such as multi-frame images.
11//!
12//! # Examples
13//!
14//! It is possible to open a DICOM file and collect its file meta information
15//! and main dataset.
16//!
17//! ```no_run
18//! # use dicom_object::InMemDicomObject;
19//! # use dicom_object::collector::DicomCollector;
20//! # use dicom_object::meta::FileMetaTable;
21//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
22//! let mut collector = DicomCollector::open_file("file.dcm")?;
23//!
24//! let fmi: &FileMetaTable = collector.read_file_meta()?;
25//! let mut dset = InMemDicomObject::new_empty();
26//! collector.read_dataset_to_end(&mut dset)?; // populate `dset` with all elements
27//! # Ok(())
28//! # }
29//! ```
30//!
31//! But at the moment,
32//! this example will be no different from using the regular file opening API.
33//! To benefit from the collector,
34//! read smaller portions of the dataset at a time.
35//! For instance, you can first read patient/study attributes,
36//! place image pixel attributes in a separate object,
37//! and only then fetch the pixel data.
38//!
39//! ```no_run
40//! # use dicom_object::InMemDicomObject;
41//! # use dicom_object::collector::DicomCollector;
42//! # use dicom_object::meta::FileMetaTable;
43//! use dicom_core::Tag;
44//!
45//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
46//! let mut collector = DicomCollector::open_file("file.dcm")?;
47//!
48//! let fmi: &FileMetaTable = collector.read_file_meta()?;
49//! // read everything before the image pixel group
50//! let mut dset = InMemDicomObject::new_empty();
51//! collector.read_dataset_up_to(Tag(0x0028, 0x0000), &mut dset)?;
52//!
53//! // read from image pixel group and stop before the pixel data
54//! let mut pixel_image_dset = InMemDicomObject::new_empty();
55//! collector.read_dataset_up_to_pixeldata(&mut pixel_image_dset)?;
56//!
57//! // ... pixel data would be next
58//! # Ok(())
59//! # }
60//! ```
61//!
62//! Moreover, this API has methods to retrieve
63//! each pixel data fragment independently,
64//! which is a significant memory saver in multi-frame scenarios.
65//!
66//! ```no_run
67//! # use dicom_object::collector::DicomCollector;
68//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
69//! # let mut collector = DicomCollector::open_file("file.dcm")?;
70//! // save the basic offset table separately
71//! let mut offset_table = Vec::<u32>::new();
72//! collector.read_basic_offset_table(&mut offset_table)?;
73//!
74//! let mut buf = Vec::new();
75//! while let Some(len) = collector.read_next_fragment(&mut buf)? {
76//!    // should now have the entire fragment data
77//!    assert_eq!(buf.len() as u32, len);
78//!    // process fragment (e.g. accumulate to a frame buffer and save to a file),
79//!    // and clear the buffer when done
80//!    buf.clear();
81//! }
82//! # Ok(())
83//! # }
84//! ```
85//!
86//! More options for DICOM collecting are available
87//! by using [`DicomCollectorOptions`].
88//!
89//! ```no_run
90//! use dicom_dictionary_std::uids;
91//!
92//! # use dicom_object::{DicomCollectorOptions, InMemDicomObject};
93//! let mut collector = DicomCollectorOptions::new()
94//!     .read_preamble(dicom_object::file::ReadPreamble::Never)
95//!     .odd_length_strategy(dicom_object::file::OddLengthStrategy::Fail)
96//!     .expected_ts(uids::EXPLICIT_VR_LITTLE_ENDIAN)
97//!     .open_file("path/to/file_with_no_meta.dcm")?;
98//!
99//! let mut metadata = InMemDicomObject::new_empty();
100//! collector.read_dataset_up_to_pixeldata(&mut metadata)?;
101//! # Result::<(), Box<dyn std::error::Error>>::Ok(())
102//! ```
103
104use std::{
105    borrow::Cow,
106    fmt,
107    fs::File,
108    io::{BufRead, BufReader, Read, Seek},
109    path::Path,
110};
111
112use dicom_core::{
113    header::HasLength,
114    value::{PixelFragmentSequence, C},
115    DataDictionary, DataElement, DicomValue, Length, Tag, VR,
116};
117use dicom_dictionary_std::{tags, StandardDataDictionary};
118use dicom_encoding::{decode::DecodeFrom, TransferSyntaxIndex};
119use dicom_parser::{
120    dataset::{
121        lazy_read::{LazyDataSetReader, LazyDataSetReaderOptions},
122        DataToken, LazyDataToken,
123    },
124    DynStatefulDecoder, StatefulDecode, StatefulDecoder,
125};
126use dicom_transfer_syntax_registry::TransferSyntaxRegistry;
127use snafu::prelude::*;
128use snafu::Backtrace;
129
130use crate::{
131    file::ReadPreamble,
132    mem::{InMemElement, InMemFragment},
133    FileMetaTable, InMemDicomObject,
134};
135
136// re-export parsing options in public API
137pub use dicom_parser::dataset::read::OddLengthStrategy;
138pub use dicom_parser::stateful::decode::CharacterSetOverride;
139
140pub type Result<T, E = Error> = std::result::Result<T, E>;
141
142/// An error which may occur when using the DICOM collector
143#[derive(Debug, Snafu)]
144pub struct Error(InnerError);
145
146/// Inner error type for collector API
147#[derive(Debug, Snafu)]
148#[non_exhaustive]
149pub(crate) enum InnerError {
150    #[snafu(display("Could not open file '{}'", filename.display()))]
151    OpenFile {
152        filename: std::path::PathBuf,
153        backtrace: Backtrace,
154        source: std::io::Error,
155    },
156    /// Could not read preamble bytes
157    ReadPreambleBytes {
158        backtrace: Backtrace,
159        source: std::io::Error,
160    },
161    /// Could not create data set parser
162    CreateParser {
163        #[snafu(
164            backtrace,
165            source(from(dicom_parser::dataset::lazy_read::Error, Box::from))
166        )]
167        source: Box<dicom_parser::dataset::lazy_read::Error>,
168    },
169    /// Could not read data set token
170    ReadToken {
171        #[snafu(
172            backtrace,
173            source(from(dicom_parser::dataset::lazy_read::Error, Box::from))
174        )]
175        source: Box<dicom_parser::dataset::lazy_read::Error>,
176    },
177    /// Illegal state for the requested operation: preamble has already been read
178    IllegalStateStart { backtrace: Backtrace },
179    /// Illegal state for the requested operation: file meta group has already been read
180    IllegalStateMeta { backtrace: Backtrace },
181    /// Illegal state for the requested operation: basic offset table has already been read
182    IllegalStateInPixel { backtrace: Backtrace },
183    /// DICOM value not found after non-empty element header
184    MissingElementValue { backtrace: Backtrace },
185    /// Unrecognized transfer syntax {ts_uid}
186    UnrecognizedTransferSyntax {
187        ts_uid: String,
188        backtrace: Backtrace,
189    },
190    /// Could not guess source transfer syntax
191    GuessTransferSyntax { backtrace: Backtrace },
192    #[snafu(display("Unexpected token {token:?}"))]
193    UnexpectedToken {
194        token: dicom_parser::dataset::LazyDataTokenRepr,
195        backtrace: Backtrace,
196    },
197    #[snafu(display("Unexpected data token {token:?}"))]
198    UnexpectedDataToken {
199        token: dicom_parser::dataset::DataToken,
200        backtrace: Backtrace,
201    },
202    #[snafu(display("Could not collect data in {tag}"))]
203    CollectDataValue {
204        tag: Tag,
205        #[snafu(backtrace, source(from(dicom_parser::dataset::Error, Box::from)))]
206        source: Box<dicom_parser::dataset::Error>,
207    },
208    /// Premature data set end
209    PrematureEnd { backtrace: Backtrace },
210    /// Could not build file meta table
211    BuildMetaTable {
212        #[snafu(backtrace, source(from(crate::meta::Error, Box::new)))]
213        source: Box<crate::meta::Error>,
214    },
215    /// Could not read item
216    ReadItem {
217        #[snafu(
218            backtrace,
219            source(from(dicom_parser::stateful::decode::Error, Box::from))
220        )]
221        source: Box<dicom_parser::stateful::decode::Error>,
222    },
223}
224
225/// A builder type for setting up a DICOM collector.
226///
227/// See the [module-level documentation](crate::collector) for more details.
228#[derive(Debug, Default)]
229pub struct DicomCollectorOptions<D = StandardDataDictionary, R = TransferSyntaxRegistry> {
230    /// Data element dictionary
231    dict: D,
232    /// Transfer syntax index (registry)
233    ts_index: R,
234    /// UID of transfer syntax suggestion
235    ts_hint: Option<Cow<'static, str>>,
236    /// Whether to read the 128-byte DICOM file preamble
237    read_preamble: ReadPreamble,
238    /// How to handle odd-lengthed data elements
239    odd_length: OddLengthStrategy,
240    /// Override for how text should be decoded
241    charset_override: CharacterSetOverride,
242}
243
244impl DicomCollectorOptions {
245    /// Create a new DICOM collector builder.
246    pub fn new() -> Self {
247        Self::default()
248    }
249}
250
251impl<D, R> DicomCollectorOptions<D, R> {
252    /// Override the data element dictionary with the one given,
253    /// potentially replacing the dictionary type.
254    ///
255    /// When not working with custom data dictionaries,
256    /// this method does not have to be called
257    /// (defaults to [`StandardDataDictionary`], which is zero sized).
258    pub fn dict<D2>(self, dict: D2) -> DicomCollectorOptions<D2, R> {
259        DicomCollectorOptions {
260            dict,
261            ts_index: self.ts_index,
262            ts_hint: self.ts_hint,
263            read_preamble: self.read_preamble,
264            odd_length: self.odd_length,
265            charset_override: self.charset_override,
266        }
267    }
268
269    /// Override the transfer syntax index (also called registry) with the one given,
270    /// potentially replacing the transfer syntax index type.
271    ///
272    /// When not working with custom transfer syntax registries,
273    /// this method does not have to be called
274    /// (defaults to [`TransferSyntaxRegistry`], which is zero sized).
275    pub fn ts_index<R2>(self, ts_index: R2) -> DicomCollectorOptions<D, R2> {
276        DicomCollectorOptions {
277            dict: self.dict,
278            ts_index,
279            ts_hint: self.ts_hint,
280            read_preamble: self.read_preamble,
281            odd_length: self.odd_length,
282            charset_override: self.charset_override,
283        }
284    }
285
286    /// Set the UID of the transfer syntax expected from the source.
287    pub fn expected_ts(mut self, ts_uid: impl Into<Cow<'static, str>>) -> Self {
288        self.ts_hint = Some(ts_uid.into());
289        self
290    }
291
292    /// Unset the UID of the transfer syntax expected from the source.
293    pub fn unset_expected_ts(mut self) -> Self {
294        self.ts_hint = None;
295        self
296    }
297
298    /// Set whether to read the 128-byte DICOM file preamble.
299    pub fn read_preamble(mut self, option: ReadPreamble) -> Self {
300        self.read_preamble = option;
301        self
302    }
303
304    /// Set how data elements with an odd length should be handled
305    pub fn odd_length_strategy(mut self, option: OddLengthStrategy) -> Self {
306        self.odd_length = option;
307        self
308    }
309
310    /// Set how text of specific value representations should be decoded
311    pub fn charset_override(mut self, option: CharacterSetOverride) -> Self {
312        self.charset_override = option;
313        self
314    }
315
316    /// Proceed with opening a file for DICOM collecting.
317    pub fn open_file(
318        self,
319        filename: impl AsRef<Path>,
320    ) -> Result<DicomCollector<BufReader<File>, D, R>>
321    where
322        R: TransferSyntaxIndex,
323    {
324        let filename = filename.as_ref();
325        let reader = BufReader::new(File::open(filename).context(OpenFileSnafu { filename })?);
326
327        Ok(DicomCollector {
328            source: CollectionSource::new(
329                reader,
330                self.ts_index,
331                self.odd_length,
332                self.charset_override,
333            ),
334            dictionary: self.dict,
335            ts_hint: self.ts_hint,
336            file_meta: None,
337            read_preamble: self.read_preamble,
338            state: Default::default(),
339        })
340    }
341
342    /// Create a DICOM collector which will read from the given source.
343    pub fn from_reader<S>(self, reader: BufReader<S>) -> DicomCollector<BufReader<S>, D, R>
344    where
345        S: Read + Seek,
346        R: TransferSyntaxIndex,
347    {
348        DicomCollector {
349            source: CollectionSource::new(
350                reader,
351                self.ts_index,
352                self.odd_length,
353                self.charset_override,
354            ),
355            dictionary: self.dict,
356            ts_hint: self.ts_hint,
357            file_meta: None,
358            read_preamble: self.read_preamble,
359            state: Default::default(),
360        }
361    }
362}
363
364enum CollectionSource<S, R> {
365    Raw {
366        /// the actual raw byte data as a reader
367        reader: Option<S>,
368        /// Transfer syntax index (registry)
369        ts_index: R,
370        /// the strategy for reading odd-lengthed data elements
371        /// (needs to be retained until a parser is constructed)
372        odd_length: OddLengthStrategy,
373        /// override for how text should be decodedf
374        charset_override: CharacterSetOverride,
375    },
376    Parser(LazyDataSetReader<DynStatefulDecoder<S>>),
377}
378
379impl<S, R> fmt::Debug for CollectionSource<S, R>
380where
381    R: fmt::Debug,
382{
383    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
384        match self {
385            CollectionSource::Raw {
386                reader,
387                ts_index,
388                odd_length,
389                charset_override,
390            } => f
391                .debug_struct("Raw")
392                .field("ts_index", ts_index)
393                .field("odd_length", odd_length)
394                .field("charset_override", charset_override)
395                .field(
396                    "reader",
397                    &match reader {
398                        Some(_) => "Some(_)",
399                        None => "None",
400                    },
401                )
402                .finish(),
403            CollectionSource::Parser(_) => f.write_str("Parser(..)"),
404        }
405    }
406}
407
408impl<S, R> CollectionSource<S, R>
409where
410    S: Read + Seek,
411    R: TransferSyntaxIndex,
412{
413    fn new(
414        raw_source: S,
415        ts_index: R,
416        odd_length: OddLengthStrategy,
417        charset_override: CharacterSetOverride,
418    ) -> Self {
419        CollectionSource::Raw {
420            reader: Some(raw_source),
421            ts_index,
422            odd_length,
423            charset_override,
424        }
425    }
426
427    fn has_parser(&self) -> bool {
428        matches!(self, CollectionSource::Parser(_))
429    }
430
431    fn raw_reader_mut(&mut self) -> &mut S {
432        match self {
433            CollectionSource::Raw { reader, .. } => reader.as_mut().unwrap(),
434            CollectionSource::Parser(_) => {
435                panic!("cannot retrieve raw reader after setting parser")
436            }
437        }
438    }
439
440    fn set_parser_with_ts(
441        &mut self,
442        ts_uid: &str,
443    ) -> Result<&mut LazyDataSetReader<DynStatefulDecoder<S>>> {
444        match self {
445            CollectionSource::Raw {
446                reader: src,
447                ts_index,
448                odd_length,
449                charset_override,
450            } => {
451                let src = src.take().unwrap();
452
453                // look up transfer syntax
454                let ts = ts_index
455                    .get(ts_uid)
456                    .context(UnrecognizedTransferSyntaxSnafu {
457                        ts_uid: ts_uid.to_string(),
458                    })?;
459
460                let mut options = LazyDataSetReaderOptions::default();
461                options.odd_length = *odd_length;
462                options.charset_override = *charset_override;
463                *self = CollectionSource::Parser(
464                    LazyDataSetReader::new_with_ts_options(src, ts, options)
465                        .context(CreateParserSnafu)?,
466                );
467                let CollectionSource::Parser(parser) = self else {
468                    unreachable!();
469                };
470                Ok(parser)
471            }
472            CollectionSource::Parser(decoder) => Ok(decoder),
473        }
474    }
475
476    fn parser(&mut self) -> &mut LazyDataSetReader<DynStatefulDecoder<S>> {
477        match self {
478            CollectionSource::Raw { .. } => panic!("parser transfer syntax not set"),
479            CollectionSource::Parser(parser) => parser,
480        }
481    }
482}
483
484/// A DICOM collector set up to read from a specific source.
485///
486/// See the [module-level documentation](crate::collector) for more details.
487pub struct DicomCollector<S, D = StandardDataDictionary, R = TransferSyntaxRegistry> {
488    /// the source of byte data to read from
489    source: CollectionSource<S, R>,
490    /// data dictionary
491    dictionary: D,
492    /// UID of transfer syntax suggestion
493    ts_hint: Option<Cow<'static, str>>,
494    /// file meta group information table
495    file_meta: Option<FileMetaTable>,
496    /// Whether to read the 128-byte DICOM file preamble
497    /// (needs to be retained until the preamble is read)
498    read_preamble: ReadPreamble,
499    /// the state of the collector so as to keep track of what's been read
500    state: CollectorState,
501}
502
503// A state indicator of what has been collected so far
504#[derive(Debug, Default, Copy, Clone, PartialEq)]
505enum CollectorState {
506    /// The collector is in the initial state.
507    #[default]
508    Start,
509    /// The collector has read the preamble,
510    /// or the preamble has been requested but not collected.
511    Preamble,
512    /// The collector has read the file meta group data set.
513    ///
514    /// If this state is reached,
515    /// `file_meta` is guaranteed to be `Some`.
516    FileMeta,
517    /// The collector has read some portion the main data set.
518    InDataset,
519    /// The collector has read the pixel data element header.
520    InPixelData,
521}
522
523impl<S, D, R> fmt::Debug for DicomCollector<S, D, R>
524where
525    D: fmt::Debug,
526    R: fmt::Debug,
527{
528    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
529        f.debug_struct("DicomCollector")
530            .field("source", &self.source)
531            .field("dictionary", &self.dictionary)
532            .field("ts_hint", &self.ts_hint)
533            .field(
534                "file_meta",
535                if self.file_meta.is_some() {
536                    &"Some(...)"
537                } else {
538                    &"None"
539                },
540            )
541            .field("read_preamble", &self.read_preamble)
542            .field("state", &self.state)
543            .finish()
544    }
545}
546
547impl<S> DicomCollector<BufReader<S>>
548where
549    S: Read + Seek,
550{
551    /// Create a new DICOM dataset collector
552    /// which reads from a buffered reader.
553    ///
554    /// The standard data dictionary and standard transfer syntax registry are used.
555    /// The transfer syntax is guessed from the file meta group data set.
556    pub fn new(reader: BufReader<S>) -> Self {
557        DicomCollector {
558            source: CollectionSource::new(
559                reader,
560                TransferSyntaxRegistry,
561                Default::default(),
562                Default::default(),
563            ),
564            dictionary: StandardDataDictionary,
565            ts_hint: None,
566            file_meta: None,
567            read_preamble: Default::default(),
568            state: Default::default(),
569        }
570    }
571
572    /// Create a new DICOM dataset collector
573    /// which reads from a buffered reader
574    /// and expects the given transfer syntax.
575    ///
576    /// The standard data dictionary is used.
577    pub fn new_with_ts(
578        reader: BufReader<S>,
579        transfer_syntax: impl Into<Cow<'static, str>>,
580    ) -> Self {
581        DicomCollector {
582            source: CollectionSource::new(
583                reader,
584                TransferSyntaxRegistry,
585                Default::default(),
586                Default::default(),
587            ),
588            dictionary: StandardDataDictionary,
589            ts_hint: Some(transfer_syntax.into()),
590            file_meta: None,
591            read_preamble: Default::default(),
592            state: Default::default(),
593        }
594    }
595}
596
597impl DicomCollector<BufReader<File>> {
598    /// Create a new DICOM dataset collector
599    /// which reads from a standard DICOM file.
600    ///
601    /// The standard data dictionary is used.
602    /// The transfer syntax is guessed from the file meta group data set.
603    pub fn open_file(filename: impl AsRef<Path>) -> Result<Self> {
604        Self::open_file_with_dict(filename, StandardDataDictionary)
605    }
606}
607
608impl<D> DicomCollector<BufReader<File>, D>
609where
610    D: DataDictionary + Clone,
611{
612    // --- constructors ---
613
614    /// Create a new DICOM dataset collector
615    /// which reads from a standard DICOM file.
616    ///
617    /// The transfer syntax is guessed from the file meta group data set.
618    /// The standard transfer syntax registry is used.
619    pub fn open_file_with_dict(filename: impl AsRef<Path>, dict: D) -> Result<Self> {
620        let filename = filename.as_ref();
621        let reader = BufReader::new(File::open(filename).context(OpenFileSnafu { filename })?);
622        Ok(Self::new_with_dict(reader, dict))
623    }
624}
625
626impl<S, D> DicomCollector<BufReader<S>, D>
627where
628    D: DataDictionary + Clone,
629    S: Read + Seek,
630{
631    /// Create a new DICOM dataset collector
632    /// using the given data element dictionary,
633    /// which reads from a buffered reader.
634    ///
635    /// The transfer syntax is guessed from the file meta group data set.
636    /// The standard transfer syntax registry is used.
637    fn new_with_dict(reader: BufReader<S>, dictionary: D) -> Self {
638        DicomCollector {
639            source: CollectionSource::new(
640                reader,
641                TransferSyntaxRegistry,
642                Default::default(),
643                Default::default(),
644            ),
645            dictionary,
646            ts_hint: None,
647            file_meta: None,
648            read_preamble: Default::default(),
649            state: Default::default(),
650        }
651    }
652}
653
654impl<S, D, R> DicomCollector<BufReader<S>, D, R>
655where
656    D: DataDictionary + Clone,
657    S: Read + Seek,
658    R: TransferSyntaxIndex,
659{
660    /// Read a DICOM file preamble from the given source.
661    ///
662    /// Returns the 128 bytes preceding the DICOM magic code,
663    /// if they were found,
664    /// or according to the `read_preamble` option on construction.
665    pub fn read_preamble(&mut self) -> Result<Option<[u8; 128]>> {
666        ensure!(self.state == CollectorState::Start, IllegalStateStartSnafu);
667
668        if self.read_preamble == ReadPreamble::Never {
669            self.state = CollectorState::Preamble;
670            return Ok(None);
671        }
672
673        let reader = self.source.raw_reader_mut();
674        let preamble = {
675            if self.read_preamble == ReadPreamble::Always {
676                // always assume that there is a preamble
677                let mut buf = [0; 128];
678                reader
679                    .read_exact(&mut buf)
680                    .context(ReadPreambleBytesSnafu)?;
681                Some(buf)
682            } else {
683                // fill the buffer and try to identify where the magic code is
684                let buf = reader.fill_buf().context(ReadPreambleBytesSnafu)?;
685                if buf.len() < 4 {
686                    return PrematureEndSnafu.fail().map_err(From::from);
687                }
688
689                if buf.len() >= 128 + 4 && &buf[128..132] == b"DICM" {
690                    let out: [u8; 128] = std::convert::TryInto::try_into(&buf[0..128])
691                        .expect("128 byte slice into array");
692                    reader.consume(128);
693                    Some(out)
694                } else if &buf[0..4] == b"DICM" {
695                    // assume that there is no preamble after all
696                    None
697                } else {
698                    // take the risk and insist on the first 128 bytes
699                    let mut out = [0; 128];
700                    reader
701                        .read_exact(&mut out)
702                        .context(ReadPreambleBytesSnafu)?;
703                    Some(out)
704                }
705            }
706        };
707        self.state = CollectorState::Preamble;
708        Ok(preamble)
709    }
710
711    /// Read a file meta table from the source,
712    /// retaining it in the reader for future reference.
713    ///
714    /// This method _must_ be called
715    /// whenever the source data is known to have a file meta information group.
716    /// Otherwise, it may fail to recognize the transfer syntax
717    /// and fail on the first data set reading request.
718    ///
719    /// If the file meta information has already been collected,
720    /// the previously saved file meta table is returned.
721    pub fn read_file_meta(&mut self) -> Result<&FileMetaTable> {
722        // check if we are in good position to read the FMI,
723        // or if we need to collect other things first
724
725        if self.state == CollectorState::Start {
726            // read preamble
727            self.read_preamble()?;
728        }
729
730        if self.state == CollectorState::Preamble {
731            let reader = self.source.raw_reader_mut();
732            self.file_meta = Some(FileMetaTable::from_reader(reader).context(BuildMetaTableSnafu)?);
733
734            self.state = CollectorState::FileMeta;
735        }
736
737        self.file_meta
738            .as_ref()
739            .context(IllegalStateMetaSnafu)
740            .map_err(From::from)
741    }
742
743    /// Take the file meta information group table saved in this collector,
744    /// if this information has already been read.
745    ///
746    /// This table will only be available
747    /// after first reading the file meta group
748    /// via [`read_file_meta`](Self::read_file_meta).
749    /// Moreover, main data set reading may be compromised
750    /// if the transfer syntax was not resolved first
751    /// by calling one of the data set reading methods beforehand
752    /// (which triggers a transfer syntax resolution).
753    ///
754    /// # Example
755    ///
756    /// ```no_run
757    /// # use dicom_object::{DicomCollector, FileMetaTable, InMemDicomObject};
758    /// let mut collector = DicomCollector::open_file("file.dcm")?;
759    ///
760    /// // read_file_meta() only returns a reference
761    /// let _: &FileMetaTable  = collector.read_file_meta()?;
762    /// // read some data from the main data set
763    /// let mut main_dataset = InMemDicomObject::new_empty();
764    /// collector.read_dataset_up_to_pixeldata(&mut main_dataset)?;
765    ///
766    /// // take the table out of the collector,
767    /// // as it is no longer needed
768    /// let file_meta: FileMetaTable = collector.take_file_meta()
769    ///     .expect("should have file meta information");
770    ///
771    /// // can still read more data afterwards
772    /// let mut fragment_data = Vec::new();
773    /// collector.read_next_fragment(&mut fragment_data)?;
774    /// # Result::<(), Box<dyn std::error::Error>>::Ok(())
775    /// ```
776    #[inline]
777    pub fn take_file_meta(&mut self) -> Option<FileMetaTable> {
778        self.file_meta.take()
779    }
780
781    /// Read a DICOM data set until it finds its end,
782    /// accumulating the elements into an in-memory object.
783    pub fn read_dataset_to_end(&mut self, to: &mut InMemDicomObject<D>) -> Result<()> {
784        let parser = if !self.source.has_parser() {
785            let ts = {
786                if self.ts_hint.is_none() {
787                    self.populate_ts_hint();
788                }
789                self.ts_hint.as_deref()
790            }
791            .context(GuessTransferSyntaxSnafu)?;
792            self.source.set_parser_with_ts(ts)?
793        } else {
794            self.source.parser()
795        };
796
797        Self::collect_to_object(&mut self.state, parser, false, None, to, &self.dictionary)
798    }
799
800    /// Read a DICOM data set until it reaches the given stop tag
801    /// (excluding it) or finds the end of the data set,
802    /// accumulating the elements into an in-memory object.
803    pub fn read_dataset_up_to(
804        &mut self,
805        stop_tag: Tag,
806        to: &mut InMemDicomObject<D>,
807    ) -> Result<()> {
808        let parser = if !self.source.has_parser() {
809            let ts = {
810                if self.ts_hint.is_none() {
811                    self.populate_ts_hint();
812                }
813                self.ts_hint.as_deref()
814            }
815            .context(GuessTransferSyntaxSnafu)?;
816            self.source.set_parser_with_ts(ts)?
817        } else {
818            self.source.parser()
819        };
820
821        Self::collect_to_object(
822            &mut self.state,
823            parser,
824            false,
825            Some(stop_tag),
826            to,
827            &self.dictionary,
828        )
829    }
830
831    /// Read a DICOM data set until it reaches the object's pixel data.
832    ///
833    /// This is equivalent to `collector.read_dataset_up_to(tags::PIXEL_DATA, to)`.
834    #[inline]
835    pub fn read_dataset_up_to_pixeldata(&mut self, to: &mut InMemDicomObject<D>) -> Result<()> {
836        self.read_dataset_up_to(dicom_dictionary_std::tags::PIXEL_DATA, to)
837    }
838
839    /// Read the DICOM data set until it reaches the pixel data
840    /// (if it has not done so yet)
841    /// and collects the next pixel data fragment,
842    /// appending the bytes into the given destination.
843    /// Returns the number of bytes of the fragment retrieved.
844    ///
845    /// If the data set contains native pixel data,
846    /// the entire value data in the _Pixel Data_ attribute
847    /// is interpreted as a single fragment.
848    ///
849    /// The basic offset table is treated as a fragment,
850    /// which means that the first call to `read_next_fragment`
851    /// on a DICOM object with encapsulated pixel data
852    /// will push the byte values of the basic offset table
853    /// in little endian.
854    /// To retrieve the offset table as a sequence of 32-bit length values,
855    /// use [`read_basic_offset_table`](Self::read_basic_offset_table)
856    /// before reading any fragment.
857    pub fn read_next_fragment(&mut self, to: &mut Vec<u8>) -> Result<Option<u32>> {
858        if self.state == CollectorState::Start || self.state == CollectorState::Preamble {
859            // read file meta information group
860            self.read_file_meta()?;
861        }
862
863        // initialize parser if necessary
864        if !self.source.has_parser() {
865            let ts = {
866                if self.ts_hint.is_none() {
867                    self.populate_ts_hint();
868                }
869                self.ts_hint.as_deref()
870            }
871            .context(GuessTransferSyntaxSnafu)?;
872            self.source.set_parser_with_ts(ts)?;
873        } else {
874            self.source.parser();
875        }
876
877        if self.state != CollectorState::InPixelData {
878            // skip until we reach the pixel data
879
880            self.skip_until(|token| {
881                match token {
882                    // catch either native pixel data
883                    LazyDataToken::ElementHeader(header)
884                        if header.tag == tags::PIXEL_DATA && header.length().is_defined() =>
885                    {
886                        true
887                    }
888                    // or start of pixel data sequencce
889                    LazyDataToken::PixelSequenceStart => true,
890                    _ => false,
891                }
892            })?;
893
894            self.state = CollectorState::InPixelData;
895        }
896
897        let parser = if !self.source.has_parser() {
898            let ts = {
899                if self.ts_hint.is_none() {
900                    self.populate_ts_hint();
901                }
902                self.ts_hint.as_deref()
903            }
904            .context(GuessTransferSyntaxSnafu)?;
905            self.source.set_parser_with_ts(ts)?
906        } else {
907            self.source.parser()
908        };
909
910        // proceed with fetching tokens,
911        // return the first fragment data found
912        while let Some(token) = parser.advance() {
913            match token.context(ReadTokenSnafu)? {
914                // native pixel data
915                LazyDataToken::LazyValue { header, decoder } => {
916                    debug_assert!(header.length().is_defined());
917                    let len = header.length().0;
918                    decoder.read_to_vec(len, to).context(ReadItemSnafu)?;
919                    return Ok(Some(len));
920                }
921                // fragment item data
922                LazyDataToken::LazyItemValue { len, decoder } => {
923                    decoder.read_to_vec(len, to).context(ReadItemSnafu)?;
924                    return Ok(Some(len));
925                }
926                // empty item
927                // (must be accounted for even though it yields no value token)
928                LazyDataToken::ItemStart { len: Length(0) } => return Ok(Some(0)),
929                _ => {
930                    // no-op
931                }
932            }
933        }
934
935        Ok(None)
936    }
937
938    /// Read the DICOM data set until it reaches the pixel data
939    /// (if it has not done so yet)
940    /// and collects the basic offset table.
941    ///
942    /// Returns the byte length of the basic offset table
943    /// on success.
944    /// Returns `Ok(None)` if the DICOM object has no pixel data
945    /// or has native pixel data,
946    /// in which case there is no basic offset table.
947    /// Returns an error if the collector has alread read too far
948    /// to obtain the basic offset table.
949    pub fn read_basic_offset_table(&mut self, to: &mut Vec<u32>) -> Result<Option<u32>> {
950        if self.state == CollectorState::InPixelData {
951            return IllegalStateInPixelSnafu.fail().map_err(From::from);
952        }
953
954        if self.state == CollectorState::Start || self.state == CollectorState::Preamble {
955            // read file meta information group
956            self.read_file_meta()?;
957        }
958
959        // initialize parser if necessary
960        if !self.source.has_parser() {
961            let ts = {
962                if self.ts_hint.is_none() {
963                    self.populate_ts_hint();
964                }
965                self.ts_hint.as_deref()
966            }
967            .context(GuessTransferSyntaxSnafu)?;
968            self.source.set_parser_with_ts(ts)?;
969        } else {
970            self.source.parser();
971        }
972
973        if self.state != CollectorState::InPixelData {
974            // skip until we reach the pixel data
975
976            self.skip_until(|token| {
977                match token {
978                    // catch either native pixel data
979                    LazyDataToken::ElementHeader(header)
980                        if header.tag == tags::PIXEL_DATA && header.length().is_defined() =>
981                    {
982                        true
983                    }
984                    // or start of pixel data sequencce
985                    LazyDataToken::PixelSequenceStart => true,
986                    _ => false,
987                }
988            })?;
989
990            self.state = CollectorState::InPixelData;
991        }
992
993        let parser = if !self.source.has_parser() {
994            let ts = {
995                if self.ts_hint.is_none() {
996                    self.populate_ts_hint();
997                }
998                self.ts_hint.as_deref()
999            }
1000            .context(GuessTransferSyntaxSnafu)?;
1001            self.source.set_parser_with_ts(ts)?
1002        } else {
1003            self.source.parser()
1004        };
1005
1006        // proceed with fetching tokens,
1007        // return the first fragment data found
1008        while let Some(token) = parser.advance() {
1009            match token.context(ReadTokenSnafu)? {
1010                // native pixel data, no offset table
1011                LazyDataToken::LazyValue { .. } => {
1012                    return Ok(None);
1013                }
1014                // fragment item data
1015                LazyDataToken::LazyItemValue { len, decoder } => {
1016                    decoder.read_u32_to_vec(len, to).context(ReadItemSnafu)?;
1017                    return Ok(Some(len));
1018                }
1019                // empty item
1020                // (must be accounted for even though it yields no value token)
1021                LazyDataToken::ItemStart { len: Length(0) } => return Ok(Some(0)),
1022                _ => {
1023                    // no-op
1024                }
1025            }
1026        }
1027
1028        Ok(None)
1029    }
1030
1031    // --- private methods ---
1032
1033    #[inline]
1034    fn populate_ts_hint(&mut self) {
1035        if let Some(meta) = self.file_meta.as_ref() {
1036            self.ts_hint = Some(Cow::Owned(meta.transfer_syntax().to_string()));
1037        }
1038    }
1039
1040    fn skip_until(
1041        &mut self,
1042        mut pred: impl FnMut(
1043            &LazyDataToken<
1044                &mut StatefulDecoder<Box<dyn DecodeFrom<BufReader<S>> + 'static>, BufReader<S>>,
1045            >,
1046        ) -> bool,
1047    ) -> Result<bool> {
1048        let parser = self.source.parser();
1049        while let Some(token) = parser.advance() {
1050            let token = token.context(ReadTokenSnafu)?;
1051            if pred(&token) {
1052                return Ok(true);
1053            }
1054            // skip through values if necessary
1055            token.skip().context(ReadItemSnafu)?;
1056            self.state = CollectorState::InDataset;
1057            // continue
1058        }
1059
1060        Ok(false)
1061    }
1062
1063    // --- private helper functions ---
1064
1065    /// Collect DICOM data elements onto an in-memory DICOM object by consuming a data set parser.
1066    fn collect_to_object(
1067        state: &mut CollectorState,
1068        token_src: &mut LazyDataSetReader<DynStatefulDecoder<BufReader<S>>>,
1069        in_item: bool,
1070        read_until: Option<Tag>,
1071        to: &mut InMemDicomObject<D>,
1072        dict: &D,
1073    ) -> Result<()> {
1074        let mut elements = Vec::new();
1075        Self::collect_elements(state, token_src, in_item, read_until, &mut elements, dict)?;
1076        to.extend(elements);
1077        Ok(())
1078    }
1079
1080    /// Collect DICOM data elements onto a vector by consuming a data set parser.
1081    fn collect_elements(
1082        state: &mut CollectorState,
1083        token_src: &mut LazyDataSetReader<DynStatefulDecoder<BufReader<S>>>,
1084        in_item: bool,
1085        read_until: Option<Tag>,
1086        to: &mut Vec<DataElement<InMemDicomObject<D>>>,
1087        dict: &D,
1088    ) -> Result<()> {
1089        // perform a structured parsing of incoming tokens
1090        while let Some(token) = token_src.peek().context(ReadTokenSnafu)? {
1091            let token = token.clone();
1092            let elem = match token {
1093                DataToken::PixelSequenceStart => {
1094                    // stop reading if reached `read_until` tag
1095                    if read_until
1096                        .map(|t| t <= Tag(0x7fe0, 0x0010))
1097                        .unwrap_or(false)
1098                    {
1099                        break;
1100                    }
1101                    *state = CollectorState::InPixelData;
1102                    token_src.advance();
1103                    let value = Self::build_encapsulated_data(&mut *token_src)?;
1104                    DataElement::new(Tag(0x7fe0, 0x0010), VR::OB, value)
1105                }
1106                DataToken::ElementHeader(header) => {
1107                    // stop reading if reached `read_until` tag
1108                    if read_until.map(|t| t <= header.tag).unwrap_or(false) {
1109                        break;
1110                    }
1111
1112                    drop(token);
1113
1114                    *state = CollectorState::InDataset;
1115                    token_src.advance();
1116
1117                    // fetch respective value, place it in the output
1118                    let next_token = token_src.advance().context(MissingElementValueSnafu)?;
1119                    match next_token.context(ReadTokenSnafu)? {
1120                        token @ LazyDataToken::LazyValue { .. }
1121                        | token @ LazyDataToken::LazyItemValue { .. } => {
1122                            InMemElement::new_with_len(
1123                                header.tag,
1124                                header.vr,
1125                                header.len,
1126                                token
1127                                    .into_value()
1128                                    .context(CollectDataValueSnafu { tag: header.tag })?,
1129                            )
1130                        }
1131                        token => {
1132                            return UnexpectedTokenSnafu { token }.fail().map_err(From::from);
1133                        }
1134                    }
1135                }
1136                DataToken::SequenceStart { tag, len } => {
1137                    // stop reading if reached `read_until` tag
1138                    if read_until.map(|t| t <= tag).unwrap_or(false) {
1139                        break;
1140                    }
1141                    *state = CollectorState::InDataset;
1142
1143                    token_src.advance();
1144
1145                    // delegate sequence building to another function
1146                    let mut items = C::new();
1147                    Self::collect_sequence(
1148                        &mut *state,
1149                        tag,
1150                        len,
1151                        &mut *token_src,
1152                        dict,
1153                        &mut items,
1154                    )?;
1155                    DataElement::new_with_len(
1156                        tag,
1157                        VR::SQ,
1158                        len,
1159                        DicomValue::new_sequence(items, len),
1160                    )
1161                }
1162                DataToken::ItemEnd if in_item => {
1163                    // end of item, leave now
1164                    token_src.advance();
1165                    return Ok(());
1166                }
1167                token => {
1168                    return UnexpectedDataTokenSnafu {
1169                        token: token.clone(),
1170                    }
1171                    .fail()
1172                    .map_err(From::from)
1173                }
1174            };
1175            to.push(elem);
1176        }
1177
1178        Ok(())
1179    }
1180
1181    /// Build an encapsulated pixel data by collecting all fragments into an
1182    /// in-memory DICOM value.
1183    fn build_encapsulated_data(
1184        dataset: &mut LazyDataSetReader<DynStatefulDecoder<BufReader<S>>>,
1185    ) -> Result<DicomValue<InMemDicomObject<D>, InMemFragment>> {
1186        // continue fetching tokens to retrieve:
1187        // - the offset table
1188        // - the various compressed fragments
1189
1190        let mut offset_table = None;
1191
1192        let mut fragments = C::new();
1193
1194        // whether to read the fragment as the basic offset table (true)
1195        // or as a pixel data fragment (false)
1196        let mut first = true;
1197
1198        while let Some(token) = dataset.advance() {
1199            let token = token.context(ReadTokenSnafu)?;
1200            match token {
1201                LazyDataToken::LazyItemValue { decoder, len } => {
1202                    if first {
1203                        let mut table = Vec::new();
1204                        decoder
1205                            .read_u32_to_vec(len, &mut table)
1206                            .context(ReadItemSnafu)?;
1207                        first = false;
1208                    } else {
1209                        let mut data = Vec::new();
1210                        decoder.read_to_vec(len, &mut data).context(ReadItemSnafu)?;
1211                        fragments.push(data);
1212                    }
1213                }
1214                LazyDataToken::ItemEnd => {
1215                    // at the end of the first item ensure the presence of
1216                    // an empty offset_table here, so that the next items
1217                    // are seen as compressed fragments
1218                    if offset_table.is_none() {
1219                        offset_table = Some(Vec::new())
1220                    }
1221                }
1222                LazyDataToken::ItemStart { len: _ } => { /* no-op */ }
1223                LazyDataToken::SequenceEnd => {
1224                    // end of pixel data
1225                    break;
1226                }
1227                // the following variants are unexpected
1228                token @ LazyDataToken::ElementHeader(_)
1229                | token @ LazyDataToken::PixelSequenceStart
1230                | token @ LazyDataToken::SequenceStart { .. }
1231                | token @ LazyDataToken::LazyValue { .. }
1232                | token => {
1233                    return UnexpectedTokenSnafu { token }.fail().map_err(From::from);
1234                }
1235            }
1236        }
1237
1238        Ok(DicomValue::from(PixelFragmentSequence::new(
1239            offset_table.unwrap_or_default(),
1240            fragments,
1241        )))
1242    }
1243
1244    /// Build a DICOM sequence by consuming a data set parser.
1245    fn collect_sequence(
1246        state: &mut CollectorState,
1247        _tag: Tag,
1248        _len: Length,
1249        token_src: &mut LazyDataSetReader<DynStatefulDecoder<BufReader<S>>>,
1250        dict: &D,
1251        items: &mut C<InMemDicomObject<D>>,
1252    ) -> Result<()> {
1253        while let Some(token) = token_src.advance() {
1254            match token.context(ReadTokenSnafu)? {
1255                LazyDataToken::ItemStart { len: _ } => {
1256                    let mut obj = InMemDicomObject::new_empty_with_dict(dict.clone());
1257                    Self::collect_to_object(state, token_src, true, None, &mut obj, dict)?;
1258                    items.push(obj);
1259                }
1260                LazyDataToken::SequenceEnd => {
1261                    return Ok(());
1262                }
1263                token => return UnexpectedTokenSnafu { token }.fail().map_err(From::from),
1264            };
1265        }
1266
1267        // iterator fully consumed without a sequence delimiter
1268        PrematureEndSnafu.fail().map_err(From::from)
1269    }
1270}
1271
1272#[cfg(test)]
1273mod tests {
1274    use std::io::{BufReader, Write};
1275
1276    use dicom_core::{prelude::*, value::DataSetSequence, PrimitiveValue};
1277    use dicom_dictionary_std::{tags, uids, StandardDataDictionary};
1278    use dicom_encoding::TransferSyntaxIndex;
1279    use dicom_parser::dataset::read::OddLengthStrategy;
1280    use dicom_transfer_syntax_registry::TransferSyntaxRegistry;
1281
1282    use crate::{
1283        file::ReadPreamble, DicomCollectorOptions, FileMetaTable, FileMetaTableBuilder,
1284        InMemDicomObject,
1285    };
1286
1287    use super::DicomCollector;
1288
1289    /// read a plain data set without file meta group,
1290    /// by specifying the transfer syntax explicitly in the collector
1291    #[test]
1292    fn test_read_dataset_to_end_set_ts() {
1293        let dataset1 = InMemDicomObject::<StandardDataDictionary>::from_element_iter([
1294            DataElement::new(
1295                tags::SOP_INSTANCE_UID,
1296                VR::UI,
1297                "2.25.51008724832548260562721775118239811861\0",
1298            ),
1299            DataElement::new(
1300                tags::SOP_CLASS_UID,
1301                VR::UI,
1302                uids::NUCLEAR_MEDICINE_IMAGE_STORAGE,
1303            ),
1304            DataElement::new(tags::PATIENT_NAME, VR::PN, "Doe^John"),
1305            DataElement::new(tags::STUDY_DESCRIPTION, VR::LO, "Test study"),
1306            DataElement::new(tags::ROWS, VR::US, PrimitiveValue::from(64_u16)),
1307            DataElement::new(tags::COLUMNS, VR::US, PrimitiveValue::from(64_u16)),
1308            DataElement::new(tags::BITS_ALLOCATED, VR::US, PrimitiveValue::from(8_u16)),
1309            DataElement::new(tags::BITS_STORED, VR::US, PrimitiveValue::from(8_u16)),
1310            DataElement::new(tags::HIGH_BIT, VR::US, PrimitiveValue::from(7_u16)),
1311            DataElement::new(
1312                tags::PIXEL_DATA,
1313                VR::OB,
1314                PrimitiveValue::from(vec![0x55u8; 64 * 64]),
1315            ),
1316        ]);
1317
1318        let ts_expl_vr_le = TransferSyntaxRegistry
1319            .get(uids::EXPLICIT_VR_LITTLE_ENDIAN)
1320            .unwrap();
1321
1322        let mut encoded = Vec::new();
1323        dataset1
1324            .write_dataset_with_ts(&mut encoded, ts_expl_vr_le)
1325            .unwrap();
1326
1327        let reader = BufReader::new(std::io::Cursor::new(&encoded));
1328        let mut collector = DicomCollector::new_with_ts(reader, uids::EXPLICIT_VR_LITTLE_ENDIAN);
1329
1330        let mut dset = InMemDicomObject::new_empty();
1331        collector.read_dataset_to_end(&mut dset).unwrap();
1332
1333        assert_eq!(dset, dataset1);
1334    }
1335
1336    /// read a DICOM data set to the end,
1337    /// inferring the transfer syntax from the file meta group
1338    #[test]
1339    fn test_read_dataset_to_end_infer_from_meta() {
1340        let dataset1 = InMemDicomObject::<StandardDataDictionary>::from_element_iter([
1341            DataElement::new(
1342                tags::SOP_INSTANCE_UID,
1343                VR::UI,
1344                "2.25.245029432991021387484564600987886994494",
1345            ),
1346            DataElement::new(
1347                tags::SOP_CLASS_UID,
1348                VR::UI,
1349                uids::NUCLEAR_MEDICINE_IMAGE_STORAGE,
1350            ),
1351            DataElement::new(tags::PATIENT_NAME, VR::PN, "Doe^John"),
1352            DataElement::new(tags::STUDY_DESCRIPTION, VR::LO, "Test study"),
1353            DataElement::new(tags::ROWS, VR::US, PrimitiveValue::from(128_u16)),
1354            DataElement::new(tags::COLUMNS, VR::US, PrimitiveValue::from(128_u16)),
1355            DataElement::new(tags::BITS_ALLOCATED, VR::US, PrimitiveValue::from(16_u16)),
1356            DataElement::new(tags::BITS_STORED, VR::US, PrimitiveValue::from(16_u16)),
1357            DataElement::new(tags::HIGH_BIT, VR::US, PrimitiveValue::from(15_u16)),
1358            DataElement::new(
1359                tags::PIXEL_DATA,
1360                VR::OB,
1361                PrimitiveValue::from(vec![0x55u8; 128 * 128 * 2]),
1362            ),
1363        ]);
1364
1365        let file_dataset1 = dataset1
1366            .clone()
1367            .with_meta(FileMetaTableBuilder::new().transfer_syntax(uids::EXPLICIT_VR_LITTLE_ENDIAN))
1368            .unwrap();
1369
1370        // write FMI and dataset to the buffer
1371        let mut encoded = Vec::new();
1372        encoded.write_all(b"DICM").unwrap();
1373        file_dataset1.meta().write(&mut encoded).unwrap();
1374        file_dataset1
1375            .write_dataset_with_ts(
1376                &mut encoded,
1377                TransferSyntaxRegistry
1378                    .get(uids::EXPLICIT_VR_LITTLE_ENDIAN)
1379                    .unwrap(),
1380            )
1381            .unwrap();
1382
1383        let reader = BufReader::new(std::io::Cursor::new(&encoded));
1384        let mut collector = DicomCollector::new(reader);
1385
1386        let mut dset = InMemDicomObject::new_empty();
1387        let file_meta = collector.read_file_meta().unwrap();
1388        assert_eq!(file_meta.transfer_syntax(), uids::EXPLICIT_VR_LITTLE_ENDIAN,);
1389        collector.read_dataset_to_end(&mut dset).unwrap();
1390
1391        assert_eq!(dset, dataset1);
1392    }
1393
1394    /// read some data and then take off the file meta table from the collector
1395    #[test]
1396    fn test_take_file_meta() {
1397        let dataset1 = InMemDicomObject::<StandardDataDictionary>::from_element_iter([
1398            DataElement::new(
1399                tags::SOP_INSTANCE_UID,
1400                VR::UI,
1401                "2.25.248821220596756482508841578490676982546",
1402            ),
1403            DataElement::new(
1404                tags::SOP_CLASS_UID,
1405                VR::UI,
1406                uids::NUCLEAR_MEDICINE_IMAGE_STORAGE,
1407            ),
1408            DataElement::new(tags::PATIENT_NAME, VR::PN, "Doe^John"),
1409            DataElement::new(tags::STUDY_DESCRIPTION, VR::LO, "Test study"),
1410            DataElement::new(tags::ROWS, VR::US, PrimitiveValue::from(64_u16)),
1411            DataElement::new(tags::COLUMNS, VR::US, PrimitiveValue::from(64_u16)),
1412            DataElement::new(tags::SAMPLES_PER_PIXEL, VR::US, PrimitiveValue::from(1_u16)),
1413            DataElement::new(tags::BITS_ALLOCATED, VR::US, PrimitiveValue::from(8_u16)),
1414            DataElement::new(tags::BITS_STORED, VR::US, PrimitiveValue::from(8_u16)),
1415            DataElement::new(tags::HIGH_BIT, VR::US, PrimitiveValue::from(7_u16)),
1416            DataElement::new(
1417                tags::PIXEL_DATA,
1418                VR::OB,
1419                PrimitiveValue::from(vec![0x55u8; 64 * 64]),
1420            ),
1421        ]);
1422
1423        let file_dataset1 = dataset1
1424            .clone()
1425            .with_meta(FileMetaTableBuilder::new().transfer_syntax(uids::EXPLICIT_VR_LITTLE_ENDIAN))
1426            .unwrap();
1427
1428        // write FMI and dataset to the buffer
1429        let mut encoded = Vec::new();
1430        encoded.write_all(b"DICM").unwrap();
1431        file_dataset1.meta().write(&mut encoded).unwrap();
1432        file_dataset1
1433            .write_dataset_with_ts(
1434                &mut encoded,
1435                TransferSyntaxRegistry
1436                    .get(uids::EXPLICIT_VR_LITTLE_ENDIAN)
1437                    .unwrap(),
1438            )
1439            .unwrap();
1440
1441        let reader = BufReader::new(std::io::Cursor::new(&encoded));
1442        let mut collector = DicomCollector::new(reader);
1443
1444        // read_file_meta() only returns a reference
1445        let _: &FileMetaTable = collector.read_file_meta().unwrap();
1446        // read some data from the main data set
1447        let mut main_dataset = InMemDicomObject::new_empty();
1448        collector
1449            .read_dataset_up_to_pixeldata(&mut main_dataset)
1450            .unwrap();
1451
1452        // can reliably take the table out of the collector
1453        let file_meta: FileMetaTable = collector
1454            .take_file_meta()
1455            .expect("should have file meta info");
1456        assert_eq!(
1457            file_meta.media_storage_sop_instance_uid(),
1458            "2.25.248821220596756482508841578490676982546"
1459        );
1460
1461        // can still read more data afterwards
1462        let mut fragment_data = Vec::new();
1463        let bytes_read = collector.read_next_fragment(&mut fragment_data).unwrap();
1464        assert_eq!(bytes_read, Some(64 * 64));
1465        assert_eq!(fragment_data.len(), bytes_read.unwrap() as usize);
1466    }
1467
1468    /// read a DICOM data set with nested sequences
1469    #[test]
1470    fn test_read_dataset_nested() {
1471        let dataset1 = InMemDicomObject::<StandardDataDictionary>::from_element_iter([
1472            DataElement::new(
1473                tags::SOP_INSTANCE_UID,
1474                VR::UI,
1475                "2.25.245029432991021387484564600987886994494",
1476            ),
1477            DataElement::new(
1478                tags::SOP_CLASS_UID,
1479                VR::UI,
1480                uids::NUCLEAR_MEDICINE_IMAGE_STORAGE,
1481            ),
1482            DataElement::new(tags::PATIENT_NAME, VR::PN, "Doe^John"),
1483            DataElement::new(tags::STUDY_DESCRIPTION, VR::LO, "Test study"),
1484            DataElement::new(
1485                tags::ANATOMIC_REGION_SEQUENCE,
1486                VR::SQ,
1487                DataSetSequence::from(vec![InMemDicomObject::from_element_iter([
1488                    DataElement::new(tags::CODE_VALUE, VR::SH, "51185008"),
1489                    DataElement::new(tags::CODING_SCHEME_DESIGNATOR, VR::SH, "SCT"),
1490                    DataElement::new(tags::CODE_MEANING, VR::LO, "chest"),
1491                    DataElement::new(
1492                        tags::ANATOMIC_REGION_MODIFIER_SEQUENCE,
1493                        VR::SQ,
1494                        DataSetSequence::from(vec![InMemDicomObject::from_element_iter([
1495                            DataElement::new(tags::CODE_VALUE, VR::SH, "302551006"),
1496                            DataElement::new(tags::CODING_SCHEME_DESIGNATOR, VR::SH, "SCT"),
1497                            DataElement::new(tags::CODE_MEANING, VR::LO, "entire thorax "),
1498                        ])]),
1499                    ),
1500                ])]),
1501            ),
1502            DataElement::new(tags::ROWS, VR::US, PrimitiveValue::from(128_u16)),
1503            DataElement::new(tags::COLUMNS, VR::US, PrimitiveValue::from(128_u16)),
1504            DataElement::new(tags::BITS_ALLOCATED, VR::US, PrimitiveValue::from(16_u16)),
1505            DataElement::new(tags::BITS_STORED, VR::US, PrimitiveValue::from(16_u16)),
1506            DataElement::new(tags::HIGH_BIT, VR::US, PrimitiveValue::from(7_u16)),
1507            DataElement::new(
1508                tags::PIXEL_DATA,
1509                VR::OB,
1510                PrimitiveValue::from(vec![0x55_u8; 128 * 128]),
1511            ),
1512        ]);
1513
1514        let ts_expl_vr_le = TransferSyntaxRegistry
1515            .get(uids::EXPLICIT_VR_LITTLE_ENDIAN)
1516            .unwrap();
1517
1518        let mut encoded = Vec::new();
1519        dataset1
1520            .write_dataset_with_ts(&mut encoded, ts_expl_vr_le)
1521            .unwrap();
1522
1523        let reader = BufReader::new(std::io::Cursor::new(&encoded));
1524
1525        let mut collector = DicomCollector::new_with_ts(reader, uids::EXPLICIT_VR_LITTLE_ENDIAN);
1526
1527        let mut dset = InMemDicomObject::new_empty();
1528        collector.read_dataset_to_end(&mut dset).unwrap();
1529
1530        // inspect some values using the attribute sequence API
1531        let v = dset
1532            .value_at((tags::ANATOMIC_REGION_SEQUENCE, tags::CODE_VALUE))
1533            .unwrap()
1534            .to_str()
1535            .unwrap();
1536        assert_eq!(v, "51185008");
1537
1538        let v = dset
1539            .value_at((
1540                tags::ANATOMIC_REGION_SEQUENCE,
1541                tags::ANATOMIC_REGION_MODIFIER_SEQUENCE,
1542                tags::CODE_MEANING,
1543            ))
1544            .unwrap()
1545            .to_str()
1546            .unwrap();
1547        assert_eq!(v, "entire thorax");
1548    }
1549
1550    /// read a DICOM data set in two chunks
1551    #[test]
1552    fn test_read_dataset_two_parts() {
1553        let dataset1 = InMemDicomObject::<StandardDataDictionary>::from_element_iter([
1554            DataElement::new(
1555                tags::SOP_INSTANCE_UID,
1556                VR::UI,
1557                "2.25.245029432991021387484564600987886994494",
1558            ),
1559            DataElement::new(
1560                tags::SOP_CLASS_UID,
1561                VR::UI,
1562                uids::NUCLEAR_MEDICINE_IMAGE_STORAGE,
1563            ),
1564            DataElement::new(tags::PATIENT_NAME, VR::PN, "Doe^John"),
1565            DataElement::new(tags::STUDY_DESCRIPTION, VR::LO, "Test study"),
1566            DataElement::new(tags::ROWS, VR::US, PrimitiveValue::from(128_u16)),
1567            DataElement::new(tags::COLUMNS, VR::US, PrimitiveValue::from(128_u16)),
1568            DataElement::new(tags::BITS_ALLOCATED, VR::US, PrimitiveValue::from(16_u16)),
1569            DataElement::new(tags::BITS_STORED, VR::US, PrimitiveValue::from(16_u16)),
1570            DataElement::new(tags::HIGH_BIT, VR::US, PrimitiveValue::from(7_u16)),
1571            DataElement::new(
1572                tags::PIXEL_DATA,
1573                VR::OB,
1574                PrimitiveValue::from(vec![0x55_u8; 128 * 128]),
1575            ),
1576        ]);
1577
1578        let ts_expl_vr_le = TransferSyntaxRegistry
1579            .get(uids::EXPLICIT_VR_LITTLE_ENDIAN)
1580            .unwrap();
1581
1582        let mut encoded = Vec::new();
1583        dataset1
1584            .write_dataset_with_ts(&mut encoded, ts_expl_vr_le)
1585            .unwrap();
1586
1587        let reader = BufReader::new(std::io::Cursor::new(&encoded));
1588
1589        let mut collector = DicomCollectorOptions::new()
1590            .expected_ts(uids::EXPLICIT_VR_LITTLE_ENDIAN)
1591            .read_preamble(ReadPreamble::Never)
1592            .odd_length_strategy(OddLengthStrategy::Fail)
1593            .from_reader(reader);
1594
1595        // read one part of the data set
1596        let mut dset1 = InMemDicomObject::new_empty();
1597
1598        collector
1599            .read_dataset_up_to(tags::ROWS, &mut dset1)
1600            .unwrap();
1601        // it has patient name and study description
1602        assert_eq!(
1603            dset1.get(tags::PATIENT_NAME).unwrap().to_str().unwrap(),
1604            "Doe^John"
1605        );
1606        assert_eq!(
1607            dset1
1608                .get(tags::STUDY_DESCRIPTION)
1609                .unwrap()
1610                .to_str()
1611                .unwrap(),
1612            "Test study"
1613        );
1614        // it does not have rows, or pixel data
1615        assert!(dset1.get(tags::ROWS).is_none());
1616        assert!(dset1.get(tags::PIXEL_DATA).is_none());
1617
1618        // read part two of the data set
1619        let mut dset2 = InMemDicomObject::new_empty();
1620
1621        collector.read_dataset_to_end(&mut dset2).unwrap();
1622
1623        // it has rows and pixel data
1624        assert_eq!(dset2.get(tags::ROWS).unwrap().to_int::<u16>().unwrap(), 128);
1625        assert_eq!(
1626            dset2.get(tags::COLUMNS).unwrap().to_int::<u16>().unwrap(),
1627            128
1628        );
1629        assert_eq!(
1630            &*dset2.get(tags::PIXEL_DATA).unwrap().to_bytes().unwrap(),
1631            &[0x55_u8; 128 * 128]
1632        );
1633
1634        // it does not have the other parts
1635        assert!(dset2.get(tags::SOP_INSTANCE_UID).is_none());
1636        assert!(dset2.get(tags::PATIENT_NAME).is_none());
1637        assert!(dset2.get(tags::STUDY_DESCRIPTION).is_none());
1638    }
1639
1640    /// read the fragments of a DICOM file one by one
1641    #[test]
1642    fn test_read_fragments() {
1643        let filename = dicom_test_files::path("WG04/JPLY/SC1_JPLY").unwrap();
1644
1645        let mut collector = DicomCollector::open_file(filename).unwrap();
1646
1647        let fmi = collector.read_file_meta().unwrap();
1648
1649        assert_eq!(fmi.transfer_syntax(), uids::JPEG_EXTENDED12_BIT);
1650
1651        // collect the basic offset table as a regular fragment
1652
1653        let mut bot = Vec::new();
1654        let len = collector
1655            .read_next_fragment(&mut bot)
1656            .expect("should read basic offset table successfully")
1657            .expect("should have basic offset table fragment");
1658        assert_eq!(len, 0);
1659        assert!(bot.is_empty());
1660
1661        // collect the other fragments
1662
1663        let mut fragment = Vec::with_capacity(131_072);
1664
1665        let len = collector
1666            .read_next_fragment(&mut fragment)
1667            .expect("should read fragment successfully")
1668            .expect("should have fragment #0");
1669        assert_eq!(len, 65_536);
1670
1671        // inspect a few bytes just to be sure
1672        assert_eq!(&fragment[0..4], &[0xFF, 0xD8, 0xFF, 0xC1]);
1673
1674        // read one more
1675
1676        let len = collector
1677            .read_next_fragment(&mut fragment)
1678            .expect("should read fragment successfully")
1679            .expect("should have fragment #1");
1680        assert_eq!(len, 65_536);
1681
1682        // accumulates
1683        assert_eq!(fragment.len(), 131_072);
1684
1685        // inspect a few bytes
1686        assert_eq!(&fragment[0..4], &[0xFF, 0xD8, 0xFF, 0xC1]);
1687        assert_eq!(&fragment[65_536..65_540], &[0x04, 0x6C, 0x3B, 0x60]);
1688
1689        // check that it can fetch the remaining fragments
1690        let mut remaining: i32 = 10; // 12 fragments
1691
1692        fragment.clear();
1693
1694        while let Some(_len) = collector
1695            .read_next_fragment(&mut fragment)
1696            .expect("should have read fragment successfully")
1697        {
1698            remaining -= 1;
1699            assert!(!fragment.is_empty());
1700            fragment.clear();
1701        }
1702
1703        assert_eq!(remaining, 0);
1704    }
1705
1706    /// read the fragments of a DICOM file after reading the basic offset table
1707    #[test]
1708    fn test_read_bot_and_fragments() {
1709        let filename = dicom_test_files::path("pydicom/SC_rgb_rle_2frame.dcm").unwrap();
1710
1711        let mut collector = DicomCollector::open_file(filename).unwrap();
1712
1713        let fmi = collector.read_file_meta().unwrap();
1714
1715        assert_eq!(fmi.transfer_syntax(), uids::RLE_LOSSLESS);
1716
1717        // collect the basic offset table
1718        let mut bot = Vec::new();
1719        let len = collector
1720            .read_basic_offset_table(&mut bot)
1721            .expect("should read basic offset table successfully")
1722            .expect("should have basic offset table fragment");
1723        assert_eq!(len, 8);
1724        assert_eq!(&bot, &[0x0000, 0x02A0]);
1725
1726        // can't read the basic offset table twice
1727        assert!(matches!(
1728            collector.read_basic_offset_table(&mut bot),
1729            Err(super::Error(super::InnerError::IllegalStateInPixel { .. })),
1730        ));
1731
1732        // collect the other fragments
1733
1734        let mut fragment = Vec::with_capacity(2048);
1735
1736        let len = collector
1737            .read_next_fragment(&mut fragment)
1738            .expect("should read fragment successfully")
1739            .expect("should have fragment #0");
1740        assert_eq!(len, 664);
1741
1742        // inspect a few bytes just to be sure
1743        assert_eq!(&fragment[0..5], &[0x03, 0x00, 0x00, 0x00, 0x40]);
1744
1745        // read one more
1746
1747        let len = collector
1748            .read_next_fragment(&mut fragment)
1749            .expect("should read fragment successfully")
1750            .expect("should have fragment #1");
1751        assert_eq!(len, 664);
1752
1753        // accumulates
1754        assert_eq!(fragment.len(), 664 + 664);
1755
1756        // inspect a few bytes
1757        assert_eq!(&fragment[0..5], &[0x03, 0x00, 0x00, 0x00, 0x40]);
1758        assert_eq!(&fragment[664 + 659..], &[0x00, 0x9D, 0x00, 0x9D, 0x00]);
1759
1760        // no more fragments
1761        assert!(collector
1762            .read_next_fragment(&mut fragment)
1763            .expect("attempt to read the next fragment should not have failed")
1764            .is_none());
1765    }
1766}