Skip to main content

dicom_object/
collector.rs

1//! DICOM collector API:
2//! high-level construct for reading DICOM data sets in controlled chunks.
3//!
4//! The DICOM collector API
5//! can be used for reading DICOM objects in cohesive portions.
6//! Unlike [`open_file`](crate::open_file) or [`OpenFileOptions`](crate::OpenFileOptions),
7//! this API makes it possible to read and process pieces of meta-data
8//! without gathering the entire data set in memory,
9//! making it appealing when working with data sets which are known to be large,
10//! such as multi-frame images.
11//!
12//! # Examples
13//!
14//! It is possible to open a DICOM file and collect its file meta information
15//! and main dataset.
16//!
17//! ```no_run
18//! # use dicom_object::InMemDicomObject;
19//! # use dicom_object::collector::DicomCollector;
20//! # use dicom_object::meta::FileMetaTable;
21//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
22//! let mut collector = DicomCollector::open_file("file.dcm")?;
23//!
24//! let fmi: &FileMetaTable = collector.read_file_meta()?;
25//! let mut dset = InMemDicomObject::new_empty();
26//! collector.read_dataset_to_end(&mut dset)?; // populate `dset` with all elements
27//! # Ok(())
28//! # }
29//! ```
30//!
31//! But at the moment,
32//! this example will be no different from using the regular file opening API.
33//! To benefit from the collector,
34//! read smaller portions of the dataset at a time.
35//! For instance, you can first read patient/study attributes,
36//! place image pixel attributes in a separate object,
37//! and only then fetch the pixel data.
38//!
39//! ```no_run
40//! # use dicom_object::InMemDicomObject;
41//! # use dicom_object::collector::DicomCollector;
42//! # use dicom_object::meta::FileMetaTable;
43//! use dicom_core::Tag;
44//!
45//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
46//! let mut collector = DicomCollector::open_file("file.dcm")?;
47//!
48//! let fmi: &FileMetaTable = collector.read_file_meta()?;
49//! // read everything before the image pixel group
50//! let mut dset = InMemDicomObject::new_empty();
51//! collector.read_dataset_up_to(Tag(0x0028, 0x0000), &mut dset)?;
52//!
53//! // read from image pixel group and stop before the pixel data
54//! let mut pixel_image_dset = InMemDicomObject::new_empty();
55//! collector.read_dataset_up_to_pixeldata(&mut pixel_image_dset)?;
56//!
57//! // ... pixel data would be next
58//! # Ok(())
59//! # }
60//! ```
61//!
62//! Moreover, this API has methods to retrieve
63//! each pixel data fragment independently,
64//! which is a significant memory saver in multi-frame scenarios.
65//!
66//! ```no_run
67//! # use dicom_object::collector::DicomCollector;
68//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
69//! # let mut collector = DicomCollector::open_file("file.dcm")?;
70//! // save the basic offset table separately
71//! let mut offset_table = Vec::<u32>::new();
72//! collector.read_basic_offset_table(&mut offset_table)?;
73//!
74//! let mut buf = Vec::new();
75//! while let Some(len) = collector.read_next_fragment(&mut buf)? {
76//!    // should now have the entire fragment data
77//!    assert_eq!(buf.len() as u32, len);
78//!    // process fragment (e.g. accumulate to a frame buffer and save to a file),
79//!    // and clear the buffer when done
80//!    buf.clear();
81//! }
82//! # Ok(())
83//! # }
84//! ```
85//!
86//! More options for DICOM collecting are available
87//! by using [`DicomCollectorOptions`].
88//!
89//! ```no_run
90//! use dicom_dictionary_std::uids;
91//!
92//! # use dicom_object::{DicomCollectorOptions, InMemDicomObject};
93//! let mut collector = DicomCollectorOptions::new()
94//!     .read_preamble(dicom_object::file::ReadPreamble::Never)
95//!     .odd_length_strategy(dicom_object::file::OddLengthStrategy::Fail)
96//!     .expected_ts(uids::EXPLICIT_VR_LITTLE_ENDIAN)
97//!     .open_file("path/to/file_with_no_meta.dcm")?;
98//!
99//! let mut metadata = InMemDicomObject::new_empty();
100//! collector.read_dataset_up_to_pixeldata(&mut metadata)?;
101//! # Result::<(), Box<dyn std::error::Error>>::Ok(())
102//! ```
103
104use std::{
105    borrow::Cow,
106    fmt,
107    fs::File,
108    io::{BufRead, BufReader, Read, Seek},
109    path::Path,
110};
111
112use dicom_core::{
113    DataDictionary, DataElement, DicomValue, Length, Tag, VR,
114    header::HasLength,
115    value::{C, PixelFragmentSequence},
116};
117use dicom_dictionary_std::{StandardDataDictionary, tags};
118use dicom_encoding::{TransferSyntaxIndex, decode::DecodeFrom};
119use dicom_parser::{
120    DynStatefulDecoder, StatefulDecode, StatefulDecoder,
121    dataset::{
122        DataToken, LazyDataToken,
123        lazy_read::{LazyDataSetReader, LazyDataSetReaderOptions},
124    },
125};
126use dicom_transfer_syntax_registry::TransferSyntaxRegistry;
127use snafu::Backtrace;
128use snafu::prelude::*;
129
130use crate::{
131    FileMetaTable, InMemDicomObject,
132    file::ReadPreamble,
133    mem::{InMemElement, InMemFragment},
134};
135
136// re-export parsing options in public API
137pub use dicom_parser::dataset::read::OddLengthStrategy;
138pub use dicom_parser::stateful::decode::CharacterSetOverride;
139
140pub type Result<T, E = Error> = std::result::Result<T, E>;
141
142/// An error which may occur when using the DICOM collector
143#[derive(Debug, Snafu)]
144pub struct Error(InnerError);
145
146/// Inner error type for collector API
147#[derive(Debug, Snafu)]
148#[non_exhaustive]
149pub(crate) enum InnerError {
150    #[snafu(display("Could not open file '{}'", filename.display()))]
151    OpenFile {
152        filename: std::path::PathBuf,
153        backtrace: Backtrace,
154        source: std::io::Error,
155    },
156    /// Could not read preamble bytes
157    ReadPreambleBytes {
158        backtrace: Backtrace,
159        source: std::io::Error,
160    },
161    /// Could not create data set parser
162    CreateParser {
163        #[snafu(
164            backtrace,
165            source(from(dicom_parser::dataset::lazy_read::Error, Box::from))
166        )]
167        source: Box<dicom_parser::dataset::lazy_read::Error>,
168    },
169    /// Could not read data set token
170    ReadToken {
171        #[snafu(
172            backtrace,
173            source(from(dicom_parser::dataset::lazy_read::Error, Box::from))
174        )]
175        source: Box<dicom_parser::dataset::lazy_read::Error>,
176    },
177    /// Illegal state for the requested operation: preamble has already been read
178    IllegalStateStart { backtrace: Backtrace },
179    /// Illegal state for the requested operation: file meta group has already been read
180    IllegalStateMeta { backtrace: Backtrace },
181    /// Illegal state for the requested operation: basic offset table has already been read
182    IllegalStateInPixel { backtrace: Backtrace },
183    /// DICOM value not found after non-empty element header
184    MissingElementValue { backtrace: Backtrace },
185    /// Unrecognized transfer syntax {ts_uid}
186    UnrecognizedTransferSyntax {
187        ts_uid: String,
188        backtrace: Backtrace,
189    },
190    /// Could not guess source transfer syntax
191    GuessTransferSyntax { backtrace: Backtrace },
192    #[snafu(display("Unexpected token {token:?}"))]
193    UnexpectedToken {
194        token: dicom_parser::dataset::LazyDataTokenRepr,
195        backtrace: Backtrace,
196    },
197    #[snafu(display("Unexpected data token {token:?}"))]
198    UnexpectedDataToken {
199        token: dicom_parser::dataset::DataToken,
200        backtrace: Backtrace,
201    },
202    #[snafu(display("Could not collect data in {tag}"))]
203    CollectDataValue {
204        tag: Tag,
205        #[snafu(backtrace, source(from(dicom_parser::dataset::Error, Box::from)))]
206        source: Box<dicom_parser::dataset::Error>,
207    },
208    /// Premature data set end
209    PrematureEnd { backtrace: Backtrace },
210    /// Could not build file meta table
211    BuildMetaTable {
212        #[snafu(backtrace, source(from(crate::meta::Error, Box::new)))]
213        source: Box<crate::meta::Error>,
214    },
215    /// Could not read item
216    ReadItem {
217        #[snafu(
218            backtrace,
219            source(from(dicom_parser::stateful::decode::Error, Box::from))
220        )]
221        source: Box<dicom_parser::stateful::decode::Error>,
222    },
223}
224
225/// A builder type for setting up a DICOM collector.
226///
227/// See the [module-level documentation](crate::collector) for more details.
228#[derive(Debug, Default)]
229pub struct DicomCollectorOptions<D = StandardDataDictionary, R = TransferSyntaxRegistry> {
230    /// Data element dictionary
231    dict: D,
232    /// Transfer syntax index (registry)
233    ts_index: R,
234    /// UID of transfer syntax suggestion
235    ts_hint: Option<Cow<'static, str>>,
236    /// Whether to read the 128-byte DICOM file preamble
237    read_preamble: ReadPreamble,
238    /// How to handle odd-lengthed data elements
239    odd_length: OddLengthStrategy,
240    /// Override for how text should be decoded
241    charset_override: CharacterSetOverride,
242}
243
244impl DicomCollectorOptions {
245    /// Create a new DICOM collector builder.
246    pub fn new() -> Self {
247        Self::default()
248    }
249}
250
251impl<D, R> DicomCollectorOptions<D, R> {
252    /// Override the data element dictionary with the one given,
253    /// potentially replacing the dictionary type.
254    ///
255    /// When not working with custom data dictionaries,
256    /// this method does not have to be called
257    /// (defaults to [`StandardDataDictionary`], which is zero sized).
258    pub fn dict<D2>(self, dict: D2) -> DicomCollectorOptions<D2, R> {
259        DicomCollectorOptions {
260            dict,
261            ts_index: self.ts_index,
262            ts_hint: self.ts_hint,
263            read_preamble: self.read_preamble,
264            odd_length: self.odd_length,
265            charset_override: self.charset_override,
266        }
267    }
268
269    /// Override the transfer syntax index (also called registry) with the one given,
270    /// potentially replacing the transfer syntax index type.
271    ///
272    /// When not working with custom transfer syntax registries,
273    /// this method does not have to be called
274    /// (defaults to [`TransferSyntaxRegistry`], which is zero sized).
275    pub fn ts_index<R2>(self, ts_index: R2) -> DicomCollectorOptions<D, R2> {
276        DicomCollectorOptions {
277            dict: self.dict,
278            ts_index,
279            ts_hint: self.ts_hint,
280            read_preamble: self.read_preamble,
281            odd_length: self.odd_length,
282            charset_override: self.charset_override,
283        }
284    }
285
286    /// Set the UID of the transfer syntax expected from the source.
287    pub fn expected_ts(mut self, ts_uid: impl Into<Cow<'static, str>>) -> Self {
288        self.ts_hint = Some(ts_uid.into());
289        self
290    }
291
292    /// Unset the UID of the transfer syntax expected from the source.
293    pub fn unset_expected_ts(mut self) -> Self {
294        self.ts_hint = None;
295        self
296    }
297
298    /// Set whether to read the 128-byte DICOM file preamble.
299    pub fn read_preamble(mut self, option: ReadPreamble) -> Self {
300        self.read_preamble = option;
301        self
302    }
303
304    /// Set how data elements with an odd length should be handled
305    pub fn odd_length_strategy(mut self, option: OddLengthStrategy) -> Self {
306        self.odd_length = option;
307        self
308    }
309
310    /// Set how text of specific value representations should be decoded
311    pub fn charset_override(mut self, option: CharacterSetOverride) -> Self {
312        self.charset_override = option;
313        self
314    }
315
316    /// Proceed with opening a file for DICOM collecting.
317    pub fn open_file(
318        self,
319        filename: impl AsRef<Path>,
320    ) -> Result<DicomCollector<BufReader<File>, D, R>>
321    where
322        R: TransferSyntaxIndex,
323    {
324        let filename = filename.as_ref();
325        let reader = BufReader::new(File::open(filename).context(OpenFileSnafu { filename })?);
326
327        Ok(DicomCollector {
328            source: CollectionSource::new(
329                reader,
330                self.ts_index,
331                self.odd_length,
332                self.charset_override,
333            ),
334            dictionary: self.dict,
335            ts_hint: self.ts_hint,
336            file_meta: None,
337            read_preamble: self.read_preamble,
338            state: Default::default(),
339        })
340    }
341
342    /// Create a DICOM collector which will read from the given source.
343    pub fn from_reader<S>(self, reader: BufReader<S>) -> DicomCollector<BufReader<S>, D, R>
344    where
345        S: Read + Seek,
346        R: TransferSyntaxIndex,
347    {
348        DicomCollector {
349            source: CollectionSource::new(
350                reader,
351                self.ts_index,
352                self.odd_length,
353                self.charset_override,
354            ),
355            dictionary: self.dict,
356            ts_hint: self.ts_hint,
357            file_meta: None,
358            read_preamble: self.read_preamble,
359            state: Default::default(),
360        }
361    }
362}
363
364enum CollectionSource<S, R> {
365    Raw {
366        /// the actual raw byte data as a reader
367        reader: Option<S>,
368        /// Transfer syntax index (registry)
369        ts_index: R,
370        /// the strategy for reading odd-lengthed data elements
371        /// (needs to be retained until a parser is constructed)
372        odd_length: OddLengthStrategy,
373        /// override for how text should be decodedf
374        charset_override: CharacterSetOverride,
375    },
376    Parser(LazyDataSetReader<DynStatefulDecoder<S>>),
377}
378
379impl<S, R> fmt::Debug for CollectionSource<S, R>
380where
381    R: fmt::Debug,
382{
383    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
384        match self {
385            CollectionSource::Raw {
386                reader,
387                ts_index,
388                odd_length,
389                charset_override,
390            } => f
391                .debug_struct("Raw")
392                .field("ts_index", ts_index)
393                .field("odd_length", odd_length)
394                .field("charset_override", charset_override)
395                .field(
396                    "reader",
397                    &match reader {
398                        Some(_) => "Some(_)",
399                        None => "None",
400                    },
401                )
402                .finish(),
403            CollectionSource::Parser(_) => f.write_str("Parser(..)"),
404        }
405    }
406}
407
408impl<S, R> CollectionSource<S, R>
409where
410    S: Read + Seek,
411    R: TransferSyntaxIndex,
412{
413    fn new(
414        raw_source: S,
415        ts_index: R,
416        odd_length: OddLengthStrategy,
417        charset_override: CharacterSetOverride,
418    ) -> Self {
419        CollectionSource::Raw {
420            reader: Some(raw_source),
421            ts_index,
422            odd_length,
423            charset_override,
424        }
425    }
426
427    fn has_parser(&self) -> bool {
428        matches!(self, CollectionSource::Parser(_))
429    }
430
431    fn raw_reader_mut(&mut self) -> &mut S {
432        match self {
433            CollectionSource::Raw { reader, .. } => reader.as_mut().unwrap(),
434            CollectionSource::Parser(_) => {
435                panic!("cannot retrieve raw reader after setting parser")
436            }
437        }
438    }
439
440    fn set_parser_with_ts(
441        &mut self,
442        ts_uid: &str,
443    ) -> Result<&mut LazyDataSetReader<DynStatefulDecoder<S>>> {
444        match self {
445            CollectionSource::Raw {
446                reader: src,
447                ts_index,
448                odd_length,
449                charset_override,
450            } => {
451                let src = src.take().unwrap();
452
453                // look up transfer syntax
454                let ts = ts_index
455                    .get(ts_uid)
456                    .context(UnrecognizedTransferSyntaxSnafu {
457                        ts_uid: ts_uid.to_string(),
458                    })?;
459
460                let mut options = LazyDataSetReaderOptions::default();
461                options.odd_length = *odd_length;
462                options.charset_override = *charset_override;
463                *self = CollectionSource::Parser(
464                    LazyDataSetReader::new_with_ts_options(src, ts, options)
465                        .context(CreateParserSnafu)?,
466                );
467                let CollectionSource::Parser(parser) = self else {
468                    unreachable!();
469                };
470                Ok(parser)
471            }
472            CollectionSource::Parser(decoder) => Ok(decoder),
473        }
474    }
475
476    fn parser(&mut self) -> &mut LazyDataSetReader<DynStatefulDecoder<S>> {
477        match self {
478            CollectionSource::Raw { .. } => panic!("parser transfer syntax not set"),
479            CollectionSource::Parser(parser) => parser,
480        }
481    }
482}
483
484/// A DICOM collector set up to read from a specific source.
485///
486/// See the [module-level documentation](crate::collector) for more details.
487pub struct DicomCollector<S, D = StandardDataDictionary, R = TransferSyntaxRegistry> {
488    /// the source of byte data to read from
489    source: CollectionSource<S, R>,
490    /// data dictionary
491    dictionary: D,
492    /// UID of transfer syntax suggestion
493    ts_hint: Option<Cow<'static, str>>,
494    /// file meta group information table
495    file_meta: Option<FileMetaTable>,
496    /// Whether to read the 128-byte DICOM file preamble
497    /// (needs to be retained until the preamble is read)
498    read_preamble: ReadPreamble,
499    /// the state of the collector so as to keep track of what's been read
500    state: CollectorState,
501}
502
503// A state indicator of what has been collected so far
504#[derive(Debug, Default, Copy, Clone, PartialEq)]
505enum CollectorState {
506    /// The collector is in the initial state.
507    #[default]
508    Start,
509    /// The collector has read the preamble,
510    /// or the preamble has been requested but not collected.
511    Preamble,
512    /// The collector has read the file meta group data set.
513    ///
514    /// If this state is reached,
515    /// `file_meta` is guaranteed to be `Some`.
516    FileMeta,
517    /// The collector has read some portion the main data set.
518    InDataset,
519    /// The collector has read the pixel data element header.
520    InPixelData,
521}
522
523impl<S, D, R> fmt::Debug for DicomCollector<S, D, R>
524where
525    D: fmt::Debug,
526    R: fmt::Debug,
527{
528    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
529        f.debug_struct("DicomCollector")
530            .field("source", &self.source)
531            .field("dictionary", &self.dictionary)
532            .field("ts_hint", &self.ts_hint)
533            .field(
534                "file_meta",
535                if self.file_meta.is_some() {
536                    &"Some(...)"
537                } else {
538                    &"None"
539                },
540            )
541            .field("read_preamble", &self.read_preamble)
542            .field("state", &self.state)
543            .finish()
544    }
545}
546
547impl<S> DicomCollector<BufReader<S>>
548where
549    S: Read + Seek,
550{
551    /// Create a new DICOM dataset collector
552    /// which reads from a buffered reader.
553    ///
554    /// The standard data dictionary and standard transfer syntax registry are used.
555    /// The transfer syntax is guessed from the file meta group data set.
556    pub fn new(reader: BufReader<S>) -> Self {
557        DicomCollector {
558            source: CollectionSource::new(
559                reader,
560                TransferSyntaxRegistry,
561                Default::default(),
562                Default::default(),
563            ),
564            dictionary: StandardDataDictionary,
565            ts_hint: None,
566            file_meta: None,
567            read_preamble: Default::default(),
568            state: Default::default(),
569        }
570    }
571
572    /// Create a new DICOM dataset collector
573    /// which reads from a buffered reader
574    /// and expects the given transfer syntax.
575    ///
576    /// The standard data dictionary is used.
577    pub fn new_with_ts(
578        reader: BufReader<S>,
579        transfer_syntax: impl Into<Cow<'static, str>>,
580    ) -> Self {
581        DicomCollector {
582            source: CollectionSource::new(
583                reader,
584                TransferSyntaxRegistry,
585                Default::default(),
586                Default::default(),
587            ),
588            dictionary: StandardDataDictionary,
589            ts_hint: Some(transfer_syntax.into()),
590            file_meta: None,
591            read_preamble: Default::default(),
592            state: Default::default(),
593        }
594    }
595}
596
597impl DicomCollector<BufReader<File>> {
598    /// Create a new DICOM dataset collector
599    /// which reads from a standard DICOM file.
600    ///
601    /// The standard data dictionary is used.
602    /// The transfer syntax is guessed from the file meta group data set.
603    pub fn open_file(filename: impl AsRef<Path>) -> Result<Self> {
604        Self::open_file_with_dict(filename, StandardDataDictionary)
605    }
606}
607
608impl<D> DicomCollector<BufReader<File>, D>
609where
610    D: DataDictionary + Clone,
611{
612    // --- constructors ---
613
614    /// Create a new DICOM dataset collector
615    /// which reads from a standard DICOM file.
616    ///
617    /// The transfer syntax is guessed from the file meta group data set.
618    /// The standard transfer syntax registry is used.
619    pub fn open_file_with_dict(filename: impl AsRef<Path>, dict: D) -> Result<Self> {
620        let filename = filename.as_ref();
621        let reader = BufReader::new(File::open(filename).context(OpenFileSnafu { filename })?);
622        Ok(Self::new_with_dict(reader, dict))
623    }
624}
625
626impl<S, D> DicomCollector<BufReader<S>, D>
627where
628    D: DataDictionary + Clone,
629    S: Read + Seek,
630{
631    /// Create a new DICOM dataset collector
632    /// using the given data element dictionary,
633    /// which reads from a buffered reader.
634    ///
635    /// The transfer syntax is guessed from the file meta group data set.
636    /// The standard transfer syntax registry is used.
637    fn new_with_dict(reader: BufReader<S>, dictionary: D) -> Self {
638        DicomCollector {
639            source: CollectionSource::new(
640                reader,
641                TransferSyntaxRegistry,
642                Default::default(),
643                Default::default(),
644            ),
645            dictionary,
646            ts_hint: None,
647            file_meta: None,
648            read_preamble: Default::default(),
649            state: Default::default(),
650        }
651    }
652}
653
654impl<S, D, R> DicomCollector<BufReader<S>, D, R>
655where
656    D: DataDictionary + Clone,
657    S: Read + Seek,
658    R: TransferSyntaxIndex,
659{
660    /// Read a DICOM file preamble from the given source.
661    ///
662    /// Returns the 128 bytes preceding the DICOM magic code,
663    /// if they were found,
664    /// or according to the `read_preamble` option on construction.
665    pub fn read_preamble(&mut self) -> Result<Option<[u8; 128]>> {
666        ensure!(self.state == CollectorState::Start, IllegalStateStartSnafu);
667
668        if self.read_preamble == ReadPreamble::Never {
669            self.state = CollectorState::Preamble;
670            return Ok(None);
671        }
672
673        let reader = self.source.raw_reader_mut();
674        let preamble = {
675            if self.read_preamble == ReadPreamble::Always {
676                // always assume that there is a preamble
677                let mut buf = [0; 128];
678                reader
679                    .read_exact(&mut buf)
680                    .context(ReadPreambleBytesSnafu)?;
681                Some(buf)
682            } else {
683                // fill the buffer and try to identify where the magic code is
684                let buf = reader.fill_buf().context(ReadPreambleBytesSnafu)?;
685                if buf.len() < 4 {
686                    return PrematureEndSnafu.fail().map_err(From::from);
687                }
688
689                if buf.len() >= 128 + 4 && &buf[128..132] == b"DICM" {
690                    let out: [u8; 128] = std::convert::TryInto::try_into(&buf[0..128])
691                        .expect("128 byte slice into array");
692                    reader.consume(128);
693                    Some(out)
694                } else if &buf[0..4] == b"DICM" {
695                    // assume that there is no preamble after all
696                    None
697                } else {
698                    // take the risk and insist on the first 128 bytes
699                    let mut out = [0; 128];
700                    reader
701                        .read_exact(&mut out)
702                        .context(ReadPreambleBytesSnafu)?;
703                    Some(out)
704                }
705            }
706        };
707        self.state = CollectorState::Preamble;
708        Ok(preamble)
709    }
710
711    /// Read a file meta table from the source,
712    /// retaining it in the reader for future reference.
713    ///
714    /// This method _must_ be called
715    /// whenever the source data is known to have a file meta information group.
716    /// Otherwise, it may fail to recognize the transfer syntax
717    /// and fail on the first data set reading request.
718    ///
719    /// If the file meta information has already been collected,
720    /// the previously saved file meta table is returned.
721    pub fn read_file_meta(&mut self) -> Result<&FileMetaTable> {
722        // check if we are in good position to read the FMI,
723        // or if we need to collect other things first
724
725        if self.state == CollectorState::Start {
726            // read preamble
727            self.read_preamble()?;
728        }
729
730        if self.state == CollectorState::Preamble {
731            let reader = self.source.raw_reader_mut();
732            self.file_meta = Some(FileMetaTable::from_reader(reader).context(BuildMetaTableSnafu)?);
733
734            self.state = CollectorState::FileMeta;
735        }
736
737        self.file_meta
738            .as_ref()
739            .context(IllegalStateMetaSnafu)
740            .map_err(From::from)
741    }
742
743    /// Take the file meta information group table saved in this collector,
744    /// if this information has already been read.
745    ///
746    /// This table will only be available
747    /// after first reading the file meta group
748    /// via [`read_file_meta`](Self::read_file_meta).
749    /// Moreover, main data set reading may be compromised
750    /// if the transfer syntax was not resolved first
751    /// by calling one of the data set reading methods beforehand
752    /// (which triggers a transfer syntax resolution).
753    ///
754    /// # Example
755    ///
756    /// ```no_run
757    /// # use dicom_object::{DicomCollector, FileMetaTable, InMemDicomObject};
758    /// let mut collector = DicomCollector::open_file("file.dcm")?;
759    ///
760    /// // read_file_meta() only returns a reference
761    /// let _: &FileMetaTable  = collector.read_file_meta()?;
762    /// // read some data from the main data set
763    /// let mut main_dataset = InMemDicomObject::new_empty();
764    /// collector.read_dataset_up_to_pixeldata(&mut main_dataset)?;
765    ///
766    /// // take the table out of the collector,
767    /// // as it is no longer needed
768    /// let file_meta: FileMetaTable = collector.take_file_meta()
769    ///     .expect("should have file meta information");
770    ///
771    /// // can still read more data afterwards
772    /// let mut fragment_data = Vec::new();
773    /// collector.read_next_fragment(&mut fragment_data)?;
774    /// # Result::<(), Box<dyn std::error::Error>>::Ok(())
775    /// ```
776    #[inline]
777    pub fn take_file_meta(&mut self) -> Option<FileMetaTable> {
778        self.file_meta.take()
779    }
780
781    /// Read a DICOM data set until it finds its end,
782    /// accumulating the elements into an in-memory object.
783    pub fn read_dataset_to_end(&mut self, to: &mut InMemDicomObject<D>) -> Result<()> {
784        let parser = if !self.source.has_parser() {
785            let ts = {
786                if self.ts_hint.is_none() {
787                    self.populate_ts_hint();
788                }
789                self.ts_hint.as_deref()
790            }
791            .context(GuessTransferSyntaxSnafu)?;
792            self.source.set_parser_with_ts(ts)?
793        } else {
794            self.source.parser()
795        };
796
797        Self::collect_to_object(
798            &mut self.state,
799            parser,
800            false,
801            None,
802            None,
803            to,
804            &self.dictionary,
805        )
806    }
807
808    /// Read a DICOM data set until it reaches the given stop tag
809    /// (excluding it) or finds the end of the data set,
810    /// accumulating the elements into an in-memory object.
811    pub fn read_dataset_up_to(
812        &mut self,
813        stop_tag: Tag,
814        to: &mut InMemDicomObject<D>,
815    ) -> Result<()> {
816        let parser = if !self.source.has_parser() {
817            let ts = {
818                if self.ts_hint.is_none() {
819                    self.populate_ts_hint();
820                }
821                self.ts_hint.as_deref()
822            }
823            .context(GuessTransferSyntaxSnafu)?;
824            self.source.set_parser_with_ts(ts)?
825        } else {
826            self.source.parser()
827        };
828
829        Self::collect_to_object(
830            &mut self.state,
831            parser,
832            false,
833            Some(stop_tag),
834            None,
835            to,
836            &self.dictionary,
837        )
838    }
839
840    /// Read a DICOM data set until it reaches the object's pixel data.
841    ///
842    /// This is equivalent to `collector.read_dataset_up_to(tags::PIXEL_DATA, to)`.
843    #[inline]
844    pub fn read_dataset_up_to_pixeldata(&mut self, to: &mut InMemDicomObject<D>) -> Result<()> {
845        self.read_dataset_up_to(dicom_dictionary_std::tags::PIXEL_DATA, to)
846    }
847
848    /// Read the DICOM data set until it reaches the pixel data
849    /// (if it has not done so yet)
850    /// and collects the next pixel data fragment,
851    /// appending the bytes into the given destination.
852    /// Returns the number of bytes of the fragment retrieved.
853    ///
854    /// If the data set contains native pixel data,
855    /// the entire value data in the _Pixel Data_ attribute
856    /// is interpreted as a single fragment.
857    ///
858    /// The basic offset table is treated as a fragment,
859    /// which means that the first call to `read_next_fragment`
860    /// on a DICOM object with encapsulated pixel data
861    /// will push the byte values of the basic offset table
862    /// in little endian.
863    /// To retrieve the offset table as a sequence of 32-bit length values,
864    /// use [`read_basic_offset_table`](Self::read_basic_offset_table)
865    /// before reading any fragment.
866    pub fn read_next_fragment(&mut self, to: &mut Vec<u8>) -> Result<Option<u32>> {
867        if self.state == CollectorState::Start || self.state == CollectorState::Preamble {
868            // read file meta information group
869            self.read_file_meta()?;
870        }
871
872        // initialize parser if necessary
873        if !self.source.has_parser() {
874            let ts = {
875                if self.ts_hint.is_none() {
876                    self.populate_ts_hint();
877                }
878                self.ts_hint.as_deref()
879            }
880            .context(GuessTransferSyntaxSnafu)?;
881            self.source.set_parser_with_ts(ts)?;
882        } else {
883            self.source.parser();
884        }
885
886        if self.state != CollectorState::InPixelData {
887            // skip until we reach the pixel data
888
889            self.skip_until(|token| {
890                match token {
891                    // catch either native pixel data
892                    LazyDataToken::ElementHeader(header)
893                        if header.tag == tags::PIXEL_DATA && header.length().is_defined() =>
894                    {
895                        true
896                    }
897                    // or start of pixel data sequencce
898                    LazyDataToken::PixelSequenceStart => true,
899                    _ => false,
900                }
901            })?;
902
903            self.state = CollectorState::InPixelData;
904        }
905
906        let parser = if !self.source.has_parser() {
907            let ts = {
908                if self.ts_hint.is_none() {
909                    self.populate_ts_hint();
910                }
911                self.ts_hint.as_deref()
912            }
913            .context(GuessTransferSyntaxSnafu)?;
914            self.source.set_parser_with_ts(ts)?
915        } else {
916            self.source.parser()
917        };
918
919        // proceed with fetching tokens,
920        // return the first fragment data found
921        while let Some(token) = parser.advance() {
922            match token.context(ReadTokenSnafu)? {
923                // native pixel data
924                LazyDataToken::LazyValue { header, decoder } => {
925                    debug_assert!(header.length().is_defined());
926                    let len = header.length().0;
927                    decoder.read_to_vec(len, to).context(ReadItemSnafu)?;
928                    return Ok(Some(len));
929                }
930                // fragment item data
931                LazyDataToken::LazyItemValue { len, decoder } => {
932                    decoder.read_to_vec(len, to).context(ReadItemSnafu)?;
933                    return Ok(Some(len));
934                }
935                // empty item
936                // (must be accounted for even though it yields no value token)
937                LazyDataToken::ItemStart { len: Length(0) } => return Ok(Some(0)),
938                _ => {
939                    // no-op
940                }
941            }
942        }
943
944        Ok(None)
945    }
946
947    /// Read the DICOM data set until it reaches the pixel data
948    /// (if it has not done so yet)
949    /// and collects the basic offset table.
950    ///
951    /// Returns the byte length of the basic offset table
952    /// on success.
953    /// Returns `Ok(None)` if the DICOM object has no pixel data
954    /// or has native pixel data,
955    /// in which case there is no basic offset table.
956    /// Returns an error if the collector has alread read too far
957    /// to obtain the basic offset table.
958    pub fn read_basic_offset_table(&mut self, to: &mut Vec<u32>) -> Result<Option<u32>> {
959        if self.state == CollectorState::InPixelData {
960            return IllegalStateInPixelSnafu.fail().map_err(From::from);
961        }
962
963        if self.state == CollectorState::Start || self.state == CollectorState::Preamble {
964            // read file meta information group
965            self.read_file_meta()?;
966        }
967
968        // initialize parser if necessary
969        if !self.source.has_parser() {
970            let ts = {
971                if self.ts_hint.is_none() {
972                    self.populate_ts_hint();
973                }
974                self.ts_hint.as_deref()
975            }
976            .context(GuessTransferSyntaxSnafu)?;
977            self.source.set_parser_with_ts(ts)?;
978        } else {
979            self.source.parser();
980        }
981
982        if self.state != CollectorState::InPixelData {
983            // skip until we reach the pixel data
984
985            self.skip_until(|token| {
986                match token {
987                    // catch either native pixel data
988                    LazyDataToken::ElementHeader(header)
989                        if header.tag == tags::PIXEL_DATA && header.length().is_defined() =>
990                    {
991                        true
992                    }
993                    // or start of pixel data sequencce
994                    LazyDataToken::PixelSequenceStart => true,
995                    _ => false,
996                }
997            })?;
998
999            self.state = CollectorState::InPixelData;
1000        }
1001
1002        let parser = if !self.source.has_parser() {
1003            let ts = {
1004                if self.ts_hint.is_none() {
1005                    self.populate_ts_hint();
1006                }
1007                self.ts_hint.as_deref()
1008            }
1009            .context(GuessTransferSyntaxSnafu)?;
1010            self.source.set_parser_with_ts(ts)?
1011        } else {
1012            self.source.parser()
1013        };
1014
1015        // proceed with fetching tokens,
1016        // return the first fragment data found
1017        while let Some(token) = parser.advance() {
1018            match token.context(ReadTokenSnafu)? {
1019                // native pixel data, no offset table
1020                LazyDataToken::LazyValue { .. } => {
1021                    return Ok(None);
1022                }
1023                // fragment item data
1024                LazyDataToken::LazyItemValue { len, decoder } => {
1025                    decoder.read_u32_to_vec(len, to).context(ReadItemSnafu)?;
1026                    return Ok(Some(len));
1027                }
1028                // empty item
1029                // (must be accounted for even though it yields no value token)
1030                LazyDataToken::ItemStart { len: Length(0) } => return Ok(Some(0)),
1031                _ => {
1032                    // no-op
1033                }
1034            }
1035        }
1036
1037        Ok(None)
1038    }
1039
1040    // --- private methods ---
1041
1042    #[inline]
1043    fn populate_ts_hint(&mut self) {
1044        if let Some(meta) = self.file_meta.as_ref() {
1045            self.ts_hint = Some(Cow::Owned(meta.transfer_syntax().to_string()));
1046        }
1047    }
1048
1049    fn skip_until(
1050        &mut self,
1051        mut pred: impl FnMut(
1052            &LazyDataToken<
1053                &mut StatefulDecoder<Box<dyn DecodeFrom<BufReader<S>> + 'static>, BufReader<S>>,
1054            >,
1055        ) -> bool,
1056    ) -> Result<bool> {
1057        let parser = self.source.parser();
1058        while let Some(token) = parser.advance() {
1059            let token = token.context(ReadTokenSnafu)?;
1060            if pred(&token) {
1061                return Ok(true);
1062            }
1063            // skip through values if necessary
1064            token.skip().context(ReadItemSnafu)?;
1065            self.state = CollectorState::InDataset;
1066            // continue
1067        }
1068
1069        Ok(false)
1070    }
1071
1072    // --- private helper functions ---
1073
1074    /// Collect DICOM data elements onto an in-memory DICOM object by consuming a data set parser.
1075    fn collect_to_object(
1076        state: &mut CollectorState,
1077        token_src: &mut LazyDataSetReader<DynStatefulDecoder<BufReader<S>>>,
1078        in_item: bool,
1079        read_until: Option<Tag>,
1080        read_to: Option<Tag>,
1081        to: &mut InMemDicomObject<D>,
1082        dict: &D,
1083    ) -> Result<()> {
1084        let mut elements = Vec::new();
1085        Self::collect_elements(
1086            state,
1087            token_src,
1088            in_item,
1089            read_until,
1090            read_to,
1091            &mut elements,
1092            dict,
1093        )?;
1094        to.extend(elements);
1095        Ok(())
1096    }
1097
1098    /// Collect DICOM data elements onto a vector by consuming a data set parser.
1099    fn collect_elements(
1100        state: &mut CollectorState,
1101        token_src: &mut LazyDataSetReader<DynStatefulDecoder<BufReader<S>>>,
1102        in_item: bool,
1103        read_until: Option<Tag>,
1104        read_to: Option<Tag>,
1105        to: &mut Vec<DataElement<InMemDicomObject<D>>>,
1106        dict: &D,
1107    ) -> Result<()> {
1108        // perform a structured parsing of incoming tokens
1109        while let Some(token) = token_src.peek().context(ReadTokenSnafu)? {
1110            let token = token.clone();
1111            let elem = match token {
1112                DataToken::PixelSequenceStart => {
1113                    // stop reading if reached `read_until` tag (exclusive)
1114                    if read_until
1115                        .map(|t| t <= Tag(0x7fe0, 0x0010))
1116                        .unwrap_or(false)
1117                    {
1118                        break;
1119                    }
1120                    // stop reading if exceeded `read_to` tag (inclusive)
1121                    if read_to.map(|t| t < Tag(0x7fe0, 0x0010)).unwrap_or(false) {
1122                        break;
1123                    }
1124                    *state = CollectorState::InPixelData;
1125                    token_src.advance();
1126                    let value = Self::build_encapsulated_data(&mut *token_src)?;
1127                    DataElement::new(Tag(0x7fe0, 0x0010), VR::OB, value)
1128                }
1129                DataToken::ElementHeader(header) => {
1130                    // stop reading if reached `read_until` tag (exclusive)
1131                    if read_until.map(|t| t <= header.tag).unwrap_or(false) {
1132                        break;
1133                    }
1134                    // stop reading if exceeded `read_to` tag (inclusive)
1135                    if read_to.map(|t| t < header.tag).unwrap_or(false) {
1136                        break;
1137                    }
1138
1139                    drop(token);
1140
1141                    *state = CollectorState::InDataset;
1142                    token_src.advance();
1143
1144                    // fetch respective value, place it in the output
1145                    let next_token = token_src.advance().context(MissingElementValueSnafu)?;
1146                    match next_token.context(ReadTokenSnafu)? {
1147                        token @ LazyDataToken::LazyValue { .. }
1148                        | token @ LazyDataToken::LazyItemValue { .. } => {
1149                            InMemElement::new_with_len(
1150                                header.tag,
1151                                header.vr,
1152                                header.len,
1153                                token
1154                                    .into_value()
1155                                    .context(CollectDataValueSnafu { tag: header.tag })?,
1156                            )
1157                        }
1158                        token => {
1159                            return UnexpectedTokenSnafu { token }.fail().map_err(From::from);
1160                        }
1161                    }
1162                }
1163                DataToken::SequenceStart { tag, len } => {
1164                    // stop reading if reached `read_until` tag (exclusive)
1165                    if read_until.map(|t| t <= tag).unwrap_or(false) {
1166                        break;
1167                    }
1168                    // stop reading if exceeded `read_to` tag (inclusive)
1169                    if read_to.map(|t| t < tag).unwrap_or(false) {
1170                        break;
1171                    }
1172                    *state = CollectorState::InDataset;
1173
1174                    token_src.advance();
1175
1176                    // delegate sequence building to another function
1177                    let mut items = C::new();
1178                    Self::collect_sequence(
1179                        &mut *state,
1180                        tag,
1181                        len,
1182                        &mut *token_src,
1183                        dict,
1184                        &mut items,
1185                    )?;
1186                    DataElement::new_with_len(
1187                        tag,
1188                        VR::SQ,
1189                        len,
1190                        DicomValue::new_sequence(items, len),
1191                    )
1192                }
1193                DataToken::ItemEnd if in_item => {
1194                    // end of item, leave now
1195                    token_src.advance();
1196                    return Ok(());
1197                }
1198                token => {
1199                    return UnexpectedDataTokenSnafu {
1200                        token: token.clone(),
1201                    }
1202                    .fail()
1203                    .map_err(From::from);
1204                }
1205            };
1206            to.push(elem);
1207        }
1208
1209        Ok(())
1210    }
1211
1212    /// Build an encapsulated pixel data by collecting all fragments into an
1213    /// in-memory DICOM value.
1214    fn build_encapsulated_data(
1215        dataset: &mut LazyDataSetReader<DynStatefulDecoder<BufReader<S>>>,
1216    ) -> Result<DicomValue<InMemDicomObject<D>, InMemFragment>> {
1217        // continue fetching tokens to retrieve:
1218        // - the offset table
1219        // - the various compressed fragments
1220
1221        let mut offset_table = None;
1222
1223        let mut fragments = C::new();
1224
1225        // whether to read the fragment as the basic offset table (true)
1226        // or as a pixel data fragment (false)
1227        let mut first = true;
1228
1229        while let Some(token) = dataset.advance() {
1230            let token = token.context(ReadTokenSnafu)?;
1231            match token {
1232                LazyDataToken::LazyItemValue { decoder, len } => {
1233                    if first {
1234                        let mut table = Vec::new();
1235                        decoder
1236                            .read_u32_to_vec(len, &mut table)
1237                            .context(ReadItemSnafu)?;
1238                        offset_table = Some(table);
1239                        first = false;
1240                    } else {
1241                        let mut data = Vec::new();
1242                        decoder.read_to_vec(len, &mut data).context(ReadItemSnafu)?;
1243                        fragments.push(data);
1244                    }
1245                }
1246                LazyDataToken::ItemEnd => {
1247                    // at the end of the first item ensure the presence of
1248                    // an empty offset_table here, so that the next items
1249                    // are seen as compressed fragments
1250                    if offset_table.is_none() {
1251                        offset_table = Some(Vec::new())
1252                    }
1253                }
1254                LazyDataToken::ItemStart { len: _ } => { /* no-op */ }
1255                LazyDataToken::SequenceEnd => {
1256                    // end of pixel data
1257                    break;
1258                }
1259                // the following variants are unexpected
1260                token @ LazyDataToken::ElementHeader(_)
1261                | token @ LazyDataToken::PixelSequenceStart
1262                | token @ LazyDataToken::SequenceStart { .. }
1263                | token @ LazyDataToken::LazyValue { .. }
1264                | token => {
1265                    return UnexpectedTokenSnafu { token }.fail().map_err(From::from);
1266                }
1267            }
1268        }
1269
1270        Ok(DicomValue::from(PixelFragmentSequence::new(
1271            offset_table.unwrap_or_default(),
1272            fragments,
1273        )))
1274    }
1275
1276    /// Build a DICOM sequence by consuming a data set parser.
1277    fn collect_sequence(
1278        state: &mut CollectorState,
1279        _tag: Tag,
1280        _len: Length,
1281        token_src: &mut LazyDataSetReader<DynStatefulDecoder<BufReader<S>>>,
1282        dict: &D,
1283        items: &mut C<InMemDicomObject<D>>,
1284    ) -> Result<()> {
1285        while let Some(token) = token_src.advance() {
1286            match token.context(ReadTokenSnafu)? {
1287                LazyDataToken::ItemStart { len: _ } => {
1288                    let mut obj = InMemDicomObject::new_empty_with_dict(dict.clone());
1289                    Self::collect_to_object(state, token_src, true, None, None, &mut obj, dict)?;
1290                    items.push(obj);
1291                }
1292                LazyDataToken::SequenceEnd => {
1293                    return Ok(());
1294                }
1295                token => return UnexpectedTokenSnafu { token }.fail().map_err(From::from),
1296            };
1297        }
1298
1299        // iterator fully consumed without a sequence delimiter
1300        PrematureEndSnafu.fail().map_err(From::from)
1301    }
1302}
1303
1304#[cfg(test)]
1305mod tests {
1306    use std::io::{BufReader, Write};
1307
1308    use dicom_core::{PrimitiveValue, prelude::*, value::DataSetSequence};
1309    use dicom_dictionary_std::{StandardDataDictionary, tags, uids};
1310    use dicom_encoding::TransferSyntaxIndex;
1311    use dicom_parser::dataset::read::OddLengthStrategy;
1312    use dicom_transfer_syntax_registry::TransferSyntaxRegistry;
1313
1314    use crate::{
1315        DicomCollectorOptions, FileMetaTable, FileMetaTableBuilder, InMemDicomObject,
1316        file::ReadPreamble,
1317    };
1318
1319    use super::DicomCollector;
1320
1321    /// read a plain data set without file meta group,
1322    /// by specifying the transfer syntax explicitly in the collector
1323    #[test]
1324    fn test_read_dataset_to_end_set_ts() {
1325        let dataset1 = InMemDicomObject::<StandardDataDictionary>::from_element_iter([
1326            DataElement::new(
1327                tags::SOP_INSTANCE_UID,
1328                VR::UI,
1329                "2.25.51008724832548260562721775118239811861\0",
1330            ),
1331            DataElement::new(
1332                tags::SOP_CLASS_UID,
1333                VR::UI,
1334                uids::NUCLEAR_MEDICINE_IMAGE_STORAGE,
1335            ),
1336            DataElement::new(tags::PATIENT_NAME, VR::PN, "Doe^John"),
1337            DataElement::new(tags::STUDY_DESCRIPTION, VR::LO, "Test study"),
1338            DataElement::new(tags::ROWS, VR::US, PrimitiveValue::from(64_u16)),
1339            DataElement::new(tags::COLUMNS, VR::US, PrimitiveValue::from(64_u16)),
1340            DataElement::new(tags::BITS_ALLOCATED, VR::US, PrimitiveValue::from(8_u16)),
1341            DataElement::new(tags::BITS_STORED, VR::US, PrimitiveValue::from(8_u16)),
1342            DataElement::new(tags::HIGH_BIT, VR::US, PrimitiveValue::from(7_u16)),
1343            DataElement::new(
1344                tags::PIXEL_DATA,
1345                VR::OB,
1346                PrimitiveValue::from(vec![0x55u8; 64 * 64]),
1347            ),
1348        ]);
1349
1350        let ts_expl_vr_le = TransferSyntaxRegistry
1351            .get(uids::EXPLICIT_VR_LITTLE_ENDIAN)
1352            .unwrap();
1353
1354        let mut encoded = Vec::new();
1355        dataset1
1356            .write_dataset_with_ts(&mut encoded, ts_expl_vr_le)
1357            .unwrap();
1358
1359        let reader = BufReader::new(std::io::Cursor::new(&encoded));
1360        let mut collector = DicomCollector::new_with_ts(reader, uids::EXPLICIT_VR_LITTLE_ENDIAN);
1361
1362        let mut dset = InMemDicomObject::new_empty();
1363        collector.read_dataset_to_end(&mut dset).unwrap();
1364
1365        assert_eq!(dset, dataset1);
1366    }
1367
1368    /// read a DICOM data set to the end,
1369    /// inferring the transfer syntax from the file meta group
1370    #[test]
1371    fn test_read_dataset_to_end_infer_from_meta() {
1372        let dataset1 = InMemDicomObject::<StandardDataDictionary>::from_element_iter([
1373            DataElement::new(
1374                tags::SOP_INSTANCE_UID,
1375                VR::UI,
1376                "2.25.245029432991021387484564600987886994494",
1377            ),
1378            DataElement::new(
1379                tags::SOP_CLASS_UID,
1380                VR::UI,
1381                uids::NUCLEAR_MEDICINE_IMAGE_STORAGE,
1382            ),
1383            DataElement::new(tags::PATIENT_NAME, VR::PN, "Doe^John"),
1384            DataElement::new(tags::STUDY_DESCRIPTION, VR::LO, "Test study"),
1385            DataElement::new(tags::ROWS, VR::US, PrimitiveValue::from(128_u16)),
1386            DataElement::new(tags::COLUMNS, VR::US, PrimitiveValue::from(128_u16)),
1387            DataElement::new(tags::BITS_ALLOCATED, VR::US, PrimitiveValue::from(16_u16)),
1388            DataElement::new(tags::BITS_STORED, VR::US, PrimitiveValue::from(16_u16)),
1389            DataElement::new(tags::HIGH_BIT, VR::US, PrimitiveValue::from(15_u16)),
1390            DataElement::new(
1391                tags::PIXEL_DATA,
1392                VR::OB,
1393                PrimitiveValue::from(vec![0x55u8; 128 * 128 * 2]),
1394            ),
1395        ]);
1396
1397        let file_dataset1 = dataset1
1398            .clone()
1399            .with_meta(FileMetaTableBuilder::new().transfer_syntax(uids::EXPLICIT_VR_LITTLE_ENDIAN))
1400            .unwrap();
1401
1402        // write FMI and dataset to the buffer
1403        let mut encoded = Vec::new();
1404        encoded.write_all(b"DICM").unwrap();
1405        file_dataset1.meta().write(&mut encoded).unwrap();
1406        file_dataset1
1407            .write_dataset_with_ts(
1408                &mut encoded,
1409                TransferSyntaxRegistry
1410                    .get(uids::EXPLICIT_VR_LITTLE_ENDIAN)
1411                    .unwrap(),
1412            )
1413            .unwrap();
1414
1415        let reader = BufReader::new(std::io::Cursor::new(&encoded));
1416        let mut collector = DicomCollector::new(reader);
1417
1418        let mut dset = InMemDicomObject::new_empty();
1419        let file_meta = collector.read_file_meta().unwrap();
1420        assert_eq!(file_meta.transfer_syntax(), uids::EXPLICIT_VR_LITTLE_ENDIAN,);
1421        collector.read_dataset_to_end(&mut dset).unwrap();
1422
1423        assert_eq!(dset, dataset1);
1424    }
1425
1426    /// read some data and then take off the file meta table from the collector
1427    #[test]
1428    fn test_take_file_meta() {
1429        let dataset1 = InMemDicomObject::<StandardDataDictionary>::from_element_iter([
1430            DataElement::new(
1431                tags::SOP_INSTANCE_UID,
1432                VR::UI,
1433                "2.25.248821220596756482508841578490676982546",
1434            ),
1435            DataElement::new(
1436                tags::SOP_CLASS_UID,
1437                VR::UI,
1438                uids::NUCLEAR_MEDICINE_IMAGE_STORAGE,
1439            ),
1440            DataElement::new(tags::PATIENT_NAME, VR::PN, "Doe^John"),
1441            DataElement::new(tags::STUDY_DESCRIPTION, VR::LO, "Test study"),
1442            DataElement::new(tags::ROWS, VR::US, PrimitiveValue::from(64_u16)),
1443            DataElement::new(tags::COLUMNS, VR::US, PrimitiveValue::from(64_u16)),
1444            DataElement::new(tags::SAMPLES_PER_PIXEL, VR::US, PrimitiveValue::from(1_u16)),
1445            DataElement::new(tags::BITS_ALLOCATED, VR::US, PrimitiveValue::from(8_u16)),
1446            DataElement::new(tags::BITS_STORED, VR::US, PrimitiveValue::from(8_u16)),
1447            DataElement::new(tags::HIGH_BIT, VR::US, PrimitiveValue::from(7_u16)),
1448            DataElement::new(
1449                tags::PIXEL_DATA,
1450                VR::OB,
1451                PrimitiveValue::from(vec![0x55u8; 64 * 64]),
1452            ),
1453        ]);
1454
1455        let file_dataset1 = dataset1
1456            .clone()
1457            .with_meta(FileMetaTableBuilder::new().transfer_syntax(uids::EXPLICIT_VR_LITTLE_ENDIAN))
1458            .unwrap();
1459
1460        // write FMI and dataset to the buffer
1461        let mut encoded = Vec::new();
1462        encoded.write_all(b"DICM").unwrap();
1463        file_dataset1.meta().write(&mut encoded).unwrap();
1464        file_dataset1
1465            .write_dataset_with_ts(
1466                &mut encoded,
1467                TransferSyntaxRegistry
1468                    .get(uids::EXPLICIT_VR_LITTLE_ENDIAN)
1469                    .unwrap(),
1470            )
1471            .unwrap();
1472
1473        let reader = BufReader::new(std::io::Cursor::new(&encoded));
1474        let mut collector = DicomCollector::new(reader);
1475
1476        // read_file_meta() only returns a reference
1477        let _: &FileMetaTable = collector.read_file_meta().unwrap();
1478        // read some data from the main data set
1479        let mut main_dataset = InMemDicomObject::new_empty();
1480        collector
1481            .read_dataset_up_to_pixeldata(&mut main_dataset)
1482            .unwrap();
1483
1484        // can reliably take the table out of the collector
1485        let file_meta: FileMetaTable = collector
1486            .take_file_meta()
1487            .expect("should have file meta info");
1488        assert_eq!(
1489            file_meta.media_storage_sop_instance_uid(),
1490            "2.25.248821220596756482508841578490676982546"
1491        );
1492
1493        // can still read more data afterwards
1494        let mut fragment_data = Vec::new();
1495        let bytes_read = collector.read_next_fragment(&mut fragment_data).unwrap();
1496        assert_eq!(bytes_read, Some(64 * 64));
1497        assert_eq!(fragment_data.len(), bytes_read.unwrap() as usize);
1498    }
1499
1500    /// read a DICOM data set with nested sequences
1501    #[test]
1502    fn test_read_dataset_nested() {
1503        let dataset1 = InMemDicomObject::<StandardDataDictionary>::from_element_iter([
1504            DataElement::new(
1505                tags::SOP_INSTANCE_UID,
1506                VR::UI,
1507                "2.25.245029432991021387484564600987886994494",
1508            ),
1509            DataElement::new(
1510                tags::SOP_CLASS_UID,
1511                VR::UI,
1512                uids::NUCLEAR_MEDICINE_IMAGE_STORAGE,
1513            ),
1514            DataElement::new(tags::PATIENT_NAME, VR::PN, "Doe^John"),
1515            DataElement::new(tags::STUDY_DESCRIPTION, VR::LO, "Test study"),
1516            DataElement::new(
1517                tags::ANATOMIC_REGION_SEQUENCE,
1518                VR::SQ,
1519                DataSetSequence::from(vec![InMemDicomObject::from_element_iter([
1520                    DataElement::new(tags::CODE_VALUE, VR::SH, "51185008"),
1521                    DataElement::new(tags::CODING_SCHEME_DESIGNATOR, VR::SH, "SCT"),
1522                    DataElement::new(tags::CODE_MEANING, VR::LO, "chest"),
1523                    DataElement::new(
1524                        tags::ANATOMIC_REGION_MODIFIER_SEQUENCE,
1525                        VR::SQ,
1526                        DataSetSequence::from(vec![InMemDicomObject::from_element_iter([
1527                            DataElement::new(tags::CODE_VALUE, VR::SH, "302551006"),
1528                            DataElement::new(tags::CODING_SCHEME_DESIGNATOR, VR::SH, "SCT"),
1529                            DataElement::new(tags::CODE_MEANING, VR::LO, "entire thorax "),
1530                        ])]),
1531                    ),
1532                ])]),
1533            ),
1534            DataElement::new(tags::ROWS, VR::US, PrimitiveValue::from(128_u16)),
1535            DataElement::new(tags::COLUMNS, VR::US, PrimitiveValue::from(128_u16)),
1536            DataElement::new(tags::BITS_ALLOCATED, VR::US, PrimitiveValue::from(16_u16)),
1537            DataElement::new(tags::BITS_STORED, VR::US, PrimitiveValue::from(16_u16)),
1538            DataElement::new(tags::HIGH_BIT, VR::US, PrimitiveValue::from(7_u16)),
1539            DataElement::new(
1540                tags::PIXEL_DATA,
1541                VR::OB,
1542                PrimitiveValue::from(vec![0x55_u8; 128 * 128]),
1543            ),
1544        ]);
1545
1546        let ts_expl_vr_le = TransferSyntaxRegistry
1547            .get(uids::EXPLICIT_VR_LITTLE_ENDIAN)
1548            .unwrap();
1549
1550        let mut encoded = Vec::new();
1551        dataset1
1552            .write_dataset_with_ts(&mut encoded, ts_expl_vr_le)
1553            .unwrap();
1554
1555        let reader = BufReader::new(std::io::Cursor::new(&encoded));
1556
1557        let mut collector = DicomCollector::new_with_ts(reader, uids::EXPLICIT_VR_LITTLE_ENDIAN);
1558
1559        let mut dset = InMemDicomObject::new_empty();
1560        collector.read_dataset_to_end(&mut dset).unwrap();
1561
1562        // inspect some values using the attribute sequence API
1563        let v = dset
1564            .value_at((tags::ANATOMIC_REGION_SEQUENCE, tags::CODE_VALUE))
1565            .unwrap()
1566            .to_str()
1567            .unwrap();
1568        assert_eq!(v, "51185008");
1569
1570        let v = dset
1571            .value_at((
1572                tags::ANATOMIC_REGION_SEQUENCE,
1573                tags::ANATOMIC_REGION_MODIFIER_SEQUENCE,
1574                tags::CODE_MEANING,
1575            ))
1576            .unwrap()
1577            .to_str()
1578            .unwrap();
1579        assert_eq!(v, "entire thorax");
1580    }
1581
1582    /// read a DICOM data set in two chunks
1583    #[test]
1584    fn test_read_dataset_two_parts() {
1585        let dataset1 = InMemDicomObject::<StandardDataDictionary>::from_element_iter([
1586            DataElement::new(
1587                tags::SOP_INSTANCE_UID,
1588                VR::UI,
1589                "2.25.245029432991021387484564600987886994494",
1590            ),
1591            DataElement::new(
1592                tags::SOP_CLASS_UID,
1593                VR::UI,
1594                uids::NUCLEAR_MEDICINE_IMAGE_STORAGE,
1595            ),
1596            DataElement::new(tags::PATIENT_NAME, VR::PN, "Doe^John"),
1597            DataElement::new(tags::STUDY_DESCRIPTION, VR::LO, "Test study"),
1598            DataElement::new(tags::ROWS, VR::US, PrimitiveValue::from(128_u16)),
1599            DataElement::new(tags::COLUMNS, VR::US, PrimitiveValue::from(128_u16)),
1600            DataElement::new(tags::BITS_ALLOCATED, VR::US, PrimitiveValue::from(16_u16)),
1601            DataElement::new(tags::BITS_STORED, VR::US, PrimitiveValue::from(16_u16)),
1602            DataElement::new(tags::HIGH_BIT, VR::US, PrimitiveValue::from(7_u16)),
1603            DataElement::new(
1604                tags::PIXEL_DATA,
1605                VR::OB,
1606                PrimitiveValue::from(vec![0x55_u8; 128 * 128]),
1607            ),
1608        ]);
1609
1610        let ts_expl_vr_le = TransferSyntaxRegistry
1611            .get(uids::EXPLICIT_VR_LITTLE_ENDIAN)
1612            .unwrap();
1613
1614        let mut encoded = Vec::new();
1615        dataset1
1616            .write_dataset_with_ts(&mut encoded, ts_expl_vr_le)
1617            .unwrap();
1618
1619        let reader = BufReader::new(std::io::Cursor::new(&encoded));
1620
1621        let mut collector = DicomCollectorOptions::new()
1622            .expected_ts(uids::EXPLICIT_VR_LITTLE_ENDIAN)
1623            .read_preamble(ReadPreamble::Never)
1624            .odd_length_strategy(OddLengthStrategy::Fail)
1625            .from_reader(reader);
1626
1627        // read one part of the data set
1628        let mut dset1 = InMemDicomObject::new_empty();
1629
1630        collector
1631            .read_dataset_up_to(tags::ROWS, &mut dset1)
1632            .unwrap();
1633        // it has patient name and study description
1634        assert_eq!(
1635            dset1.get(tags::PATIENT_NAME).unwrap().to_str().unwrap(),
1636            "Doe^John"
1637        );
1638        assert_eq!(
1639            dset1
1640                .get(tags::STUDY_DESCRIPTION)
1641                .unwrap()
1642                .to_str()
1643                .unwrap(),
1644            "Test study"
1645        );
1646        // it does not have rows, or pixel data
1647        assert!(dset1.get(tags::ROWS).is_none());
1648        assert!(dset1.get(tags::PIXEL_DATA).is_none());
1649
1650        // read part two of the data set
1651        let mut dset2 = InMemDicomObject::new_empty();
1652
1653        collector.read_dataset_to_end(&mut dset2).unwrap();
1654
1655        // it has rows and pixel data
1656        assert_eq!(dset2.get(tags::ROWS).unwrap().to_int::<u16>().unwrap(), 128);
1657        assert_eq!(
1658            dset2.get(tags::COLUMNS).unwrap().to_int::<u16>().unwrap(),
1659            128
1660        );
1661        assert_eq!(
1662            &*dset2.get(tags::PIXEL_DATA).unwrap().to_bytes().unwrap(),
1663            &[0x55_u8; 128 * 128]
1664        );
1665
1666        // it does not have the other parts
1667        assert!(dset2.get(tags::SOP_INSTANCE_UID).is_none());
1668        assert!(dset2.get(tags::PATIENT_NAME).is_none());
1669        assert!(dset2.get(tags::STUDY_DESCRIPTION).is_none());
1670    }
1671
1672    /// read the fragments of a DICOM file one by one
1673    #[test]
1674    fn test_read_fragments() {
1675        let filename = dicom_test_files::path("WG04/JPLY/SC1_JPLY").unwrap();
1676
1677        let mut collector = DicomCollector::open_file(filename).unwrap();
1678
1679        let fmi = collector.read_file_meta().unwrap();
1680
1681        assert_eq!(fmi.transfer_syntax(), uids::JPEG_EXTENDED12_BIT);
1682
1683        // collect the basic offset table as a regular fragment
1684
1685        let mut bot = Vec::new();
1686        let len = collector
1687            .read_next_fragment(&mut bot)
1688            .expect("should read basic offset table successfully")
1689            .expect("should have basic offset table fragment");
1690        assert_eq!(len, 0);
1691        assert!(bot.is_empty());
1692
1693        // collect the other fragments
1694
1695        let mut fragment = Vec::with_capacity(131_072);
1696
1697        let len = collector
1698            .read_next_fragment(&mut fragment)
1699            .expect("should read fragment successfully")
1700            .expect("should have fragment #0");
1701        assert_eq!(len, 65_536);
1702
1703        // inspect a few bytes just to be sure
1704        assert_eq!(&fragment[0..4], &[0xFF, 0xD8, 0xFF, 0xC1]);
1705
1706        // read one more
1707
1708        let len = collector
1709            .read_next_fragment(&mut fragment)
1710            .expect("should read fragment successfully")
1711            .expect("should have fragment #1");
1712        assert_eq!(len, 65_536);
1713
1714        // accumulates
1715        assert_eq!(fragment.len(), 131_072);
1716
1717        // inspect a few bytes
1718        assert_eq!(&fragment[0..4], &[0xFF, 0xD8, 0xFF, 0xC1]);
1719        assert_eq!(&fragment[65_536..65_540], &[0x04, 0x6C, 0x3B, 0x60]);
1720
1721        // check that it can fetch the remaining fragments
1722        let mut remaining: i32 = 10; // 12 fragments
1723
1724        fragment.clear();
1725
1726        while let Some(_len) = collector
1727            .read_next_fragment(&mut fragment)
1728            .expect("should have read fragment successfully")
1729        {
1730            remaining -= 1;
1731            assert!(!fragment.is_empty());
1732            fragment.clear();
1733        }
1734
1735        assert_eq!(remaining, 0);
1736    }
1737
1738    /// read the fragments of a DICOM file after reading the basic offset table
1739    #[test]
1740    fn test_read_bot_and_fragments() {
1741        let filename = dicom_test_files::path("pydicom/SC_rgb_rle_2frame.dcm").unwrap();
1742
1743        let mut collector = DicomCollector::open_file(filename).unwrap();
1744
1745        let fmi = collector.read_file_meta().unwrap();
1746
1747        assert_eq!(fmi.transfer_syntax(), uids::RLE_LOSSLESS);
1748
1749        // collect the basic offset table
1750        let mut bot = Vec::new();
1751        let len = collector
1752            .read_basic_offset_table(&mut bot)
1753            .expect("should read basic offset table successfully")
1754            .expect("should have basic offset table fragment");
1755        assert_eq!(len, 8);
1756        assert_eq!(&bot, &[0x0000, 0x02A0]);
1757
1758        // can't read the basic offset table twice
1759        assert!(matches!(
1760            collector.read_basic_offset_table(&mut bot),
1761            Err(super::Error(super::InnerError::IllegalStateInPixel { .. })),
1762        ));
1763
1764        // collect the other fragments
1765
1766        let mut fragment = Vec::with_capacity(2048);
1767
1768        let len = collector
1769            .read_next_fragment(&mut fragment)
1770            .expect("should read fragment successfully")
1771            .expect("should have fragment #0");
1772        assert_eq!(len, 664);
1773
1774        // inspect a few bytes just to be sure
1775        assert_eq!(&fragment[0..5], &[0x03, 0x00, 0x00, 0x00, 0x40]);
1776
1777        // read one more
1778
1779        let len = collector
1780            .read_next_fragment(&mut fragment)
1781            .expect("should read fragment successfully")
1782            .expect("should have fragment #1");
1783        assert_eq!(len, 664);
1784
1785        // accumulates
1786        assert_eq!(fragment.len(), 664 + 664);
1787
1788        // inspect a few bytes
1789        assert_eq!(&fragment[0..5], &[0x03, 0x00, 0x00, 0x00, 0x40]);
1790        assert_eq!(&fragment[664 + 659..], &[0x00, 0x9D, 0x00, 0x9D, 0x00]);
1791
1792        // no more fragments
1793        assert!(
1794            collector
1795                .read_next_fragment(&mut fragment)
1796                .expect("attempt to read the next fragment should not have failed")
1797                .is_none()
1798        );
1799    }
1800
1801    // test loading portions of a DICOM in steps with the collector API
1802    #[test]
1803    fn test_lazy_dicom_read() {
1804        let file_path_buf = dicom_test_files::path("WG04/J2KR/MG1_J2KR")
1805            .expect("should be able to retrieve test file");
1806
1807        let filename = file_path_buf
1808            .to_str()
1809            .expect("should be able to retrieve test file");
1810
1811        // instantiate collector and read fmi
1812        let mut collector = DicomCollector::open_file(filename)
1813            .expect("should be able to open the test file with the collector");
1814        let _fmi = collector.read_file_meta();
1815
1816        // read the dicom up to the pixel data
1817        let mut obj = InMemDicomObject::new_empty();
1818        collector
1819            .read_dataset_up_to_pixeldata(&mut obj)
1820            .expect("should be able to read up to the PixelData element");
1821
1822        // read the rest of the dicom
1823        collector
1824            .read_dataset_to_end(&mut obj)
1825            .expect("should be able to read the rest of the DICOM");
1826    }
1827
1828    /// read the full DICOM file,
1829    /// checking that everything is in it as expected
1830    #[test]
1831    fn test_read_to_end() {
1832        let filename = dicom_test_files::path("pydicom/SC_rgb_rle_2frame.dcm").unwrap();
1833
1834        let mut collector = DicomCollector::open_file(filename).unwrap();
1835
1836        let fmi = collector
1837            .read_file_meta()
1838            .expect("should read file meta info successfully");
1839
1840        assert_eq!(fmi.transfer_syntax(), uids::RLE_LOSSLESS);
1841
1842        let mut dset = InMemDicomObject::new_empty();
1843        collector
1844            .read_dataset_to_end(&mut dset)
1845            .expect("should read dataset successfully");
1846
1847        // inspect a few attributes
1848        assert_eq!(
1849            dset.get(tags::SPECIFIC_CHARACTER_SET)
1850                .unwrap()
1851                .to_str()
1852                .unwrap(),
1853            "ISO_IR 192"
1854        );
1855        assert_eq!(dset.get(tags::MODALITY).unwrap().to_str().unwrap(), "OT");
1856        assert_eq!(dset.get(tags::PATIENT_ID).unwrap().to_str().unwrap(), "ID1");
1857        assert_eq!(
1858            dset.get(tags::PHOTOMETRIC_INTERPRETATION)
1859                .unwrap()
1860                .to_str()
1861                .unwrap(),
1862            "RGB"
1863        );
1864
1865        assert_eq!(
1866            dset.get(tags::NUMBER_OF_FRAMES)
1867                .unwrap()
1868                .to_int::<u32>()
1869                .unwrap(),
1870            2
1871        );
1872
1873        // inspect the basic offset table
1874        let pixel_data = dset
1875            .get(tags::PIXEL_DATA)
1876            .expect("Should have pixel data")
1877            .value();
1878        let DicomValue::PixelSequence(seq) = pixel_data else {
1879            panic!("Expected encapsulated pixel data");
1880        };
1881        let bot = seq.offset_table();
1882        assert_eq!(bot.len(), 2);
1883        assert_eq!(&bot, &[0x0000, 0x02A0]);
1884
1885        // inspect the other fragments
1886
1887        let fragments = seq.fragments();
1888
1889        assert_eq!(fragments.len(), 2);
1890        assert_eq!(fragments[0].len(), 664);
1891
1892        // inspect a few bytes just to be sure
1893        assert_eq!(&fragments[0][0..5], &[0x03, 0x00, 0x00, 0x00, 0x40]);
1894        assert_eq!(&fragments[1][0..5], &[0x03, 0x00, 0x00, 0x00, 0x40]);
1895    }
1896}