Skip to main content

dicom_encoding/decode/
adaptive_le.rs

1//! Adaptive VR Little Endian syntax transfer implementation.
2//!
3//! This decoder handles non-conformant DICOM files that declare
4//! Explicit VR Little Endian in their transfer syntax but actually
5//! encode the dataset in Implicit VR. On the first non-meta,
6//! non-delimiter element, it probes the bytes after the tag to
7//! determine whether they form a valid VR code. If they do,
8//! it locks to explicit VR for the rest of the file; if not,
9//! it switches to implicit VR.
10
11use crate::decode::basic::LittleEndianBasicDecoder;
12use crate::decode::{
13    BadSequenceHeaderSnafu, BasicDecode, Decode, DecodeFrom, ReadHeaderTagSnafu,
14    ReadItemHeaderSnafu, ReadItemLengthSnafu, ReadLengthSnafu, ReadReservedSnafu, ReadTagSnafu,
15    ReadVrSnafu, Result,
16};
17use byteordered::byteorder::{ByteOrder, LittleEndian};
18use dicom_core::dictionary::{DataDictionary, DataDictionaryEntry, VirtualVr};
19use dicom_core::header::{DataElementHeader, Length, SequenceItemHeader};
20use dicom_core::{Tag, VR};
21use dicom_dictionary_std::StandardDataDictionary;
22use snafu::ResultExt;
23use std::cell::Cell;
24use std::fmt;
25use std::io::Read;
26
27/// An AdaptiveVRLittleEndianDecoder which uses the standard data dictionary.
28pub type StandardAdaptiveVRLittleEndianDecoder =
29    AdaptiveVRLittleEndianDecoder<StandardDataDictionary>;
30
31#[derive(Debug, Copy, Clone, PartialEq, Eq)]
32enum VrState {
33    /// Not yet determined — will probe on next non-delimiter element.
34    Unknown,
35    /// Locked to explicit VR.
36    Explicit,
37    /// Locked to implicit VR.
38    Implicit,
39}
40
41/// A data element decoder for Little Endian data that auto-detects
42/// whether the dataset uses explicit or implicit VR encoding.
43///
44/// This is intended for non-conformant files that declare Explicit VR LE
45/// in the transfer syntax but actually contain Implicit VR data.
46/// On the first non-meta element, the decoder probes the two bytes
47/// following the tag: if they form a recognized VR code, it proceeds
48/// as explicit VR; otherwise it falls back to implicit VR.
49pub struct AdaptiveVRLittleEndianDecoder<D> {
50    dict: D,
51    basic: LittleEndianBasicDecoder,
52    state: Cell<VrState>,
53}
54
55impl<D> fmt::Debug for AdaptiveVRLittleEndianDecoder<D> {
56    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
57        f.debug_struct("AdaptiveVRLittleEndianDecoder")
58            .field("dict", &"«omitted»")
59            .field("basic", &self.basic)
60            .field("state", &self.state)
61            .finish()
62    }
63}
64
65impl AdaptiveVRLittleEndianDecoder<StandardDataDictionary> {
66    /// Retrieve this decoder using the standard data dictionary.
67    pub fn with_std_dict() -> Self {
68        AdaptiveVRLittleEndianDecoder {
69            dict: StandardDataDictionary,
70            basic: LittleEndianBasicDecoder,
71            state: Cell::new(VrState::Unknown),
72        }
73    }
74
75    /// Retrieve this decoder using the standard data dictionary.
76    pub fn new() -> Self {
77        Self::with_std_dict()
78    }
79}
80
81impl Default for AdaptiveVRLittleEndianDecoder<StandardDataDictionary> {
82    fn default() -> Self {
83        Self::with_std_dict()
84    }
85}
86
87impl<D> AdaptiveVRLittleEndianDecoder<D>
88where
89    D: DataDictionary,
90{
91    /// Retrieve this decoder using a custom data dictionary.
92    pub fn with_dict(dictionary: D) -> Self {
93        AdaptiveVRLittleEndianDecoder {
94            dict: dictionary,
95            basic: LittleEndianBasicDecoder,
96            state: Cell::new(VrState::Unknown),
97        }
98    }
99
100    /// Resolve VR for implicit mode using the data dictionary.
101    fn resolve_vr(&self, tag: Tag) -> VR {
102        if tag == Tag(0x7FE0, 0x0010) || (tag.0 >> 8 == 0x60 && tag.1 == 0x3000) {
103            VR::OW
104        } else {
105            self.dict
106                .by_tag(tag)
107                .map(|entry| entry.vr().relaxed())
108                .unwrap_or(VR::UN)
109        }
110    }
111}
112
113/// Check whether a probed VR is compatible with a dictionary VirtualVr.
114/// VirtualVr variants like Xs and Ox allow multiple concrete VRs.
115fn vr_compatible_with_virtual(probed: VR, dict_vr: VirtualVr) -> bool {
116    match dict_vr {
117        VirtualVr::Exact(vr) => probed == vr,
118        VirtualVr::Xs => matches!(probed, VR::US | VR::SS),
119        VirtualVr::Ox => matches!(probed, VR::OB | VR::OW),
120        VirtualVr::Px => matches!(probed, VR::OB | VR::OW),
121        VirtualVr::Lt => matches!(probed, VR::US | VR::OW),
122        // unknown variants: treat as incompatible to avoid
123        // false-positive explicit detection
124        _ => false,
125    }
126}
127
128impl<D> Decode for AdaptiveVRLittleEndianDecoder<D>
129where
130    D: DataDictionary,
131{
132    fn decode_header<S>(&self, mut source: &mut S) -> Result<(DataElementHeader, usize)>
133    where
134        S: ?Sized + Read,
135    {
136        // retrieve tag
137        let Tag(group, element) = self
138            .basic
139            .decode_tag(&mut source)
140            .context(ReadHeaderTagSnafu)?;
141
142        let mut buf = [0u8; 4];
143
144        // item delimiters never have VR or reserved fields
145        if group == 0xFFFE {
146            source.read_exact(&mut buf).context(ReadItemLengthSnafu)?;
147            let len = LittleEndian::read_u32(&buf);
148            return Ok((
149                DataElementHeader::new((group, element), VR::UN, Length(len)),
150                8,
151            ));
152        }
153
154        let tag = Tag(group, element);
155        let state = self.state.get();
156
157        match state {
158            VrState::Explicit => decode_explicit_header(&mut source, tag, &mut buf),
159            VrState::Implicit => {
160                let (vr, len, bytes) = decode_implicit_length(&mut source, tag, self, &mut buf)?;
161                Ok((DataElementHeader::new(tag, vr, Length(len)), bytes))
162            }
163            VrState::Unknown => {
164                // Probe: read the 2 bytes after the tag.
165                // If they form a valid VR, we're in explicit mode.
166                // Otherwise, they're the first half of a 4-byte length.
167                source.read_exact(&mut buf[0..2]).context(ReadVrSnafu)?;
168
169                if let Some(vr) = VR::from_binary([buf[0], buf[1]]) {
170                    // Cross-check against the data dictionary:
171                    // if the dictionary knows this tag and the probed VR
172                    // is incompatible, the bytes are actually
173                    // the start of a 4-byte implicit length.
174                    let dict_vvr = self.dict.by_tag(tag).map(|entry| entry.vr());
175                    if dict_vvr.is_some_and(|vvr| !vr_compatible_with_virtual(vr, vvr)) {
176                        self.state.set(VrState::Implicit);
177                        source.read_exact(&mut buf[2..4]).context(ReadLengthSnafu)?;
178                        let len = LittleEndian::read_u32(&buf);
179                        let resolved = self.resolve_vr(tag);
180                        return Ok((DataElementHeader::new(tag, resolved, Length(len)), 8));
181                    }
182
183                    self.state.set(VrState::Explicit);
184                    let (len, bytes_read) = decode_explicit_length(&mut source, vr, &mut buf)?;
185                    Ok((DataElementHeader::new(tag, vr, Length(len)), bytes_read))
186                } else {
187                    self.state.set(VrState::Implicit);
188                    // The 2 bytes we read are the low half of the 4-byte length.
189                    // Read the remaining 2 bytes.
190                    source.read_exact(&mut buf[2..4]).context(ReadLengthSnafu)?;
191                    let len = LittleEndian::read_u32(&buf);
192                    let vr = self.resolve_vr(tag);
193                    Ok((DataElementHeader::new(tag, vr, Length(len)), 8))
194                }
195            }
196        }
197    }
198
199    fn decode_item_header<S>(&self, source: &mut S) -> Result<SequenceItemHeader>
200    where
201        S: ?Sized + Read,
202    {
203        // item headers are the same regardless of VR mode
204        let mut buf = [0u8; 8];
205        source.read_exact(&mut buf).context(ReadItemHeaderSnafu)?;
206        let group = LittleEndian::read_u16(&buf[0..2]);
207        let element = LittleEndian::read_u16(&buf[2..4]);
208        let len = LittleEndian::read_u32(&buf[4..8]);
209        SequenceItemHeader::new((group, element), Length(len)).context(BadSequenceHeaderSnafu)
210    }
211
212    #[inline]
213    fn decode_tag<S>(&self, source: &mut S) -> Result<Tag>
214    where
215        S: ?Sized + Read,
216    {
217        self.basic.decode_tag(source).context(ReadTagSnafu)
218    }
219}
220
221/// Decode an explicit VR element header after the tag has been read.
222fn decode_explicit_header<S>(
223    source: &mut S,
224    tag: Tag,
225    buf: &mut [u8; 4],
226) -> Result<(DataElementHeader, usize)>
227where
228    S: ?Sized + Read,
229{
230    source.read_exact(&mut buf[0..2]).context(ReadVrSnafu)?;
231    let vr = VR::from_binary([buf[0], buf[1]]).unwrap_or(VR::UN);
232    let (len, bytes_read) = decode_explicit_length(source, vr, buf)?;
233    Ok((DataElementHeader::new(tag, vr, Length(len)), bytes_read))
234}
235
236/// Read the length field for an explicit VR element.
237/// Returns (length, total_bytes_read_including_tag_and_vr).
238fn decode_explicit_length<S>(source: &mut S, vr: VR, buf: &mut [u8; 4]) -> Result<(u32, usize)>
239where
240    S: ?Sized + Read,
241{
242    match vr {
243        VR::AE
244        | VR::AS
245        | VR::AT
246        | VR::CS
247        | VR::DA
248        | VR::DS
249        | VR::DT
250        | VR::FL
251        | VR::FD
252        | VR::IS
253        | VR::LO
254        | VR::LT
255        | VR::PN
256        | VR::SH
257        | VR::SL
258        | VR::SS
259        | VR::ST
260        | VR::TM
261        | VR::UI
262        | VR::UL
263        | VR::US => {
264            source.read_exact(&mut buf[0..2]).context(ReadLengthSnafu)?;
265            Ok((u32::from(LittleEndian::read_u16(&buf[0..2])), 8))
266        }
267        _ => {
268            source
269                .read_exact(&mut buf[0..2])
270                .context(ReadReservedSnafu)?;
271            source.read_exact(buf).context(ReadLengthSnafu)?;
272            Ok((LittleEndian::read_u32(buf), 12))
273        }
274    }
275}
276
277/// Read the length field for an implicit VR element (4 bytes)
278/// and resolve VR from the dictionary.
279fn decode_implicit_length<S, D>(
280    source: &mut S,
281    tag: Tag,
282    dec: &AdaptiveVRLittleEndianDecoder<D>,
283    buf: &mut [u8; 4],
284) -> Result<(VR, u32, usize)>
285where
286    S: ?Sized + Read,
287    D: DataDictionary,
288{
289    source.read_exact(buf).context(ReadLengthSnafu)?;
290    let len = LittleEndian::read_u32(buf);
291    let vr = dec.resolve_vr(tag);
292    Ok((vr, len, 8))
293}
294
295impl<S: ?Sized, D> DecodeFrom<S> for AdaptiveVRLittleEndianDecoder<D>
296where
297    S: Read,
298    D: DataDictionary,
299{
300    #[inline]
301    fn decode_header(&self, source: &mut S) -> Result<(DataElementHeader, usize)> {
302        Decode::decode_header(self, source)
303    }
304
305    #[inline]
306    fn decode_item_header(&self, source: &mut S) -> Result<SequenceItemHeader> {
307        Decode::decode_item_header(self, source)
308    }
309
310    #[inline]
311    fn decode_tag(&self, source: &mut S) -> Result<Tag> {
312        Decode::decode_tag(self, source)
313    }
314}
315
316#[cfg(test)]
317mod tests {
318    use super::AdaptiveVRLittleEndianDecoder;
319    use crate::decode::Decode;
320    use dicom_core::dictionary::stub::StubDataDictionary;
321    use dicom_core::header::{HasLength, Header, Length};
322    use dicom_core::{Tag, VR};
323    use std::io::{Cursor, Read, Seek, SeekFrom};
324
325    // Explicit VR data: same structure as the explicit_le tests.
326    //   Tag: (0002,0002) Media Storage SOP Class UID
327    //   VR: UI, Length: 26
328    //   Value: "1.2.840.10008.5.1.4.1.1.1\0"
329    // --
330    //   Tag: (0002,0010) Transfer Syntax UID
331    //   VR: UI, Length: 20
332    //   Value: "1.2.840.10008.1.2.1\0"
333    #[rustfmt::skip]
334    const RAW_EXPLICIT: &[u8] = &[
335        0x02, 0x00, 0x02, 0x00,     // tag (0002,0002)
336            b'U', b'I',             // VR
337            0x1A, 0x00,             // length: 26
338            0x31, 0x2e, 0x32, 0x2e, 0x38, 0x34, 0x30, 0x2e,
339            0x31, 0x30, 0x30, 0x30, 0x38, 0x2e, 0x35, 0x2e,
340            0x31, 0x2e, 0x34, 0x2e, 0x31, 0x2e, 0x31, 0x2e,
341            0x31, 0x00,
342        0x02, 0x00, 0x10, 0x00,     // tag (0002,0010)
343            b'U', b'I',             // VR
344            0x14, 0x00,             // length: 20
345            0x31, 0x2e, 0x32, 0x2e, 0x38, 0x34, 0x30, 0x2e,
346            0x31, 0x30, 0x30, 0x30, 0x38, 0x2e, 0x31, 0x2e,
347            0x32, 0x2e, 0x31, 0x00,
348    ];
349
350    #[test]
351    fn adaptive_reads_explicit_vr() {
352        let reader = AdaptiveVRLittleEndianDecoder::with_std_dict();
353        let mut cursor = Cursor::new(RAW_EXPLICIT.as_ref());
354        {
355            let (elem, bytes_read) = reader
356                .decode_header(&mut cursor)
357                .expect("should find an element");
358            assert_eq!(elem.tag(), Tag(0x0002, 0x0002));
359            assert_eq!(elem.vr(), VR::UI);
360            assert_eq!(elem.length(), Length(26));
361            assert_eq!(bytes_read, 8);
362            cursor.seek(SeekFrom::Current(26)).unwrap();
363        }
364        {
365            let (elem, _) = reader
366                .decode_header(&mut cursor)
367                .expect("should find an element");
368            assert_eq!(elem.tag(), Tag(0x0002, 0x0010));
369            assert_eq!(elem.vr(), VR::UI);
370            assert_eq!(elem.length(), Length(20));
371        }
372    }
373
374    // Implicit VR data: tag + 4-byte length, no VR bytes.
375    //   Tag: (0002,0002) Media Storage SOP Class UID
376    //   Length: 26
377    //   Value: "1.2.840.10008.5.1.4.1.1.1\0"
378    // --
379    //   Tag: (0002,0010) Transfer Syntax UID
380    //   Length: 20
381    //   Value: "1.2.840.10008.1.2.1\0"
382    #[rustfmt::skip]
383    const RAW_IMPLICIT: &[u8] = &[
384        0x02, 0x00, 0x02, 0x00,     // tag (0002,0002)
385            0x1A, 0x00, 0x00, 0x00, // length: 26
386            0x31, 0x2e, 0x32, 0x2e, 0x38, 0x34, 0x30, 0x2e,
387            0x31, 0x30, 0x30, 0x30, 0x38, 0x2e, 0x35, 0x2e,
388            0x31, 0x2e, 0x34, 0x2e, 0x31, 0x2e, 0x31, 0x2e,
389            0x31, 0x00,
390        0x02, 0x00, 0x10, 0x00,     // tag (0002,0010)
391            0x14, 0x00, 0x00, 0x00, // length: 20
392            0x31, 0x2e, 0x32, 0x2e, 0x38, 0x34, 0x30, 0x2e,
393            0x31, 0x30, 0x30, 0x30, 0x38, 0x2e, 0x31, 0x2e,
394            0x32, 0x2e, 0x31, 0x00,
395    ];
396
397    const DICT: &StubDataDictionary = &StubDataDictionary;
398
399    #[test]
400    fn adaptive_reads_implicit_vr() {
401        let reader = AdaptiveVRLittleEndianDecoder::with_dict(DICT);
402        let mut cursor = Cursor::new(RAW_IMPLICIT.as_ref());
403        {
404            let (elem, bytes_read) = reader
405                .decode_header(&mut cursor)
406                .expect("should find an element");
407            assert_eq!(elem.tag(), Tag(0x0002, 0x0002));
408            // StubDataDictionary returns UN for unknown tags
409            assert_eq!(elem.vr(), VR::UN);
410            assert_eq!(elem.length(), Length(26));
411            assert_eq!(bytes_read, 8);
412            cursor.seek(SeekFrom::Current(26)).unwrap();
413        }
414        {
415            let (elem, _) = reader
416                .decode_header(&mut cursor)
417                .expect("should find an element");
418            assert_eq!(elem.tag(), Tag(0x0002, 0x0010));
419            assert_eq!(elem.vr(), VR::UN);
420            assert_eq!(elem.length(), Length(20));
421        }
422    }
423
424    #[test]
425    fn adaptive_reads_implicit_with_standard_dict() {
426        let reader = AdaptiveVRLittleEndianDecoder::with_std_dict();
427        let mut cursor = Cursor::new(RAW_IMPLICIT.as_ref());
428        {
429            let (elem, _) = reader
430                .decode_header(&mut cursor)
431                .expect("should find an element");
432            assert_eq!(elem.tag(), Tag(0x0002, 0x0002));
433            assert_eq!(elem.vr(), VR::UI);
434            assert_eq!(elem.length(), Length(26));
435            cursor.seek(SeekFrom::Current(26)).unwrap();
436        }
437        {
438            let (elem, _) = reader
439                .decode_header(&mut cursor)
440                .expect("should find an element");
441            assert_eq!(elem.tag(), Tag(0x0002, 0x0010));
442            assert_eq!(elem.vr(), VR::UI);
443            assert_eq!(elem.length(), Length(20));
444        }
445    }
446
447    // Sequence/item delimiters — should work regardless of VR state.
448    //  Tag: (FFFE,E000) Item, Length: 0xFFFFFFFF
449    //  Tag: (FFFE,E00D) Item Delimitation, Length: 0
450    //  Tag: (FFFE,E0DD) Sequence Delimitation, Length: 0
451    #[rustfmt::skip]
452    const RAW_DELIMITERS: &[u8] = &[
453        0xFE, 0xFF, 0x00, 0xE0, 0xFF, 0xFF, 0xFF, 0xFF,
454        0xFE, 0xFF, 0x0D, 0xE0, 0x00, 0x00, 0x00, 0x00,
455        0xFE, 0xFF, 0xDD, 0xE0, 0x00, 0x00, 0x00, 0x00,
456    ];
457
458    #[test]
459    fn adaptive_reads_delimiters() {
460        let reader = AdaptiveVRLittleEndianDecoder::with_std_dict();
461        let mut cursor = Cursor::new(RAW_DELIMITERS.as_ref());
462        {
463            let (elem, bytes_read) = reader
464                .decode_header(&mut cursor)
465                .expect("should find an element");
466            assert_eq!(elem.tag(), Tag(0xFFFE, 0xE000));
467            assert_eq!(elem.vr(), VR::UN);
468            assert!(elem.length().is_undefined());
469            assert_eq!(bytes_read, 8);
470        }
471        {
472            let (elem, _) = reader
473                .decode_header(&mut cursor)
474                .expect("should find an element");
475            assert_eq!(elem.tag(), Tag(0xFFFE, 0xE00D));
476            assert_eq!(elem.length(), Length(0));
477        }
478        {
479            let (elem, _) = reader
480                .decode_header(&mut cursor)
481                .expect("should find an element");
482            assert_eq!(elem.tag(), Tag(0xFFFE, 0xE0DD));
483            assert_eq!(elem.length(), Length(0));
484        }
485    }
486
487    // Mixed: explicit VR element followed by a sequence delimiter.
488    // Verifies the state is locked after the first probe
489    // and delimiters still work.
490    #[rustfmt::skip]
491    const RAW_EXPLICIT_THEN_DELIMITER: &[u8] = &[
492        // (0008,0060) Modality, VR: CS, Length: 2, Value: "CT"
493        0x08, 0x00, 0x60, 0x00,
494            b'C', b'S',
495            0x02, 0x00,
496            b'C', b'T',
497        // (FFFE,E0DD) Sequence Delimitation, Length: 0
498        0xFE, 0xFF, 0xDD, 0xE0, 0x00, 0x00, 0x00, 0x00,
499    ];
500
501    // Implicit VR data where the length field's low bytes happen to
502    // match a valid VR code ("UN" = 0x55 0x4E, length = 20053).
503    // The dictionary cross-check should catch the mismatch
504    // (Modality is CS, not UN) and correctly treat as implicit.
505    #[rustfmt::skip]
506    const RAW_IMPLICIT_VR_COLLISION: &[u8] = &[
507        // (0008,0060) Modality — implicit VR, length 20053 (0x00004E55)
508        // Low bytes of length: 0x55 0x4E = "UN"
509        0x08, 0x00, 0x60, 0x00,
510            0x55, 0x4E, 0x00, 0x00,
511            // (value bytes omitted — we only need to parse the header)
512    ];
513
514    #[test]
515    fn adaptive_rejects_false_positive_vr() {
516        let reader = AdaptiveVRLittleEndianDecoder::with_std_dict();
517        let mut cursor = Cursor::new(RAW_IMPLICIT_VR_COLLISION.as_ref());
518        let (elem, bytes_read) = reader
519            .decode_header(&mut cursor)
520            .expect("should find an element");
521        assert_eq!(elem.tag(), Tag(0x0008, 0x0060));
522        // Dictionary says Modality is CS, probed bytes say UN:
523        // dictionary wins, implicit VR is used, CS resolved from dict
524        assert_eq!(elem.vr(), VR::CS);
525        assert_eq!(elem.length(), Length(20053));
526        assert_eq!(bytes_read, 8);
527    }
528
529    #[test]
530    fn adaptive_explicit_then_delimiter() {
531        let reader = AdaptiveVRLittleEndianDecoder::with_std_dict();
532        let mut cursor = Cursor::new(RAW_EXPLICIT_THEN_DELIMITER.as_ref());
533        {
534            let (elem, _) = reader
535                .decode_header(&mut cursor)
536                .expect("should find an element");
537            assert_eq!(elem.tag(), Tag(0x0008, 0x0060));
538            assert_eq!(elem.vr(), VR::CS);
539            assert_eq!(elem.length(), Length(2));
540            let mut val = vec![0u8; 2];
541            cursor.read_exact(&mut val).unwrap();
542            assert_eq!(&val, b"CT");
543        }
544        {
545            let (elem, _) = reader
546                .decode_header(&mut cursor)
547                .expect("should find delimiter");
548            assert_eq!(elem.tag(), Tag(0xFFFE, 0xE0DD));
549            assert_eq!(elem.length(), Length(0));
550        }
551    }
552}