Skip to main content

dicom_toolkit_data/io/
reader.rs

1//! DICOM Part 10 file reader.
2//!
3//! Reads binary DICOM files into a `FileFormat` or raw `DataSet`.
4
5use crate::dataset::DataSet;
6use crate::element::Element;
7use crate::file_format::FileFormat;
8use crate::io::transfer::{implicit_vr_for_tag, TransferSyntaxProperties};
9use crate::meta_info::FileMetaInformation;
10use crate::value::{DicomDate, DicomDateTime, DicomTime, PersonName, PixelData, Value};
11use dicom_toolkit_core::charset::DicomCharsetDecoder;
12use dicom_toolkit_core::error::{DcmError, DcmResult};
13use dicom_toolkit_dict::{tags, Tag, Vr};
14use std::io::Read;
15
16/// Streaming DICOM reader.
17pub struct DicomReader<R: Read> {
18    reader: R,
19}
20
21impl<R: Read> DicomReader<R> {
22    pub fn new(reader: R) -> Self {
23        Self { reader }
24    }
25
26    /// Read a complete DICOM Part 10 file.
27    pub fn read_file(&mut self) -> DcmResult<FileFormat> {
28        let mut data = Vec::new();
29        self.reader.read_to_end(&mut data)?;
30        parse_file(&data)
31    }
32
33    /// Read a raw dataset (no preamble/meta) using the given transfer syntax UID.
34    pub fn read_dataset(&mut self, ts_uid: &str) -> DcmResult<DataSet> {
35        let mut data = Vec::new();
36        self.reader.read_to_end(&mut data)?;
37        let props = TransferSyntaxProperties::from_uid(ts_uid);
38        let actual: std::borrow::Cow<[u8]> = if props.is_deflated {
39            std::borrow::Cow::Owned(decompress_deflated(&data)?)
40        } else {
41            std::borrow::Cow::Borrowed(&data)
42        };
43        let mut cursor = DicomCursor::new(&actual);
44        cursor.read_dataset_impl(
45            props.is_explicit_vr(),
46            props.is_little_endian(),
47            actual.len(),
48        )
49    }
50}
51
52// ── Internal parse entry point ────────────────────────────────────────────────
53
54pub(crate) fn parse_file(data: &[u8]) -> DcmResult<FileFormat> {
55    if payload_starts_with_file_meta(data) {
56        if let Ok(file) = parse_part10_payload(data, 0) {
57            return Ok(file);
58        }
59    }
60
61    // Short files without preamble — try raw implicit VR LE
62    if data.len() < 132 {
63        let mut cursor = DicomCursor::new(data);
64        let ds = cursor.read_dataset_impl(false, true, data.len())?;
65        let meta = FileMetaInformation::new("", "", "1.2.840.10008.1.2");
66        return Ok(FileFormat::new(meta, ds));
67    }
68
69    let has_dicm = &data[128..132] == b"DICM";
70
71    if !has_dicm {
72        // No magic: try implicit VR LE from the start
73        let mut cursor = DicomCursor::new(data);
74        let ds = cursor.read_dataset_impl(false, true, data.len())?;
75        let meta = FileMetaInformation::new("", "", "1.2.840.10008.1.2");
76        return Ok(FileFormat::new(meta, ds));
77    }
78
79    parse_part10_payload(data, 132)
80}
81
82fn payload_starts_with_file_meta(data: &[u8]) -> bool {
83    data.len() >= 4 && u16::from_le_bytes([data[0], data[1]]) == 0x0002
84}
85
86fn parse_part10_payload(data: &[u8], meta_offset: usize) -> DcmResult<FileFormat> {
87    let mut cursor = DicomCursor::new(data);
88    cursor.pos = meta_offset;
89
90    // Read File Meta Information (always explicit VR LE)
91    let meta_ds = cursor.read_meta()?;
92    let meta = FileMetaInformation::from_dataset(&meta_ds)?;
93    let ts_uid = meta.transfer_syntax_uid.clone();
94    let props = TransferSyntaxProperties::from_uid(&ts_uid);
95
96    let dataset = if props.is_deflated {
97        let remaining = &data[cursor.pos..];
98        let decompressed = decompress_deflated(remaining)?;
99        let mut dc = DicomCursor::new(&decompressed);
100        dc.read_dataset_impl(true, true, decompressed.len())?
101    } else {
102        cursor.read_dataset_impl(props.is_explicit_vr(), props.is_little_endian(), data.len())?
103    };
104
105    Ok(FileFormat::new(meta, dataset))
106}
107
108fn decompress_deflated(data: &[u8]) -> DcmResult<Vec<u8>> {
109    use flate2::read::DeflateDecoder;
110    let mut decoder = DeflateDecoder::new(data);
111    let mut out = Vec::new();
112    decoder.read_to_end(&mut out).map_err(DcmError::Io)?;
113    Ok(out)
114}
115
116// ── Cursor ────────────────────────────────────────────────────────────────────
117
118struct DicomCursor<'a> {
119    data: &'a [u8],
120    pos: usize,
121    /// Character set decoder, updated when (0008,0005) is encountered.
122    charset: DicomCharsetDecoder,
123}
124
125impl<'a> DicomCursor<'a> {
126    fn new(data: &'a [u8]) -> Self {
127        Self {
128            data,
129            pos: 0,
130            charset: DicomCharsetDecoder::default_ascii(),
131        }
132    }
133
134    // ── Primitives ────────────────────────────────────────────────────────────
135
136    fn read_u8(&mut self) -> DcmResult<u8> {
137        if self.pos >= self.data.len() {
138            return Err(DcmError::UnexpectedEof {
139                offset: self.pos as u64,
140            });
141        }
142        let b = self.data[self.pos];
143        self.pos += 1;
144        Ok(b)
145    }
146
147    fn read_u16(&mut self, le: bool) -> DcmResult<u16> {
148        let a = self.read_u8()?;
149        let b = self.read_u8()?;
150        Ok(if le {
151            u16::from_le_bytes([a, b])
152        } else {
153            u16::from_be_bytes([a, b])
154        })
155    }
156
157    fn read_u32(&mut self, le: bool) -> DcmResult<u32> {
158        let a = self.read_u8()?;
159        let b = self.read_u8()?;
160        let c = self.read_u8()?;
161        let d = self.read_u8()?;
162        Ok(if le {
163            u32::from_le_bytes([a, b, c, d])
164        } else {
165            u32::from_be_bytes([a, b, c, d])
166        })
167    }
168
169    fn read_bytes(&mut self, n: usize) -> DcmResult<&'a [u8]> {
170        if self.pos + n > self.data.len() {
171            return Err(DcmError::UnexpectedEof {
172                offset: self.pos as u64,
173            });
174        }
175        let slice = &self.data[self.pos..self.pos + n];
176        self.pos += n;
177        Ok(slice)
178    }
179
180    fn peek_tag(&self, le: bool) -> DcmResult<Tag> {
181        if self.pos + 4 > self.data.len() {
182            return Err(DcmError::UnexpectedEof {
183                offset: self.pos as u64,
184            });
185        }
186        let g0 = self.data[self.pos];
187        let g1 = self.data[self.pos + 1];
188        let e0 = self.data[self.pos + 2];
189        let e1 = self.data[self.pos + 3];
190        let group = if le {
191            u16::from_le_bytes([g0, g1])
192        } else {
193            u16::from_be_bytes([g0, g1])
194        };
195        let element = if le {
196            u16::from_le_bytes([e0, e1])
197        } else {
198            u16::from_be_bytes([e0, e1])
199        };
200        Ok(Tag::new(group, element))
201    }
202
203    fn read_tag(&mut self, le: bool) -> DcmResult<Tag> {
204        let tag = self.peek_tag(le)?;
205        self.pos += 4;
206        Ok(tag)
207    }
208
209    // ── Meta ──────────────────────────────────────────────────────────────────
210
211    /// Read group 0002 elements (explicit VR LE). Stops when group != 0002.
212    fn read_meta(&mut self) -> DcmResult<DataSet> {
213        let mut meta = DataSet::new();
214        while self.pos + 4 <= self.data.len() {
215            let group = u16::from_le_bytes([self.data[self.pos], self.data[self.pos + 1]]);
216            if group != 0x0002 {
217                break;
218            }
219            let elem = self.read_element(true, true)?;
220            // Skip group-length element — computed at write time
221            if !elem.tag.is_group_length() {
222                meta.insert(elem);
223            }
224        }
225        Ok(meta)
226    }
227
228    // ── Dataset ───────────────────────────────────────────────────────────────
229
230    fn read_dataset_impl(&mut self, explicit: bool, le: bool, end: usize) -> DcmResult<DataSet> {
231        let mut ds = DataSet::new();
232        while self.pos < end && self.pos + 4 <= self.data.len() {
233            let tag = self.peek_tag(le)?;
234
235            // Stop on sequence or item delimitation
236            if tag.is_sequence_delimitation() || tag.is_item_delimitation() {
237                self.pos += 4;
238                let _ = self.read_u32(true); // consume length
239                break;
240            }
241
242            // ITEM tags shouldn't appear at dataset level; stop gracefully
243            if tag.is_item() {
244                break;
245            }
246
247            let elem = self.read_element(explicit, le)?;
248
249            // Update charset decoder when Specific Character Set is encountered
250            if elem.tag == tags::SPECIFIC_CHARACTER_SET {
251                if let Value::Strings(ref terms) = elem.value {
252                    let charset_value = terms.join("\\");
253                    if let Ok(decoder) = DicomCharsetDecoder::new(&charset_value) {
254                        self.charset = decoder;
255                    }
256                }
257            }
258
259            ds.insert(elem);
260        }
261        Ok(ds)
262    }
263
264    // ── Element ───────────────────────────────────────────────────────────────
265
266    fn read_element(&mut self, explicit: bool, le: bool) -> DcmResult<Element> {
267        let tag = self.read_tag(le)?;
268
269        let (vr, len, undef_len) = if tag.is_delimiter() {
270            let len = self.read_u32(true)?;
271            (Vr::UN, len, false)
272        } else if explicit {
273            let vr_b0 = self.read_u8()?;
274            let vr_b1 = self.read_u8()?;
275            let vr = Vr::from_bytes([vr_b0, vr_b1]).unwrap_or(Vr::UN);
276            if vr.has_long_explicit_length() {
277                let _reserved = self.read_u16(le)?;
278                let len = self.read_u32(le)?;
279                (vr, len, len == 0xFFFF_FFFF)
280            } else {
281                let len = self.read_u16(le)? as u32;
282                (vr, len, false)
283            }
284        } else {
285            let len = self.read_u32(le)?;
286            let vr = implicit_vr_for_tag(tag);
287            (vr, len, len == 0xFFFF_FFFF)
288        };
289
290        let value = self.read_value(tag, vr, len, undef_len, explicit, le)?;
291        let effective_vr = if vr == Vr::UN && undef_len && matches!(value, Value::Sequence(_)) {
292            // Mirror DCMTK CP-246 handling: undefined-length UN is normalized to SQ.
293            Vr::SQ
294        } else {
295            vr
296        };
297        Ok(Element::new(tag, effective_vr, value))
298    }
299
300    // ── Value ─────────────────────────────────────────────────────────────────
301
302    fn read_value(
303        &mut self,
304        tag: Tag,
305        vr: Vr,
306        len: u32,
307        undef_len: bool,
308        explicit: bool,
309        le: bool,
310    ) -> DcmResult<Value> {
311        match vr {
312            Vr::SQ => {
313                let items = self.read_sequence(len, undef_len, explicit, le)?;
314                Ok(Value::Sequence(items))
315            }
316            _ if tag == tags::PIXEL_DATA => self.read_pixel_data(len, undef_len, le),
317            Vr::UN if undef_len => {
318                // CP-246: undefined-length UN is interpreted as a sequence encoded
319                // using Implicit VR Little Endian semantics.
320                let items = self.read_sequence(len, true, false, true)?;
321                Ok(Value::Sequence(items))
322            }
323            _ => {
324                if undef_len {
325                    return Err(DcmError::InvalidLength {
326                        group: tag.group,
327                        element: tag.element,
328                        length: 0xFFFF_FFFF,
329                    });
330                }
331                let bytes = self.read_bytes(len as usize)?;
332                parse_value_bytes(vr, bytes, le, &self.charset)
333            }
334        }
335    }
336
337    // ── Sequence ──────────────────────────────────────────────────────────────
338
339    fn read_sequence(
340        &mut self,
341        len: u32,
342        undef_len: bool,
343        explicit: bool,
344        le: bool,
345    ) -> DcmResult<Vec<DataSet>> {
346        let end = if undef_len {
347            usize::MAX
348        } else {
349            self.pos.saturating_add(len as usize)
350        };
351
352        let mut items = Vec::new();
353
354        while self.pos < end && self.pos + 4 <= self.data.len() {
355            let tag = self.peek_tag(le)?;
356
357            if tag.is_sequence_delimitation() {
358                self.pos += 4;
359                let _ = self.read_u32(true);
360                break;
361            }
362
363            if tag.is_item() {
364                self.pos += 4; // consume ITEM tag
365                let item_len = self.read_u32(le)?;
366                let item_undef = item_len == 0xFFFF_FFFF;
367                let item_end = if item_undef {
368                    usize::MAX
369                } else {
370                    self.pos.saturating_add(item_len as usize)
371                };
372                let item_ds = self.read_dataset_impl(explicit, le, item_end)?;
373                items.push(item_ds);
374            } else {
375                break;
376            }
377        }
378
379        Ok(items)
380    }
381
382    // ── Pixel data ────────────────────────────────────────────────────────────
383
384    fn read_pixel_data(&mut self, len: u32, undef_len: bool, le: bool) -> DcmResult<Value> {
385        if !undef_len {
386            let bytes = self.read_bytes(len as usize)?.to_vec();
387            return Ok(Value::PixelData(PixelData::Native { bytes }));
388        }
389
390        // Encapsulated pixel data — undefined length
391        let mut offset_table: Vec<u32> = Vec::new();
392        let mut fragments: Vec<Vec<u8>> = Vec::new();
393        let mut first_item = true;
394
395        loop {
396            if self.pos + 4 > self.data.len() {
397                break;
398            }
399            let tag = self.peek_tag(le)?;
400
401            if tag.is_sequence_delimitation() {
402                self.pos += 4;
403                let _ = self.read_u32(true);
404                break;
405            }
406
407            if tag.is_item() {
408                self.pos += 4;
409                let item_len = self.read_u32(le)?;
410                let item_bytes = self.read_bytes(item_len as usize)?.to_vec();
411
412                if first_item {
413                    // Basic offset table
414                    let n = item_bytes.len() / 4;
415                    for i in 0..n {
416                        let b = &item_bytes[i * 4..i * 4 + 4];
417                        offset_table.push(u32::from_le_bytes([b[0], b[1], b[2], b[3]]));
418                    }
419                    first_item = false;
420                } else {
421                    fragments.push(item_bytes);
422                }
423            } else {
424                break;
425            }
426        }
427
428        Ok(Value::PixelData(PixelData::Encapsulated {
429            offset_table,
430            fragments,
431        }))
432    }
433}
434
435// ── Value byte parser ─────────────────────────────────────────────────────────
436
437fn parse_value_bytes(
438    vr: Vr,
439    bytes: &[u8],
440    le: bool,
441    charset: &DicomCharsetDecoder,
442) -> DcmResult<Value> {
443    if bytes.is_empty() {
444        return Ok(Value::Empty);
445    }
446
447    match vr {
448        Vr::UI => {
449            // UIDs are always ASCII — no charset decoding needed
450            let s = std::str::from_utf8(bytes)
451                .unwrap_or("")
452                .trim_end_matches('\0');
453            Ok(Value::Uid(s.to_string()))
454        }
455
456        // VRs that use the Specific Character Set:
457        // AE, CS are restricted to ASCII in DICOM, but we decode them through
458        // the charset decoder for robustness (some non-conformant files exist).
459        Vr::AE | Vr::AS | Vr::CS | Vr::LO | Vr::SH => {
460            let s = decode_string_with_charset(bytes, charset);
461            let s = s.trim_end_matches('\0').trim_end_matches(' ');
462            let parts: Vec<String> = s.split('\\').map(str::to_string).collect();
463            Ok(Value::Strings(parts))
464        }
465        Vr::LT | Vr::ST | Vr::UT | Vr::UC | Vr::UR => {
466            let s = decode_string_with_charset(bytes, charset);
467            let s = s.trim_end_matches('\0').trim_end_matches(' ').to_string();
468            Ok(Value::Strings(vec![s]))
469        }
470        Vr::PN => {
471            let s = decode_string_with_charset(bytes, charset);
472            let s = s.trim_end_matches('\0').trim_end_matches(' ');
473            if s.is_empty() {
474                return Ok(Value::Empty);
475            }
476            let names: Vec<PersonName> = s.split('\\').map(PersonName::parse).collect();
477            Ok(Value::PersonNames(names))
478        }
479
480        // Numeric-string VRs — always ASCII content, but decoded through charset
481        // for consistent handling of padding.
482        Vr::DA => {
483            let s = decode_ascii_string(bytes);
484            if s.is_empty() {
485                return Ok(Value::Empty);
486            }
487            let res: Result<Vec<_>, _> = s
488                .split('\\')
489                .map(|p| DicomDate::from_da_str(p.trim()))
490                .collect();
491            res.map(Value::Date)
492                .map_err(|_| DcmError::Other("invalid DA value".into()))
493        }
494        Vr::TM => {
495            let s = decode_ascii_string(bytes);
496            if s.is_empty() {
497                return Ok(Value::Empty);
498            }
499            let res: Result<Vec<_>, _> =
500                s.split('\\').map(|p| DicomTime::parse(p.trim())).collect();
501            res.map(Value::Time)
502                .map_err(|_| DcmError::Other("invalid TM value".into()))
503        }
504        Vr::DT => {
505            let s = decode_ascii_string(bytes);
506            if s.is_empty() {
507                return Ok(Value::Empty);
508            }
509            let res: Result<Vec<_>, _> = s
510                .split('\\')
511                .map(|p| DicomDateTime::parse(p.trim()))
512                .collect();
513            res.map(Value::DateTime)
514                .map_err(|_| DcmError::Other("invalid DT value".into()))
515        }
516        Vr::IS => {
517            let s = decode_ascii_string(bytes);
518            if s.is_empty() {
519                return Ok(Value::Empty);
520            }
521            let res: Result<Vec<i64>, _> = s
522                .split('\\')
523                .map(|p| {
524                    p.trim()
525                        .parse::<i64>()
526                        .map_err(|_| DcmError::Other(format!("invalid IS: {p}")))
527                })
528                .collect();
529            res.map(Value::Ints)
530        }
531        Vr::DS => {
532            let s = decode_ascii_string(bytes);
533            if s.is_empty() {
534                return Ok(Value::Empty);
535            }
536            let res: Result<Vec<f64>, _> = s
537                .split('\\')
538                .map(|p| {
539                    p.trim()
540                        .parse::<f64>()
541                        .map_err(|_| DcmError::Other(format!("invalid DS: {p}")))
542                })
543                .collect();
544            res.map(Value::Decimals)
545        }
546
547        Vr::US | Vr::OW => {
548            if bytes.len() % 2 != 0 {
549                return Err(DcmError::Other(format!(
550                    "{} value has odd byte length",
551                    vr.code()
552                )));
553            }
554            let vals: Vec<u16> = bytes
555                .chunks_exact(2)
556                .map(|c| {
557                    if le {
558                        u16::from_le_bytes([c[0], c[1]])
559                    } else {
560                        u16::from_be_bytes([c[0], c[1]])
561                    }
562                })
563                .collect();
564            Ok(Value::U16(vals))
565        }
566        Vr::SS => {
567            if bytes.len() % 2 != 0 {
568                return Err(DcmError::Other("SS value has odd byte length".into()));
569            }
570            let vals: Vec<i16> = bytes
571                .chunks_exact(2)
572                .map(|c| {
573                    if le {
574                        i16::from_le_bytes([c[0], c[1]])
575                    } else {
576                        i16::from_be_bytes([c[0], c[1]])
577                    }
578                })
579                .collect();
580            Ok(Value::I16(vals))
581        }
582        Vr::UL | Vr::OL => {
583            if bytes.len() % 4 != 0 {
584                return Err(DcmError::Other(format!(
585                    "{} value length not multiple of 4",
586                    vr.code()
587                )));
588            }
589            let vals: Vec<u32> = bytes
590                .chunks_exact(4)
591                .map(|c| {
592                    if le {
593                        u32::from_le_bytes([c[0], c[1], c[2], c[3]])
594                    } else {
595                        u32::from_be_bytes([c[0], c[1], c[2], c[3]])
596                    }
597                })
598                .collect();
599            Ok(Value::U32(vals))
600        }
601        Vr::SL => {
602            if bytes.len() % 4 != 0 {
603                return Err(DcmError::Other("SL value length not multiple of 4".into()));
604            }
605            let vals: Vec<i32> = bytes
606                .chunks_exact(4)
607                .map(|c| {
608                    if le {
609                        i32::from_le_bytes([c[0], c[1], c[2], c[3]])
610                    } else {
611                        i32::from_be_bytes([c[0], c[1], c[2], c[3]])
612                    }
613                })
614                .collect();
615            Ok(Value::I32(vals))
616        }
617        Vr::FL | Vr::OF => {
618            if bytes.len() % 4 != 0 {
619                return Err(DcmError::Other(format!(
620                    "{} value length not multiple of 4",
621                    vr.code()
622                )));
623            }
624            let vals: Vec<f32> = bytes
625                .chunks_exact(4)
626                .map(|c| {
627                    if le {
628                        f32::from_le_bytes([c[0], c[1], c[2], c[3]])
629                    } else {
630                        f32::from_be_bytes([c[0], c[1], c[2], c[3]])
631                    }
632                })
633                .collect();
634            Ok(Value::F32(vals))
635        }
636        Vr::FD | Vr::OD => {
637            if bytes.len() % 8 != 0 {
638                return Err(DcmError::Other(format!(
639                    "{} value length not multiple of 8",
640                    vr.code()
641                )));
642            }
643            let vals: Vec<f64> = bytes
644                .chunks_exact(8)
645                .map(|c| {
646                    if le {
647                        f64::from_le_bytes([c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7]])
648                    } else {
649                        f64::from_be_bytes([c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7]])
650                    }
651                })
652                .collect();
653            Ok(Value::F64(vals))
654        }
655        Vr::SV => {
656            if bytes.len() % 8 != 0 {
657                return Err(DcmError::Other("SV value length not multiple of 8".into()));
658            }
659            let vals: Vec<i64> = bytes
660                .chunks_exact(8)
661                .map(|c| {
662                    if le {
663                        i64::from_le_bytes([c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7]])
664                    } else {
665                        i64::from_be_bytes([c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7]])
666                    }
667                })
668                .collect();
669            Ok(Value::I64(vals))
670        }
671        Vr::UV | Vr::OV => {
672            if bytes.len() % 8 != 0 {
673                return Err(DcmError::Other(format!(
674                    "{} value length not multiple of 8",
675                    vr.code()
676                )));
677            }
678            let vals: Vec<u64> = bytes
679                .chunks_exact(8)
680                .map(|c| {
681                    if le {
682                        u64::from_le_bytes([c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7]])
683                    } else {
684                        u64::from_be_bytes([c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7]])
685                    }
686                })
687                .collect();
688            Ok(Value::U64(vals))
689        }
690        Vr::AT => {
691            if bytes.len() % 4 != 0 {
692                return Err(DcmError::Other("AT value length not multiple of 4".into()));
693            }
694            let tags: Vec<Tag> = bytes
695                .chunks_exact(4)
696                .map(|c| {
697                    let g = if le {
698                        u16::from_le_bytes([c[0], c[1]])
699                    } else {
700                        u16::from_be_bytes([c[0], c[1]])
701                    };
702                    let e = if le {
703                        u16::from_le_bytes([c[2], c[3]])
704                    } else {
705                        u16::from_be_bytes([c[2], c[3]])
706                    };
707                    Tag::new(g, e)
708                })
709                .collect();
710            Ok(Value::Tags(tags))
711        }
712        Vr::OB | Vr::UN => Ok(Value::U8(bytes.to_vec())),
713        Vr::SQ => Err(DcmError::Other("parse_value_bytes called for SQ".into())),
714    }
715}
716
717/// Decode a byte slice using the active charset decoder.
718fn decode_string_with_charset(bytes: &[u8], charset: &DicomCharsetDecoder) -> String {
719    charset
720        .decode(bytes)
721        .unwrap_or_else(|_| String::from_utf8_lossy(bytes).into_owned())
722}
723
724/// Decode an ASCII-only string (for numeric VRs like DA, TM, IS, DS).
725fn decode_ascii_string(bytes: &[u8]) -> String {
726    String::from_utf8_lossy(bytes)
727        .trim_end_matches('\0')
728        .trim_end_matches(' ')
729        .to_string()
730}
731
732// ── Tests ─────────────────────────────────────────────────────────────────────
733
734#[cfg(test)]
735mod tests {
736    use super::*;
737    use crate::element::Element;
738    use crate::file_format::FileFormat;
739    use crate::io::writer::DicomWriter;
740    use crate::value::Value;
741    use dicom_toolkit_dict::{tags, Tag, Vr};
742
743    fn ascii() -> DicomCharsetDecoder {
744        DicomCharsetDecoder::default_ascii()
745    }
746
747    #[test]
748    fn parse_us_bytes() {
749        let bytes = 512u16.to_le_bytes();
750        let v = parse_value_bytes(Vr::US, &bytes, true, &ascii()).unwrap();
751        assert_eq!(v.as_u16(), Some(512));
752    }
753
754    #[test]
755    fn parse_ui_bytes() {
756        let uid = b"1.2.840.10008.1.2.1";
757        let v = parse_value_bytes(Vr::UI, uid, true, &ascii()).unwrap();
758        assert_eq!(v.as_string(), Some("1.2.840.10008.1.2.1"));
759    }
760
761    #[test]
762    fn parse_lo_bytes_backslash() {
763        let s = b"foo\\bar";
764        let v = parse_value_bytes(Vr::LO, s, true, &ascii()).unwrap();
765        match v {
766            Value::Strings(ss) => assert_eq!(ss, &["foo", "bar"]),
767            other => panic!("unexpected: {:?}", other),
768        }
769    }
770
771    #[test]
772    fn parse_ds_bytes() {
773        let s = b"2.78";
774        let v = parse_value_bytes(Vr::DS, s, true, &ascii()).unwrap();
775        match v {
776            Value::Decimals(ds) => assert!((ds[0] - 2.78).abs() < 1e-9),
777            other => panic!("unexpected: {:?}", other),
778        }
779    }
780
781    #[test]
782    fn parse_is_bytes() {
783        let s = b"-42";
784        let v = parse_value_bytes(Vr::IS, s, true, &ascii()).unwrap();
785        match v {
786            Value::Ints(is) => assert_eq!(is[0], -42),
787            other => panic!("unexpected: {:?}", other),
788        }
789    }
790
791    #[test]
792    fn parse_ob_bytes() {
793        let bytes = vec![0xDE, 0xAD, 0xBE, 0xEF];
794        let v = parse_value_bytes(Vr::OB, &bytes, true, &ascii()).unwrap();
795        assert_eq!(v.as_bytes(), Some(bytes.as_slice()));
796    }
797
798    #[test]
799    fn parse_at_bytes() {
800        let bytes = [
801            0x08, 0x00, 0x20, 0x00, // (0008,0020)
802            0x08, 0x00, 0x30, 0x00, // (0008,0030)
803        ];
804        let v = parse_value_bytes(Vr::AT, &bytes, true, &ascii()).unwrap();
805        match v {
806            Value::Tags(tags) => {
807                assert_eq!(
808                    tags,
809                    vec![Tag::new(0x0008, 0x0020), Tag::new(0x0008, 0x0030)]
810                )
811            }
812            other => panic!("unexpected: {:?}", other),
813        }
814    }
815
816    #[test]
817    fn parse_lo_latin1() {
818        // "Müller" in ISO-8859-1
819        let bytes = vec![b'M', 0xFC, b'l', b'l', b'e', b'r'];
820        let latin1 = DicomCharsetDecoder::new("ISO_IR 100").unwrap();
821        let v = parse_value_bytes(Vr::LO, &bytes, true, &latin1).unwrap();
822        match v {
823            Value::Strings(ss) => assert_eq!(ss, &["Müller"]),
824            other => panic!("unexpected: {:?}", other),
825        }
826    }
827
828    #[test]
829    fn read_file_recovers_file_meta_without_preamble() {
830        let mut ds = DataSet::new();
831        ds.insert(Element::u16(tags::ROWS, 64));
832        ds.insert(Element::u16(tags::COLUMNS, 32));
833
834        let ff = FileFormat::from_dataset("1.2.840.10008.5.1.4.1.1.2", "1.2.3.4.5", ds);
835        let mut bytes = Vec::new();
836        DicomWriter::new(&mut bytes).write_file(&ff).unwrap();
837
838        let without_preamble = &bytes[132..];
839        let parsed = DicomReader::new(without_preamble).read_file().unwrap();
840        assert_eq!(parsed.meta.transfer_syntax_uid, "1.2.840.10008.1.2.1");
841        assert_eq!(parsed.dataset.get_u16(tags::ROWS), Some(64));
842        assert_eq!(parsed.dataset.get_u16(tags::COLUMNS), Some(32));
843    }
844
845    #[test]
846    fn parse_pn_utf8() {
847        let name = "田中^太郎";
848        let utf8 = DicomCharsetDecoder::new("ISO_IR 192").unwrap();
849        let v = parse_value_bytes(Vr::PN, name.as_bytes(), true, &utf8).unwrap();
850        match v {
851            Value::PersonNames(names) => assert_eq!(names[0].to_string(), "田中^太郎"),
852            other => panic!("unexpected: {:?}", other),
853        }
854    }
855
856    #[test]
857    fn read_undefined_length_un_as_sequence_cp246() {
858        let private_tag = Tag::new(0x7777, 0x0010);
859
860        let mut item = Vec::new();
861        item.extend_from_slice(&tags::PATIENT_ID.group.to_le_bytes());
862        item.extend_from_slice(&tags::PATIENT_ID.element.to_le_bytes());
863        item.extend_from_slice(&4u32.to_le_bytes());
864        item.extend_from_slice(b"ABCD");
865
866        let mut bytes = Vec::new();
867        bytes.extend_from_slice(&private_tag.group.to_le_bytes());
868        bytes.extend_from_slice(&private_tag.element.to_le_bytes());
869        bytes.extend_from_slice(b"UN");
870        bytes.extend_from_slice(&[0, 0]);
871        bytes.extend_from_slice(&0xFFFF_FFFFu32.to_le_bytes());
872        bytes.extend_from_slice(&[0xFE, 0xFF, 0x00, 0xE0]); // Item
873        bytes.extend_from_slice(&(item.len() as u32).to_le_bytes());
874        bytes.extend_from_slice(&item);
875        bytes.extend_from_slice(&[0xFE, 0xFF, 0xDD, 0xE0, 0, 0, 0, 0]); // Sequence delimitation
876
877        let ds = DicomReader::new(bytes.as_slice())
878            .read_dataset("1.2.840.10008.1.2.1")
879            .unwrap();
880
881        let elem = ds.get(private_tag).unwrap();
882        assert_eq!(elem.vr, Vr::SQ);
883        let items = elem.items().unwrap();
884        assert_eq!(items.len(), 1);
885        assert_eq!(items[0].get_string(tags::PATIENT_ID), Some("ABCD"));
886    }
887}