Skip to main content

dicom_toolkit_data/
dataset.rs

1//! DICOM dataset — an ordered map of `Tag → Element`.
2//!
3//! Ports DCMTK's `DcmDataset` / `DcmItem`. Elements are kept in ascending
4//! tag order, matching the DICOM requirement for encoded files.
5
6use crate::element::Element;
7use crate::value::Value;
8use dicom_toolkit_core::error::{DcmError, DcmResult};
9use dicom_toolkit_dict::{Tag, Vr};
10use indexmap::IndexMap;
11
12/// A DICOM dataset: an ordered collection of data elements.
13///
14/// Internally backed by an `IndexMap` that is kept sorted by tag.
15#[derive(Debug, Clone, Default, PartialEq)]
16pub struct DataSet {
17    elements: IndexMap<Tag, Element>,
18}
19
20/// One segment of a nested DICOM attribute path.
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum AttributePathSegment {
23    Tag(Tag),
24    Item(usize),
25}
26
27impl DataSet {
28    pub fn new() -> Self {
29        Self {
30            elements: IndexMap::new(),
31        }
32    }
33
34    // ── Core map operations ───────────────────────────────────────────────────
35
36    /// Insert an element, maintaining ascending tag order.
37    pub fn insert(&mut self, element: Element) {
38        self.elements.insert(element.tag, element);
39        self.elements.sort_unstable_keys();
40    }
41
42    pub fn get(&self, tag: Tag) -> Option<&Element> {
43        self.elements.get(&tag)
44    }
45
46    pub fn get_mut(&mut self, tag: Tag) -> Option<&mut Element> {
47        self.elements.get_mut(&tag)
48    }
49
50    pub fn remove(&mut self, tag: Tag) -> Option<Element> {
51        self.elements.swap_remove(&tag)
52    }
53
54    pub fn contains(&self, tag: Tag) -> bool {
55        self.elements.contains_key(&tag)
56    }
57
58    pub fn len(&self) -> usize {
59        self.elements.len()
60    }
61
62    pub fn is_empty(&self) -> bool {
63        self.elements.is_empty()
64    }
65
66    pub fn iter(&self) -> impl Iterator<Item = (&Tag, &Element)> {
67        self.elements.iter()
68    }
69
70    pub fn tags(&self) -> impl Iterator<Item = Tag> + '_ {
71        self.elements.keys().copied()
72    }
73
74    /// Return the element for `tag`, or a [`DcmError::UnknownTag`] if absent.
75    pub fn find_element(&self, tag: Tag) -> DcmResult<&Element> {
76        self.elements.get(&tag).ok_or(DcmError::UnknownTag {
77            group: tag.group,
78            element: tag.element,
79        })
80    }
81
82    // ── Convenience getters ───────────────────────────────────────────────────
83
84    pub fn get_string(&self, tag: Tag) -> Option<&str> {
85        self.get(tag)?.string_value()
86    }
87
88    pub fn get_strings(&self, tag: Tag) -> Option<&[String]> {
89        self.get(tag)?.strings_value()
90    }
91
92    pub fn get_u16(&self, tag: Tag) -> Option<u16> {
93        self.get(tag)?.u16_value()
94    }
95
96    pub fn get_u32(&self, tag: Tag) -> Option<u32> {
97        self.get(tag)?.u32_value()
98    }
99
100    pub fn get_i32(&self, tag: Tag) -> Option<i32> {
101        self.get(tag)?.i32_value()
102    }
103
104    pub fn get_f64(&self, tag: Tag) -> Option<f64> {
105        self.get(tag)?.f64_value()
106    }
107
108    pub fn get_bytes(&self, tag: Tag) -> Option<&[u8]> {
109        self.get(tag)?.bytes_value()
110    }
111
112    pub fn get_items(&self, tag: Tag) -> Option<&[DataSet]> {
113        self.get(tag)?.items()
114    }
115
116    // ── Convenience setters ───────────────────────────────────────────────────
117
118    pub fn set_string(&mut self, tag: Tag, vr: Vr, value: &str) {
119        self.insert(Element::string(tag, vr, value));
120    }
121
122    pub fn set_strings(&mut self, tag: Tag, vr: Vr, values: Vec<String>) {
123        self.insert(Element::new(tag, vr, Value::Strings(values)));
124    }
125
126    pub fn set_u16(&mut self, tag: Tag, value: u16) {
127        self.insert(Element::u16(tag, value));
128    }
129
130    pub fn set_u32(&mut self, tag: Tag, value: u32) {
131        self.insert(Element::u32(tag, value));
132    }
133
134    pub fn set_i32(&mut self, tag: Tag, value: i32) {
135        self.insert(Element::i32(tag, value));
136    }
137
138    pub fn set_f64(&mut self, tag: Tag, value: f64) {
139        self.insert(Element::f64(tag, value));
140    }
141
142    pub fn set_bytes(&mut self, tag: Tag, vr: Vr, data: Vec<u8>) {
143        self.insert(Element::bytes(tag, vr, data));
144    }
145
146    pub fn set_sequence(&mut self, tag: Tag, items: Vec<DataSet>) {
147        self.insert(Element::sequence(tag, items));
148    }
149
150    pub fn set_uid(&mut self, tag: Tag, uid: &str) {
151        self.insert(Element::uid(tag, uid));
152    }
153}
154
155/// Parse an attribute path of the form `TAG[/ITEM/TAG]*`.
156///
157/// Tags are 8 hexadecimal digits (`GGGGEEEE`). Item indices are zero-based
158/// decimal integers. Leading and trailing slashes are ignored.
159pub fn parse_attribute_path(path: &str) -> DcmResult<Vec<AttributePathSegment>> {
160    let trimmed = path.trim_matches('/');
161    if trimmed.is_empty() {
162        return Err(DcmError::Other("attribute path must not be empty".into()));
163    }
164
165    let parts: Vec<&str> = trimmed.split('/').collect();
166    if parts.len() % 2 == 0 {
167        return Err(DcmError::Other(format!(
168            "attribute path must end with a tag, got {path:?}"
169        )));
170    }
171
172    let mut segments = Vec::with_capacity(parts.len());
173    for (index, part) in parts.iter().enumerate() {
174        if index % 2 == 0 {
175            segments.push(AttributePathSegment::Tag(parse_path_tag(part)?));
176        } else {
177            segments.push(AttributePathSegment::Item(parse_path_item(part)?));
178        }
179    }
180
181    Ok(segments)
182}
183
184/// Resolve an attribute path into a concrete element.
185pub fn resolve_attribute_path<'a>(
186    dataset: &'a DataSet,
187    path: &[AttributePathSegment],
188) -> DcmResult<&'a Element> {
189    if path.is_empty() {
190        return Err(DcmError::Other("attribute path must not be empty".into()));
191    }
192
193    let mut current = dataset;
194    let mut index = 0usize;
195    while index < path.len() {
196        let AttributePathSegment::Tag(tag) = path[index] else {
197            return Err(DcmError::Other(
198                "attribute paths must start with a tag segment".into(),
199            ));
200        };
201
202        let element = current.find_element(tag)?;
203        if index == path.len() - 1 {
204            return Ok(element);
205        }
206
207        let AttributePathSegment::Item(item_index) = path[index + 1] else {
208            return Err(DcmError::Other(format!(
209                "tag ({:04X},{:04X}) must be followed by an item index before descending",
210                tag.group, tag.element
211            )));
212        };
213
214        let items = element.items().ok_or_else(|| {
215            DcmError::Other(format!(
216                "tag ({:04X},{:04X}) is not a sequence and cannot be indexed",
217                tag.group, tag.element
218            ))
219        })?;
220
221        current = items.get(item_index).ok_or_else(|| {
222            DcmError::Other(format!(
223                "item index {} is out of range for sequence ({:04X},{:04X}) with {} item(s)",
224                item_index,
225                tag.group,
226                tag.element,
227                items.len()
228            ))
229        })?;
230        index += 2;
231    }
232
233    Err(DcmError::Other(
234        "attribute path did not resolve to an element".into(),
235    ))
236}
237
238fn parse_path_tag(segment: &str) -> DcmResult<Tag> {
239    if segment.len() != 8 || !segment.bytes().all(|b| b.is_ascii_hexdigit()) {
240        return Err(DcmError::Other(format!(
241            "invalid tag path segment {segment:?}; expected 8 hexadecimal digits"
242        )));
243    }
244
245    let group = u16::from_str_radix(&segment[..4], 16)
246        .map_err(|_| DcmError::Other(format!("invalid tag group in {segment:?}")))?;
247    let element = u16::from_str_radix(&segment[4..], 16)
248        .map_err(|_| DcmError::Other(format!("invalid tag element in {segment:?}")))?;
249    Ok(Tag::new(group, element))
250}
251
252fn parse_path_item(segment: &str) -> DcmResult<usize> {
253    let raw = segment
254        .strip_prefix('[')
255        .and_then(|s| s.strip_suffix(']'))
256        .unwrap_or(segment);
257    raw.parse::<usize>().map_err(|_| {
258        DcmError::Other(format!(
259            "invalid item path segment {segment:?}; expected a zero-based item index"
260        ))
261    })
262}
263
264// ── Tests ─────────────────────────────────────────────────────────────────────
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269    use dicom_toolkit_dict::tags;
270
271    #[test]
272    fn dataset_insert_and_get() {
273        let mut ds = DataSet::new();
274        ds.set_u16(tags::ROWS, 512);
275        assert_eq!(ds.get_u16(tags::ROWS), Some(512));
276    }
277
278    #[test]
279    fn dataset_contains_remove() {
280        let mut ds = DataSet::new();
281        ds.set_string(tags::PATIENT_NAME, Vr::PN, "Smith^John");
282        assert!(ds.contains(tags::PATIENT_NAME));
283        let removed = ds.remove(tags::PATIENT_NAME).unwrap();
284        assert_eq!(removed.string_value(), Some("Smith^John"));
285        assert!(!ds.contains(tags::PATIENT_NAME));
286    }
287
288    #[test]
289    fn dataset_len_is_empty() {
290        let mut ds = DataSet::new();
291        assert!(ds.is_empty());
292        assert_eq!(ds.len(), 0);
293        ds.set_u16(tags::ROWS, 1);
294        assert!(!ds.is_empty());
295        assert_eq!(ds.len(), 1);
296    }
297
298    #[test]
299    fn dataset_tag_order_ascending() {
300        // Insert in reverse order; tags() should return in ascending order.
301        let mut ds = DataSet::new();
302        ds.set_u16(tags::COLUMNS, 256); // (0028,0011)
303        ds.set_u16(tags::ROWS, 512); // (0028,0010)
304        ds.set_string(tags::PATIENT_NAME, Vr::PN, "Doe^Jane"); // (0010,0010)
305
306        let tags: Vec<Tag> = ds.tags().collect();
307        assert!(
308            tags.windows(2).all(|w| w[0] < w[1]),
309            "tags not in order: {:?}",
310            tags
311        );
312    }
313
314    #[test]
315    fn dataset_convenience_getters() {
316        let mut ds = DataSet::new();
317        ds.set_string(tags::PATIENT_ID, Vr::LO, "PID001");
318        ds.set_strings(
319            tags::IMAGE_TYPE,
320            Vr::CS,
321            vec!["ORIGINAL".into(), "PRIMARY".into()],
322        );
323        ds.set_u16(tags::ROWS, 512);
324        ds.set_u32(Tag::new(0x0028, 0x0000), 42);
325        ds.set_i32(Tag::new(0x0020, 0x0013), -1);
326        ds.set_f64(Tag::new(0x0028, 0x1050), 1024.0);
327        ds.set_uid(tags::SOP_CLASS_UID, "1.2.840.10008.1.1");
328
329        assert_eq!(ds.get_string(tags::PATIENT_ID), Some("PID001"));
330        assert_eq!(ds.get_strings(tags::IMAGE_TYPE).unwrap().len(), 2);
331        assert_eq!(ds.get_u16(tags::ROWS), Some(512));
332        assert_eq!(ds.get_u32(Tag::new(0x0028, 0x0000)), Some(42));
333        assert_eq!(ds.get_i32(Tag::new(0x0020, 0x0013)), Some(-1));
334        assert!((ds.get_f64(Tag::new(0x0028, 0x1050)).unwrap() - 1024.0).abs() < 1e-9);
335        assert_eq!(
336            ds.get_string(tags::SOP_CLASS_UID),
337            Some("1.2.840.10008.1.1")
338        );
339    }
340
341    #[test]
342    fn dataset_set_bytes() {
343        let mut ds = DataSet::new();
344        let data = vec![0u8, 1, 2, 3];
345        ds.set_bytes(Tag::new(0x0042, 0x0011), Vr::OB, data.clone());
346        assert_eq!(
347            ds.get_bytes(Tag::new(0x0042, 0x0011)),
348            Some(data.as_slice())
349        );
350    }
351
352    #[test]
353    fn dataset_nested_sequence() {
354        let mut item = DataSet::new();
355        item.set_string(tags::PATIENT_NAME, Vr::PN, "Jones^Bob");
356
357        let mut ds = DataSet::new();
358        ds.set_sequence(Tag::new(0x0008, 0x1115), vec![item]);
359
360        let items = ds.get_items(Tag::new(0x0008, 0x1115)).unwrap();
361        assert_eq!(items.len(), 1);
362        assert_eq!(items[0].get_string(tags::PATIENT_NAME), Some("Jones^Bob"));
363    }
364
365    #[test]
366    fn dataset_find_element_ok() {
367        let mut ds = DataSet::new();
368        ds.set_u16(tags::ROWS, 512);
369        assert!(ds.find_element(tags::ROWS).is_ok());
370    }
371
372    #[test]
373    fn dataset_find_element_not_found() {
374        let ds = DataSet::new();
375        let err = ds.find_element(tags::ROWS).unwrap_err();
376        // Should be UnknownTag
377        assert!(matches!(err, DcmError::UnknownTag { .. }));
378    }
379
380    #[test]
381    fn dataset_iter() {
382        let mut ds = DataSet::new();
383        ds.set_u16(tags::ROWS, 512);
384        ds.set_u16(tags::COLUMNS, 256);
385        let count = ds.iter().count();
386        assert_eq!(count, 2);
387    }
388
389    #[test]
390    fn dataset_overwrite() {
391        let mut ds = DataSet::new();
392        ds.set_u16(tags::ROWS, 512);
393        ds.set_u16(tags::ROWS, 1024);
394        assert_eq!(ds.len(), 1);
395        assert_eq!(ds.get_u16(tags::ROWS), Some(1024));
396    }
397
398    #[test]
399    fn parse_attribute_path_top_level_tag() {
400        let path = parse_attribute_path("7FE00010").unwrap();
401        assert_eq!(path, vec![AttributePathSegment::Tag(tags::PIXEL_DATA)]);
402    }
403
404    #[test]
405    fn parse_attribute_path_nested_sequence() {
406        let path = parse_attribute_path("00081115/0/00081155").unwrap();
407        assert_eq!(
408            path,
409            vec![
410                AttributePathSegment::Tag(tags::REFERENCED_SOP_SEQUENCE),
411                AttributePathSegment::Item(0),
412                AttributePathSegment::Tag(tags::REFERENCED_SOP_INSTANCE_UID),
413            ]
414        );
415    }
416
417    #[test]
418    fn parse_attribute_path_rejects_malformed_paths() {
419        assert!(parse_attribute_path("").is_err());
420        assert!(parse_attribute_path("00081140/0").is_err());
421        assert!(parse_attribute_path("GGGG1140").is_err());
422        assert!(parse_attribute_path("00081140/not-an-item/00081155").is_err());
423    }
424
425    #[test]
426    fn resolve_attribute_path_top_level_tag() {
427        let mut ds = DataSet::new();
428        ds.set_u16(tags::ROWS, 512);
429
430        let path = parse_attribute_path("00280010").unwrap();
431        let element = resolve_attribute_path(&ds, &path).unwrap();
432        assert_eq!(element.u16_value(), Some(512));
433    }
434
435    #[test]
436    fn resolve_attribute_path_nested_sequence_item() {
437        let mut item = DataSet::new();
438        item.set_uid(tags::REFERENCED_SOP_INSTANCE_UID, "1.2.3");
439
440        let mut ds = DataSet::new();
441        ds.set_sequence(tags::REFERENCED_SOP_SEQUENCE, vec![item]);
442
443        let path = parse_attribute_path("00081115/0/00081155").unwrap();
444        let element = resolve_attribute_path(&ds, &path).unwrap();
445        assert_eq!(element.string_value(), Some("1.2.3"));
446    }
447
448    #[test]
449    fn resolve_attribute_path_rejects_out_of_range_item() {
450        let mut ds = DataSet::new();
451        ds.set_sequence(tags::REFERENCED_SOP_SEQUENCE, vec![DataSet::new()]);
452
453        let path = parse_attribute_path("00081115/1/00081155").unwrap();
454        let err = resolve_attribute_path(&ds, &path).unwrap_err();
455        assert!(err.to_string().contains("out of range"));
456    }
457}