dicom_parser/stateful/
encode.rs

1//! Module holding a stateful DICOM data encoding abstraction.
2//!
3//! The [`StatefulEncoder`] supports encoding of binary data and text
4//! while applying the necessary padding to conform to DICOM encoding rules.
5
6use dicom_core::{value::PrimitiveValue, DataElementHeader, Length, Tag, VR};
7use dicom_encoding::transfer_syntax::DynEncoder;
8use dicom_encoding::{
9    encode::EncodeTo,
10    text::{DefaultCharacterSetCodec, SpecificCharacterSet, TextCodec},
11    TransferSyntax,
12};
13use snafu::{Backtrace, OptionExt, ResultExt, Snafu};
14use std::io::Write;
15
16#[derive(Debug, Snafu)]
17#[non_exhaustive]
18pub enum Error {
19    #[snafu(display("Encoding in transfer syntax {} is unsupported", ts))]
20    UnsupportedTransferSyntax {
21        ts: &'static str,
22        backtrace: Backtrace,
23    },
24
25    #[snafu(display("Unsupported character set {:?}", charset))]
26    UnsupportedCharacterSet {
27        charset: SpecificCharacterSet,
28        backtrace: Backtrace,
29    },
30
31    #[snafu(display("Failed to encode a data piece at position {}", position))]
32    EncodeData {
33        position: u64,
34        source: dicom_encoding::encode::Error,
35    },
36
37    #[snafu(display("Could not encode text at position {}", position))]
38    EncodeText {
39        position: u64,
40        source: dicom_encoding::text::EncodeTextError,
41    },
42
43    #[snafu(display("Could not write value data at position {}", position))]
44    WriteValueData {
45        position: u64,
46        source: std::io::Error,
47        backtrace: Backtrace,
48    },
49}
50
51pub type Result<T> = std::result::Result<T, Error>;
52
53/// Also called a printer, this encoder type provides a stateful mid-level
54/// abstraction for writing DICOM content. Unlike `Encode`,
55/// the stateful encoder knows how to write text values and keeps track
56/// of how many bytes were written.
57/// `W` is the write target, `E` is the encoder, and `T` is the text codec.
58#[derive(Debug)]
59pub struct StatefulEncoder<W, E, T = SpecificCharacterSet> {
60    to: W,
61    encoder: E,
62    text: T,
63    bytes_written: u64,
64    buffer: Vec<u8>,
65}
66
67pub type DynStatefulEncoder<'w> = StatefulEncoder<Box<dyn Write + 'w>, DynEncoder<'w, dyn Write>>;
68
69impl<W, E, T> StatefulEncoder<W, E, T> {
70    pub fn new(to: W, encoder: E, text: T) -> Self {
71        StatefulEncoder {
72            to,
73            encoder,
74            text,
75            bytes_written: 0,
76            buffer: Vec::with_capacity(128),
77        }
78    }
79}
80
81impl<'s> DynStatefulEncoder<'s> {
82    pub fn from_transfer_syntax(
83        to: Box<dyn Write + 's>,
84        ts: TransferSyntax,
85        charset: SpecificCharacterSet,
86    ) -> Result<Self> {
87        let encoder = ts
88            .encoder()
89            .context(UnsupportedTransferSyntaxSnafu { ts: ts.uid() })?;
90        Ok(StatefulEncoder::new(to, encoder, charset))
91    }
92}
93
94impl<W, E> StatefulEncoder<W, E>
95where
96    W: Write,
97    E: EncodeTo<W>,
98{
99    /// Encode and write a data element header.
100    pub fn encode_element_header(&mut self, mut de: DataElementHeader) -> Result<()> {
101        if let Some(len) = de.len.get() {
102            de.len = Length(even_len(len))
103        }
104        let bytes = self
105            .encoder
106            .encode_element_header(&mut self.to, de)
107            .context(EncodeDataSnafu {
108                position: self.bytes_written,
109            })?;
110        self.bytes_written += bytes as u64;
111        Ok(())
112    }
113
114    /// Encode and write an item header,
115    /// where `len` is the specified length of the item
116    /// (can be `0xFFFF_FFFF` for undefined length).
117    pub fn encode_item_header(&mut self, len: u32) -> Result<()> {
118        let len = if len == 0xFFFF_FFFF {
119            len
120        } else {
121            even_len(len)
122        };
123        self.encoder
124            .encode_item_header(&mut self.to, len)
125            .context(EncodeDataSnafu {
126                position: self.bytes_written,
127            })?;
128        self.bytes_written += 8;
129        Ok(())
130    }
131
132    /// Encode and write an item delimiter.
133    pub fn encode_item_delimiter(&mut self) -> Result<()> {
134        self.encoder
135            .encode_item_delimiter(&mut self.to)
136            .context(EncodeDataSnafu {
137                position: self.bytes_written,
138            })?;
139        self.bytes_written += 8;
140        Ok(())
141    }
142
143    /// Encode and write a sequence delimiter.
144    pub fn encode_sequence_delimiter(&mut self) -> Result<()> {
145        self.encoder
146            .encode_sequence_delimiter(&mut self.to)
147            .context(EncodeDataSnafu {
148                position: self.bytes_written,
149            })?;
150        self.bytes_written += 8;
151        Ok(())
152    }
153
154    /// Write the given bytes directly to the inner writer.
155    ///
156    /// Note that this method
157    /// (unlike [`write_bytes`](StatefulEncoder::write_bytes))
158    /// does not perform any additional padding.
159    pub fn write_raw_bytes(&mut self, bytes: &[u8]) -> Result<()> {
160        self.to.write_all(bytes).context(WriteValueDataSnafu {
161            position: self.bytes_written,
162        })?;
163        self.bytes_written += bytes.len() as u64;
164        Ok(())
165    }
166
167    /// Write a primitive DICOM value as a bunch of bytes
168    /// directly to the inner writer.
169    ///
170    /// This method will perform the necessary padding
171    /// (always with zeros)
172    /// to ensure that the encoded value has an even number of bytes.
173    pub fn write_bytes(&mut self, bytes: &[u8]) -> Result<()> {
174        debug_assert!(bytes.len() < u32::MAX as usize);
175        self.to.write_all(bytes).context(WriteValueDataSnafu {
176            position: self.bytes_written,
177        })?;
178        self.bytes_written += bytes.len() as u64;
179        if bytes.len() % 2 != 0 {
180            self.to.write_all(&[0]).context(WriteValueDataSnafu {
181                position: self.bytes_written,
182            })?;
183            self.bytes_written += 1;
184        }
185        Ok(())
186    }
187
188    /// Retrieve the number of bytes written so far by this printer.
189    pub fn bytes_written(&self) -> u64 {
190        self.bytes_written
191    }
192
193    /// Encode and write the values of a pixel data offset table.
194    pub fn encode_offset_table(&mut self, table: &[u32]) -> Result<()> {
195        self.encoder
196            .encode_offset_table(&mut self.to, table)
197            .context(EncodeDataSnafu {
198                position: self.bytes_written,
199            })?;
200
201        self.bytes_written += table.len() as u64 * 4;
202        Ok(())
203    }
204
205    /// Encode and write a data element with a primitive value.
206    ///
207    /// This method will perform the necessary padding to ensure that the
208    /// encoded value is an even number of bytes.
209    /// Where applicable,
210    /// this will use the inner text codec for textual values.
211    /// The length property of the header is ignored,
212    /// the true byte length of the value in its encoded form is used instead.
213    pub fn encode_primitive_element(
214        &mut self,
215        de: &DataElementHeader,
216        value: &PrimitiveValue,
217    ) -> Result<()> {
218        // intercept string encoding calls to use the text codec
219        match value {
220            PrimitiveValue::Str(text) => {
221                self.encode_text_element(text, *de)?;
222                Ok(())
223            }
224            PrimitiveValue::Strs(texts) => {
225                self.encode_texts_element(&texts[..], *de)?;
226                Ok(())
227            }
228            _ => {
229                // if VR is DS or IS and the value is binary,
230                // write value as a string instead
231                if let VR::DS | VR::IS = de.vr {
232                    return self.encode_element_as_text(value, de);
233                }
234
235                let byte_len = value.calculate_byte_len();
236                self.encode_element_header(DataElementHeader {
237                    tag: de.tag,
238                    vr: de.vr,
239                    len: Length(byte_len as u32),
240                })?;
241
242                let bytes = self.encoder.encode_primitive(&mut self.to, value).context(
243                    EncodeDataSnafu {
244                        position: self.bytes_written,
245                    },
246                )?;
247
248                self.bytes_written += bytes as u64;
249                if bytes % 2 != 0 {
250                    let padding = match de.vr {
251                        VR::DA | VR::DT | VR::TM => b' ',
252                        _ => 0,
253                    };
254                    self.to.write_all(&[padding]).context(WriteValueDataSnafu {
255                        position: self.bytes_written,
256                    })?;
257                    self.bytes_written += 1;
258                }
259
260                Ok(())
261            }
262        }
263    }
264
265    fn try_new_codec(&mut self, name: &str) {
266        if let Some(codec) = SpecificCharacterSet::from_code(name) {
267            self.text = codec;
268        } else {
269            tracing::warn!("Unsupported character set `{}`, ignoring", name);
270        }
271    }
272
273    fn encode_text_element(&mut self, text: &str, de: DataElementHeader) -> Result<()> {
274        // encode it in memory first so that we know the real length
275        let mut encoded_value = self.convert_text_untrailed(text, de.vr)?;
276        // pad to even length
277        if encoded_value.len() % 2 == 1 {
278            let pad = if de.vr == VR::UI { b'\0' } else { b' ' };
279            encoded_value.push(pad);
280        }
281
282        // now we can write the header with the correct length
283        self.encode_element_header(DataElementHeader {
284            tag: de.tag,
285            vr: de.vr,
286            len: Length(encoded_value.len() as u32),
287        })?;
288        self.to
289            .write_all(&encoded_value)
290            .context(WriteValueDataSnafu {
291                position: self.bytes_written,
292            })?;
293        self.bytes_written += encoded_value.len() as u64;
294
295        // if element is Specific Character Set,
296        // update the text codec
297        if de.tag == Tag(0x0008, 0x0005) {
298            self.try_new_codec(text);
299        }
300
301        Ok(())
302    }
303
304    fn encode_texts_element<S>(&mut self, texts: &[S], de: DataElementHeader) -> Result<()>
305    where
306        S: AsRef<str>,
307    {
308        self.buffer.clear();
309        for (i, t) in texts.iter().enumerate() {
310            self.buffer
311                .extend_from_slice(&self.convert_text_untrailed(t.as_ref(), de.vr)?);
312            if i < texts.len() - 1 {
313                self.buffer.push(b'\\');
314            }
315        }
316        // pad to even length
317        if self.buffer.len() % 2 == 1 {
318            let pad = if de.vr == VR::UI { b'\0' } else { b' ' };
319            self.buffer.push(pad);
320        }
321
322        // now we can write the header with the correct length
323        self.encode_element_header(DataElementHeader {
324            tag: de.tag,
325            vr: de.vr,
326            len: Length(self.buffer.len() as u32),
327        })?;
328
329        self.to
330            .write_all(&self.buffer)
331            .context(WriteValueDataSnafu {
332                position: self.bytes_written,
333            })?;
334        self.bytes_written += self.buffer.len() as u64;
335
336        // if element is Specific Character Set,
337        // update the text codec
338        if de.tag == Tag(0x0008, 0x0005) {
339            if let Some(charset_name) = texts.first() {
340                self.try_new_codec(charset_name.as_ref());
341            }
342        }
343
344        Ok(())
345    }
346
347    fn convert_text_untrailed(&self, text: &str, vr: VR) -> Result<Vec<u8>> {
348        match vr {
349            VR::AE | VR::AS | VR::CS | VR::DA | VR::DS | VR::DT | VR::IS | VR::TM | VR::UI => {
350                // these VRs always use the default character repertoire
351                DefaultCharacterSetCodec
352                    .encode(text)
353                    .context(EncodeTextSnafu {
354                        position: self.bytes_written,
355                    })
356            }
357            _ => self.text.encode(text).context(EncodeTextSnafu {
358                position: self.bytes_written,
359            }),
360        }
361    }
362
363    /// edge case method for encoding data elements with IS and VR values
364    /// (always as text)
365    fn encode_element_as_text(
366        &mut self,
367        value: &PrimitiveValue,
368        de: &DataElementHeader,
369    ) -> Result<()> {
370        match value {
371            PrimitiveValue::Empty => {
372                self.encode_element_header(DataElementHeader {
373                    tag: de.tag,
374                    vr: de.vr,
375                    len: Length(0),
376                })?;
377                Ok(())
378            }
379            PrimitiveValue::U8(_)
380            | PrimitiveValue::I16(_)
381            | PrimitiveValue::U16(_)
382            | PrimitiveValue::I32(_)
383            | PrimitiveValue::U32(_)
384            | PrimitiveValue::I64(_)
385            | PrimitiveValue::U64(_)
386            | PrimitiveValue::F32(_)
387            | PrimitiveValue::F64(_) => {
388                let textual_value = value.to_str();
389                self.encode_element_header(DataElementHeader {
390                    tag: de.tag,
391                    vr: de.vr,
392                    len: Length(even_len(textual_value.len() as u32)),
393                })?;
394
395                write!(self.to, "{textual_value}").context(WriteValueDataSnafu {
396                    position: self.bytes_written,
397                })?;
398                let len = if textual_value.len() % 2 == 1 {
399                    self.to.write_all(b" ").context(WriteValueDataSnafu {
400                        position: self.bytes_written,
401                    })?;
402                    textual_value.len() as u64 + 1
403                } else {
404                    textual_value.len() as u64
405                };
406                self.bytes_written += len;
407                Ok(())
408            }
409            PrimitiveValue::Date(_)
410            | PrimitiveValue::DateTime(_)
411            | PrimitiveValue::Time(_)
412            | PrimitiveValue::Tags(_)
413            | PrimitiveValue::Strs(_)
414            | PrimitiveValue::Str(_) => unreachable!(),
415        }
416    }
417
418    /// Flush the inner writer
419    pub fn flush(&mut self) -> std::io::Result<()> {
420        self.to.flush()
421    }
422}
423
424#[inline]
425fn even_len(l: u32) -> u32 {
426    (l + 1) & !1
427}
428
429#[cfg(test)]
430mod tests {
431    use dicom_core::{
432        dicom_value, value::DicomTime, DataElement, DataElementHeader, DicomValue, Length,
433        PrimitiveValue, Tag, VR,
434    };
435    use dicom_encoding::{
436        encode::{explicit_le::ExplicitVRLittleEndianEncoder, EncoderFor},
437        text::{SpecificCharacterSet, TextCodec},
438    };
439
440    use super::StatefulEncoder;
441
442    /// Odd lengthed values convert to tokens with even padding (PN)
443    #[test]
444    fn encode_odd_length_element_pn() {
445        let element = DataElement::new(
446            Tag(0x0010, 0x0010),
447            VR::PN,
448            DicomValue::new(dicom_value!(Strs, ["Dall^John"])),
449        );
450
451        let mut out: Vec<_> = Vec::new();
452
453        {
454            let mut encoder = StatefulEncoder::new(
455                &mut out,
456                EncoderFor::new(ExplicitVRLittleEndianEncoder::default()),
457                SpecificCharacterSet::default(),
458            );
459
460            encoder
461                .encode_primitive_element(element.header(), element.value().primitive().unwrap())
462                .unwrap();
463        }
464
465        assert_eq!(
466            &out,
467            &[
468                0x10, 0x00, 0x10, 0x00, // tag
469                b'P', b'N', // VR
470                0x0A, 0x00, // length
471                // ---------- value ----------
472                b'D', b'a', b'l', b'l', b'^', b'J', b'o', b'h', b'n', b' ',
473            ],
474        )
475    }
476
477    /// Odd lengthed values are encoded with even padding (bytes)
478    #[test]
479    fn encode_odd_length_element_bytes() {
480        let element = DataElement::new(
481            Tag(0x7FE0, 0x0010),
482            VR::OB,
483            DicomValue::new(vec![1; 9].into()),
484        );
485
486        let mut out: Vec<_> = Vec::new();
487
488        {
489            let mut encoder = StatefulEncoder::new(
490                &mut out,
491                EncoderFor::new(ExplicitVRLittleEndianEncoder::default()),
492                SpecificCharacterSet::default(),
493            );
494
495            encoder
496                .encode_primitive_element(element.header(), element.value().primitive().unwrap())
497                .unwrap();
498        }
499
500        assert_eq!(
501            &out,
502            &[
503                0xE0, 0x7F, 0x10, 0x00, // tag
504                b'O', b'B', // VR
505                0x00, 0x00, // reserved
506                0x0A, 0x00, 0x00, 0x00, // length
507                // ---------- value ----------
508                1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
509            ],
510        )
511    }
512
513    /// Odd lengthed values are encoded with even padding (UIDs)
514    #[test]
515    fn encode_odd_length_element_uid() {
516        let element = DataElement::new(
517            Tag(0x0000, 0x0002),
518            VR::UI,
519            DicomValue::new("1.2.840.10008.1.1".into()),
520        );
521
522        let mut out: Vec<_> = Vec::new();
523
524        {
525            let mut encoder = StatefulEncoder::new(
526                &mut out,
527                EncoderFor::new(ExplicitVRLittleEndianEncoder::default()),
528                SpecificCharacterSet::default(),
529            );
530
531            encoder
532                .encode_primitive_element(element.header(), element.value().primitive().unwrap())
533                .unwrap();
534        }
535
536        assert_eq!(
537            &out,
538            &[
539                // tag
540                0x00, 0x00, 0x02, 0x00, // VR
541                b'U', b'I', // length
542                0x12, 0x00, // length
543                // ---------- value ----------
544                b'1', b'.', b'2', b'.', b'8', b'4', b'0', b'.', b'1', b'0', b'0', b'0', b'8', b'.',
545                b'1', b'.', b'1', b'\0',
546            ],
547        )
548    }
549
550    /// Odd lengthed item values are encoded with even padding
551    #[test]
552    fn encode_odd_length_item_bytes() {
553        let mut out: Vec<_> = Vec::new();
554
555        {
556            let mut encoder = StatefulEncoder::new(
557                &mut out,
558                EncoderFor::new(ExplicitVRLittleEndianEncoder::default()),
559                SpecificCharacterSet::default(),
560            );
561
562            encoder.encode_item_header(9).unwrap();
563            encoder.write_bytes(&[5; 9]).unwrap();
564        }
565
566        assert_eq!(
567            &out,
568            &[
569                0xFE, 0xFF, 0x00, 0xE0, // tag (0xFFFE, 0xE000)
570                0x0A, 0x00, 0x00, 0x00, // length
571                // ---------- value ----------
572                5, 5, 5, 5, 5, 5, 5, 5, 5, 0,
573            ],
574        )
575    }
576
577    /// Odd lengthed textual values are encoded to even padding with a space
578    #[test]
579    fn encode_odd_length_text() {
580        let mut out: Vec<_> = Vec::new();
581
582        {
583            let mut encoder = StatefulEncoder::new(
584                &mut out,
585                EncoderFor::new(ExplicitVRLittleEndianEncoder::default()),
586                SpecificCharacterSet::default(),
587            );
588
589            let tm = DicomTime::from_hms_micro(23, 57, 59, 999_999).unwrap();
590
591            encoder
592                .encode_primitive_element(
593                    &DataElementHeader::new(Tag(0x0008, 0x0030), VR::TM, Length(14)),
594                    &PrimitiveValue::from(tm),
595                )
596                .unwrap();
597        }
598
599        assert_eq!(
600            &out,
601            &[
602                0x08, 0x00, 0x30, 0x00, // tag (0x0008, 0x0030)
603                b'T', b'M', // VR
604                0x0E, 0x00, // length
605                // ---------- value ----------
606                b'2', b'3', b'5', b'7', b'5', b'9', // time
607                b'.', b'9', b'9', b'9', b'9', b'9', b'9', // second fragment
608                b' ', // padding
609            ],
610        )
611    }
612
613    #[test]
614    fn test_even_len() {
615        use super::even_len;
616
617        assert_eq!(even_len(0), 0);
618        assert_eq!(even_len(1), 2);
619        assert_eq!(even_len(2), 2);
620        assert_eq!(even_len(3), 4);
621        assert_eq!(even_len(4), 4);
622        assert_eq!(even_len(5), 6);
623        assert_eq!(even_len(6), 6);
624        assert_eq!(even_len(0xFFFF_FFFD), 0xFFFF_FFFE);
625    }
626
627    /// Test that the stateful encoder updates
628    /// the active character set after writing a Specific Character Set element
629    /// with a supported text encoding.
630    #[test]
631    fn update_character_set() {
632        const GT: &[u8; 54] = &[
633            // Tag: (0008,0005) Specific Character Set
634            0x08, 0x00, 0x05, 0x00, // VR: CS
635            b'C', b'S', // Length: 10
636            0x0a, 0x00, // Value: "ISO_IR 192"
637            b'I', b'S', b'O', b'_', b'I', b'R', b' ', b'1', b'9', b'2',
638            // Tag: (0010,0010) Patient Name
639            0x10, 0x00, 0x10, 0x00, // VR: PN
640            b'P', b'N', // Length: 28
641            0x1c, 0x00, // Value: "Иванков^Андрей "
642            0xd0, 0x98, 0xd0, 0xb2, 0xd0, 0xb0, 0xd0, 0xbd, 0xd0, 0xba, 0xd0, 0xbe, 0xd0, 0xb2,
643            0x5e, 0xd0, 0x90, 0xd0, 0xbd, 0xd0, 0xb4, 0xd1, 0x80, 0xd0, 0xb5, 0xd0, 0xb9, b' ',
644        ];
645
646        let mut sink = Vec::with_capacity(GT.len());
647
648        let mut encoder = StatefulEncoder::new(
649            &mut sink,
650            EncoderFor::new(ExplicitVRLittleEndianEncoder::default()),
651            SpecificCharacterSet::default(),
652        );
653
654        // encode specific character set
655        let scs = DataElementHeader {
656            tag: Tag(0x0008, 0x0005),
657            vr: VR::CS,
658            len: Length(10),
659        };
660        let scs_value = PrimitiveValue::from("ISO_IR 192");
661
662        encoder.encode_primitive_element(&scs, &scs_value).unwrap();
663
664        // check that the encoder has changed
665        assert_eq!(encoder.text.name(), "ISO_IR 192");
666
667        // now encode something non-ASCII
668        let pn = DataElementHeader {
669            tag: Tag(0x0010, 0x0010),
670            vr: VR::PN,
671            len: Length(28),
672        };
673        let pn_value = PrimitiveValue::from("Иванков^Андрей ");
674        encoder.encode_primitive_element(&pn, &pn_value).unwrap();
675
676        // test all output against ground truth
677        assert_eq!(&sink, GT);
678    }
679}