dicom_parser/stateful/
encode.rs

1//! Module holding a stateful DICOM data encoding abstraction.
2//!
3//! The [`StatefulEncoder`] supports encoding of binary data and text
4//! while applying the necessary padding to conform to DICOM encoding rules.
5
6use dicom_core::{value::PrimitiveValue, DataElementHeader, Length, Tag, VR};
7use dicom_encoding::transfer_syntax::DynEncoder;
8use dicom_encoding::{
9    encode::EncodeTo,
10    text::{DefaultCharacterSetCodec, SpecificCharacterSet, TextCodec},
11    TransferSyntax,
12};
13use snafu::{Backtrace, OptionExt, ResultExt, Snafu};
14use std::io::Write;
15
16#[derive(Debug, Snafu)]
17#[non_exhaustive]
18pub enum Error {
19    #[snafu(display("Encoding in transfer syntax {} is unsupported", ts))]
20    UnsupportedTransferSyntax {
21        ts: &'static str,
22        backtrace: Backtrace,
23    },
24
25    #[snafu(display("Unsupported character set {:?}", charset))]
26    UnsupportedCharacterSet {
27        charset: SpecificCharacterSet,
28        backtrace: Backtrace,
29    },
30
31    #[snafu(display("Failed to encode a data piece at position {}", position))]
32    EncodeData {
33        position: u64,
34        source: dicom_encoding::encode::Error,
35    },
36
37    #[snafu(display("Could not encode text at position {}", position))]
38    EncodeText {
39        position: u64,
40        source: dicom_encoding::text::EncodeTextError,
41    },
42
43    #[snafu(display("Could not write value data at position {}", position))]
44    WriteValueData {
45        position: u64,
46        source: std::io::Error,
47        backtrace: Backtrace,
48    },
49}
50
51pub type Result<T> = std::result::Result<T, Error>;
52
53/// Also called a printer, this encoder type provides a stateful mid-level
54/// abstraction for writing DICOM content. Unlike `Encode`,
55/// the stateful encoder knows how to write text values and keeps track
56/// of how many bytes were written.
57/// `W` is the write target, `E` is the encoder, and `T` is the text codec.
58#[derive(Debug)]
59pub struct StatefulEncoder<W, E, T = SpecificCharacterSet> {
60    to: W,
61    encoder: E,
62    text: T,
63    bytes_written: u64,
64    buffer: Vec<u8>,
65}
66
67pub type DynStatefulEncoder<'w> = StatefulEncoder<Box<dyn Write + 'w>, DynEncoder<'w, dyn Write>>;
68
69impl<W, E, T> StatefulEncoder<W, E, T> {
70    pub fn new(to: W, encoder: E, text: T) -> Self {
71        StatefulEncoder {
72            to,
73            encoder,
74            text,
75            bytes_written: 0,
76            buffer: Vec::with_capacity(128),
77        }
78    }
79}
80
81impl<'s> DynStatefulEncoder<'s> {
82    pub fn from_transfer_syntax(
83        to: Box<dyn Write + 's>,
84        ts: TransferSyntax,
85        charset: SpecificCharacterSet,
86    ) -> Result<Self> {
87        let encoder = ts
88            .encoder()
89            .context(UnsupportedTransferSyntaxSnafu { ts: ts.uid() })?;
90        Ok(StatefulEncoder::new(to, encoder, charset))
91    }
92}
93
94impl<W, E> StatefulEncoder<W, E>
95where
96    W: Write,
97    E: EncodeTo<W>,
98{
99    /// Encode and write a data element header.
100    pub fn encode_element_header(&mut self, mut de: DataElementHeader) -> Result<()> {
101        if let Some(len) = de.len.get() {
102            de.len = Length(even_len(len))
103        }
104        let bytes = self
105            .encoder
106            .encode_element_header(&mut self.to, de)
107            .context(EncodeDataSnafu {
108                position: self.bytes_written,
109            })?;
110        self.bytes_written += bytes as u64;
111        Ok(())
112    }
113
114    /// Encode and write an item header,
115    /// where `len` is the specified length of the item
116    /// (can be `0xFFFF_FFFF` for undefined length).
117    pub fn encode_item_header(&mut self, len: u32) -> Result<()> {
118        let len = if len == 0xFFFF_FFFF {
119            len
120        } else {
121            even_len(len)
122        };
123        self.encoder
124            .encode_item_header(&mut self.to, len)
125            .context(EncodeDataSnafu {
126                position: self.bytes_written,
127            })?;
128        self.bytes_written += 8;
129        Ok(())
130    }
131
132    /// Encode and write an item delimiter.
133    pub fn encode_item_delimiter(&mut self) -> Result<()> {
134        self.encoder
135            .encode_item_delimiter(&mut self.to)
136            .context(EncodeDataSnafu {
137                position: self.bytes_written,
138            })?;
139        self.bytes_written += 8;
140        Ok(())
141    }
142
143    /// Encode and write a sequence delimiter.
144    pub fn encode_sequence_delimiter(&mut self) -> Result<()> {
145        self.encoder
146            .encode_sequence_delimiter(&mut self.to)
147            .context(EncodeDataSnafu {
148                position: self.bytes_written,
149            })?;
150        self.bytes_written += 8;
151        Ok(())
152    }
153
154    /// Write the given bytes directly to the inner writer.
155    ///
156    /// Note that this method
157    /// (unlike [`write_bytes`](StatefulEncoder::write_bytes))
158    /// does not perform any additional padding.
159    pub fn write_raw_bytes(&mut self, bytes: &[u8]) -> Result<()> {
160        self.to.write_all(bytes).context(WriteValueDataSnafu {
161            position: self.bytes_written,
162        })?;
163        self.bytes_written += bytes.len() as u64;
164        Ok(())
165    }
166
167    /// Write a primitive DICOM value as a bunch of bytes
168    /// directly to the inner writer.
169    ///
170    /// This method will perform the necessary padding
171    /// (always with zeros)
172    /// to ensure that the encoded value has an even number of bytes.
173    pub fn write_bytes(&mut self, bytes: &[u8]) -> Result<()> {
174        debug_assert!(bytes.len() < u32::MAX as usize);
175        self.to.write_all(bytes).context(WriteValueDataSnafu {
176            position: self.bytes_written,
177        })?;
178        self.bytes_written += bytes.len() as u64;
179        if bytes.len() % 2 != 0 {
180            self.to.write_all(&[0]).context(WriteValueDataSnafu {
181                position: self.bytes_written,
182            })?;
183            self.bytes_written += 1;
184        }
185        Ok(())
186    }
187
188    /// Retrieve the number of bytes written so far by this printer.
189    pub fn bytes_written(&self) -> u64 {
190        self.bytes_written
191    }
192
193    /// Encode and write the values of a pixel data offset table.
194    pub fn encode_offset_table(&mut self, table: &[u32]) -> Result<()> {
195        self.encoder
196            .encode_offset_table(&mut self.to, table)
197            .context(EncodeDataSnafu {
198                position: self.bytes_written,
199            })?;
200
201        self.bytes_written += table.len() as u64 * 4;
202        Ok(())
203    }
204
205    /// Encode and write a data element with a primitive value.
206    ///
207    /// This method will perform the necessary padding to ensure that the
208    /// encoded value is an even number of bytes.
209    /// Where applicable,
210    /// this will use the inner text codec for textual values.
211    /// The length property of the header is ignored,
212    /// the true byte length of the value in its encoded form is used instead.
213    pub fn encode_primitive_element(
214        &mut self,
215        de: &DataElementHeader,
216        value: &PrimitiveValue,
217    ) -> Result<()> {
218        // intercept string encoding calls to use the text codec
219        match value {
220            PrimitiveValue::Str(text) => {
221                self.encode_text_element(text, *de)?;
222                Ok(())
223            }
224            PrimitiveValue::Strs(texts) => {
225                self.encode_texts_element(&texts[..], *de)?;
226                Ok(())
227            }
228            _ => {
229                // if VR is DS or IS and the value is binary,
230                // write value as a string instead
231                if let VR::DS | VR::IS = de.vr {
232                    return self.encode_element_as_text(value, de);
233                }
234
235                let byte_len = value.calculate_byte_len();
236                self.encode_element_header(DataElementHeader {
237                    tag: de.tag,
238                    vr: de.vr,
239                    len: Length(byte_len as u32),
240                })?;
241
242                let bytes = self.encoder.encode_primitive(&mut self.to, value).context(
243                    EncodeDataSnafu {
244                        position: self.bytes_written,
245                    },
246                )?;
247
248                self.bytes_written += bytes as u64;
249                if bytes % 2 != 0 {
250                    let padding = match de.vr {
251                        VR::DA | VR::DT | VR::TM => b' ',
252                        _ => 0,
253                    };
254                    self.to.write_all(&[padding]).context(WriteValueDataSnafu {
255                        position: self.bytes_written,
256                    })?;
257                    self.bytes_written += 1;
258                }
259
260                Ok(())
261            }
262        }
263    }
264
265    fn try_new_codec(&mut self, name: &str) {
266        if let Some(codec) = SpecificCharacterSet::from_code(name) {
267            self.text = codec;
268        } else {
269            tracing::warn!("Unsupported character set `{}`, ignoring", name);
270        }
271    }
272
273    fn encode_text_element(&mut self, text: &str, de: DataElementHeader) -> Result<()> {
274        // encode it in memory first so that we know the real length
275        let mut encoded_value = self.convert_text_untrailed(text, de.vr)?;
276        // pad to even length
277        if encoded_value.len() % 2 == 1 {
278            let pad = if de.vr == VR::UI { b'\0' } else { b' ' };
279            encoded_value.push(pad);
280        }
281
282        // now we can write the header with the correct length
283        self.encode_element_header(DataElementHeader {
284            tag: de.tag,
285            vr: de.vr,
286            len: Length(encoded_value.len() as u32),
287        })?;
288        self.to
289            .write_all(&encoded_value)
290            .context(WriteValueDataSnafu {
291                position: self.bytes_written,
292            })?;
293        self.bytes_written += encoded_value.len() as u64;
294
295        // if element is Specific Character Set,
296        // update the text codec
297        if de.tag == Tag(0x0008, 0x0005) {
298            self.try_new_codec(text);
299        }
300
301        Ok(())
302    }
303
304    fn encode_texts_element<S>(&mut self, texts: &[S], de: DataElementHeader) -> Result<()>
305    where
306        S: AsRef<str>,
307    {
308        self.buffer.clear();
309        for (i, t) in texts.iter().enumerate() {
310            self.buffer
311                .extend_from_slice(&self.convert_text_untrailed(t.as_ref(), de.vr)?);
312            if i < texts.len() - 1 {
313                self.buffer.push(b'\\');
314            }
315        }
316        // pad to even length
317        if self.buffer.len() % 2 == 1 {
318            let pad = if de.vr == VR::UI { b'\0' } else { b' ' };
319            self.buffer.push(pad);
320        }
321
322        // now we can write the header with the correct length
323        self.encode_element_header(DataElementHeader {
324            tag: de.tag,
325            vr: de.vr,
326            len: Length(self.buffer.len() as u32),
327        })?;
328
329        self.to
330            .write_all(&self.buffer)
331            .context(WriteValueDataSnafu {
332                position: self.bytes_written,
333            })?;
334        self.bytes_written += self.buffer.len() as u64;
335
336        // if element is Specific Character Set,
337        // update the text codec
338        if de.tag == Tag(0x0008, 0x0005) {
339            if let Some(charset_name) = texts.first() {
340                self.try_new_codec(charset_name.as_ref());
341            }
342        }
343
344        Ok(())
345    }
346
347    fn convert_text_untrailed(&self, text: &str, vr: VR) -> Result<Vec<u8>> {
348        match vr {
349            VR::AE | VR::AS | VR::CS | VR::DA | VR::DS | VR::DT | VR::IS | VR::TM | VR::UI => {
350                // these VRs always use the default character repertoire
351                DefaultCharacterSetCodec
352                    .encode(text)
353                    .context(EncodeTextSnafu {
354                        position: self.bytes_written,
355                    })
356            }
357            _ => self.text.encode(text).context(EncodeTextSnafu {
358                position: self.bytes_written,
359            }),
360        }
361    }
362
363    /// edge case method for encoding data elements with IS and VR values
364    /// (always as text)
365    fn encode_element_as_text(
366        &mut self,
367        value: &PrimitiveValue,
368        de: &DataElementHeader,
369    ) -> Result<()> {
370        match value {
371            PrimitiveValue::Empty => {
372                self.encode_element_header(DataElementHeader {
373                    tag: de.tag,
374                    vr: de.vr,
375                    len: Length(0),
376                })?;
377                Ok(())
378            }
379            PrimitiveValue::U8(_)
380            | PrimitiveValue::I16(_)
381            | PrimitiveValue::U16(_)
382            | PrimitiveValue::I32(_)
383            | PrimitiveValue::U32(_)
384            | PrimitiveValue::I64(_)
385            | PrimitiveValue::U64(_)
386            | PrimitiveValue::F32(_)
387            | PrimitiveValue::F64(_) => {
388                let textual_value = value.to_str();
389                self.encode_element_header(DataElementHeader {
390                    tag: de.tag,
391                    vr: de.vr,
392                    len: Length(even_len(textual_value.len() as u32)),
393                })?;
394
395                write!(self.to, "{textual_value}").context(WriteValueDataSnafu {
396                    position: self.bytes_written,
397                })?;
398                let len = if textual_value.len() % 2 == 1 {
399                    self.to.write_all(b" ").context(WriteValueDataSnafu {
400                        position: self.bytes_written,
401                    })?;
402                    textual_value.len() as u64 + 1
403                } else {
404                    textual_value.len() as u64
405                };
406                self.bytes_written += len;
407                Ok(())
408            }
409            PrimitiveValue::Date(_)
410            | PrimitiveValue::DateTime(_)
411            | PrimitiveValue::Time(_)
412            | PrimitiveValue::Tags(_)
413            | PrimitiveValue::Strs(_)
414            | PrimitiveValue::Str(_) => unreachable!(),
415        }
416    }
417}
418
419#[inline]
420fn even_len(l: u32) -> u32 {
421    (l + 1) & !1
422}
423
424#[cfg(test)]
425mod tests {
426    use dicom_core::{
427        dicom_value, value::DicomTime, DataElement, DataElementHeader, DicomValue, Length,
428        PrimitiveValue, Tag, VR,
429    };
430    use dicom_encoding::{
431        encode::{explicit_le::ExplicitVRLittleEndianEncoder, EncoderFor},
432        text::{SpecificCharacterSet, TextCodec},
433    };
434
435    use super::StatefulEncoder;
436
437    /// Odd lengthed values convert to tokens with even padding (PN)
438    #[test]
439    fn encode_odd_length_element_pn() {
440        let element = DataElement::new(
441            Tag(0x0010, 0x0010),
442            VR::PN,
443            DicomValue::new(dicom_value!(Strs, ["Dall^John"])),
444        );
445
446        let mut out: Vec<_> = Vec::new();
447
448        {
449            let mut encoder = StatefulEncoder::new(
450                &mut out,
451                EncoderFor::new(ExplicitVRLittleEndianEncoder::default()),
452                SpecificCharacterSet::default(),
453            );
454
455            encoder
456                .encode_primitive_element(element.header(), element.value().primitive().unwrap())
457                .unwrap();
458        }
459
460        assert_eq!(
461            &out,
462            &[
463                0x10, 0x00, 0x10, 0x00, // tag
464                b'P', b'N', // VR
465                0x0A, 0x00, // length
466                // ---------- value ----------
467                b'D', b'a', b'l', b'l', b'^', b'J', b'o', b'h', b'n', b' ',
468            ],
469        )
470    }
471
472    /// Odd lengthed values are encoded with even padding (bytes)
473    #[test]
474    fn encode_odd_length_element_bytes() {
475        let element = DataElement::new(
476            Tag(0x7FE0, 0x0010),
477            VR::OB,
478            DicomValue::new(vec![1; 9].into()),
479        );
480
481        let mut out: Vec<_> = Vec::new();
482
483        {
484            let mut encoder = StatefulEncoder::new(
485                &mut out,
486                EncoderFor::new(ExplicitVRLittleEndianEncoder::default()),
487                SpecificCharacterSet::default(),
488            );
489
490            encoder
491                .encode_primitive_element(element.header(), element.value().primitive().unwrap())
492                .unwrap();
493        }
494
495        assert_eq!(
496            &out,
497            &[
498                0xE0, 0x7F, 0x10, 0x00, // tag
499                b'O', b'B', // VR
500                0x00, 0x00, // reserved
501                0x0A, 0x00, 0x00, 0x00, // length
502                // ---------- value ----------
503                1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
504            ],
505        )
506    }
507
508    /// Odd lengthed values are encoded with even padding (UIDs)
509    #[test]
510    fn encode_odd_length_element_uid() {
511        let element = DataElement::new(
512            Tag(0x0000, 0x0002),
513            VR::UI,
514            DicomValue::new("1.2.840.10008.1.1".into()),
515        );
516
517        let mut out: Vec<_> = Vec::new();
518
519        {
520            let mut encoder = StatefulEncoder::new(
521                &mut out,
522                EncoderFor::new(ExplicitVRLittleEndianEncoder::default()),
523                SpecificCharacterSet::default(),
524            );
525
526            encoder
527                .encode_primitive_element(element.header(), element.value().primitive().unwrap())
528                .unwrap();
529        }
530
531        assert_eq!(
532            &out,
533            &[
534                // tag
535                0x00, 0x00, 0x02, 0x00, // VR
536                b'U', b'I', // length
537                0x12, 0x00, // length
538                // ---------- value ----------
539                b'1', b'.', b'2', b'.', b'8', b'4', b'0', b'.', b'1', b'0', b'0', b'0', b'8', b'.',
540                b'1', b'.', b'1', b'\0',
541            ],
542        )
543    }
544
545    /// Odd lengthed item values are encoded with even padding
546    #[test]
547    fn encode_odd_length_item_bytes() {
548        let mut out: Vec<_> = Vec::new();
549
550        {
551            let mut encoder = StatefulEncoder::new(
552                &mut out,
553                EncoderFor::new(ExplicitVRLittleEndianEncoder::default()),
554                SpecificCharacterSet::default(),
555            );
556
557            encoder.encode_item_header(9).unwrap();
558            encoder.write_bytes(&[5; 9]).unwrap();
559        }
560
561        assert_eq!(
562            &out,
563            &[
564                0xFE, 0xFF, 0x00, 0xE0, // tag (0xFFFE, 0xE000)
565                0x0A, 0x00, 0x00, 0x00, // length
566                // ---------- value ----------
567                5, 5, 5, 5, 5, 5, 5, 5, 5, 0,
568            ],
569        )
570    }
571
572    /// Odd lengthed textual values are encoded to even padding with a space
573    #[test]
574    fn encode_odd_length_text() {
575        let mut out: Vec<_> = Vec::new();
576
577        {
578            let mut encoder = StatefulEncoder::new(
579                &mut out,
580                EncoderFor::new(ExplicitVRLittleEndianEncoder::default()),
581                SpecificCharacterSet::default(),
582            );
583
584            let tm = DicomTime::from_hms_micro(23, 57, 59, 999_999).unwrap();
585
586            encoder
587                .encode_primitive_element(
588                    &DataElementHeader::new(Tag(0x0008, 0x0030), VR::TM, Length(14)),
589                    &PrimitiveValue::from(tm),
590                )
591                .unwrap();
592        }
593
594        assert_eq!(
595            &out,
596            &[
597                0x08, 0x00, 0x30, 0x00, // tag (0x0008, 0x0030)
598                b'T', b'M', // VR
599                0x0E, 0x00, // length
600                // ---------- value ----------
601                b'2', b'3', b'5', b'7', b'5', b'9', // time
602                b'.', b'9', b'9', b'9', b'9', b'9', b'9', // second fragment
603                b' ', // padding
604            ],
605        )
606    }
607
608    #[test]
609    fn test_even_len() {
610        use super::even_len;
611
612        assert_eq!(even_len(0), 0);
613        assert_eq!(even_len(1), 2);
614        assert_eq!(even_len(2), 2);
615        assert_eq!(even_len(3), 4);
616        assert_eq!(even_len(4), 4);
617        assert_eq!(even_len(5), 6);
618        assert_eq!(even_len(6), 6);
619        assert_eq!(even_len(0xFFFF_FFFD), 0xFFFF_FFFE);
620    }
621
622    /// Test that the stateful encoder updates
623    /// the active character set after writing a Specific Character Set element
624    /// with a supported text encoding.
625    #[test]
626    fn update_character_set() {
627        const GT: &[u8; 54] = &[
628            // Tag: (0008,0005) Specific Character Set
629            0x08, 0x00, 0x05, 0x00, // VR: CS
630            b'C', b'S', // Length: 10
631            0x0a, 0x00, // Value: "ISO_IR 192"
632            b'I', b'S', b'O', b'_', b'I', b'R', b' ', b'1', b'9', b'2',
633            // Tag: (0010,0010) Patient Name
634            0x10, 0x00, 0x10, 0x00, // VR: PN
635            b'P', b'N', // Length: 28
636            0x1c, 0x00, // Value: "Иванков^Андрей "
637            0xd0, 0x98, 0xd0, 0xb2, 0xd0, 0xb0, 0xd0, 0xbd, 0xd0, 0xba, 0xd0, 0xbe, 0xd0, 0xb2,
638            0x5e, 0xd0, 0x90, 0xd0, 0xbd, 0xd0, 0xb4, 0xd1, 0x80, 0xd0, 0xb5, 0xd0, 0xb9, b' ',
639        ];
640
641        let mut sink = Vec::with_capacity(GT.len());
642
643        let mut encoder = StatefulEncoder::new(
644            &mut sink,
645            EncoderFor::new(ExplicitVRLittleEndianEncoder::default()),
646            SpecificCharacterSet::default(),
647        );
648
649        // encode specific character set
650        let scs = DataElementHeader {
651            tag: Tag(0x0008, 0x0005),
652            vr: VR::CS,
653            len: Length(10),
654        };
655        let scs_value = PrimitiveValue::from("ISO_IR 192");
656
657        encoder.encode_primitive_element(&scs, &scs_value).unwrap();
658
659        // check that the encoder has changed
660        assert_eq!(encoder.text.name(), "ISO_IR 192");
661
662        // now encode something non-ASCII
663        let pn = DataElementHeader {
664            tag: Tag(0x0010, 0x0010),
665            vr: VR::PN,
666            len: Length(28),
667        };
668        let pn_value = PrimitiveValue::from("Иванков^Андрей ");
669        encoder.encode_primitive_element(&pn, &pn_value).unwrap();
670
671        // test all output against ground truth
672        assert_eq!(&sink, GT);
673    }
674}