dicom_encoding/
text.rs

1//! This module contains reusable components for encoding and decoding text in DICOM
2//! data structures, including support for character repertoires.
3//!
4//! At the moment the following character sets are supported:
5//!
6//! | Character Set                 | decoding support | encoding support |
7//! |-------------------------------|------------------|------------------|
8//! | ISO-IR 6 (default)            | ✓ | ✓ |
9//! | ISO-IR 13 (WINDOWS_31J): The JIS X 0201-1976 character set (Japanese single-byte) | ✓ | ✓ |
10//! | ISO-IR 87 (ISO_2022_JP): The JIS X 0208-1990 character set (Japanese multi-byte) | ✓ | ✓ |
11//! | ISO-IR 100 (ISO-8859-1): Right-hand part of the Latin alphabet no. 1, the Western Europe character set | ✓ | ✓ |
12//! | ISO-IR 101 (ISO-8859-2): Right-hand part of the Latin alphabet no. 2, the Central/Eastern Europe character set | ✓ | ✓ |
13//! | ISO-IR 109 (ISO-8859-3): Right-hand part of the Latin alphabet no. 3, the South Europe character set | ✓ | ✓ |
14//! | ISO-IR 110 (ISO-8859-4): Right-hand part of the Latin alphabet no. 4, the North Europe character set | ✓ | ✓ |
15//! | ISO-IR 126 (ISO-8859-7): The Latin/Greek character set | ✓ | ✓ |
16//! | ISO-IR 127 (ISO-8859-6): The Latin/Arabic character set | ✓ | ✓ |
17//! | ISO-IR 138 (ISO-8859-8): The Latin/Hebrew character set | ✓ | ✓ |
18//! | ISO-IR 144 (ISO-8859-5): The Latin/Cyrillic character set | ✓ | ✓ |
19//! | ISO-IR 148 (ISO-8859-9): Latin no. 5, the Turkish character set  | x | x |
20//! | ISO-IR 149 (WINDOWS_949): The KS X 1001 character set (Korean) | ✓ | ✓ |
21//! | ISO-IR 159: The JIS X 0212-1990 character set (supplementary Japanese characters) | x | x |
22//! | ISO-IR 166 (WINDOWS_874): The TIS 620-2533 character set (Thai) | ✓ | ✓ |
23//! | ISO-IR 192: The Unicode character set based on the UTF-8 encoding | ✓ | ✓ |
24//! | GB18030: The Simplified Chinese character set | ✓ | ✓ |
25//! | GB2312: Simplified Chinese character set | ✓ | ✓ |
26//! | GBK: Simplified Chinese character set | ✓ | ✓ |
27//! These capabilities are available through [`SpecificCharacterSet`].
28
29use encoding::all::{
30    GB18030, GBK, ISO_2022_JP, ISO_8859_1, ISO_8859_2, ISO_8859_3, ISO_8859_4, ISO_8859_5,
31    ISO_8859_6, ISO_8859_7, ISO_8859_8, UTF_8, WINDOWS_31J, WINDOWS_874, WINDOWS_949,
32};
33use encoding::{DecoderTrap, EncoderTrap, Encoding, RawDecoder, StringWriter};
34use snafu::{Backtrace, Snafu};
35use std::borrow::Cow;
36use std::fmt::Debug;
37
38/// An error type for text encoding issues.
39#[derive(Debug, Snafu)]
40#[non_exhaustive]
41pub enum EncodeTextError {
42    /// A custom error message,
43    /// for when the underlying error type does not encode error semantics
44    /// into type variants.
45    #[snafu(display("{}", message))]
46    EncodeCustom {
47        /// The error message in plain text.
48        message: Cow<'static, str>,
49        /// The generated backtrace, if available.
50        backtrace: Backtrace,
51    },
52}
53
54/// An error type for text decoding issues.
55#[derive(Debug, Snafu)]
56#[non_exhaustive]
57pub enum DecodeTextError {
58    /// A custom error message,
59    /// for when the underlying error type does not encode error semantics
60    /// into type variants.
61    #[snafu(display("{}", message))]
62    DecodeCustom {
63        /// The error message in plain text.
64        message: Cow<'static, str>,
65        /// The generated backtrace, if available.
66        backtrace: Backtrace,
67    },
68}
69
70type EncodeResult<T> = Result<T, EncodeTextError>;
71type DecodeResult<T> = Result<T, DecodeTextError>;
72
73/// A holder of encoding and decoding mechanisms for text in DICOM content,
74/// which according to the standard, depends on the specific character set.
75pub trait TextCodec {
76    /// Obtain the defined term (unique name) of the text encoding,
77    /// which may be used as the value of a
78    /// Specific Character Set (0008, 0005) element to refer to this codec.
79    ///
80    /// Should contain no leading or trailing spaces.
81    /// This method may be useful for testing purposes, considering that
82    /// `TextCodec` is often used as a trait object.
83    fn name(&self) -> Cow<'static, str>;
84
85    /// Decode the given byte buffer as a single string. The resulting string
86    /// _may_ contain backslash characters ('\') to delimit individual values,
87    /// and should be split later on if required.
88    fn decode(&self, text: &[u8]) -> DecodeResult<String>;
89
90    /// Encode a text value into a byte vector. The input string can
91    /// feature multiple text values by using the backslash character ('\')
92    /// as the value delimiter.
93    fn encode(&self, text: &str) -> EncodeResult<Vec<u8>>;
94}
95
96impl<T: ?Sized> TextCodec for Box<T>
97where
98    T: TextCodec,
99{
100    fn name(&self) -> Cow<'static, str> {
101        self.as_ref().name()
102    }
103
104    fn decode(&self, text: &[u8]) -> DecodeResult<String> {
105        self.as_ref().decode(text)
106    }
107
108    fn encode(&self, text: &str) -> EncodeResult<Vec<u8>> {
109        self.as_ref().encode(text)
110    }
111}
112
113impl<T: ?Sized> TextCodec for &'_ T
114where
115    T: TextCodec,
116{
117    fn name(&self) -> Cow<'static, str> {
118        (**self).name()
119    }
120
121    fn decode(&self, text: &[u8]) -> DecodeResult<String> {
122        (**self).decode(text)
123    }
124
125    fn encode(&self, text: &str) -> EncodeResult<Vec<u8>> {
126        (**self).encode(text)
127    }
128}
129
130/// A descriptor for a specific character set,
131/// taking part in text encoding and decoding
132/// as per [PS3.5 ch 6 6.1](https://dicom.nema.org/medical/dicom/2023e/output/chtml/part05/chapter_6.html#sect_6.1).
133///
134/// # Example
135///
136/// Use [`from_code`](SpecificCharacterSet::from_code)
137/// or one of the associated constants to create a character set.
138/// From there, use the [`TextCodec`] trait to encode and decode text.
139///
140/// ```
141/// use dicom_encoding::text::{SpecificCharacterSet, TextCodec};
142///
143/// let character_set = SpecificCharacterSet::from_code("ISO_IR 100").unwrap();
144/// assert_eq!(character_set, SpecificCharacterSet::ISO_IR_100);
145/// ```
146#[derive(Debug, Default, Clone, PartialEq)]
147pub struct SpecificCharacterSet(CharsetImpl);
148
149impl SpecificCharacterSet {
150    /// ISO IR 6: The default character set, as defined by the DICOM standard.
151    pub const ISO_IR_6: SpecificCharacterSet = SpecificCharacterSet(CharsetImpl::Default);
152
153    // ISO IR 100: ISO 8859-1, the Western Europe character set
154    pub const ISO_IR_100: SpecificCharacterSet = SpecificCharacterSet(CharsetImpl::IsoIr100);
155
156    /// ISO IR 192: UTF-8 encoding
157    pub const ISO_IR_192: SpecificCharacterSet = SpecificCharacterSet(CharsetImpl::IsoIr192);
158
159    /// Obtain the specific character set identified by the given code string.
160    ///
161    /// Supported code strings include the possible values
162    /// in the respective DICOM element (0008, 0005).
163    ///
164    /// # Example
165    ///
166    /// ```
167    /// use dicom_encoding::text::{SpecificCharacterSet, TextCodec};
168    ///
169    /// let character_set = SpecificCharacterSet::from_code("ISO_IR 100").unwrap();
170    /// assert_eq!(character_set.name(), "ISO_IR 100");
171    /// ```
172    pub fn from_code(code: &str) -> Option<Self> {
173        CharsetImpl::from_code(code).map(SpecificCharacterSet)
174    }
175}
176
177impl TextCodec for SpecificCharacterSet {
178    fn name(&self) -> Cow<'static, str> {
179        self.0.name()
180    }
181
182    fn decode(&self, text: &[u8]) -> DecodeResult<String> {
183        self.0.decode(text)
184    }
185
186    fn encode(&self, text: &str) -> EncodeResult<Vec<u8>> {
187        self.0.encode(text)
188    }
189}
190
191/// An enum type for individual supported character sets.
192#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, PartialOrd, Ord)]
193#[non_exhaustive]
194enum CharsetImpl {
195    /// **ISO-IR 6**: the default character set.
196    #[default]
197    Default,
198    /// **ISO-IR 13**: The Simplified Japanese single byte character set.
199    IsoIr13,
200    /// **ISO-IR 87**: The Simplified Japanese multi byte character set.
201    IsoIr87,
202    /// **ISO-IR 100** (ISO-8859-1): Right-hand part of the Latin alphabet no. 1,
203    /// the Western Europe character set.
204    IsoIr100,
205    /// **ISO-IR 101** (ISO-8859-2): Right-hand part of the Latin alphabet no. 2,
206    /// the Central/Eastern Europe character set.
207    IsoIr101,
208    /// **ISO-IR 109** (ISO-8859-3): Right-hand part of the Latin alphabet no. 3,
209    /// the South Europe character set.
210    IsoIr109,
211    /// **ISO-IR 110** (ISO-8859-4): Right-hand part of the Latin alphabet no. 4,
212    /// the North Europe character set.
213    IsoIr110,
214    /// **ISO-IR 126** (ISO-8859-7): The Greek character set.
215    IsoIr126,
216    /// **ISO-IR 127** (ISO-8859-6): The Arabic character set.
217    IsoIr127,
218    /// **ISO-IR 138** (ISO-8859-8): The Hebrew character set.
219    IsoIr138,
220    /// **ISO-IR 144** (ISO-8859-5): The Latin/Cyrillic character set.
221    IsoIr144,
222    /// **ISO-IR 149**: The Korean character set.
223    IsoIr149,
224    /// **ISO-IR 166**: The Thai character set.
225    IsoIr166,
226    /// **ISO-IR 192**: The Unicode character set based on the UTF-8 encoding.
227    IsoIr192,
228    /// **GB18030**: The Simplified Chinese character set.
229    Gb18030,
230    /// **Gbk**: The Simplified Chinese character set.
231    Gbk,
232    // Support for more text encodings is tracked in issue #40.
233}
234
235impl CharsetImpl {
236    /// Obtain the specific character set identified by the given code string.
237    ///
238    /// Supported code strings include the possible values
239    /// in the respective DICOM element (0008, 0005).
240    pub fn from_code(uid: &str) -> Option<Self> {
241        use self::CharsetImpl::*;
242        match uid.trim_end() {
243            "Default" | "ISO_IR_6" | "ISO_IR 6" | "ISO 2022 IR 6" => Some(Default),
244            "ISO_IR_13" | "ISO_IR 13" | "ISO 2022 IR 13" => Some(IsoIr13),
245            "ISO_IR_87" | "ISO_IR 87" | "ISO 2022 IR 87" => Some(IsoIr87),
246            "ISO_IR_100" | "ISO_IR 100" | "ISO 2022 IR 100" => Some(IsoIr100),
247            "ISO_IR_101" | "ISO_IR 101" | "ISO 2022 IR 101" => Some(IsoIr101),
248            "ISO_IR_109" | "ISO_IR 109" | "ISO 2022 IR 109" => Some(IsoIr109),
249            "ISO_IR_110" | "ISO_IR 110" | "ISO 2022 IR 110" => Some(IsoIr110),
250            "ISO_IR_126" | "ISO_IR 126" | "ISO 2022 IR 126" => Some(IsoIr126),
251            "ISO_IR_127" | "ISO_IR 127" | "ISO 2022 IR 127" => Some(IsoIr127),
252            "ISO_IR_138" | "ISO_IR 138" | "ISO 2022 IR 138" => Some(IsoIr138),
253            "ISO_IR_144" | "ISO_IR 144" | "ISO 2022 IR 144" => Some(IsoIr144),
254            "ISO_IR_149" | "ISO_IR 149" | "ISO 2022 IR 149" => Some(IsoIr149),
255            "ISO_IR_166" | "ISO_IR 166" | "ISO 2022 IR 166" => Some(IsoIr166),
256            "ISO_IR_192" | "ISO_IR 192" => Some(IsoIr192),
257            "GB18030" => Some(Gb18030),
258            "GBK" | "GB2312" | "ISO 2022 IR 58" => Some(Gbk),
259            _ => None,
260        }
261    }
262}
263
264impl TextCodec for CharsetImpl {
265    fn name(&self) -> Cow<'static, str> {
266        Cow::Borrowed(match self {
267            CharsetImpl::Default => "ISO_IR 6",
268            CharsetImpl::IsoIr13 => "ISO_IR 13",
269            CharsetImpl::IsoIr87 => "ISO_IR 87",
270            CharsetImpl::IsoIr100 => "ISO_IR 100",
271            CharsetImpl::IsoIr101 => "ISO_IR 101",
272            CharsetImpl::IsoIr109 => "ISO_IR 109",
273            CharsetImpl::IsoIr110 => "ISO_IR 110",
274            CharsetImpl::IsoIr126 => "ISO_IR 126",
275            CharsetImpl::IsoIr127 => "ISO_IR 127",
276            CharsetImpl::IsoIr138 => "ISO_IR 138",
277            CharsetImpl::IsoIr144 => "ISO_IR 144",
278            CharsetImpl::IsoIr149 => "ISO_IR 149",
279            CharsetImpl::IsoIr166 => "ISO_IR 166",
280            CharsetImpl::IsoIr192 => "ISO_IR 192",
281            CharsetImpl::Gb18030 => "GB18030",
282            CharsetImpl::Gbk => "GBK",
283        })
284    }
285
286    fn decode(&self, text: &[u8]) -> DecodeResult<String> {
287        match self {
288            CharsetImpl::Default => DefaultCharacterSetCodec.decode(text),
289            CharsetImpl::IsoIr13 => IsoIr13CharacterSetCodec.decode(text),
290            CharsetImpl::IsoIr87 => IsoIr87CharacterSetCodec.decode(text),
291            CharsetImpl::IsoIr100 => IsoIr100CharacterSetCodec.decode(text),
292            CharsetImpl::IsoIr101 => IsoIr101CharacterSetCodec.decode(text),
293            CharsetImpl::IsoIr109 => IsoIr109CharacterSetCodec.decode(text),
294            CharsetImpl::IsoIr110 => IsoIr110CharacterSetCodec.decode(text),
295            CharsetImpl::IsoIr126 => IsoIr126CharacterSetCodec.decode(text),
296            CharsetImpl::IsoIr127 => IsoIr127CharacterSetCodec.decode(text),
297            CharsetImpl::IsoIr138 => IsoIr138CharacterSetCodec.decode(text),
298            CharsetImpl::IsoIr144 => IsoIr144CharacterSetCodec.decode(text),
299            CharsetImpl::IsoIr149 => IsoIr149CharacterSetCodec.decode(text),
300            CharsetImpl::IsoIr166 => IsoIr166CharacterSetCodec.decode(text),
301            CharsetImpl::IsoIr192 => Utf8CharacterSetCodec.decode(text),
302            CharsetImpl::Gb18030 => Gb18030CharacterSetCodec.decode(text),
303            CharsetImpl::Gbk => GBKCharacterSetCodec.decode(text),
304        }
305    }
306
307    fn encode(&self, text: &str) -> EncodeResult<Vec<u8>> {
308        match self {
309            CharsetImpl::Default => DefaultCharacterSetCodec.encode(text),
310            CharsetImpl::IsoIr13 => IsoIr13CharacterSetCodec.encode(text),
311            CharsetImpl::IsoIr87 => IsoIr87CharacterSetCodec.encode(text),
312            CharsetImpl::IsoIr100 => IsoIr100CharacterSetCodec.encode(text),
313            CharsetImpl::IsoIr101 => IsoIr101CharacterSetCodec.encode(text),
314            CharsetImpl::IsoIr109 => IsoIr109CharacterSetCodec.encode(text),
315            CharsetImpl::IsoIr110 => IsoIr110CharacterSetCodec.encode(text),
316            CharsetImpl::IsoIr126 => IsoIr126CharacterSetCodec.encode(text),
317            CharsetImpl::IsoIr127 => IsoIr127CharacterSetCodec.encode(text),
318            CharsetImpl::IsoIr138 => IsoIr138CharacterSetCodec.encode(text),
319            CharsetImpl::IsoIr144 => IsoIr144CharacterSetCodec.encode(text),
320            CharsetImpl::IsoIr149 => IsoIr149CharacterSetCodec.encode(text),
321            CharsetImpl::IsoIr166 => IsoIr166CharacterSetCodec.encode(text),
322            CharsetImpl::IsoIr192 => Utf8CharacterSetCodec.encode(text),
323            CharsetImpl::Gb18030 => Gb18030CharacterSetCodec.encode(text),
324            CharsetImpl::Gbk => GBKCharacterSetCodec.encode(text),
325        }
326    }
327}
328
329fn decode_text_trap(
330    _decoder: &mut dyn RawDecoder,
331    input: &[u8],
332    output: &mut dyn StringWriter,
333) -> bool {
334    let c = input[0];
335    let o0 = c & 7;
336    let o1 = (c & 56) >> 3;
337    let o2 = (c & 192) >> 6;
338    output.write_char('\\');
339    output.write_char((o2 + b'0') as char);
340    output.write_char((o1 + b'0') as char);
341    output.write_char((o0 + b'0') as char);
342    true
343}
344
345/// Create and implement a character set type using the `encoding` crate.
346macro_rules! decl_character_set {
347    ($typ: ident, $term: literal, $val: expr) => {
348        #[derive(Debug, Default, Copy, Clone, Eq, Hash, PartialEq)]
349        #[doc = "Data type for the "]
350        #[doc = $term]
351        #[doc = "character set encoding."]
352        pub struct $typ;
353
354        impl TextCodec for $typ {
355            fn name(&self) -> Cow<'static, str> {
356                Cow::Borrowed($term)
357            }
358
359            fn decode(&self, text: &[u8]) -> DecodeResult<String> {
360                $val.decode(text, DecoderTrap::Call(decode_text_trap))
361                    .map_err(|message| DecodeCustomSnafu { message }.build())
362            }
363
364            fn encode(&self, text: &str) -> EncodeResult<Vec<u8>> {
365                $val.encode(text, EncoderTrap::Strict)
366                    .map_err(|message| EncodeCustomSnafu { message }.build())
367            }
368        }
369    };
370}
371
372/// Data type representing the default character set.
373#[derive(Debug, Default, Copy, Clone, Eq, Hash, PartialEq)]
374pub struct DefaultCharacterSetCodec;
375
376impl TextCodec for DefaultCharacterSetCodec {
377    fn name(&self) -> Cow<'static, str> {
378        Cow::Borrowed("ISO_IR 6")
379    }
380
381    fn decode(&self, text: &[u8]) -> DecodeResult<String> {
382        // Using 8859-1 because it is a superset. Reiterations of this impl
383        // should check for invalid character codes (#40).
384        ISO_8859_1
385            .decode(text, DecoderTrap::Call(decode_text_trap))
386            .map_err(|message| DecodeCustomSnafu { message }.build())
387    }
388
389    fn encode(&self, text: &str) -> EncodeResult<Vec<u8>> {
390        ISO_8859_1
391            .encode(text, EncoderTrap::Strict)
392            .map_err(|message| EncodeCustomSnafu { message }.build())
393    }
394}
395
396decl_character_set!(IsoIr13CharacterSetCodec, "ISO_IR 13", WINDOWS_31J);
397decl_character_set!(IsoIr87CharacterSetCodec, "ISO_IR 87", ISO_2022_JP);
398decl_character_set!(IsoIr100CharacterSetCodec, "ISO_IR 100", ISO_8859_1);
399decl_character_set!(IsoIr101CharacterSetCodec, "ISO_IR 101", ISO_8859_2);
400decl_character_set!(IsoIr109CharacterSetCodec, "ISO_IR 109", ISO_8859_3);
401decl_character_set!(IsoIr110CharacterSetCodec, "ISO_IR 110", ISO_8859_4);
402decl_character_set!(IsoIr126CharacterSetCodec, "ISO_IR 126", ISO_8859_7);
403decl_character_set!(IsoIr127CharacterSetCodec, "ISO_IR 127", ISO_8859_6);
404decl_character_set!(IsoIr138CharacterSetCodec, "ISO_IR 138", ISO_8859_8);
405decl_character_set!(IsoIr144CharacterSetCodec, "ISO_IR 144", ISO_8859_5);
406decl_character_set!(IsoIr149CharacterSetCodec, "ISO_IR 149", WINDOWS_949);
407decl_character_set!(IsoIr166CharacterSetCodec, "ISO_IR 166", WINDOWS_874);
408decl_character_set!(Utf8CharacterSetCodec, "ISO_IR 192", UTF_8);
409decl_character_set!(Gb18030CharacterSetCodec, "GB18030", GB18030);
410decl_character_set!(GBKCharacterSetCodec, "GBK", GBK);
411
412/// The result of a text validation procedure (please see [`validate_iso_8859`]).
413#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
414pub enum TextValidationOutcome {
415    /// The text is fully valid and can be safely decoded.
416    Ok,
417    /// Some characters may have to be replaced, other than that the text can be safely decoded.
418    BadCharacters,
419    /// The text cannot be decoded.
420    NotOk,
421}
422
423/// Check whether the given byte slice contains valid text from the default character repertoire.
424pub fn validate_iso_8859(text: &[u8]) -> TextValidationOutcome {
425    if ISO_8859_1.decode(text, DecoderTrap::Strict).is_err() {
426        match ISO_8859_1.decode(text, DecoderTrap::Call(decode_text_trap)) {
427            Ok(_) => TextValidationOutcome::BadCharacters,
428            Err(_) => TextValidationOutcome::NotOk,
429        }
430    } else {
431        TextValidationOutcome::Ok
432    }
433}
434
435/// Check whether the given byte slice contains only valid characters for a
436/// Date value representation.
437pub fn validate_da(text: &[u8]) -> TextValidationOutcome {
438    if text.iter().cloned().all(|c| c.is_ascii_digit()) {
439        TextValidationOutcome::Ok
440    } else {
441        TextValidationOutcome::NotOk
442    }
443}
444
445/// Check whether the given byte slice contains only valid characters for a
446/// Time value representation.
447pub fn validate_tm(text: &[u8]) -> TextValidationOutcome {
448    if text.iter().cloned().all(|c| match c {
449        b'\\' | b'.' | b'-' | b' ' => true,
450        c => c.is_ascii_digit(),
451    }) {
452        TextValidationOutcome::Ok
453    } else {
454        TextValidationOutcome::NotOk
455    }
456}
457
458/// Check whether the given byte slice contains only valid characters for a
459/// Date Time value representation.
460pub fn validate_dt(text: &[u8]) -> TextValidationOutcome {
461    if text.iter().cloned().all(|c| match c {
462        b'.' | b'-' | b'+' | b' ' | b'\\' => true,
463        c => c.is_ascii_digit(),
464    }) {
465        TextValidationOutcome::Ok
466    } else {
467        TextValidationOutcome::NotOk
468    }
469}
470
471/// Check whether the given byte slice contains only valid characters for a
472/// Code String value representation.
473pub fn validate_cs(text: &[u8]) -> TextValidationOutcome {
474    if text.iter().cloned().all(|c| match c {
475        b' ' | b'_' => true,
476        c => c.is_ascii_digit() || c.is_ascii_uppercase(),
477    }) {
478        TextValidationOutcome::Ok
479    } else {
480        TextValidationOutcome::NotOk
481    }
482}
483
484#[cfg(test)]
485mod tests {
486    use super::*;
487
488    fn test_codec<T>(codec: T, string: &str, bytes: &[u8])
489    where
490        T: TextCodec,
491    {
492        assert_eq!(codec.encode(string).expect("encoding"), bytes);
493        assert_eq!(codec.decode(bytes).expect("decoding"), string);
494    }
495
496    #[test]
497    fn iso_ir_6_baseline() {
498        let codec = SpecificCharacterSet::default();
499        test_codec(codec, "Smith^John", b"Smith^John");
500    }
501
502    #[test]
503    fn iso_ir_13_baseline() {
504        let codec = SpecificCharacterSet(CharsetImpl::IsoIr13);
505        test_codec(codec, "ヤマダ^タロウ", b"\xd4\xcf\xc0\xde^\xc0\xdb\xb3");
506    }
507
508    #[test]
509    fn iso_ir_87_baseline() {
510        let codec = SpecificCharacterSet(CharsetImpl::IsoIr87);
511        test_codec(&codec, "山田^太郎", b"\x1b$B;3ED\x1b(B^\x1b$BB@O:");
512        test_codec(&codec, "やまだ^たろう", b"\x1b$B$d$^$@\x1b(B^\x1b$B$?$m$&");
513    }
514
515    #[test]
516    fn iso_ir_192_baseline() {
517        let codec = SpecificCharacterSet::ISO_IR_192;
518        test_codec(&codec, "Simões^John", "Simões^John".as_bytes());
519        test_codec(codec, "Иванков^Андрей", "Иванков^Андрей".as_bytes());
520    }
521
522    #[test]
523    fn iso_ir_100_baseline() {
524        let codec = SpecificCharacterSet(CharsetImpl::IsoIr100);
525        test_codec(&codec, "Simões^João", b"Sim\xF5es^Jo\xE3o");
526        test_codec(codec, "Günther^Hans", b"G\xfcnther^Hans");
527    }
528
529    #[test]
530    fn iso_ir_101_baseline() {
531        let codec = SpecificCharacterSet(CharsetImpl::IsoIr101);
532        test_codec(codec, "Günther^Hans", b"G\xfcnther^Hans");
533    }
534
535    #[test]
536    fn iso_ir_110_baseline() {
537        let codec = SpecificCharacterSet(CharsetImpl::IsoIr110);
538        test_codec(codec, "ĄĸŖĨϧŠĒĢŦŽĀÁÂÃÄÅÆĮČÉ^ĘËĖÍÎĪĐŅŌĶÔÕÖרŲÚÛÜŨŪß", b"\xA1\xA2\xA3\xA5\xA6\xA7\xA9\xAA\xAB\xAC\xAE\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9^\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF");
539    }
540
541    #[test]
542    fn iso_ir_126_baseline() {
543        let codec = SpecificCharacterSet(CharsetImpl::IsoIr126);
544        test_codec(codec, "Διονυσιος", b"\xC4\xE9\xEF\xED\xF5\xF3\xE9\xEF\xF2");
545    }
546
547    #[test]
548    fn iso_ir_127_baseline() {
549        let codec = SpecificCharacterSet(CharsetImpl::IsoIr127);
550        test_codec(
551            codec,
552            "قباني^لنزار",
553            b"\xE2\xC8\xC7\xE6\xEA^\xE4\xE6\xD2\xC7\xD1",
554        );
555    }
556
557    #[test]
558    fn iso_ir_138_baseline() {
559        let codec = SpecificCharacterSet(CharsetImpl::IsoIr138);
560        test_codec(
561            &codec,
562            "מקור השם עברית",
563            b"\xEE\xF7\xE5\xF8\x20\xE4\xF9\xED\x20\xF2\xE1\xF8\xE9\xFA",
564        );
565        test_codec(
566            codec,
567            "שרון^דבורה",
568            b"\xF9\xF8\xE5\xEF^\xE3\xE1\xE5\xF8\xE4",
569        );
570    }
571
572    #[test]
573    fn iso_ir_144_baseline() {
574        let codec = SpecificCharacterSet(CharsetImpl::IsoIr144);
575        test_codec(
576            &codec,
577            "Иванков^Андрей",
578            b"\xb8\xd2\xd0\xdd\xda\xde\xd2^\xb0\xdd\xd4\xe0\xd5\xd9",
579        );
580        test_codec(
581            &codec,
582            "Гол. мозг стандарт",
583            b"\xB3\xDE\xDB.\x20\xDC\xDE\xD7\xD3\x20\xE1\xE2\xD0\xDD\xD4\xD0\xE0\xE2",
584        );
585        test_codec(&codec, "мозг 2мм", b"\xDC\xDE\xD7\xD3\x202\xDC\xDC");
586    }
587
588    #[test]
589    fn iso_ir_149_baseline() {
590        let codec = SpecificCharacterSet(CharsetImpl::IsoIr149);
591        test_codec(&codec, "김희중", b"\xB1\xE8\xC8\xF1\xC1\xDF");
592        test_codec(
593            codec,
594            "Hong^Gildong=洪^吉洞=홍^길동",
595            b"Hong^Gildong=\xFB\xF3^\xD1\xCE\xD4\xD7=\xC8\xAB^\xB1\xE6\xB5\xBF",
596        );
597    }
598
599    #[test]
600    fn iso_ir_166_baseline() {
601        let codec = SpecificCharacterSet(CharsetImpl::IsoIr166);
602        test_codec(&codec, "ประเทศไทย", b"\xBB\xC3\xD0\xE0\xB7\xC8\xE4\xB7\xC2");
603        test_codec(codec, "รหัสสำหรับอักขระไทยที่ใช้กับคอมพิวเตอร์", b"\xC3\xCB\xD1\xCA\xCA\xD3\xCB\xC3\xD1\xBA\xCD\xD1\xA1\xA2\xC3\xD0\xE4\xB7\xC2\xB7\xD5\xE8\xE3\xAA\xE9\xA1\xD1\xBA\xA4\xCD\xC1\xBE\xD4\xC7\xE0\xB5\xCD\xC3\xEC");
604    }
605
606    #[test]
607    fn gb_18030_baseline() {
608        let codec = SpecificCharacterSet(CharsetImpl::Gb18030);
609        test_codec(
610            &codec,
611            "Wang^XiaoDong=王^小东",
612            b"Wang^XiaoDong=\xCD\xF5^\xD0\xA1\xB6\xAB",
613        );
614    }
615    #[test]
616    fn gb_gbk_baseline() {
617        let codec = SpecificCharacterSet(CharsetImpl::Gbk);
618
619        let iso2022_ir58_bytes = vec![
620            0xB0, 0xB2, 0xBB, 0xD5, 0xD0, 0xC7, 0xC1, 0xE9, 0xD0, 0xC5, 0xCF, 0xA2, 0xBF, 0xC6,
621            0xBC, 0xBC, 0xD3, 0xD0, 0xCF, 0xDE, 0xB9, 0xAB, 0xCB, 0xBE,
622        ];
623        let rw = codec.decode(&iso2022_ir58_bytes).expect("decoding");
624
625        assert_eq!(rw, "安徽星灵信息科技有限公司");
626
627        let gb2312_bytes = vec![
628            0xCA, 0xB9, 0xC6, 0xE4, 0xD3, 0xEB, 0xD4, 0xAD, 0xCA, 0xBC, 0xB2, 0xD6, 0xBF, 0xE2,
629            0xB1, 0xA3, 0xB3, 0xD6, 0xD2, 0xBB, 0xD6, 0xC2,
630        ];
631        let rw2 = codec.decode(&gb2312_bytes).expect("decoding");
632
633        assert_eq!(rw2, "使其与原始仓库保持一致");
634    }
635}