dicom_encoding/
text.rs

1//! This module contains reusable components for encoding and decoding text in DICOM
2//! data structures, including support for character repertoires.
3//!
4//! At the moment the following character sets are supported:
5//!
6//! | Character Set                 | decoding support | encoding support |
7//! |-------------------------------|------------------|------------------|
8//! | ISO-IR 6 (default)            | ✓ | ✓ |
9//! | ISO-IR 13 (WINDOWS_31J): The JIS X 0201-1976 character set (Japanese single-byte) | ✓ | ✓ |
10//! | ISO-IR 87 (ISO_2022_JP): The JIS X 0208-1990 character set (Japanese multi-byte) | ✓ | ✓ |
11//! | ISO-IR 100 (ISO-8859-1): Right-hand part of the Latin alphabet no. 1, the Western Europe character set | ✓ | ✓ |
12//! | ISO-IR 101 (ISO-8859-2): Right-hand part of the Latin alphabet no. 2, the Central/Eastern Europe character set | ✓ | ✓ |
13//! | ISO-IR 109 (ISO-8859-3): Right-hand part of the Latin alphabet no. 3, the South Europe character set | ✓ | ✓ |
14//! | ISO-IR 110 (ISO-8859-4): Right-hand part of the Latin alphabet no. 4, the North Europe character set | ✓ | ✓ |
15//! | ISO-IR 126 (ISO-8859-7): The Latin/Greek character set | ✓ | ✓ |
16//! | ISO-IR 127 (ISO-8859-6): The Latin/Arabic character set | ✓ | ✓ |
17//! | ISO-IR 138 (ISO-8859-8): The Latin/Hebrew character set | ✓ | ✓ |
18//! | ISO-IR 144 (ISO-8859-5): The Latin/Cyrillic character set | ✓ | ✓ |
19//! | ISO-IR 148 (ISO-8859-9): Latin no. 5, the Turkish character set  | x | x |
20//! | ISO-IR 149 (WINDOWS_949): The KS X 1001 character set (Korean) | ✓ | ✓ |
21//! | ISO-IR 159: The JIS X 0212-1990 character set (supplementary Japanese characters) | x | x |
22//! | ISO-IR 166 (WINDOWS_874): The TIS 620-2533 character set (Thai) | ✓ | ✓ |
23//! | ISO-IR 192: The Unicode character set based on the UTF-8 encoding | ✓ | ✓ |
24//! | GB18030: The Simplified Chinese character set | ✓ | ✓ |
25//! | GB2312: Simplified Chinese character set | x | x |
26//!
27//! These capabilities are available through [`SpecificCharacterSet`].
28
29use encoding::all::{
30    GB18030, ISO_2022_JP, ISO_8859_1, ISO_8859_2, ISO_8859_3, ISO_8859_4, ISO_8859_5, ISO_8859_6,
31    ISO_8859_7, ISO_8859_8, UTF_8, WINDOWS_31J, WINDOWS_874, WINDOWS_949,
32};
33use encoding::{DecoderTrap, EncoderTrap, Encoding, RawDecoder, StringWriter};
34use snafu::{Backtrace, Snafu};
35use std::borrow::Cow;
36use std::fmt::Debug;
37
38/// An error type for text encoding issues.
39#[derive(Debug, Snafu)]
40#[non_exhaustive]
41pub enum EncodeTextError {
42    /// A custom error message,
43    /// for when the underlying error type does not encode error semantics
44    /// into type variants.
45    #[snafu(display("{}", message))]
46    EncodeCustom {
47        /// The error message in plain text.
48        message: Cow<'static, str>,
49        /// The generated backtrace, if available.
50        backtrace: Backtrace,
51    },
52}
53
54/// An error type for text decoding issues.
55#[derive(Debug, Snafu)]
56#[non_exhaustive]
57pub enum DecodeTextError {
58    /// A custom error message,
59    /// for when the underlying error type does not encode error semantics
60    /// into type variants.
61    #[snafu(display("{}", message))]
62    DecodeCustom {
63        /// The error message in plain text.
64        message: Cow<'static, str>,
65        /// The generated backtrace, if available.
66        backtrace: Backtrace,
67    },
68}
69
70type EncodeResult<T> = Result<T, EncodeTextError>;
71type DecodeResult<T> = Result<T, DecodeTextError>;
72
73/// A holder of encoding and decoding mechanisms for text in DICOM content,
74/// which according to the standard, depends on the specific character set.
75pub trait TextCodec {
76    /// Obtain the defined term (unique name) of the text encoding,
77    /// which may be used as the value of a
78    /// Specific Character Set (0008, 0005) element to refer to this codec.
79    ///
80    /// Should contain no leading or trailing spaces.
81    /// This method may be useful for testing purposes, considering that
82    /// `TextCodec` is often used as a trait object.
83    fn name(&self) -> Cow<'static, str>;
84
85    /// Decode the given byte buffer as a single string. The resulting string
86    /// _may_ contain backslash characters ('\') to delimit individual values,
87    /// and should be split later on if required.
88    fn decode(&self, text: &[u8]) -> DecodeResult<String>;
89
90    /// Encode a text value into a byte vector. The input string can
91    /// feature multiple text values by using the backslash character ('\')
92    /// as the value delimiter.
93    fn encode(&self, text: &str) -> EncodeResult<Vec<u8>>;
94}
95
96impl<T: ?Sized> TextCodec for Box<T>
97where
98    T: TextCodec,
99{
100    fn name(&self) -> Cow<'static, str> {
101        self.as_ref().name()
102    }
103
104    fn decode(&self, text: &[u8]) -> DecodeResult<String> {
105        self.as_ref().decode(text)
106    }
107
108    fn encode(&self, text: &str) -> EncodeResult<Vec<u8>> {
109        self.as_ref().encode(text)
110    }
111}
112
113impl<T: ?Sized> TextCodec for &'_ T
114where
115    T: TextCodec,
116{
117    fn name(&self) -> Cow<'static, str> {
118        (**self).name()
119    }
120
121    fn decode(&self, text: &[u8]) -> DecodeResult<String> {
122        (**self).decode(text)
123    }
124
125    fn encode(&self, text: &str) -> EncodeResult<Vec<u8>> {
126        (**self).encode(text)
127    }
128}
129
130/// A descriptor for a specific character set,
131/// taking part in text encoding and decoding
132/// as per [PS3.5 ch 6 6.1](https://dicom.nema.org/medical/dicom/2023e/output/chtml/part05/chapter_6.html#sect_6.1).
133///
134/// # Example
135///
136/// Use [`from_code`](SpecificCharacterSet::from_code)
137/// or one of the associated constants to create a character set.
138/// From there, use the [`TextCodec`] trait to encode and decode text.
139///
140/// ```
141/// use dicom_encoding::text::{SpecificCharacterSet, TextCodec};
142///
143/// let character_set = SpecificCharacterSet::from_code("ISO_IR 100").unwrap();
144/// assert_eq!(character_set, SpecificCharacterSet::ISO_IR_100);
145/// ```
146#[derive(Debug, Default, Clone, PartialEq)]
147pub struct SpecificCharacterSet(CharsetImpl);
148
149impl SpecificCharacterSet {
150    /// ISO IR 6: The default character set, as defined by the DICOM standard.
151    pub const ISO_IR_6: SpecificCharacterSet = SpecificCharacterSet(CharsetImpl::Default);
152
153    // ISO IR 100: ISO 8859-1, the Western Europe character set
154    pub const ISO_IR_100: SpecificCharacterSet = SpecificCharacterSet(CharsetImpl::IsoIr100);
155
156    /// ISO IR 192: UTF-8 encoding
157    pub const ISO_IR_192: SpecificCharacterSet = SpecificCharacterSet(CharsetImpl::IsoIr192);
158
159    /// Obtain the specific character set identified by the given code string.
160    ///
161    /// Supported code strings include the possible values
162    /// in the respective DICOM element (0008, 0005).
163    ///
164    /// # Example
165    ///
166    /// ```
167    /// use dicom_encoding::text::{SpecificCharacterSet, TextCodec};
168    ///
169    /// let character_set = SpecificCharacterSet::from_code("ISO_IR 100").unwrap();
170    /// assert_eq!(character_set.name(), "ISO_IR 100");
171    /// ```
172    pub fn from_code(code: &str) -> Option<Self> {
173        CharsetImpl::from_code(code).map(SpecificCharacterSet)
174    }
175}
176
177impl TextCodec for SpecificCharacterSet {
178    fn name(&self) -> Cow<'static, str> {
179        self.0.name()
180    }
181
182    fn decode(&self, text: &[u8]) -> DecodeResult<String> {
183        self.0.decode(text)
184    }
185
186    fn encode(&self, text: &str) -> EncodeResult<Vec<u8>> {
187        self.0.encode(text)
188    }
189}
190
191/// An enum type for individual supported character sets.
192#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, PartialOrd, Ord)]
193#[non_exhaustive]
194enum CharsetImpl {
195    /// **ISO-IR 6**: the default character set.
196    #[default]
197    Default,
198    /// **ISO-IR 13**: The Simplified Japanese single byte character set.
199    IsoIr13,
200    /// **ISO-IR 87**: The Simplified Japanese multi byte character set.
201    IsoIr87,
202    /// **ISO-IR 100** (ISO-8859-1): Right-hand part of the Latin alphabet no. 1,
203    /// the Western Europe character set.
204    IsoIr100,
205    /// **ISO-IR 101** (ISO-8859-2): Right-hand part of the Latin alphabet no. 2,
206    /// the Central/Eastern Europe character set.
207    IsoIr101,
208    /// **ISO-IR 109** (ISO-8859-3): Right-hand part of the Latin alphabet no. 3,
209    /// the South Europe character set.
210    IsoIr109,
211    /// **ISO-IR 110** (ISO-8859-4): Right-hand part of the Latin alphabet no. 4,
212    /// the North Europe character set.
213    IsoIr110,
214    /// **ISO-IR 126** (ISO-8859-7): The Greek character set.
215    IsoIr126,
216    /// **ISO-IR 127** (ISO-8859-6): The Arabic character set.
217    IsoIr127,
218    /// **ISO-IR 138** (ISO-8859-8): The Hebrew character set.
219    IsoIr138,
220    /// **ISO-IR 144** (ISO-8859-5): The Latin/Cyrillic character set.
221    IsoIr144,
222    /// **ISO-IR 149**: The Korean character set.
223    IsoIr149,
224    /// **ISO-IR 166**: The Thai character set.
225    IsoIr166,
226    /// **ISO-IR 192**: The Unicode character set based on the UTF-8 encoding.
227    IsoIr192,
228    /// **GB18030**: The Simplified Chinese character set.
229    Gb18030,
230    // Support for more text encodings is tracked in issue #40.
231}
232
233impl CharsetImpl {
234    /// Obtain the specific character set identified by the given code string.
235    ///
236    /// Supported code strings include the possible values
237    /// in the respective DICOM element (0008, 0005).
238    pub fn from_code(uid: &str) -> Option<Self> {
239        use self::CharsetImpl::*;
240        match uid.trim_end() {
241            "Default" | "ISO_IR_6" | "ISO_IR 6" | "ISO 2022 IR 6" => Some(Default),
242            "ISO_IR_13" | "ISO_IR 13" | "ISO 2022 IR 13" => Some(IsoIr13),
243            "ISO_IR_87" | "ISO_IR 87" | "ISO 2022 IR 87" => Some(IsoIr87),
244            "ISO_IR_100" | "ISO_IR 100" | "ISO 2022 IR 100" => Some(IsoIr100),
245            "ISO_IR_101" | "ISO_IR 101" | "ISO 2022 IR 101" => Some(IsoIr101),
246            "ISO_IR_109" | "ISO_IR 109" | "ISO 2022 IR 109" => Some(IsoIr109),
247            "ISO_IR_110" | "ISO_IR 110" | "ISO 2022 IR 110" => Some(IsoIr110),
248            "ISO_IR_126" | "ISO_IR 126" | "ISO 2022 IR 126" => Some(IsoIr126),
249            "ISO_IR_127" | "ISO_IR 127" | "ISO 2022 IR 127" => Some(IsoIr127),
250            "ISO_IR_138" | "ISO_IR 138" | "ISO 2022 IR 138" => Some(IsoIr138),
251            "ISO_IR_144" | "ISO_IR 144" | "ISO 2022 IR 144" => Some(IsoIr144),
252            "ISO_IR_149" | "ISO_IR 149" | "ISO 2022 IR 149" => Some(IsoIr149),
253            "ISO_IR_166" | "ISO_IR 166" | "ISO 2022 IR 166" => Some(IsoIr166),
254            "ISO_IR_192" | "ISO_IR 192" => Some(IsoIr192),
255            "GB18030" => Some(Gb18030),
256            _ => None,
257        }
258    }
259}
260
261impl TextCodec for CharsetImpl {
262    fn name(&self) -> Cow<'static, str> {
263        Cow::Borrowed(match self {
264            CharsetImpl::Default => "ISO_IR 6",
265            CharsetImpl::IsoIr13 => "ISO_IR 13",
266            CharsetImpl::IsoIr87 => "ISO_IR 87",
267            CharsetImpl::IsoIr100 => "ISO_IR 100",
268            CharsetImpl::IsoIr101 => "ISO_IR 101",
269            CharsetImpl::IsoIr109 => "ISO_IR 109",
270            CharsetImpl::IsoIr110 => "ISO_IR 110",
271            CharsetImpl::IsoIr126 => "ISO_IR 126",
272            CharsetImpl::IsoIr127 => "ISO_IR 127",
273            CharsetImpl::IsoIr138 => "ISO_IR 138",
274            CharsetImpl::IsoIr144 => "ISO_IR 144",
275            CharsetImpl::IsoIr149 => "ISO_IR 149",
276            CharsetImpl::IsoIr166 => "ISO_IR 166",
277            CharsetImpl::IsoIr192 => "ISO_IR 192",
278            CharsetImpl::Gb18030 => "GB18030",
279        })
280    }
281
282    fn decode(&self, text: &[u8]) -> DecodeResult<String> {
283        match self {
284            CharsetImpl::Default => DefaultCharacterSetCodec.decode(text),
285            CharsetImpl::IsoIr13 => IsoIr13CharacterSetCodec.decode(text),
286            CharsetImpl::IsoIr87 => IsoIr87CharacterSetCodec.decode(text),
287            CharsetImpl::IsoIr100 => IsoIr100CharacterSetCodec.decode(text),
288            CharsetImpl::IsoIr101 => IsoIr101CharacterSetCodec.decode(text),
289            CharsetImpl::IsoIr109 => IsoIr109CharacterSetCodec.decode(text),
290            CharsetImpl::IsoIr110 => IsoIr110CharacterSetCodec.decode(text),
291            CharsetImpl::IsoIr126 => IsoIr126CharacterSetCodec.decode(text),
292            CharsetImpl::IsoIr127 => IsoIr127CharacterSetCodec.decode(text),
293            CharsetImpl::IsoIr138 => IsoIr138CharacterSetCodec.decode(text),
294            CharsetImpl::IsoIr144 => IsoIr144CharacterSetCodec.decode(text),
295            CharsetImpl::IsoIr149 => IsoIr149CharacterSetCodec.decode(text),
296            CharsetImpl::IsoIr166 => IsoIr166CharacterSetCodec.decode(text),
297            CharsetImpl::IsoIr192 => Utf8CharacterSetCodec.decode(text),
298            CharsetImpl::Gb18030 => Gb18030CharacterSetCodec.decode(text),
299        }
300    }
301
302    fn encode(&self, text: &str) -> EncodeResult<Vec<u8>> {
303        match self {
304            CharsetImpl::Default => DefaultCharacterSetCodec.encode(text),
305            CharsetImpl::IsoIr13 => IsoIr13CharacterSetCodec.encode(text),
306            CharsetImpl::IsoIr87 => IsoIr87CharacterSetCodec.encode(text),
307            CharsetImpl::IsoIr100 => IsoIr100CharacterSetCodec.encode(text),
308            CharsetImpl::IsoIr101 => IsoIr101CharacterSetCodec.encode(text),
309            CharsetImpl::IsoIr109 => IsoIr109CharacterSetCodec.encode(text),
310            CharsetImpl::IsoIr110 => IsoIr110CharacterSetCodec.encode(text),
311            CharsetImpl::IsoIr126 => IsoIr126CharacterSetCodec.encode(text),
312            CharsetImpl::IsoIr127 => IsoIr127CharacterSetCodec.encode(text),
313            CharsetImpl::IsoIr138 => IsoIr138CharacterSetCodec.encode(text),
314            CharsetImpl::IsoIr144 => IsoIr144CharacterSetCodec.encode(text),
315            CharsetImpl::IsoIr149 => IsoIr149CharacterSetCodec.encode(text),
316            CharsetImpl::IsoIr166 => IsoIr166CharacterSetCodec.encode(text),
317            CharsetImpl::IsoIr192 => Utf8CharacterSetCodec.encode(text),
318            CharsetImpl::Gb18030 => Gb18030CharacterSetCodec.encode(text),
319        }
320    }
321}
322
323fn decode_text_trap(
324    _decoder: &mut dyn RawDecoder,
325    input: &[u8],
326    output: &mut dyn StringWriter,
327) -> bool {
328    let c = input[0];
329    let o0 = c & 7;
330    let o1 = (c & 56) >> 3;
331    let o2 = (c & 192) >> 6;
332    output.write_char('\\');
333    output.write_char((o2 + b'0') as char);
334    output.write_char((o1 + b'0') as char);
335    output.write_char((o0 + b'0') as char);
336    true
337}
338
339/// Create and implement a character set type using the `encoding` crate.
340macro_rules! decl_character_set {
341    ($typ: ident, $term: literal, $val: expr) => {
342        #[derive(Debug, Default, Copy, Clone, Eq, Hash, PartialEq)]
343        #[doc = "Data type for the "]
344        #[doc = $term]
345        #[doc = "character set encoding."]
346        pub struct $typ;
347
348        impl TextCodec for $typ {
349            fn name(&self) -> Cow<'static, str> {
350                Cow::Borrowed($term)
351            }
352
353            fn decode(&self, text: &[u8]) -> DecodeResult<String> {
354                $val.decode(text, DecoderTrap::Call(decode_text_trap))
355                    .map_err(|message| DecodeCustomSnafu { message }.build())
356            }
357
358            fn encode(&self, text: &str) -> EncodeResult<Vec<u8>> {
359                $val.encode(text, EncoderTrap::Strict)
360                    .map_err(|message| EncodeCustomSnafu { message }.build())
361            }
362        }
363    };
364}
365
366/// Data type representing the default character set.
367#[derive(Debug, Default, Copy, Clone, Eq, Hash, PartialEq)]
368pub struct DefaultCharacterSetCodec;
369
370impl TextCodec for DefaultCharacterSetCodec {
371    fn name(&self) -> Cow<'static, str> {
372        Cow::Borrowed("ISO_IR 6")
373    }
374
375    fn decode(&self, text: &[u8]) -> DecodeResult<String> {
376        // Using 8859-1 because it is a superset. Reiterations of this impl
377        // should check for invalid character codes (#40).
378        ISO_8859_1
379            .decode(text, DecoderTrap::Call(decode_text_trap))
380            .map_err(|message| DecodeCustomSnafu { message }.build())
381    }
382
383    fn encode(&self, text: &str) -> EncodeResult<Vec<u8>> {
384        ISO_8859_1
385            .encode(text, EncoderTrap::Strict)
386            .map_err(|message| EncodeCustomSnafu { message }.build())
387    }
388}
389
390decl_character_set!(IsoIr13CharacterSetCodec, "ISO_IR 13", WINDOWS_31J);
391decl_character_set!(IsoIr87CharacterSetCodec, "ISO_IR 87", ISO_2022_JP);
392decl_character_set!(IsoIr100CharacterSetCodec, "ISO_IR 100", ISO_8859_1);
393decl_character_set!(IsoIr101CharacterSetCodec, "ISO_IR 101", ISO_8859_2);
394decl_character_set!(IsoIr109CharacterSetCodec, "ISO_IR 109", ISO_8859_3);
395decl_character_set!(IsoIr110CharacterSetCodec, "ISO_IR 110", ISO_8859_4);
396decl_character_set!(IsoIr126CharacterSetCodec, "ISO_IR 126", ISO_8859_7);
397decl_character_set!(IsoIr127CharacterSetCodec, "ISO_IR 127", ISO_8859_6);
398decl_character_set!(IsoIr138CharacterSetCodec, "ISO_IR 138", ISO_8859_8);
399decl_character_set!(IsoIr144CharacterSetCodec, "ISO_IR 144", ISO_8859_5);
400decl_character_set!(IsoIr149CharacterSetCodec, "ISO_IR 149", WINDOWS_949);
401decl_character_set!(IsoIr166CharacterSetCodec, "ISO_IR 166", WINDOWS_874);
402decl_character_set!(Utf8CharacterSetCodec, "ISO_IR 192", UTF_8);
403decl_character_set!(Gb18030CharacterSetCodec, "GB18030", GB18030);
404
405/// The result of a text validation procedure (please see [`validate_iso_8859`]).
406#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
407pub enum TextValidationOutcome {
408    /// The text is fully valid and can be safely decoded.
409    Ok,
410    /// Some characters may have to be replaced, other than that the text can be safely decoded.
411    BadCharacters,
412    /// The text cannot be decoded.
413    NotOk,
414}
415
416/// Check whether the given byte slice contains valid text from the default character repertoire.
417pub fn validate_iso_8859(text: &[u8]) -> TextValidationOutcome {
418    if ISO_8859_1.decode(text, DecoderTrap::Strict).is_err() {
419        match ISO_8859_1.decode(text, DecoderTrap::Call(decode_text_trap)) {
420            Ok(_) => TextValidationOutcome::BadCharacters,
421            Err(_) => TextValidationOutcome::NotOk,
422        }
423    } else {
424        TextValidationOutcome::Ok
425    }
426}
427
428/// Check whether the given byte slice contains only valid characters for a
429/// Date value representation.
430pub fn validate_da(text: &[u8]) -> TextValidationOutcome {
431    if text.iter().cloned().all(|c| c.is_ascii_digit()) {
432        TextValidationOutcome::Ok
433    } else {
434        TextValidationOutcome::NotOk
435    }
436}
437
438/// Check whether the given byte slice contains only valid characters for a
439/// Time value representation.
440pub fn validate_tm(text: &[u8]) -> TextValidationOutcome {
441    if text.iter().cloned().all(|c| match c {
442        b'\\' | b'.' | b'-' | b' ' => true,
443        c => c.is_ascii_digit(),
444    }) {
445        TextValidationOutcome::Ok
446    } else {
447        TextValidationOutcome::NotOk
448    }
449}
450
451/// Check whether the given byte slice contains only valid characters for a
452/// Date Time value representation.
453pub fn validate_dt(text: &[u8]) -> TextValidationOutcome {
454    if text.iter().cloned().all(|c| match c {
455        b'.' | b'-' | b'+' | b' ' | b'\\' => true,
456        c => c.is_ascii_digit(),
457    }) {
458        TextValidationOutcome::Ok
459    } else {
460        TextValidationOutcome::NotOk
461    }
462}
463
464/// Check whether the given byte slice contains only valid characters for a
465/// Code String value representation.
466pub fn validate_cs(text: &[u8]) -> TextValidationOutcome {
467    if text.iter().cloned().all(|c| match c {
468        b' ' | b'_' => true,
469        c => c.is_ascii_digit() || c.is_ascii_uppercase(),
470    }) {
471        TextValidationOutcome::Ok
472    } else {
473        TextValidationOutcome::NotOk
474    }
475}
476
477#[cfg(test)]
478mod tests {
479    use super::*;
480
481    fn test_codec<T>(codec: T, string: &str, bytes: &[u8])
482    where
483        T: TextCodec,
484    {
485        assert_eq!(codec.encode(string).expect("encoding"), bytes);
486        assert_eq!(codec.decode(bytes).expect("decoding"), string);
487    }
488
489    #[test]
490    fn iso_ir_6_baseline() {
491        let codec = SpecificCharacterSet::default();
492        test_codec(codec, "Smith^John", b"Smith^John");
493    }
494
495    #[test]
496    fn iso_ir_13_baseline() {
497        let codec = SpecificCharacterSet(CharsetImpl::IsoIr13);
498        test_codec(codec, "ヤマダ^タロウ", b"\xd4\xcf\xc0\xde^\xc0\xdb\xb3");
499    }
500
501    #[test]
502    fn iso_ir_87_baseline() {
503        let codec = SpecificCharacterSet(CharsetImpl::IsoIr87);
504        test_codec(&codec, "山田^太郎", b"\x1b$B;3ED\x1b(B^\x1b$BB@O:");
505        test_codec(&codec, "やまだ^たろう", b"\x1b$B$d$^$@\x1b(B^\x1b$B$?$m$&");
506    }
507
508    #[test]
509    fn iso_ir_192_baseline() {
510        let codec = SpecificCharacterSet::ISO_IR_192;
511        test_codec(&codec, "Simões^John", "Simões^John".as_bytes());
512        test_codec(codec, "Иванков^Андрей", "Иванков^Андрей".as_bytes());
513    }
514
515    #[test]
516    fn iso_ir_100_baseline() {
517        let codec = SpecificCharacterSet(CharsetImpl::IsoIr100);
518        test_codec(&codec, "Simões^João", b"Sim\xF5es^Jo\xE3o");
519        test_codec(codec, "Günther^Hans", b"G\xfcnther^Hans");
520    }
521
522    #[test]
523    fn iso_ir_101_baseline() {
524        let codec = SpecificCharacterSet(CharsetImpl::IsoIr101);
525        test_codec(codec, "Günther^Hans", b"G\xfcnther^Hans");
526    }
527
528    #[test]
529    fn iso_ir_110_baseline() {
530        let codec = SpecificCharacterSet(CharsetImpl::IsoIr110);
531        test_codec(codec, "ĄĸŖĨϧŠĒĢŦŽĀÁÂÃÄÅÆĮČÉ^ĘËĖÍÎĪĐŅŌĶÔÕÖרŲÚÛÜŨŪß", b"\xA1\xA2\xA3\xA5\xA6\xA7\xA9\xAA\xAB\xAC\xAE\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9^\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF");
532    }
533
534    #[test]
535    fn iso_ir_126_baseline() {
536        let codec = SpecificCharacterSet(CharsetImpl::IsoIr126);
537        test_codec(codec, "Διονυσιος", b"\xC4\xE9\xEF\xED\xF5\xF3\xE9\xEF\xF2");
538    }
539
540    #[test]
541    fn iso_ir_127_baseline() {
542        let codec = SpecificCharacterSet(CharsetImpl::IsoIr127);
543        test_codec(
544            codec,
545            "قباني^لنزار",
546            b"\xE2\xC8\xC7\xE6\xEA^\xE4\xE6\xD2\xC7\xD1",
547        );
548    }
549
550    #[test]
551    fn iso_ir_138_baseline() {
552        let codec = SpecificCharacterSet(CharsetImpl::IsoIr138);
553        test_codec(
554            &codec,
555            "מקור השם עברית",
556            b"\xEE\xF7\xE5\xF8\x20\xE4\xF9\xED\x20\xF2\xE1\xF8\xE9\xFA",
557        );
558        test_codec(
559            codec,
560            "שרון^דבורה",
561            b"\xF9\xF8\xE5\xEF^\xE3\xE1\xE5\xF8\xE4",
562        );
563    }
564
565    #[test]
566    fn iso_ir_144_baseline() {
567        let codec = SpecificCharacterSet(CharsetImpl::IsoIr144);
568        test_codec(
569            &codec,
570            "Иванков^Андрей",
571            b"\xb8\xd2\xd0\xdd\xda\xde\xd2^\xb0\xdd\xd4\xe0\xd5\xd9",
572        );
573        test_codec(
574            &codec,
575            "Гол. мозг стандарт",
576            b"\xB3\xDE\xDB.\x20\xDC\xDE\xD7\xD3\x20\xE1\xE2\xD0\xDD\xD4\xD0\xE0\xE2",
577        );
578        test_codec(&codec, "мозг 2мм", b"\xDC\xDE\xD7\xD3\x202\xDC\xDC");
579    }
580
581    #[test]
582    fn iso_ir_149_baseline() {
583        let codec = SpecificCharacterSet(CharsetImpl::IsoIr149);
584        test_codec(&codec, "김희중", b"\xB1\xE8\xC8\xF1\xC1\xDF");
585        test_codec(
586            codec,
587            "Hong^Gildong=洪^吉洞=홍^길동",
588            b"Hong^Gildong=\xFB\xF3^\xD1\xCE\xD4\xD7=\xC8\xAB^\xB1\xE6\xB5\xBF",
589        );
590    }
591
592    #[test]
593    fn iso_ir_166_baseline() {
594        let codec = SpecificCharacterSet(CharsetImpl::IsoIr166);
595        test_codec(&codec, "ประเทศไทย", b"\xBB\xC3\xD0\xE0\xB7\xC8\xE4\xB7\xC2");
596        test_codec(codec, "รหัสสำหรับอักขระไทยที่ใช้กับคอมพิวเตอร์", b"\xC3\xCB\xD1\xCA\xCA\xD3\xCB\xC3\xD1\xBA\xCD\xD1\xA1\xA2\xC3\xD0\xE4\xB7\xC2\xB7\xD5\xE8\xE3\xAA\xE9\xA1\xD1\xBA\xA4\xCD\xC1\xBE\xD4\xC7\xE0\xB5\xCD\xC3\xEC");
597    }
598
599    #[test]
600    fn gb_18030_baseline() {
601        let codec = SpecificCharacterSet(CharsetImpl::Gb18030);
602        test_codec(
603            &codec,
604            "Wang^XiaoDong=王^小东",
605            b"Wang^XiaoDong=\xCD\xF5^\xD0\xA1\xB6\xAB",
606        );
607    }
608}