ibm1047/
lib.rs

1#![no_std]
2
3//! Convert strings to and from IBM-1047.
4//! 
5//! ```rust
6//! # #[cfg(feature = "alloc")]
7//! # {
8//! use ibm1047::Encode;
9//! // Warning: flatten will discard characters that cannot be encoded as IBM-1047.
10//! let ebcdic: Vec<u8> = "Hello, World!\n".encode_ibm1047().flatten().collect();
11//! let string: String = ibm1047::decode(&ebcdic).collect();
12//! 
13//! assert_eq!(string, "Hello, World!\n");
14//! # }
15//! ```
16
17#[cfg(feature = "alloc")]
18extern crate alloc;
19#[cfg(feature = "alloc")]
20use alloc::string::String;
21
22const UNICODE_TO_IBM1047: [u8; 256] = [
23    0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
24    0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
25    0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
26    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
27    0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
28    0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
29    0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
30    0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
31    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
32    0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
33    0x41, 0xAA, 0x4A, 0xB1, 0x9F, 0xB2, 0x6A, 0xB5, 0xBB, 0xB4, 0x9A, 0x8A, 0xB0, 0xCA, 0xAF, 0xBC,
34    0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
35    0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
36    0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xFD, 0xFE, 0xFB, 0xFC, 0xBA, 0xAE, 0x59,
37    0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
38    0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF,
39];
40
41const IBM1047_TO_UNICODE: [char; 256] = [
42    '\u{00}', '\u{01}', '\u{02}', '\u{03}', '\u{9C}', '\u{09}', '\u{86}', '\u{7F}', '\u{97}',
43    '\u{8D}', '\u{8E}', '\u{0B}', '\u{0C}', '\u{0D}', '\u{0E}', '\u{0F}', '\u{10}', '\u{11}',
44    '\u{12}', '\u{13}', '\u{9D}', '\u{0A}', '\u{08}', '\u{87}', '\u{18}', '\u{19}', '\u{92}',
45    '\u{8F}', '\u{1C}', '\u{1D}', '\u{1E}', '\u{1F}', '\u{80}', '\u{81}', '\u{82}', '\u{83}',
46    '\u{84}', '\u{85}', '\u{17}', '\u{1B}', '\u{88}', '\u{89}', '\u{8A}', '\u{8B}', '\u{8C}',
47    '\u{05}', '\u{06}', '\u{07}', '\u{90}', '\u{91}', '\u{16}', '\u{93}', '\u{94}', '\u{95}',
48    '\u{96}', '\u{04}', '\u{98}', '\u{99}', '\u{9A}', '\u{9B}', '\u{14}', '\u{15}', '\u{9E}',
49    '\u{1A}', '\u{20}', '\u{A0}', '\u{E2}', '\u{E4}', '\u{E0}', '\u{E1}', '\u{E3}', '\u{E5}',
50    '\u{E7}', '\u{F1}', '\u{A2}', '\u{2E}', '\u{3C}', '\u{28}', '\u{2B}', '\u{7C}', '\u{26}',
51    '\u{E9}', '\u{EA}', '\u{EB}', '\u{E8}', '\u{ED}', '\u{EE}', '\u{EF}', '\u{EC}', '\u{DF}',
52    '\u{21}', '\u{24}', '\u{2A}', '\u{29}', '\u{3B}', '\u{5E}', '\u{2D}', '\u{2F}', '\u{C2}',
53    '\u{C4}', '\u{C0}', '\u{C1}', '\u{C3}', '\u{C5}', '\u{C7}', '\u{D1}', '\u{A6}', '\u{2C}',
54    '\u{25}', '\u{5F}', '\u{3E}', '\u{3F}', '\u{F8}', '\u{C9}', '\u{CA}', '\u{CB}', '\u{C8}',
55    '\u{CD}', '\u{CE}', '\u{CF}', '\u{CC}', '\u{60}', '\u{3A}', '\u{23}', '\u{40}', '\u{27}',
56    '\u{3D}', '\u{22}', '\u{D8}', '\u{61}', '\u{62}', '\u{63}', '\u{64}', '\u{65}', '\u{66}',
57    '\u{67}', '\u{68}', '\u{69}', '\u{AB}', '\u{BB}', '\u{F0}', '\u{FD}', '\u{FE}', '\u{B1}',
58    '\u{B0}', '\u{6A}', '\u{6B}', '\u{6C}', '\u{6D}', '\u{6E}', '\u{6F}', '\u{70}', '\u{71}',
59    '\u{72}', '\u{AA}', '\u{BA}', '\u{E6}', '\u{B8}', '\u{C6}', '\u{A4}', '\u{B5}', '\u{7E}',
60    '\u{73}', '\u{74}', '\u{75}', '\u{76}', '\u{77}', '\u{78}', '\u{79}', '\u{7A}', '\u{A1}',
61    '\u{BF}', '\u{D0}', '\u{5B}', '\u{DE}', '\u{AE}', '\u{AC}', '\u{A3}', '\u{A5}', '\u{B7}',
62    '\u{A9}', '\u{A7}', '\u{B6}', '\u{BC}', '\u{BD}', '\u{BE}', '\u{DD}', '\u{A8}', '\u{AF}',
63    '\u{5D}', '\u{B4}', '\u{D7}', '\u{7B}', '\u{41}', '\u{42}', '\u{43}', '\u{44}', '\u{45}',
64    '\u{46}', '\u{47}', '\u{48}', '\u{49}', '\u{AD}', '\u{F4}', '\u{F6}', '\u{F2}', '\u{F3}',
65    '\u{F5}', '\u{7D}', '\u{4A}', '\u{4B}', '\u{4C}', '\u{4D}', '\u{4E}', '\u{4F}', '\u{50}',
66    '\u{51}', '\u{52}', '\u{B9}', '\u{FB}', '\u{FC}', '\u{F9}', '\u{FA}', '\u{FF}', '\u{5C}',
67    '\u{F7}', '\u{53}', '\u{54}', '\u{55}', '\u{56}', '\u{57}', '\u{58}', '\u{59}', '\u{5A}',
68    '\u{B2}', '\u{D4}', '\u{D6}', '\u{D2}', '\u{D3}', '\u{D5}', '\u{30}', '\u{31}', '\u{32}',
69    '\u{33}', '\u{34}', '\u{35}', '\u{36}', '\u{37}', '\u{38}', '\u{39}', '\u{B3}', '\u{DB}',
70    '\u{DC}', '\u{D9}', '\u{DA}', '\u{9F}',
71];
72
73pub struct EncodeIbm1047<'a> {
74    chars: core::str::Chars<'a>,
75}
76impl<'a> Iterator for EncodeIbm1047<'a> {
77    type Item = Result<u8, char>;
78    fn next(&mut self) -> Option<Self::Item> {
79        let c = self.chars.next()?;
80        if c as u32 >= UNICODE_TO_IBM1047.len().try_into().unwrap() {
81            Some(Err(c))
82        } else {
83            Some(Ok(UNICODE_TO_IBM1047[c as usize]))
84        }
85    }
86}
87
88pub struct DecodeIbm1047<'a> {
89    bytes: core::slice::Iter<'a, u8>,
90}
91impl<'a> Iterator for DecodeIbm1047<'a> {
92    type Item = char;
93    fn next(&mut self) -> Option<Self::Item> {
94        self.bytes.next().map(|&i| IBM1047_TO_UNICODE[i as usize])
95    }
96}
97
98pub trait Encode {
99    fn encode_ibm1047(&self) -> EncodeIbm1047;
100}
101#[cfg(feature = "alloc")]
102pub trait Decode {
103    fn from_ibm1047(bytes: &[u8]) -> String;
104}
105pub fn decode(bytes: &[u8]) -> DecodeIbm1047 {
106    DecodeIbm1047 { bytes: bytes.iter() }
107}
108
109impl Encode for str {
110    fn encode_ibm1047(&self) -> EncodeIbm1047 {
111        EncodeIbm1047 {
112            chars: self.chars(),
113        }
114    }
115}
116#[cfg(feature = "alloc")]
117impl Decode for String {
118    fn from_ibm1047(bytes: &[u8]) -> String {
119        let mut str = String::with_capacity(bytes.len());
120        for &b in bytes {
121            str.push(IBM1047_TO_UNICODE[b as usize])
122        }
123        str
124    }
125}
126
127#[cfg(test)]
128mod test {
129
130    use crate::Encode;
131    #[cfg(feature = "alloc")]
132    use crate::Decode;
133
134    const UTF8: &str = "\u{10}\u{89}\u{8}\u{84}\u{1d}»¸¬\u{95}7Bôú|Ëk$Zíîhó5f\u{17} ¡\u{94}\u{9c}/y%\u{8c}Þ,X\u{9d}ßħ\u{5}\u{a0}\u{e}\u{88}vÊ\u{80}\u{1e}x×r\u{3}JõFæ\u{9e}t\u{15}.\u{92}W¹!ÁÖ\u{16}\u{9a}nÌ\u{1b}ý¾Åzì\u{7f}cLÿ¦w\u{2}oE\u{4}¯\u{f}\u{14}\u{98}[º`ãK\u{96}\u{9b}\u{87}Ã\n¤\u{81}ÒS\u{18}Çë8\u{ad}TU:+ép\u{1}¼2<V½Ï\"\u{c}³Y\u{13}Éa\u{8a}²\u{19}~¶µ?e\u{90}\u{8f}Güs\u{86}ø\u{97}Îq=i\u{9f}Õ\u{85}bÍ\u{83}\u{6}#{;_ä\u{11}\u{91}]¥mèRÀÑâ±^Ø\u{1c}\u{99}ûÈ*\u{8e}Qê\u{82}þM\u{8d}}¨O\u{1f}çÐ1ò\u{b}Ô\\\tÛ\u{93}áPà9·ï®Aù«Âª\0÷\u{8b}\u{12}>0ÚCÙ&ñl\u{7}@\rð)Ü-(4¿3£©´åÝ6Dd'öÓ\u{1a}jgH¢uÆIN°";
135    const IBM1047: &[u8] = &[
136        0x10, 0x29, 0x16, 0x24, 0x1d, 0x8b, 0x9d, 0xb0, 0x35, 0xf7, 0xc2, 0xcb, 0xde, 0x4f, 0x73,
137        0x92, 0x5b, 0xe9, 0x55, 0x56, 0x88, 0xce, 0xf5, 0x86, 0x26, 0x40, 0xaa, 0x34, 0x04, 0x61,
138        0xa8, 0x6c, 0x2c, 0xae, 0x6b, 0xe7, 0x14, 0x59, 0x63, 0xb5, 0x2d, 0x41, 0x0e, 0x28, 0xa5,
139        0x72, 0x20, 0x1e, 0xa7, 0xbf, 0x99, 0x03, 0xd1, 0xcf, 0xc6, 0x9c, 0x3e, 0xa3, 0x3d, 0x4b,
140        0x1a, 0xe6, 0xda, 0x5a, 0x65, 0xec, 0x32, 0x3a, 0x95, 0x78, 0x27, 0x8d, 0xb9, 0x67, 0xa9,
141        0x58, 0x07, 0x83, 0xd3, 0xdf, 0x6a, 0xa6, 0x02, 0x96, 0xc5, 0x37, 0xbc, 0x0f, 0x3c, 0x38,
142        0xad, 0x9b, 0x79, 0x46, 0xd2, 0x36, 0x3b, 0x17, 0x66, 0x15, 0x9f, 0x21, 0xed, 0xe2, 0x18,
143        0x68, 0x53, 0xf8, 0xca, 0xe3, 0xe4, 0x7a, 0x4e, 0x51, 0x97, 0x01, 0xb7, 0xf2, 0x4c, 0xe5,
144        0xb8, 0x77, 0x7f, 0x0c, 0xfa, 0xe8, 0x13, 0x71, 0x81, 0x2a, 0xea, 0x19, 0xa1, 0xb6, 0xa0,
145        0x6f, 0x85, 0x30, 0x1b, 0xc7, 0xdc, 0xa2, 0x06, 0x70, 0x08, 0x76, 0x98, 0x7e, 0x89, 0xff,
146        0xef, 0x25, 0x82, 0x75, 0x23, 0x2e, 0x7b, 0xc0, 0x5e, 0x6d, 0x43, 0x11, 0x31, 0xbd, 0xb2,
147        0x94, 0x54, 0xd9, 0x64, 0x69, 0x42, 0x8f, 0x5f, 0x80, 0x1c, 0x39, 0xdb, 0x74, 0x5c, 0x0a,
148        0xd8, 0x52, 0x22, 0x8e, 0xd4, 0x09, 0xd0, 0xbb, 0xd6, 0x1f, 0x48, 0xac, 0xf1, 0xcd, 0x0b,
149        0xeb, 0xe0, 0x05, 0xfb, 0x33, 0x45, 0xd7, 0x44, 0xf9, 0xb3, 0x57, 0xaf, 0xc1, 0xdd, 0x8a,
150        0x62, 0x9a, 0x00, 0xe1, 0x2b, 0x12, 0x6e, 0xf0, 0xfe, 0xc3, 0xfd, 0x50, 0x49, 0x93, 0x2f,
151        0x7c, 0x0d, 0x8c, 0x5d, 0xfc, 0x60, 0x4d, 0xf4, 0xab, 0xf3, 0xb1, 0xb4, 0xbe, 0x47, 0xba,
152        0xf6, 0xc4, 0x84, 0x7d, 0xcc, 0xee, 0x3f, 0x91, 0x87, 0xc8, 0x4a, 0xa4, 0x9e, 0xc9, 0xd5,
153        0x90,
154    ];
155
156    #[test]
157    fn encode() {
158        assert!(UTF8.encode_ibm1047().flatten().eq(IBM1047.iter().copied()))
159    }
160
161    #[test]
162    fn decode() {
163        assert!(super::decode(IBM1047).eq(UTF8.chars()))
164    }
165
166    #[cfg(feature = "alloc")]
167    #[test]
168    fn decode_string() {
169        assert_eq!(alloc::string::String::from_ibm1047(IBM1047), UTF8)
170    }
171}