1#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
2pub use copybook_codepage::{Codepage, UnmappablePolicy, get_zoned_sign_table, space_byte};
28use copybook_error::{Error, ErrorCode, Result};
29use std::convert::TryFrom;
30use tracing::warn;
31
32static CP037_TO_UNICODE: [u32; 256] = [
41 0x0000, 0x0001, 0x0002, 0x0003, 0x009C, 0x0009, 0x0086, 0x007F, 0x0097, 0x008D, 0x008E, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x0010, 0x0011, 0x0012, 0x0013, 0x009D, 0x0085, 0x0008, 0x0087, 0x0018, 0x0019, 0x0092, 0x008F, 0x001C, 0x001D, 0x001E, 0x001F, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x000A, 0x0017, 0x001B, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x0005, 0x0006, 0x0007, 0x0090, 0x0091, 0x0016, 0x0093, 0x0094, 0x0095, 0x0096, 0x0004, 0x0098, 0x0099, 0x009A, 0x009B, 0x0014, 0x0015, 0x009E, 0x001A, 0x0020, 0x00A0, 0x00E2, 0x00E4, 0x00E0, 0x00E1, 0x00E3,
50 0x00E5, 0x00E7, 0x00F1, 0x00A2, 0x002E, 0x003C, 0x0028, 0x002B,
52 0x007C, 0x0026, 0x00E9, 0x00EA, 0x00EB, 0x00E8, 0x00ED, 0x00EE,
54 0x00EF, 0x00EC, 0x00DF, 0x0021, 0x0024, 0x002A, 0x0029, 0x003B,
56 0x00AC, 0x002D, 0x002F, 0x00C2, 0x00C4, 0x00C0, 0x00C1, 0x00C3,
58 0x00C5, 0x00C7, 0x00D1, 0x00A6, 0x002C, 0x0025, 0x005F, 0x003E,
60 0x003F, 0x00F8, 0x00C9, 0x00CA, 0x00CB, 0x00C8, 0x00CD, 0x00CE,
62 0x00CF, 0x00CC, 0x0060, 0x003A, 0x0023, 0x0040, 0x0027, 0x003D,
64 0x0022, 0x00D8, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x00AB, 0x00BB, 0x00F0, 0x00FD, 0x00FE,
67 0x00B1, 0x00B0, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, 0x0070, 0x0071, 0x0072, 0x00AA, 0x00BA, 0x00E6, 0x00B8, 0x00C6,
70 0x00A4, 0x00B5, 0x007E, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007A, 0x00A1, 0x00BF, 0x00D0, 0x00DD, 0x00DE,
73 0x00AE, 0x005E, 0x00A3, 0x00A5, 0x00B7, 0x00A9, 0x00A7, 0x00B6,
75 0x00BC, 0x00BD, 0x00BE, 0x005B, 0x005D, 0x00AF, 0x00A8, 0x00B4,
77 0x00D7, 0x007B, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x00AD, 0x00F4, 0x00F6, 0x00F2, 0x00F3,
80 0x00F5, 0x007D, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x00B9, 0x00FB, 0x00FC, 0x00F9, 0x00FA,
83 0x00FF, 0x005C, 0x00F7, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x00B2, 0x00D4, 0x00D6, 0x00D2, 0x00D3,
86 0x00D5, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x00B3, 0x00DB, 0x00DC, 0x00D9, 0x00DA,
89 0x009F, ];
91
92static CP273_TO_UNICODE: [u32; 256] = [
94 0x0000, 0x0001, 0x0002, 0x0003, 0x009C, 0x0009, 0x0086, 0x007F, 0x0097, 0x008D, 0x008E, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x0010, 0x0011, 0x0012, 0x0013, 0x009D, 0x0085, 0x0008, 0x0087, 0x0018, 0x0019, 0x0092, 0x008F, 0x001C, 0x001D, 0x001E, 0x001F, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x000A, 0x0017, 0x001B, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x0005, 0x0006, 0x0007, 0x0090, 0x0091, 0x0016, 0x0093, 0x0094, 0x0095, 0x0096, 0x0004, 0x0098, 0x0099, 0x009A, 0x009B, 0x0014, 0x0015, 0x009E, 0x001A, 0x0020, 0x00A0, 0x00E2, 0x007B, 0x00E0, 0x00E1, 0x00E3,
103 0x00E5, 0x00E7, 0x00F1, 0x00C4, 0x002E, 0x003C, 0x0028, 0x002B,
105 0x0021, 0x0026, 0x00E9, 0x00EA, 0x00EB, 0x00E8, 0x00ED, 0x00EE,
107 0x00EF, 0x00EC, 0x00DF, 0x00DC, 0x0024, 0x002A, 0x0029, 0x003B,
109 0x005E, 0x002D, 0x002F, 0x00C2, 0x005B, 0x00C0, 0x00C1, 0x00C3,
111 0x00C5, 0x00C7, 0x00D1, 0x00F6, 0x002C, 0x0025, 0x005F, 0x003E,
113 0x003F, 0x00F8, 0x00C9, 0x00CA, 0x00CB, 0x00C8, 0x00CD, 0x00CE,
115 0x00CF, 0x00CC, 0x0060, 0x003A, 0x0023, 0x00A7, 0x0027, 0x003D,
117 0x0022, 0x00D8, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x00AB, 0x00BB, 0x00F0, 0x00FD, 0x00FE,
120 0x00B1, 0x00B0, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, 0x0070, 0x0071, 0x0072, 0x00AA, 0x00BA, 0x00E6, 0x00B8, 0x00C6,
123 0x00A4, 0x00B5, 0x007E, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007A, 0x00A1, 0x00BF, 0x00D0, 0x00DD, 0x00DE,
126 0x00AE, 0x00A2, 0x00A3, 0x00A5, 0x00B7, 0x00A9, 0x0040, 0x00B6,
128 0x00BC, 0x00BD, 0x00BE, 0x00AC, 0x007C, 0x00AF, 0x00A8, 0x00B4,
130 0x00D7, 0x00E4, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x00AD, 0x00F4, 0x00A6, 0x00F2, 0x00F3,
133 0x00F5, 0x00FC, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x00B9, 0x00FB, 0x007D, 0x00F9, 0x00FA,
136 0x00FF, 0x00D6, 0x00F7, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x00B2, 0x00D4, 0x005C, 0x00D2, 0x00D3,
139 0x00D5, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x00B3, 0x00DB, 0x005D, 0x00D9, 0x00DA,
142 0x009F, ];
144
145static CP500_TO_UNICODE: [u32; 256] = [
147 0x0000, 0x0001, 0x0002, 0x0003, 0x009C, 0x0009, 0x0086, 0x007F, 0x0097, 0x008D, 0x008E, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x0010, 0x0011, 0x0012, 0x0013, 0x009D, 0x0085, 0x0008, 0x0087, 0x0018, 0x0019, 0x0092, 0x008F, 0x001C, 0x001D, 0x001E, 0x001F, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x000A, 0x0017, 0x001B, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x0005, 0x0006, 0x0007, 0x0090, 0x0091, 0x0016, 0x0093, 0x0094, 0x0095, 0x0096, 0x0004, 0x0098, 0x0099, 0x009A, 0x009B, 0x0014, 0x0015, 0x009E, 0x001A, 0x0020, 0x00A0, 0x00E2, 0x00E4, 0x00E0, 0x00E1, 0x00E3,
156 0x00E5, 0x00E7, 0x00F1, 0x005B, 0x002E, 0x003C, 0x0028, 0x002B,
158 0x0021, 0x0026, 0x00E9, 0x00EA, 0x00EB, 0x00E8, 0x00ED, 0x00EE,
160 0x00EF, 0x00EC, 0x00DF, 0x005D, 0x0024, 0x002A, 0x0029, 0x003B,
162 0x005E, 0x002D, 0x002F, 0x00C2, 0x00C4, 0x00C0, 0x00C1, 0x00C3,
164 0x00C5, 0x00C7, 0x00D1, 0x00A6, 0x002C, 0x0025, 0x005F, 0x003E,
166 0x003F, 0x00F8, 0x00C9, 0x00CA, 0x00CB, 0x00C8, 0x00CD, 0x00CE,
168 0x00CF, 0x00CC, 0x0060, 0x003A, 0x0023, 0x0040, 0x0027, 0x003D,
170 0x0022, 0x00D8, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x00AB, 0x00BB, 0x00F0, 0x00FD, 0x00FE,
173 0x00B1, 0x00B0, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, 0x0070, 0x0071, 0x0072, 0x00AA, 0x00BA, 0x00E6, 0x00B8, 0x00C6,
176 0x00A4, 0x00B5, 0x007E, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007A, 0x00A1, 0x00BF, 0x00D0, 0x00DD, 0x00DE,
179 0x00AE, 0x00A2, 0x00A3, 0x00A5, 0x00B7, 0x00A9, 0x00A7, 0x00B6,
181 0x00BC, 0x00BD, 0x00BE, 0x00AC, 0x007C, 0x00AF, 0x00A8, 0x00B4,
183 0x00D7, 0x007B, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x00AD, 0x00F4, 0x00F6, 0x00F2, 0x00F3,
186 0x00F5, 0x007D, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x00B9, 0x00FB, 0x00FC, 0x00F9, 0x00FA,
189 0x00FF, 0x005C, 0x00F7, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x00B2, 0x00D4, 0x00D6, 0x00D2, 0x00D3,
192 0x00D5, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x00B3, 0x00DB, 0x00DC, 0x00D9, 0x00DA,
195 0x009F, ];
197
198static CP1047_TO_UNICODE: [u32; 256] = [
200 0x0000, 0x0001, 0x0002, 0x0003, 0x009C, 0x0009, 0x0086, 0x007F, 0x0097, 0x008D, 0x008E, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x0010, 0x0011, 0x0012, 0x0013, 0x009D, 0x0085, 0x0008, 0x0087, 0x0018, 0x0019, 0x0092, 0x008F, 0x001C, 0x001D, 0x001E, 0x001F, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x000A, 0x0017, 0x001B, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x0005, 0x0006, 0x0007, 0x0090, 0x0091, 0x0016, 0x0093, 0x0094, 0x0095, 0x0096, 0x0004, 0x0098, 0x0099, 0x009A, 0x009B, 0x0014, 0x0015, 0x009E, 0x001A, 0x0020, 0x00A0, 0x00E2, 0x00E4, 0x00E0, 0x00E1, 0x00E3,
209 0x00E5, 0x00E7, 0x00F1, 0x00A2, 0x002E, 0x003C, 0x0028, 0x002B,
211 0x007C, 0x0026, 0x00E9, 0x00EA, 0x00EB, 0x00E8, 0x00ED, 0x00EE,
213 0x00EF, 0x00EC, 0x00DF, 0x0021, 0x0024, 0x002A, 0x0029, 0x003B,
215 0x00AC, 0x002D, 0x002F, 0x00C2, 0x00C4, 0x00C0, 0x00C1, 0x00C3,
217 0x00C5, 0x00C7, 0x00D1, 0x00A6, 0x002C, 0x0025, 0x005F, 0x003E,
219 0x003F, 0x00F8, 0x00C9, 0x00CA, 0x00CB, 0x00C8, 0x00CD, 0x00CE,
221 0x00CF, 0x00CC, 0x0060, 0x003A, 0x0023, 0x0040, 0x0027, 0x003D,
223 0x0022, 0x00D8, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x00AB, 0x00BB, 0x00F0, 0x00FD, 0x00FE,
226 0x00B1, 0x00B0, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, 0x0070, 0x0071, 0x0072, 0x00AA, 0x00BA, 0x00E6, 0x00B8, 0x00C6,
229 0x00A4, 0x00B5, 0x007E, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007A, 0x00A1, 0x00BF, 0x00D0, 0x005B, 0x00DE,
232 0x00AE, 0x005E, 0x00A3, 0x00A5, 0x00B7, 0x00A9, 0x00A7, 0x00B6,
234 0x00BC, 0x00BD, 0x00BE, 0x00DD, 0x00A8, 0x00AF, 0x005D, 0x00B4,
236 0x00D7, 0x007B, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x00AD, 0x00F4, 0x00F6, 0x00F2, 0x00F3,
239 0x00F5, 0x007D, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x00B9, 0x00FB, 0x00FC, 0x00F9, 0x00FA,
242 0x00FF, 0x005C, 0x00F7, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x00B2, 0x00D4, 0x00D6, 0x00D2, 0x00D3,
245 0x00D5, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x00B3, 0x00DB, 0x00DC, 0x00D9, 0x00DA,
248 0x009F, ];
250
251static CP1140_TO_UNICODE: [u32; 256] = [
253 0x0000, 0x0001, 0x0002, 0x0003, 0x009C, 0x0009, 0x0086, 0x007F, 0x0097, 0x008D, 0x008E, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x0010, 0x0011, 0x0012, 0x0013, 0x009D, 0x0085, 0x0008, 0x0087, 0x0018, 0x0019, 0x0092, 0x008F, 0x001C, 0x001D, 0x001E, 0x001F, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x000A, 0x0017, 0x001B, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x0005, 0x0006, 0x0007, 0x0090, 0x0091, 0x0016, 0x0093, 0x0094, 0x0095, 0x0096, 0x0004, 0x0098, 0x0099, 0x009A, 0x009B, 0x0014, 0x0015, 0x009E, 0x001A, 0x0020, 0x00A0, 0x00E2, 0x00E4, 0x00E0, 0x00E1, 0x00E3,
262 0x00E5, 0x00E7, 0x00F1, 0x00A2, 0x002E, 0x003C, 0x0028, 0x002B,
264 0x007C, 0x0026, 0x00E9, 0x00EA, 0x00EB, 0x00E8, 0x00ED, 0x00EE,
266 0x00EF, 0x00EC, 0x00DF, 0x0021, 0x0024, 0x002A, 0x0029, 0x003B,
268 0x00AC, 0x002D, 0x002F, 0x00C2, 0x00C4, 0x00C0, 0x00C1, 0x00C3,
270 0x00C5, 0x00C7, 0x00D1, 0x00A6, 0x002C, 0x0025, 0x005F, 0x003E,
272 0x003F, 0x00F8, 0x00C9, 0x00CA, 0x00CB, 0x00C8, 0x00CD, 0x00CE,
274 0x00CF, 0x00CC, 0x0060, 0x003A, 0x0023, 0x0040, 0x0027, 0x003D,
276 0x0022, 0x00D8, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x00AB, 0x00BB, 0x00F0, 0x00FD, 0x00FE,
279 0x00B1, 0x00B0, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, 0x0070, 0x0071, 0x0072, 0x00AA, 0x00BA, 0x00E6, 0x00B8, 0x00C6,
282 0x00A4, 0x00B5, 0x007E, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007A, 0x00A1, 0x00BF, 0x00D0, 0x00DD, 0x00DE,
285 0x00AE, 0x005E, 0x00A3, 0x00A5, 0x00B7, 0x00A9, 0x00A7, 0x00B6,
287 0x00BC, 0x00BD, 0x00BE, 0x005B, 0x005D, 0x00AF, 0x00A8, 0x00B4,
289 0x00D7, 0x007B, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x00AD, 0x00F4, 0x00F6, 0x00F2, 0x00F3,
292 0x00F5, 0x007D, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x00B9, 0x00FB, 0x00FC, 0x00F9, 0x00FA,
295 0x00FF, 0x005C, 0x00F7, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x00B2, 0x00D4, 0x00D6, 0x00D2, 0x00D3,
298 0x00D5, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x00B3, 0x00DB, 0x00DC, 0x00D9, 0x00DA,
301 0x20AC, ];
303
304fn get_ebcdic_table(codepage: Codepage) -> Option<&'static [u32; 256]> {
306 match codepage {
307 Codepage::ASCII => None,
308 Codepage::CP037 => Some(&CP037_TO_UNICODE),
309 Codepage::CP273 => Some(&CP273_TO_UNICODE),
310 Codepage::CP500 => Some(&CP500_TO_UNICODE),
311 Codepage::CP1047 => Some(&CP1047_TO_UNICODE),
312 Codepage::CP1140 => Some(&CP1140_TO_UNICODE),
313 }
314}
315
316#[inline]
335#[must_use = "Handle the Result or propagate the error"]
336pub fn ebcdic_to_utf8(data: &[u8], codepage: Codepage, policy: UnmappablePolicy) -> Result<String> {
337 if codepage == Codepage::ASCII {
339 return Ok(String::from_utf8_lossy(data).into_owned());
340 }
341
342 let table = get_ebcdic_table(codepage).ok_or_else(|| {
343 Error::new(
344 ErrorCode::CBKC301_INVALID_EBCDIC_BYTE,
345 format!("Unsupported codepage: {codepage:?}"),
346 )
347 })?;
348
349 let mut result = String::with_capacity(data.len());
350
351 for &byte in data {
352 let unicode_point = table[byte as usize];
353
354 if unicode_point < 0x20
356 && unicode_point != 0x09
357 && unicode_point != 0x0A
358 && unicode_point != 0x0D
359 {
360 match policy {
361 UnmappablePolicy::Error => {
362 return Err(Error::new(
363 ErrorCode::CBKC301_INVALID_EBCDIC_BYTE,
364 format!("Unmappable EBCDIC byte: 0x{byte:02X} -> U+{unicode_point:04X}"),
365 ));
366 }
367 UnmappablePolicy::Replace => {
368 warn!(
369 "CBKC301_INVALID_EBCDIC_BYTE: Unmappable EBCDIC byte 0x{:02X}, replacing with U+FFFD",
370 byte
371 );
372 result.push('\u{FFFD}'); continue;
374 }
375 UnmappablePolicy::Skip => {
376 warn!(
377 "CBKC301_INVALID_EBCDIC_BYTE: Unmappable EBCDIC byte 0x{:02X}, skipping",
378 byte
379 );
380 continue;
381 }
382 }
383 }
384
385 if let Some(ch) = char::from_u32(unicode_point) {
387 result.push(ch);
388 } else {
389 match policy {
390 UnmappablePolicy::Error => {
391 return Err(Error::new(
392 ErrorCode::CBKC301_INVALID_EBCDIC_BYTE,
393 format!("Invalid Unicode code point: U+{unicode_point:04X}"),
394 ));
395 }
396 UnmappablePolicy::Replace => {
397 warn!(
398 "CBKC301_INVALID_EBCDIC_BYTE: Invalid Unicode code point U+{:04X}, replacing with U+FFFD",
399 unicode_point
400 );
401 result.push('\u{FFFD}');
402 }
403 UnmappablePolicy::Skip => {
404 warn!(
405 "CBKC301_INVALID_EBCDIC_BYTE: Invalid Unicode code point U+{:04X}, skipping",
406 unicode_point
407 );
408 }
409 }
410 }
411 }
412
413 Ok(result)
414}
415
416#[inline]
432#[must_use = "Handle the Result or propagate the error"]
433pub fn utf8_to_ebcdic(text: &str, codepage: Codepage) -> Result<Vec<u8>> {
434 if codepage == Codepage::ASCII {
436 return Ok(text.as_bytes().to_vec());
437 }
438
439 let table = get_ebcdic_table(codepage).ok_or_else(|| {
440 Error::new(
441 ErrorCode::CBKC301_INVALID_EBCDIC_BYTE,
442 format!("Unsupported codepage: {codepage:?}"),
443 )
444 })?;
445
446 let mut reverse_table = std::collections::HashMap::new();
448 for (ebcdic_index, &unicode_point) in table.iter().enumerate() {
449 if let Some(ch) = char::from_u32(unicode_point) {
450 let ebcdic_byte = u8::try_from(ebcdic_index).map_err(|_| {
451 Error::new(
452 ErrorCode::CBKC301_INVALID_EBCDIC_BYTE,
453 format!("EBCDIC byte index {ebcdic_index} exceeds u8 range"),
454 )
455 })?;
456 reverse_table.insert(ch, ebcdic_byte);
457 }
458 }
459
460 let mut result = Vec::with_capacity(text.len());
461
462 for ch in text.chars() {
463 if let Some(&ebcdic_byte) = reverse_table.get(&ch) {
464 result.push(ebcdic_byte);
465 } else {
466 return Err(Error::new(
467 ErrorCode::CBKC301_INVALID_EBCDIC_BYTE,
468 format!("Character '{ch}' cannot be mapped to {codepage:?}"),
469 ));
470 }
471 }
472
473 Ok(result)
474}
475
476#[cfg(test)]
477#[allow(clippy::expect_used, clippy::unwrap_used)]
478mod tests {
479 use super::*;
480 use copybook_error::ErrorCode;
481
482 #[test]
483 fn test_space_byte_ascii() {
484 assert_eq!(space_byte(Codepage::ASCII), 0x20);
485 }
486
487 #[test]
488 fn test_space_byte_ebcdic() {
489 assert_eq!(space_byte(Codepage::CP037), 0x40);
491 assert_eq!(space_byte(Codepage::CP273), 0x40);
492 assert_eq!(space_byte(Codepage::CP500), 0x40);
493 assert_eq!(space_byte(Codepage::CP1047), 0x40);
494 assert_eq!(space_byte(Codepage::CP1140), 0x40);
495 }
496
497 #[test]
498 fn test_codepage_is_ascii() {
499 assert!(Codepage::ASCII.is_ascii());
500 assert!(!Codepage::CP037.is_ascii());
501 }
502
503 #[test]
504 fn test_codepage_is_ebcdic() {
505 assert!(!Codepage::ASCII.is_ebcdic());
506 assert!(Codepage::CP037.is_ebcdic());
507 }
508
509 #[test]
510 fn test_codepage_code_page_number() {
511 assert_eq!(Codepage::ASCII.code_page_number(), None);
512 assert_eq!(Codepage::CP037.code_page_number(), Some(37));
513 assert_eq!(Codepage::CP1140.code_page_number(), Some(1140));
514 }
515
516 #[test]
519 fn test_ebcdic_to_utf8_empty_input() {
520 let result = ebcdic_to_utf8(&[], Codepage::CP037, UnmappablePolicy::Error).unwrap();
521 assert_eq!(result, "");
522 }
523
524 #[test]
525 fn test_ebcdic_to_utf8_ascii_passthrough() {
526 let data = b"Hello, World!";
527 let result = ebcdic_to_utf8(data, Codepage::ASCII, UnmappablePolicy::Error).unwrap();
528 assert_eq!(result, "Hello, World!");
529 }
530
531 #[test]
532 fn test_ebcdic_to_utf8_ascii_passthrough_non_utf8() {
533 let data: &[u8] = &[0xFF, 0xFE];
535 let result = ebcdic_to_utf8(data, Codepage::ASCII, UnmappablePolicy::Error).unwrap();
536 assert!(result.contains('\u{FFFD}'));
537 }
538
539 #[test]
540 fn test_ebcdic_to_utf8_cp037_space() {
541 let data: &[u8] = &[0x40];
543 let result = ebcdic_to_utf8(data, Codepage::CP037, UnmappablePolicy::Error).unwrap();
544 assert_eq!(result, " ");
545 }
546
547 #[test]
548 fn test_ebcdic_to_utf8_cp037_digits() {
549 let data: Vec<u8> = (0xF0..=0xF9).collect();
551 let result = ebcdic_to_utf8(&data, Codepage::CP037, UnmappablePolicy::Error).unwrap();
552 assert_eq!(result, "0123456789");
553 }
554
555 #[test]
556 fn test_ebcdic_to_utf8_cp037_uppercase() {
557 let data: &[u8] = &[0xC1, 0xC2, 0xC3];
559 let result = ebcdic_to_utf8(data, Codepage::CP037, UnmappablePolicy::Error).unwrap();
560 assert_eq!(result, "ABC");
561 }
562
563 #[test]
564 fn test_ebcdic_to_utf8_cp037_lowercase() {
565 let data: &[u8] = &[0x81, 0x82, 0x83];
567 let result = ebcdic_to_utf8(data, Codepage::CP037, UnmappablePolicy::Error).unwrap();
568 assert_eq!(result, "abc");
569 }
570
571 #[test]
572 fn test_ebcdic_to_utf8_unmappable_error_policy() {
573 let data: &[u8] = &[0x00];
575 let err = ebcdic_to_utf8(data, Codepage::CP037, UnmappablePolicy::Error).unwrap_err();
576 assert_eq!(err.code, ErrorCode::CBKC301_INVALID_EBCDIC_BYTE);
577 }
578
579 #[test]
580 fn test_ebcdic_to_utf8_unmappable_replace_policy() {
581 let data: &[u8] = &[0x00];
582 let result = ebcdic_to_utf8(data, Codepage::CP037, UnmappablePolicy::Replace).unwrap();
583 assert_eq!(result, "\u{FFFD}");
584 }
585
586 #[test]
587 fn test_ebcdic_to_utf8_unmappable_skip_policy() {
588 let data: &[u8] = &[0x00];
589 let result = ebcdic_to_utf8(data, Codepage::CP037, UnmappablePolicy::Skip).unwrap();
590 assert_eq!(result, "");
591 }
592
593 #[test]
594 fn test_ebcdic_to_utf8_mixed_valid_and_unmappable_skip() {
595 let data: &[u8] = &[0x00, 0xC1, 0x00, 0xC2];
597 let result = ebcdic_to_utf8(data, Codepage::CP037, UnmappablePolicy::Skip).unwrap();
598 assert_eq!(result, "AB");
599 }
600
601 #[test]
602 fn test_ebcdic_to_utf8_all_codepages_digits() {
603 let data: Vec<u8> = (0xF0..=0xF9).collect();
605 for cp in [
606 Codepage::CP037,
607 Codepage::CP273,
608 Codepage::CP500,
609 Codepage::CP1047,
610 ] {
611 let result = ebcdic_to_utf8(&data, cp, UnmappablePolicy::Error).unwrap();
612 assert_eq!(result, "0123456789", "Failed for {cp:?}");
613 }
614 }
615
616 #[test]
617 fn test_ebcdic_to_utf8_cp1140_euro_sign() {
618 let data: &[u8] = &[0xFF];
620 let result = ebcdic_to_utf8(data, Codepage::CP1140, UnmappablePolicy::Error).unwrap();
621 assert_eq!(result, "€");
622 }
623
624 #[test]
625 fn test_ebcdic_to_utf8_cp037_tab_allowed() {
626 let data: &[u8] = &[0x05];
628 let result = ebcdic_to_utf8(data, Codepage::CP037, UnmappablePolicy::Error).unwrap();
629 assert_eq!(result, "\t");
630 }
631
632 #[test]
633 fn test_ebcdic_to_utf8_cp037_lf_allowed() {
634 let data: &[u8] = &[0x25];
636 let result = ebcdic_to_utf8(data, Codepage::CP037, UnmappablePolicy::Error).unwrap();
637 assert_eq!(result, "\n");
638 }
639
640 #[test]
641 fn test_ebcdic_to_utf8_cp037_cr_allowed() {
642 let data: &[u8] = &[0x0D];
644 let result = ebcdic_to_utf8(data, Codepage::CP037, UnmappablePolicy::Error).unwrap();
645 assert_eq!(result, "\r");
646 }
647
648 #[test]
651 fn test_utf8_to_ebcdic_empty_input() {
652 let result = utf8_to_ebcdic("", Codepage::CP037).unwrap();
653 assert!(result.is_empty());
654 }
655
656 #[test]
657 fn test_utf8_to_ebcdic_ascii_passthrough() {
658 let result = utf8_to_ebcdic("Hello", Codepage::ASCII).unwrap();
659 assert_eq!(result, b"Hello");
660 }
661
662 #[test]
663 fn test_utf8_to_ebcdic_cp037_space() {
664 let result = utf8_to_ebcdic(" ", Codepage::CP037).unwrap();
665 assert_eq!(result, &[0x40]);
666 }
667
668 #[test]
669 fn test_utf8_to_ebcdic_cp037_digits() {
670 let result = utf8_to_ebcdic("0123456789", Codepage::CP037).unwrap();
671 let expected: Vec<u8> = (0xF0..=0xF9).collect();
672 assert_eq!(result, expected);
673 }
674
675 #[test]
676 fn test_utf8_to_ebcdic_cp037_uppercase() {
677 let result = utf8_to_ebcdic("ABC", Codepage::CP037).unwrap();
678 assert_eq!(result, &[0xC1, 0xC2, 0xC3]);
679 }
680
681 #[test]
682 fn test_utf8_to_ebcdic_unmappable_character() {
683 let err = utf8_to_ebcdic("日", Codepage::CP037).unwrap_err();
685 assert_eq!(err.code, ErrorCode::CBKC301_INVALID_EBCDIC_BYTE);
686 }
687
688 #[test]
689 fn test_ebcdic_roundtrip_cp037() {
690 let original = "Hello World 123";
691 let ebcdic = utf8_to_ebcdic(original, Codepage::CP037).unwrap();
692 let roundtrip = ebcdic_to_utf8(&ebcdic, Codepage::CP037, UnmappablePolicy::Error).unwrap();
693 assert_eq!(roundtrip, original);
694 }
695
696 #[test]
697 fn test_ebcdic_roundtrip_cp500() {
698 let original = "Test 789";
699 let ebcdic = utf8_to_ebcdic(original, Codepage::CP500).unwrap();
700 let roundtrip = ebcdic_to_utf8(&ebcdic, Codepage::CP500, UnmappablePolicy::Error).unwrap();
701 assert_eq!(roundtrip, original);
702 }
703
704 #[test]
705 fn test_ebcdic_roundtrip_cp1047() {
706 let original = "COBOL DATA";
707 let ebcdic = utf8_to_ebcdic(original, Codepage::CP1047).unwrap();
708 let roundtrip = ebcdic_to_utf8(&ebcdic, Codepage::CP1047, UnmappablePolicy::Error).unwrap();
709 assert_eq!(roundtrip, original);
710 }
711
712 const ALL_EBCDIC: [Codepage; 5] = [
718 Codepage::CP037,
719 Codepage::CP273,
720 Codepage::CP500,
721 Codepage::CP1047,
722 Codepage::CP1140,
723 ];
724
725 #[test]
728 fn test_printable_ascii_roundtrip_cp037() {
729 roundtrip_printable_ascii(Codepage::CP037);
730 }
731
732 #[test]
733 fn test_printable_ascii_roundtrip_cp273() {
734 roundtrip_printable_ascii(Codepage::CP273);
735 }
736
737 #[test]
738 fn test_printable_ascii_roundtrip_cp500() {
739 roundtrip_printable_ascii(Codepage::CP500);
740 }
741
742 #[test]
743 fn test_printable_ascii_roundtrip_cp1047() {
744 roundtrip_printable_ascii(Codepage::CP1047);
745 }
746
747 #[test]
748 fn test_printable_ascii_roundtrip_cp1140() {
749 roundtrip_printable_ascii(Codepage::CP1140);
750 }
751
752 fn roundtrip_printable_ascii(cp: Codepage) {
755 let printable: String = (0x20u8..=0x7Eu8).map(|b| b as char).collect();
756 let ebcdic =
757 utf8_to_ebcdic(&printable, cp).unwrap_or_else(|e| panic!("{cp:?} encode failed: {e}"));
758 let back = ebcdic_to_utf8(&ebcdic, cp, UnmappablePolicy::Error)
759 .unwrap_or_else(|e| panic!("{cp:?} decode failed: {e}"));
760 assert_eq!(back, printable, "Round-trip mismatch for {cp:?}");
761 }
762
763 #[test]
766 fn test_cp1140_euro_sign_roundtrip() {
767 let decoded = ebcdic_to_utf8(&[0xFF], Codepage::CP1140, UnmappablePolicy::Error).unwrap();
769 assert_eq!(decoded, "€");
770 let encoded = utf8_to_ebcdic("€", Codepage::CP1140).unwrap();
771 assert_eq!(encoded, &[0xFF]);
772 }
773
774 #[test]
775 fn test_cp037_currency_sign_at_9f() {
776 let decoded = ebcdic_to_utf8(&[0x9F], Codepage::CP037, UnmappablePolicy::Error).unwrap();
778 assert_eq!(decoded, "¤");
779 }
780
781 #[test]
782 fn test_cp273_national_chars() {
783 let data: &[u8] = &[0x4A, 0x6A, 0xC0];
786 let decoded = ebcdic_to_utf8(data, Codepage::CP273, UnmappablePolicy::Error).unwrap();
787 assert_eq!(decoded, "Äöä");
788 let encoded = utf8_to_ebcdic("Äöä", Codepage::CP273).unwrap();
790 assert_eq!(encoded, data);
791 }
792
793 #[test]
794 fn test_cp1140_vs_cp037_difference() {
795 let cp037_ff = ebcdic_to_utf8(&[0xFF], Codepage::CP037, UnmappablePolicy::Replace).unwrap();
801 let cp1140_ff = ebcdic_to_utf8(&[0xFF], Codepage::CP1140, UnmappablePolicy::Error).unwrap();
802 assert_ne!(cp037_ff, cp1140_ff, "CP037 and CP1140 must differ at 0xFF");
803 assert_eq!(cp1140_ff, "€");
804 }
805
806 #[test]
809 fn test_control_chars_error_policy_all_codepages() {
810 for cp in ALL_EBCDIC {
812 let err = ebcdic_to_utf8(&[0x00], cp, UnmappablePolicy::Error).unwrap_err();
813 assert_eq!(
814 err.code,
815 ErrorCode::CBKC301_INVALID_EBCDIC_BYTE,
816 "Expected error for NUL on {cp:?}"
817 );
818 }
819 }
820
821 #[test]
822 fn test_control_chars_replace_policy_all_codepages() {
823 for cp in ALL_EBCDIC {
824 let result = ebcdic_to_utf8(&[0x00], cp, UnmappablePolicy::Replace).unwrap();
825 assert_eq!(result, "\u{FFFD}", "Replace policy failed for {cp:?}");
826 }
827 }
828
829 #[test]
830 fn test_control_chars_skip_policy_all_codepages() {
831 for cp in ALL_EBCDIC {
832 let result = ebcdic_to_utf8(&[0x00], cp, UnmappablePolicy::Skip).unwrap();
833 assert_eq!(result, "", "Skip policy failed for {cp:?}");
834 }
835 }
836
837 #[test]
838 fn test_allowed_control_chars_tab_lf_cr_all_codepages() {
839 for cp in ALL_EBCDIC {
841 let tab = ebcdic_to_utf8(&[0x05], cp, UnmappablePolicy::Error).unwrap();
842 assert_eq!(tab, "\t", "Tab failed for {cp:?}");
843
844 let lf = ebcdic_to_utf8(&[0x25], cp, UnmappablePolicy::Error).unwrap();
845 assert_eq!(lf, "\n", "LF failed for {cp:?}");
846
847 let cr = ebcdic_to_utf8(&[0x0D], cp, UnmappablePolicy::Error).unwrap();
848 assert_eq!(cr, "\r", "CR failed for {cp:?}");
849 }
850 }
851
852 #[test]
855 fn test_utf8_to_ebcdic_unmappable_cjk_all_codepages() {
856 for cp in ALL_EBCDIC {
858 let err = utf8_to_ebcdic("日", cp).unwrap_err();
859 assert_eq!(
860 err.code,
861 ErrorCode::CBKC301_INVALID_EBCDIC_BYTE,
862 "Expected unmappable error for {cp:?}"
863 );
864 }
865 }
866
867 #[test]
868 fn test_utf8_to_ebcdic_emoji_unmappable() {
869 let err = utf8_to_ebcdic("😀", Codepage::CP037).unwrap_err();
870 assert_eq!(err.code, ErrorCode::CBKC301_INVALID_EBCDIC_BYTE);
871 }
872
873 #[test]
876 fn test_empty_input_all_codepages_both_directions() {
877 for cp in ALL_EBCDIC {
878 let decoded = ebcdic_to_utf8(&[], cp, UnmappablePolicy::Error).unwrap();
879 assert_eq!(decoded, "", "Empty decode failed for {cp:?}");
880
881 let encoded = utf8_to_ebcdic("", cp).unwrap();
882 assert!(encoded.is_empty(), "Empty encode failed for {cp:?}");
883 }
884 let decoded = ebcdic_to_utf8(&[], Codepage::ASCII, UnmappablePolicy::Error).unwrap();
886 assert_eq!(decoded, "");
887 let encoded = utf8_to_ebcdic("", Codepage::ASCII).unwrap();
888 assert!(encoded.is_empty());
889 }
890
891 #[test]
894 fn test_full_byte_roundtrip_cp037() {
895 full_byte_roundtrip(Codepage::CP037);
896 }
897
898 #[test]
899 fn test_full_byte_roundtrip_cp273() {
900 full_byte_roundtrip(Codepage::CP273);
901 }
902
903 #[test]
904 fn test_full_byte_roundtrip_cp500() {
905 full_byte_roundtrip(Codepage::CP500);
906 }
907
908 #[test]
909 fn test_full_byte_roundtrip_cp1047() {
910 full_byte_roundtrip(Codepage::CP1047);
911 }
912
913 #[test]
914 fn test_full_byte_roundtrip_cp1140() {
915 full_byte_roundtrip(Codepage::CP1140);
916 }
917
918 fn full_byte_roundtrip(cp: Codepage) {
921 for byte in 0x00u8..=0xFF {
922 let Ok(decoded) = ebcdic_to_utf8(&[byte], cp, UnmappablePolicy::Skip) else {
923 continue;
924 };
925 if decoded.is_empty() {
926 continue;
928 }
929 let Ok(re_encoded) = utf8_to_ebcdic(&decoded, cp) else {
930 continue;
931 };
932 assert_eq!(
933 re_encoded,
934 &[byte],
935 "{cp:?}: byte 0x{byte:02X} decoded to {decoded:?} but re-encoded to {re_encoded:?}"
936 );
937 }
938 }
939
940 #[test]
943 fn test_large_buffer_decode_cp037() {
944 let large_input = vec![0x40u8; 10_000];
946 let result =
947 ebcdic_to_utf8(&large_input, Codepage::CP037, UnmappablePolicy::Error).unwrap();
948 assert_eq!(result.len(), 10_000);
949 assert!(result.chars().all(|c| c == ' '));
950 }
951
952 #[test]
953 fn test_large_buffer_encode_cp037() {
954 let large_text: String = std::iter::repeat_n('A', 10_000).collect();
955 let encoded = utf8_to_ebcdic(&large_text, Codepage::CP037).unwrap();
956 assert_eq!(encoded.len(), 10_000);
957 assert!(encoded.iter().all(|&b| b == 0xC1)); }
959
960 #[test]
961 fn test_large_buffer_roundtrip_all_codepages() {
962 let pattern = "HELLO WORLD 12345 ";
963 let large_text: String = pattern.repeat(500); for cp in ALL_EBCDIC {
965 let encoded = utf8_to_ebcdic(&large_text, cp)
966 .unwrap_or_else(|e| panic!("{cp:?} large encode failed: {e}"));
967 let decoded = ebcdic_to_utf8(&encoded, cp, UnmappablePolicy::Error)
968 .unwrap_or_else(|e| panic!("{cp:?} large decode failed: {e}"));
969 assert_eq!(decoded, large_text, "Large roundtrip failed for {cp:?}");
970 }
971 }
972
973 #[test]
976 fn test_mixed_valid_and_control_replace_all_codepages() {
977 for cp in ALL_EBCDIC {
979 let data: &[u8] = &[0x00, 0xC1, 0x00];
980 let result = ebcdic_to_utf8(data, cp, UnmappablePolicy::Replace).unwrap();
981 assert_eq!(
983 result.matches('\u{FFFD}').count(),
984 2,
985 "Replace count wrong for {cp:?}"
986 );
987 assert!(result.contains('A'), "Missing 'A' for {cp:?}");
988 }
989 }
990
991 #[test]
992 fn test_mixed_valid_and_control_skip_preserves_valid() {
993 for cp in ALL_EBCDIC {
995 let data: &[u8] = &[0x00, 0x40, 0xF1, 0x00];
996 let result = ebcdic_to_utf8(data, cp, UnmappablePolicy::Skip).unwrap();
997 assert_eq!(result, " 1", "Skip mixed content wrong for {cp:?}");
998 }
999 }
1000
1001 #[test]
1004 fn test_uppercase_letters_all_codepages() {
1005 let alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1007 for cp in ALL_EBCDIC {
1008 let encoded =
1009 utf8_to_ebcdic(alpha, cp).unwrap_or_else(|e| panic!("{cp:?} alpha encode: {e}"));
1010 let decoded = ebcdic_to_utf8(&encoded, cp, UnmappablePolicy::Error)
1011 .unwrap_or_else(|e| panic!("{cp:?} alpha decode: {e}"));
1012 assert_eq!(decoded, alpha, "Alphabet roundtrip failed for {cp:?}");
1013 assert_eq!(encoded[0], 0xC1, "{cp:?}: 'A' should be 0xC1");
1015 assert_eq!(encoded[9], 0xD1, "{cp:?}: 'J' should be 0xD1");
1016 assert_eq!(encoded[18], 0xE2, "{cp:?}: 'S' should be 0xE2");
1017 }
1018 }
1019
1020 #[test]
1021 fn test_lowercase_letters_all_codepages() {
1022 let alpha = "abcdefghijklmnopqrstuvwxyz";
1023 for cp in ALL_EBCDIC {
1024 let encoded =
1025 utf8_to_ebcdic(alpha, cp).unwrap_or_else(|e| panic!("{cp:?} lower encode: {e}"));
1026 let decoded = ebcdic_to_utf8(&encoded, cp, UnmappablePolicy::Error)
1027 .unwrap_or_else(|e| panic!("{cp:?} lower decode: {e}"));
1028 assert_eq!(decoded, alpha, "Lowercase roundtrip failed for {cp:?}");
1029 assert_eq!(encoded[0], 0x81, "{cp:?}: 'a' should be 0x81");
1031 assert_eq!(encoded[9], 0x91, "{cp:?}: 'j' should be 0x91");
1032 assert_eq!(encoded[18], 0xA2, "{cp:?}: 's' should be 0xA2");
1033 }
1034 }
1035
1036 #[test]
1037 fn test_digits_all_codepages() {
1038 let digits = "0123456789";
1039 for cp in ALL_EBCDIC {
1040 let encoded =
1041 utf8_to_ebcdic(digits, cp).unwrap_or_else(|e| panic!("{cp:?} digit encode: {e}"));
1042 assert_eq!(encoded.len(), 10);
1043 for (i, &b) in encoded.iter().enumerate() {
1045 assert_eq!(
1046 b,
1047 0xF0 + u8::try_from(i).unwrap(),
1048 "{cp:?}: digit {i} wrong"
1049 );
1050 }
1051 }
1052 }
1053}