1use std::collections::HashMap;
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum StandardEncoding {
12 WinAnsi,
14 MacRoman,
16 MacExpert,
18 Standard,
20}
21
22impl StandardEncoding {
23 pub fn decode(&self, code: u8) -> Option<char> {
28 let table = match self {
29 StandardEncoding::WinAnsi => &WIN_ANSI_TABLE,
30 StandardEncoding::MacRoman => &MAC_ROMAN_TABLE,
31 StandardEncoding::MacExpert => &MAC_EXPERT_TABLE,
32 StandardEncoding::Standard => &STANDARD_TABLE,
33 };
34 table[code as usize]
35 }
36
37 pub fn decode_bytes(&self, bytes: &[u8]) -> String {
41 bytes
42 .iter()
43 .map(|&b| self.decode(b).unwrap_or('\u{FFFD}'))
44 .collect()
45 }
46}
47
48#[derive(Debug, Clone)]
54pub struct FontEncoding {
55 table: [Option<char>; 256],
57}
58
59impl FontEncoding {
60 pub fn from_standard(encoding: StandardEncoding) -> Self {
62 let table = match encoding {
63 StandardEncoding::WinAnsi => WIN_ANSI_TABLE,
64 StandardEncoding::MacRoman => MAC_ROMAN_TABLE,
65 StandardEncoding::MacExpert => MAC_EXPERT_TABLE,
66 StandardEncoding::Standard => STANDARD_TABLE,
67 };
68 Self { table }
69 }
70
71 pub fn from_standard_with_differences(
78 encoding: StandardEncoding,
79 differences: &[(u8, char)],
80 ) -> Self {
81 let mut enc = Self::from_standard(encoding);
82 enc.apply_differences(differences);
83 enc
84 }
85
86 pub fn from_table(table: [Option<char>; 256]) -> Self {
88 Self { table }
89 }
90
91 pub fn apply_differences(&mut self, differences: &[(u8, char)]) {
93 for &(code, ch) in differences {
94 self.table[code as usize] = Some(ch);
95 }
96 }
97
98 pub fn decode(&self, code: u8) -> Option<char> {
100 self.table[code as usize]
101 }
102
103 pub fn decode_bytes(&self, bytes: &[u8]) -> String {
107 bytes
108 .iter()
109 .map(|&b| self.decode(b).unwrap_or('\u{FFFD}'))
110 .collect()
111 }
112}
113
114#[derive(Debug, Clone)]
121pub struct EncodingResolver {
122 to_unicode: Option<HashMap<u16, String>>,
124 font_encoding: Option<FontEncoding>,
126 default_encoding: FontEncoding,
128}
129
130impl EncodingResolver {
131 pub fn new(default_encoding: FontEncoding) -> Self {
133 Self {
134 to_unicode: None,
135 font_encoding: None,
136 default_encoding,
137 }
138 }
139
140 pub fn with_to_unicode(mut self, to_unicode: HashMap<u16, String>) -> Self {
142 self.to_unicode = Some(to_unicode);
143 self
144 }
145
146 pub fn with_font_encoding(mut self, encoding: FontEncoding) -> Self {
148 self.font_encoding = Some(encoding);
149 self
150 }
151
152 pub fn resolve(&self, code: u16) -> Option<String> {
161 if let Some(ref to_unicode) = self.to_unicode {
163 if let Some(s) = to_unicode.get(&code) {
164 return Some(s.clone());
165 }
166 }
167
168 if code <= 255 {
170 let byte = code as u8;
171
172 if let Some(ref enc) = self.font_encoding {
174 if let Some(ch) = enc.decode(byte) {
175 return Some(ch.to_string());
176 }
177 }
178
179 if let Some(ch) = self.default_encoding.decode(byte) {
181 return Some(ch.to_string());
182 }
183 }
184
185 None
186 }
187
188 pub fn decode_bytes(&self, bytes: &[u8]) -> String {
192 bytes
193 .iter()
194 .map(|&b| {
195 self.resolve(b as u16)
196 .unwrap_or_else(|| "\u{FFFD}".to_string())
197 })
198 .collect()
199 }
200}
201
202static WIN_ANSI_TABLE: [Option<char>; 256] = {
212 let mut t = [None; 256];
213 t[0x00] = Some('\0');
215 t[0x01] = Some('\u{0001}');
216 t[0x02] = Some('\u{0002}');
217 t[0x03] = Some('\u{0003}');
218 t[0x04] = Some('\u{0004}');
219 t[0x05] = Some('\u{0005}');
220 t[0x06] = Some('\u{0006}');
221 t[0x07] = Some('\u{0007}');
222 t[0x08] = Some('\u{0008}');
223 t[0x09] = Some('\t');
224 t[0x0A] = Some('\n');
225 t[0x0B] = Some('\u{000B}');
226 t[0x0C] = Some('\u{000C}');
227 t[0x0D] = Some('\r');
228 t[0x0E] = Some('\u{000E}');
229 t[0x0F] = Some('\u{000F}');
230 t[0x10] = Some('\u{0010}');
231 t[0x11] = Some('\u{0011}');
232 t[0x12] = Some('\u{0012}');
233 t[0x13] = Some('\u{0013}');
234 t[0x14] = Some('\u{0014}');
235 t[0x15] = Some('\u{0015}');
236 t[0x16] = Some('\u{0016}');
237 t[0x17] = Some('\u{0017}');
238 t[0x18] = Some('\u{0018}');
239 t[0x19] = Some('\u{0019}');
240 t[0x1A] = Some('\u{001A}');
241 t[0x1B] = Some('\u{001B}');
242 t[0x1C] = Some('\u{001C}');
243 t[0x1D] = Some('\u{001D}');
244 t[0x1E] = Some('\u{001E}');
245 t[0x1F] = Some('\u{001F}');
246 t[0x20] = Some(' ');
248 t[0x21] = Some('!');
249 t[0x22] = Some('"');
250 t[0x23] = Some('#');
251 t[0x24] = Some('$');
252 t[0x25] = Some('%');
253 t[0x26] = Some('&');
254 t[0x27] = Some('\'');
255 t[0x28] = Some('(');
256 t[0x29] = Some(')');
257 t[0x2A] = Some('*');
258 t[0x2B] = Some('+');
259 t[0x2C] = Some(',');
260 t[0x2D] = Some('-');
261 t[0x2E] = Some('.');
262 t[0x2F] = Some('/');
263 t[0x30] = Some('0');
264 t[0x31] = Some('1');
265 t[0x32] = Some('2');
266 t[0x33] = Some('3');
267 t[0x34] = Some('4');
268 t[0x35] = Some('5');
269 t[0x36] = Some('6');
270 t[0x37] = Some('7');
271 t[0x38] = Some('8');
272 t[0x39] = Some('9');
273 t[0x3A] = Some(':');
274 t[0x3B] = Some(';');
275 t[0x3C] = Some('<');
276 t[0x3D] = Some('=');
277 t[0x3E] = Some('>');
278 t[0x3F] = Some('?');
279 t[0x40] = Some('@');
280 t[0x41] = Some('A');
281 t[0x42] = Some('B');
282 t[0x43] = Some('C');
283 t[0x44] = Some('D');
284 t[0x45] = Some('E');
285 t[0x46] = Some('F');
286 t[0x47] = Some('G');
287 t[0x48] = Some('H');
288 t[0x49] = Some('I');
289 t[0x4A] = Some('J');
290 t[0x4B] = Some('K');
291 t[0x4C] = Some('L');
292 t[0x4D] = Some('M');
293 t[0x4E] = Some('N');
294 t[0x4F] = Some('O');
295 t[0x50] = Some('P');
296 t[0x51] = Some('Q');
297 t[0x52] = Some('R');
298 t[0x53] = Some('S');
299 t[0x54] = Some('T');
300 t[0x55] = Some('U');
301 t[0x56] = Some('V');
302 t[0x57] = Some('W');
303 t[0x58] = Some('X');
304 t[0x59] = Some('Y');
305 t[0x5A] = Some('Z');
306 t[0x5B] = Some('[');
307 t[0x5C] = Some('\\');
308 t[0x5D] = Some(']');
309 t[0x5E] = Some('^');
310 t[0x5F] = Some('_');
311 t[0x60] = Some('`');
312 t[0x61] = Some('a');
313 t[0x62] = Some('b');
314 t[0x63] = Some('c');
315 t[0x64] = Some('d');
316 t[0x65] = Some('e');
317 t[0x66] = Some('f');
318 t[0x67] = Some('g');
319 t[0x68] = Some('h');
320 t[0x69] = Some('i');
321 t[0x6A] = Some('j');
322 t[0x6B] = Some('k');
323 t[0x6C] = Some('l');
324 t[0x6D] = Some('m');
325 t[0x6E] = Some('n');
326 t[0x6F] = Some('o');
327 t[0x70] = Some('p');
328 t[0x71] = Some('q');
329 t[0x72] = Some('r');
330 t[0x73] = Some('s');
331 t[0x74] = Some('t');
332 t[0x75] = Some('u');
333 t[0x76] = Some('v');
334 t[0x77] = Some('w');
335 t[0x78] = Some('x');
336 t[0x79] = Some('y');
337 t[0x7A] = Some('z');
338 t[0x7B] = Some('{');
339 t[0x7C] = Some('|');
340 t[0x7D] = Some('}');
341 t[0x7E] = Some('~');
342 t[0x7F] = None; t[0x80] = Some('\u{20AC}'); t[0x82] = Some('\u{201A}'); t[0x83] = Some('\u{0192}'); t[0x84] = Some('\u{201E}'); t[0x85] = Some('\u{2026}'); t[0x86] = Some('\u{2020}'); t[0x87] = Some('\u{2021}'); t[0x88] = Some('\u{02C6}'); t[0x89] = Some('\u{2030}'); t[0x8A] = Some('\u{0160}'); t[0x8B] = Some('\u{2039}'); t[0x8C] = Some('\u{0152}'); t[0x8E] = Some('\u{017D}'); t[0x91] = Some('\u{2018}'); t[0x92] = Some('\u{2019}'); t[0x93] = Some('\u{201C}'); t[0x94] = Some('\u{201D}'); t[0x95] = Some('\u{2022}'); t[0x96] = Some('\u{2013}'); t[0x97] = Some('\u{2014}'); t[0x98] = Some('\u{02DC}'); t[0x99] = Some('\u{2122}'); t[0x9A] = Some('\u{0161}'); t[0x9B] = Some('\u{203A}'); t[0x9C] = Some('\u{0153}'); t[0x9E] = Some('\u{017E}'); t[0x9F] = Some('\u{0178}'); t[0xA0] = Some('\u{00A0}'); t[0xA1] = Some('\u{00A1}'); t[0xA2] = Some('\u{00A2}'); t[0xA3] = Some('\u{00A3}'); t[0xA4] = Some('\u{00A4}'); t[0xA5] = Some('\u{00A5}'); t[0xA6] = Some('\u{00A6}'); t[0xA7] = Some('\u{00A7}'); t[0xA8] = Some('\u{00A8}'); t[0xA9] = Some('\u{00A9}'); t[0xAA] = Some('\u{00AA}'); t[0xAB] = Some('\u{00AB}'); t[0xAC] = Some('\u{00AC}'); t[0xAD] = Some('\u{00AD}'); t[0xAE] = Some('\u{00AE}'); t[0xAF] = Some('\u{00AF}'); t[0xB0] = Some('\u{00B0}'); t[0xB1] = Some('\u{00B1}'); t[0xB2] = Some('\u{00B2}'); t[0xB3] = Some('\u{00B3}'); t[0xB4] = Some('\u{00B4}'); t[0xB5] = Some('\u{00B5}'); t[0xB6] = Some('\u{00B6}'); t[0xB7] = Some('\u{00B7}'); t[0xB8] = Some('\u{00B8}'); t[0xB9] = Some('\u{00B9}'); t[0xBA] = Some('\u{00BA}'); t[0xBB] = Some('\u{00BB}'); t[0xBC] = Some('\u{00BC}'); t[0xBD] = Some('\u{00BD}'); t[0xBE] = Some('\u{00BE}'); t[0xBF] = Some('\u{00BF}'); t[0xC0] = Some('\u{00C0}'); t[0xC1] = Some('\u{00C1}'); t[0xC2] = Some('\u{00C2}'); t[0xC3] = Some('\u{00C3}'); t[0xC4] = Some('\u{00C4}'); t[0xC5] = Some('\u{00C5}'); t[0xC6] = Some('\u{00C6}'); t[0xC7] = Some('\u{00C7}'); t[0xC8] = Some('\u{00C8}'); t[0xC9] = Some('\u{00C9}'); t[0xCA] = Some('\u{00CA}'); t[0xCB] = Some('\u{00CB}'); t[0xCC] = Some('\u{00CC}'); t[0xCD] = Some('\u{00CD}'); t[0xCE] = Some('\u{00CE}'); t[0xCF] = Some('\u{00CF}'); t[0xD0] = Some('\u{00D0}'); t[0xD1] = Some('\u{00D1}'); t[0xD2] = Some('\u{00D2}'); t[0xD3] = Some('\u{00D3}'); t[0xD4] = Some('\u{00D4}'); t[0xD5] = Some('\u{00D5}'); t[0xD6] = Some('\u{00D6}'); t[0xD7] = Some('\u{00D7}'); t[0xD8] = Some('\u{00D8}'); t[0xD9] = Some('\u{00D9}'); t[0xDA] = Some('\u{00DA}'); t[0xDB] = Some('\u{00DB}'); t[0xDC] = Some('\u{00DC}'); t[0xDD] = Some('\u{00DD}'); t[0xDE] = Some('\u{00DE}'); t[0xDF] = Some('\u{00DF}'); t[0xE0] = Some('\u{00E0}'); t[0xE1] = Some('\u{00E1}'); t[0xE2] = Some('\u{00E2}'); t[0xE3] = Some('\u{00E3}'); t[0xE4] = Some('\u{00E4}'); t[0xE5] = Some('\u{00E5}'); t[0xE6] = Some('\u{00E6}'); t[0xE7] = Some('\u{00E7}'); t[0xE8] = Some('\u{00E8}'); t[0xE9] = Some('\u{00E9}'); t[0xEA] = Some('\u{00EA}'); t[0xEB] = Some('\u{00EB}'); t[0xEC] = Some('\u{00EC}'); t[0xED] = Some('\u{00ED}'); t[0xEE] = Some('\u{00EE}'); t[0xEF] = Some('\u{00EF}'); t[0xF0] = Some('\u{00F0}'); t[0xF1] = Some('\u{00F1}'); t[0xF2] = Some('\u{00F2}'); t[0xF3] = Some('\u{00F3}'); t[0xF4] = Some('\u{00F4}'); t[0xF5] = Some('\u{00F5}'); t[0xF6] = Some('\u{00F6}'); t[0xF7] = Some('\u{00F7}'); t[0xF8] = Some('\u{00F8}'); t[0xF9] = Some('\u{00F9}'); t[0xFA] = Some('\u{00FA}'); t[0xFB] = Some('\u{00FB}'); t[0xFC] = Some('\u{00FC}'); t[0xFD] = Some('\u{00FD}'); t[0xFE] = Some('\u{00FE}'); t[0xFF] = Some('\u{00FF}'); t
474};
475
476static MAC_ROMAN_TABLE: [Option<char>; 256] = {
478 let mut t = [None; 256];
479 t[0x00] = Some('\0');
481 t[0x01] = Some('\u{0001}');
482 t[0x02] = Some('\u{0002}');
483 t[0x03] = Some('\u{0003}');
484 t[0x04] = Some('\u{0004}');
485 t[0x05] = Some('\u{0005}');
486 t[0x06] = Some('\u{0006}');
487 t[0x07] = Some('\u{0007}');
488 t[0x08] = Some('\u{0008}');
489 t[0x09] = Some('\t');
490 t[0x0A] = Some('\n');
491 t[0x0B] = Some('\u{000B}');
492 t[0x0C] = Some('\u{000C}');
493 t[0x0D] = Some('\r');
494 t[0x0E] = Some('\u{000E}');
495 t[0x0F] = Some('\u{000F}');
496 t[0x10] = Some('\u{0010}');
497 t[0x11] = Some('\u{0011}');
498 t[0x12] = Some('\u{0012}');
499 t[0x13] = Some('\u{0013}');
500 t[0x14] = Some('\u{0014}');
501 t[0x15] = Some('\u{0015}');
502 t[0x16] = Some('\u{0016}');
503 t[0x17] = Some('\u{0017}');
504 t[0x18] = Some('\u{0018}');
505 t[0x19] = Some('\u{0019}');
506 t[0x1A] = Some('\u{001A}');
507 t[0x1B] = Some('\u{001B}');
508 t[0x1C] = Some('\u{001C}');
509 t[0x1D] = Some('\u{001D}');
510 t[0x1E] = Some('\u{001E}');
511 t[0x1F] = Some('\u{001F}');
512 t[0x20] = Some(' ');
513 t[0x21] = Some('!');
514 t[0x22] = Some('"');
515 t[0x23] = Some('#');
516 t[0x24] = Some('$');
517 t[0x25] = Some('%');
518 t[0x26] = Some('&');
519 t[0x27] = Some('\'');
520 t[0x28] = Some('(');
521 t[0x29] = Some(')');
522 t[0x2A] = Some('*');
523 t[0x2B] = Some('+');
524 t[0x2C] = Some(',');
525 t[0x2D] = Some('-');
526 t[0x2E] = Some('.');
527 t[0x2F] = Some('/');
528 t[0x30] = Some('0');
529 t[0x31] = Some('1');
530 t[0x32] = Some('2');
531 t[0x33] = Some('3');
532 t[0x34] = Some('4');
533 t[0x35] = Some('5');
534 t[0x36] = Some('6');
535 t[0x37] = Some('7');
536 t[0x38] = Some('8');
537 t[0x39] = Some('9');
538 t[0x3A] = Some(':');
539 t[0x3B] = Some(';');
540 t[0x3C] = Some('<');
541 t[0x3D] = Some('=');
542 t[0x3E] = Some('>');
543 t[0x3F] = Some('?');
544 t[0x40] = Some('@');
545 t[0x41] = Some('A');
546 t[0x42] = Some('B');
547 t[0x43] = Some('C');
548 t[0x44] = Some('D');
549 t[0x45] = Some('E');
550 t[0x46] = Some('F');
551 t[0x47] = Some('G');
552 t[0x48] = Some('H');
553 t[0x49] = Some('I');
554 t[0x4A] = Some('J');
555 t[0x4B] = Some('K');
556 t[0x4C] = Some('L');
557 t[0x4D] = Some('M');
558 t[0x4E] = Some('N');
559 t[0x4F] = Some('O');
560 t[0x50] = Some('P');
561 t[0x51] = Some('Q');
562 t[0x52] = Some('R');
563 t[0x53] = Some('S');
564 t[0x54] = Some('T');
565 t[0x55] = Some('U');
566 t[0x56] = Some('V');
567 t[0x57] = Some('W');
568 t[0x58] = Some('X');
569 t[0x59] = Some('Y');
570 t[0x5A] = Some('Z');
571 t[0x5B] = Some('[');
572 t[0x5C] = Some('\\');
573 t[0x5D] = Some(']');
574 t[0x5E] = Some('^');
575 t[0x5F] = Some('_');
576 t[0x60] = Some('`');
577 t[0x61] = Some('a');
578 t[0x62] = Some('b');
579 t[0x63] = Some('c');
580 t[0x64] = Some('d');
581 t[0x65] = Some('e');
582 t[0x66] = Some('f');
583 t[0x67] = Some('g');
584 t[0x68] = Some('h');
585 t[0x69] = Some('i');
586 t[0x6A] = Some('j');
587 t[0x6B] = Some('k');
588 t[0x6C] = Some('l');
589 t[0x6D] = Some('m');
590 t[0x6E] = Some('n');
591 t[0x6F] = Some('o');
592 t[0x70] = Some('p');
593 t[0x71] = Some('q');
594 t[0x72] = Some('r');
595 t[0x73] = Some('s');
596 t[0x74] = Some('t');
597 t[0x75] = Some('u');
598 t[0x76] = Some('v');
599 t[0x77] = Some('w');
600 t[0x78] = Some('x');
601 t[0x79] = Some('y');
602 t[0x7A] = Some('z');
603 t[0x7B] = Some('{');
604 t[0x7C] = Some('|');
605 t[0x7D] = Some('}');
606 t[0x7E] = Some('~');
607 t[0x7F] = None; t[0x80] = Some('\u{00C4}'); t[0x81] = Some('\u{00C5}'); t[0x82] = Some('\u{00C7}'); t[0x83] = Some('\u{00C9}'); t[0x84] = Some('\u{00D1}'); t[0x85] = Some('\u{00D6}'); t[0x86] = Some('\u{00DC}'); t[0x87] = Some('\u{00E1}'); t[0x88] = Some('\u{00E0}'); t[0x89] = Some('\u{00E2}'); t[0x8A] = Some('\u{00E4}'); t[0x8B] = Some('\u{00E3}'); t[0x8C] = Some('\u{00E5}'); t[0x8D] = Some('\u{00E7}'); t[0x8E] = Some('\u{00E9}'); t[0x8F] = Some('\u{00E8}'); t[0x90] = Some('\u{00EA}'); t[0x91] = Some('\u{00EB}'); t[0x92] = Some('\u{00ED}'); t[0x93] = Some('\u{00EC}'); t[0x94] = Some('\u{00EE}'); t[0x95] = Some('\u{00EF}'); t[0x96] = Some('\u{00F1}'); t[0x97] = Some('\u{00F3}'); t[0x98] = Some('\u{00F2}'); t[0x99] = Some('\u{00F4}'); t[0x9A] = Some('\u{00F6}'); t[0x9B] = Some('\u{00F5}'); t[0x9C] = Some('\u{00FA}'); t[0x9D] = Some('\u{00F9}'); t[0x9E] = Some('\u{00FB}'); t[0x9F] = Some('\u{00FC}'); t[0xA0] = Some('\u{2020}'); t[0xA1] = Some('\u{00B0}'); t[0xA2] = Some('\u{00A2}'); t[0xA3] = Some('\u{00A3}'); t[0xA4] = Some('\u{00A7}'); t[0xA5] = Some('\u{2022}'); t[0xA6] = Some('\u{00B6}'); t[0xA7] = Some('\u{00DF}'); t[0xA8] = Some('\u{00AE}'); t[0xA9] = Some('\u{00A9}'); t[0xAA] = Some('\u{2122}'); t[0xAB] = Some('\u{00B4}'); t[0xAC] = Some('\u{00A8}'); t[0xAD] = Some('\u{2260}'); t[0xAE] = Some('\u{00C6}'); t[0xAF] = Some('\u{00D8}'); t[0xB0] = Some('\u{221E}'); t[0xB1] = Some('\u{00B1}'); t[0xB2] = Some('\u{2264}'); t[0xB3] = Some('\u{2265}'); t[0xB4] = Some('\u{00A5}'); t[0xB5] = Some('\u{00B5}'); t[0xB6] = Some('\u{2202}'); t[0xB7] = Some('\u{2211}'); t[0xB8] = Some('\u{220F}'); t[0xB9] = Some('\u{03C0}'); t[0xBA] = Some('\u{222B}'); t[0xBB] = Some('\u{00AA}'); t[0xBC] = Some('\u{00BA}'); t[0xBD] = Some('\u{2126}'); t[0xBE] = Some('\u{00E6}'); t[0xBF] = Some('\u{00F8}'); t[0xC0] = Some('\u{00BF}'); t[0xC1] = Some('\u{00A1}'); t[0xC2] = Some('\u{00AC}'); t[0xC3] = Some('\u{221A}'); t[0xC4] = Some('\u{0192}'); t[0xC5] = Some('\u{2248}'); t[0xC6] = Some('\u{2206}'); t[0xC7] = Some('\u{00AB}'); t[0xC8] = Some('\u{00BB}'); t[0xC9] = Some('\u{2026}'); t[0xCA] = Some('\u{00A0}'); t[0xCB] = Some('\u{00C0}'); t[0xCC] = Some('\u{00C3}'); t[0xCD] = Some('\u{00D5}'); t[0xCE] = Some('\u{0152}'); t[0xCF] = Some('\u{0153}'); t[0xD0] = Some('\u{2013}'); t[0xD1] = Some('\u{2014}'); t[0xD2] = Some('\u{201C}'); t[0xD3] = Some('\u{201D}'); t[0xD4] = Some('\u{2018}'); t[0xD5] = Some('\u{2019}'); t[0xD6] = Some('\u{00F7}'); t[0xD7] = Some('\u{25CA}'); t[0xD8] = Some('\u{00FF}'); t[0xD9] = Some('\u{0178}'); t[0xDA] = Some('\u{2044}'); t[0xDB] = Some('\u{20AC}'); t[0xDC] = Some('\u{2039}'); t[0xDD] = Some('\u{203A}'); t[0xDE] = Some('\u{FB01}'); t[0xDF] = Some('\u{FB02}'); t[0xE0] = Some('\u{2021}'); t[0xE1] = Some('\u{00B7}'); t[0xE2] = Some('\u{201A}'); t[0xE3] = Some('\u{201E}'); t[0xE4] = Some('\u{2030}'); t[0xE5] = Some('\u{00C2}'); t[0xE6] = Some('\u{00CA}'); t[0xE7] = Some('\u{00C1}'); t[0xE8] = Some('\u{00CB}'); t[0xE9] = Some('\u{00C8}'); t[0xEA] = Some('\u{00CD}'); t[0xEB] = Some('\u{00CE}'); t[0xEC] = Some('\u{00CF}'); t[0xED] = Some('\u{00CC}'); t[0xEE] = Some('\u{00D3}'); t[0xEF] = Some('\u{00D4}'); t[0xF0] = Some('\u{F8FF}'); t[0xF1] = Some('\u{00D2}'); t[0xF2] = Some('\u{00DA}'); t[0xF3] = Some('\u{00DB}'); t[0xF4] = Some('\u{00D9}'); t[0xF5] = Some('\u{0131}'); t[0xF6] = Some('\u{02C6}'); t[0xF7] = Some('\u{02DC}'); t[0xF8] = Some('\u{00AF}'); t[0xF9] = Some('\u{02D8}'); t[0xFA] = Some('\u{02D9}'); t[0xFB] = Some('\u{02DA}'); t[0xFC] = Some('\u{00B8}'); t[0xFD] = Some('\u{02DD}'); t[0xFE] = Some('\u{02DB}'); t[0xFF] = Some('\u{02C7}'); t
738};
739
740static MAC_EXPERT_TABLE: [Option<char>; 256] = {
746 let mut t = [None; 256];
747 t[0x20] = Some(' '); t[0x21] = Some('\u{F721}'); t[0x22] = Some('\u{F6C9}'); t[0x23] = Some('\u{F7A2}'); t[0x24] = Some('\u{F724}'); t[0x25] = Some('\u{F6DC}'); t[0x26] = Some('\u{F726}'); t[0x27] = Some('\u{F7B4}'); t[0x28] = Some('\u{207D}'); t[0x29] = Some('\u{207E}'); t[0x2A] = Some('\u{2025}'); t[0x2B] = Some('\u{2024}'); t[0x2C] = Some(','); t[0x2D] = Some('\u{002D}'); t[0x2E] = Some('.'); t[0x2F] = Some('\u{2044}'); t[0x30] = Some('\u{F730}'); t[0x31] = Some('\u{F731}'); t[0x32] = Some('\u{F732}'); t[0x33] = Some('\u{F733}'); t[0x34] = Some('\u{F734}'); t[0x35] = Some('\u{F735}'); t[0x36] = Some('\u{F736}'); t[0x37] = Some('\u{F737}'); t[0x38] = Some('\u{F738}'); t[0x39] = Some('\u{F739}'); t[0x3A] = Some(':'); t[0x3B] = Some(';'); t[0x3F] = Some('\u{F73F}'); t[0x5B] = Some('\u{F6E2}'); t[0x61] = Some('\u{F6F1}'); t[0x62] = Some('\u{F6F2}'); t[0x63] = Some('\u{F7A3}'); t[0x64] = Some('\u{F6F4}'); t[0x65] = Some('\u{F6F5}'); t[0x66] = Some('\u{F6F6}'); t[0x67] = Some('\u{F6F7}'); t[0x68] = Some('\u{F6F8}'); t[0x69] = Some('\u{F6E3}'); t[0x6A] = Some('\u{F6FA}'); t[0x6B] = Some('\u{F6FB}'); t[0x6C] = Some('\u{F6FC}'); t[0x6D] = Some('\u{F6FD}'); t[0x6E] = Some('\u{F6FE}'); t[0x6F] = Some('\u{F6FF}'); t[0x70] = Some('\u{F700}'); t[0x71] = Some('\u{F701}'); t[0x72] = Some('\u{F702}'); t[0x73] = Some('\u{F703}'); t[0x74] = Some('\u{F704}'); t[0x75] = Some('\u{F705}'); t[0x76] = Some('\u{F706}'); t[0x77] = Some('\u{F707}'); t[0x78] = Some('\u{F708}'); t[0x79] = Some('\u{F709}'); t[0x7A] = Some('\u{F70A}'); t[0x7B] = Some('\u{20A1}'); t[0x7C] = Some('\u{F6DC}'); t[0x7D] = Some('\u{F6DD}'); t[0x7E] = Some('\u{F6DE}'); t[0x87] = Some('\u{F6E4}'); t[0x88] = Some('\u{F7A8}'); t[0x8A] = Some('\u{F6E5}'); t[0x8B] = Some('\u{F6E6}'); t[0x8D] = Some('\u{F7AF}'); t[0x90] = Some('\u{F6E7}'); t[0x93] = Some('\u{F6E8}'); t[0x95] = Some('\u{F6EA}'); t[0x96] = Some('\u{F7B8}'); t[0x9A] = Some('\u{F6EB}'); t[0x9B] = Some('\u{F6EC}'); t[0x9D] = Some('\u{F6ED}'); t[0xA0] = Some('\u{F6EE}'); t[0xA1] = Some('\u{F6EF}'); t[0xA2] = Some('\u{F6F0}'); t[0xA3] = Some('\u{F7A3}'); t[0xA4] = Some('\u{F7A4}'); t[0xA5] = Some('\u{F7A5}'); t[0xA6] = Some('\u{F7A6}'); t[0xA7] = Some('\u{F7A7}'); t[0xA8] = Some('\u{F7A9}'); t[0xA9] = Some('\u{F7AA}'); t[0xAA] = Some('\u{F7AB}'); t[0xAB] = Some('\u{F7AC}'); t[0xAC] = Some('\u{F7AD}'); t[0xAD] = Some('\u{F7AE}'); t[0xAE] = Some('\u{F7AF}'); t[0xAF] = Some('\u{F7B0}'); t[0xB0] = Some('\u{F7B1}'); t[0xB1] = Some('\u{F7B2}'); t[0xB2] = Some('\u{F7B3}'); t[0xB3] = Some('\u{F7B4}'); t[0xB4] = Some('\u{F7B5}'); t[0xB5] = Some('\u{F7B6}'); t[0xB6] = Some('\u{F7B7}'); t[0xB7] = Some('\u{F7B8}'); t[0xB8] = Some('\u{F7B9}'); t[0xB9] = Some('\u{F7BA}'); t[0xBA] = Some('\u{F7BB}'); t[0xBB] = Some('\u{F7BC}'); t[0xBC] = Some('\u{F7BD}'); t[0xBD] = Some('\u{F7BE}'); t[0xBE] = Some('\u{F7BF}'); t[0xBF] = Some('\u{F7C0}'); t[0xC0] = Some('\u{F7C1}'); t[0xC1] = Some('\u{2153}'); t[0xC2] = Some('\u{2154}'); t[0xC4] = Some('\u{215B}'); t[0xC5] = Some('\u{215C}'); t[0xC6] = Some('\u{215D}'); t[0xC7] = Some('\u{215E}'); t[0xC8] = Some('\u{2070}'); t[0xCA] = Some('\u{F6F3}'); t[0xCD] = Some('\u{2074}'); t[0xCF] = Some('\u{2075}'); t[0xD1] = Some('\u{2076}'); t[0xD2] = Some('\u{2077}'); t[0xD3] = Some('\u{2078}'); t[0xD4] = Some('\u{2079}'); t[0xD5] = Some('\u{2080}'); t[0xD6] = Some('\u{2081}'); t[0xD7] = Some('\u{2082}'); t[0xD8] = Some('\u{2083}'); t[0xD9] = Some('\u{2084}'); t[0xDA] = Some('\u{2085}'); t[0xDB] = Some('\u{2086}'); t[0xDC] = Some('\u{2087}'); t[0xDD] = Some('\u{2088}'); t[0xDE] = Some('\u{2089}'); t[0xE0] = Some('\u{2215}'); t[0xE1] = Some('\u{F6F4}'); t[0xE2] = Some('\u{F6F5}'); t[0xE3] = Some('\u{F6F6}'); t[0xE4] = Some('\u{F6F7}'); t[0xE5] = Some('\u{F6F8}'); t[0xE6] = Some('\u{F7E6}'); t[0xE8] = Some('\u{F7E8}'); t[0xEA] = Some('\u{F6F9}'); t[0xEB] = Some('\u{F6FA}'); t[0xED] = Some('\u{F6E9}'); t[0xF0] = Some('\u{F7F0}'); t[0xF1] = Some('\u{00BC}'); t[0xF2] = Some('\u{00BD}'); t[0xF3] = Some('\u{00BE}'); t[0xF4] = Some('\u{215F}'); t[0xF8] = Some('\u{F6FA}'); t
927};
928
929static STANDARD_TABLE: [Option<char>; 256] = {
933 let mut t = [None; 256];
934 t[0x20] = Some(' '); t[0x21] = Some('!'); t[0x22] = Some('"'); t[0x23] = Some('#'); t[0x24] = Some('$'); t[0x25] = Some('%'); t[0x26] = Some('&'); t[0x27] = Some('\u{2019}'); t[0x28] = Some('('); t[0x29] = Some(')'); t[0x2A] = Some('*'); t[0x2B] = Some('+'); t[0x2C] = Some(','); t[0x2D] = Some('-'); t[0x2E] = Some('.'); t[0x2F] = Some('/'); t[0x30] = Some('0'); t[0x31] = Some('1'); t[0x32] = Some('2'); t[0x33] = Some('3'); t[0x34] = Some('4'); t[0x35] = Some('5'); t[0x36] = Some('6'); t[0x37] = Some('7'); t[0x38] = Some('8'); t[0x39] = Some('9'); t[0x3A] = Some(':'); t[0x3B] = Some(';'); t[0x3C] = Some('<'); t[0x3D] = Some('='); t[0x3E] = Some('>'); t[0x3F] = Some('?'); t[0x40] = Some('@'); t[0x41] = Some('A');
969 t[0x42] = Some('B');
970 t[0x43] = Some('C');
971 t[0x44] = Some('D');
972 t[0x45] = Some('E');
973 t[0x46] = Some('F');
974 t[0x47] = Some('G');
975 t[0x48] = Some('H');
976 t[0x49] = Some('I');
977 t[0x4A] = Some('J');
978 t[0x4B] = Some('K');
979 t[0x4C] = Some('L');
980 t[0x4D] = Some('M');
981 t[0x4E] = Some('N');
982 t[0x4F] = Some('O');
983 t[0x50] = Some('P');
984 t[0x51] = Some('Q');
985 t[0x52] = Some('R');
986 t[0x53] = Some('S');
987 t[0x54] = Some('T');
988 t[0x55] = Some('U');
989 t[0x56] = Some('V');
990 t[0x57] = Some('W');
991 t[0x58] = Some('X');
992 t[0x59] = Some('Y');
993 t[0x5A] = Some('Z');
994 t[0x5B] = Some('['); t[0x5C] = Some('\\'); t[0x5D] = Some(']'); t[0x5E] = Some('^'); t[0x5F] = Some('_'); t[0x60] = Some('\u{2018}'); t[0x61] = Some('a');
1001 t[0x62] = Some('b');
1002 t[0x63] = Some('c');
1003 t[0x64] = Some('d');
1004 t[0x65] = Some('e');
1005 t[0x66] = Some('f');
1006 t[0x67] = Some('g');
1007 t[0x68] = Some('h');
1008 t[0x69] = Some('i');
1009 t[0x6A] = Some('j');
1010 t[0x6B] = Some('k');
1011 t[0x6C] = Some('l');
1012 t[0x6D] = Some('m');
1013 t[0x6E] = Some('n');
1014 t[0x6F] = Some('o');
1015 t[0x70] = Some('p');
1016 t[0x71] = Some('q');
1017 t[0x72] = Some('r');
1018 t[0x73] = Some('s');
1019 t[0x74] = Some('t');
1020 t[0x75] = Some('u');
1021 t[0x76] = Some('v');
1022 t[0x77] = Some('w');
1023 t[0x78] = Some('x');
1024 t[0x79] = Some('y');
1025 t[0x7A] = Some('z');
1026 t[0x7B] = Some('{'); t[0x7C] = Some('|'); t[0x7D] = Some('}'); t[0x7E] = Some('~'); t[0xA1] = Some('\u{00A1}'); t[0xA2] = Some('\u{00A2}'); t[0xA3] = Some('\u{00A3}'); t[0xA4] = Some('\u{2044}'); t[0xA5] = Some('\u{00A5}'); t[0xA6] = Some('\u{0192}'); t[0xA7] = Some('\u{00A7}'); t[0xA8] = Some('\u{00A4}'); t[0xA9] = Some('\''); t[0xAA] = Some('\u{201C}'); t[0xAB] = Some('\u{00AB}'); t[0xAC] = Some('\u{2039}'); t[0xAD] = Some('\u{203A}'); t[0xAE] = Some('\u{FB01}'); t[0xAF] = Some('\u{FB02}'); t[0xB1] = Some('\u{2013}'); t[0xB2] = Some('\u{2020}'); t[0xB3] = Some('\u{2021}'); t[0xB4] = Some('\u{00B7}'); t[0xB6] = Some('\u{00B6}'); t[0xB7] = Some('\u{2022}'); t[0xB8] = Some('\u{201A}'); t[0xB9] = Some('\u{201E}'); t[0xBA] = Some('\u{201D}'); t[0xBB] = Some('\u{00BB}'); t[0xBC] = Some('\u{2026}'); t[0xBD] = Some('\u{2030}'); t[0xBF] = Some('\u{00BF}'); t[0xC1] = Some('\u{0060}'); t[0xC2] = Some('\u{00B4}'); t[0xC3] = Some('\u{02C6}'); t[0xC4] = Some('\u{02DC}'); t[0xC5] = Some('\u{00AF}'); t[0xC6] = Some('\u{02D8}'); t[0xC7] = Some('\u{02D9}'); t[0xC8] = Some('\u{00A8}'); t[0xCA] = Some('\u{02DA}'); t[0xCB] = Some('\u{00B8}'); t[0xCD] = Some('\u{02DD}'); t[0xCE] = Some('\u{02DB}'); t[0xCF] = Some('\u{02C7}'); t[0xD0] = Some('\u{2014}'); t[0xE1] = Some('\u{00C6}'); t[0xE3] = Some('\u{00AA}'); t[0xE8] = Some('\u{0141}'); t[0xE9] = Some('\u{00D8}'); t[0xEA] = Some('\u{0152}'); t[0xEB] = Some('\u{00BA}'); t[0xF1] = Some('\u{00E6}'); t[0xF5] = Some('\u{0131}'); t[0xF8] = Some('\u{0142}'); t[0xF9] = Some('\u{00F8}'); t[0xFA] = Some('\u{0153}'); t[0xFB] = Some('\u{00DF}'); t
1106};
1107
1108#[cfg(test)]
1109mod tests {
1110 use super::*;
1111
1112 #[test]
1117 fn win_ansi_ascii_printable() {
1118 let enc = StandardEncoding::WinAnsi;
1119 assert_eq!(enc.decode(0x20), Some(' '));
1120 assert_eq!(enc.decode(0x41), Some('A'));
1121 assert_eq!(enc.decode(0x5A), Some('Z'));
1122 assert_eq!(enc.decode(0x61), Some('a'));
1123 assert_eq!(enc.decode(0x7A), Some('z'));
1124 assert_eq!(enc.decode(0x30), Some('0'));
1125 assert_eq!(enc.decode(0x39), Some('9'));
1126 }
1127
1128 #[test]
1129 fn win_ansi_extended_characters() {
1130 let enc = StandardEncoding::WinAnsi;
1131 assert_eq!(enc.decode(0x80), Some('\u{20AC}')); assert_eq!(enc.decode(0x85), Some('\u{2026}')); assert_eq!(enc.decode(0x93), Some('\u{201C}')); assert_eq!(enc.decode(0x94), Some('\u{201D}')); assert_eq!(enc.decode(0x96), Some('\u{2013}')); assert_eq!(enc.decode(0x97), Some('\u{2014}')); assert_eq!(enc.decode(0x99), Some('\u{2122}')); }
1140
1141 #[test]
1142 fn win_ansi_undefined_codes() {
1143 let enc = StandardEncoding::WinAnsi;
1144 assert_eq!(enc.decode(0x81), None); assert_eq!(enc.decode(0x8D), None); assert_eq!(enc.decode(0x8F), None); assert_eq!(enc.decode(0x90), None); assert_eq!(enc.decode(0x9D), None); }
1150
1151 #[test]
1152 fn win_ansi_latin_extended() {
1153 let enc = StandardEncoding::WinAnsi;
1154 assert_eq!(enc.decode(0xC0), Some('\u{00C0}')); assert_eq!(enc.decode(0xC9), Some('\u{00C9}')); assert_eq!(enc.decode(0xE9), Some('\u{00E9}')); assert_eq!(enc.decode(0xF1), Some('\u{00F1}')); assert_eq!(enc.decode(0xFC), Some('\u{00FC}')); assert_eq!(enc.decode(0xFF), Some('\u{00FF}')); }
1162
1163 #[test]
1164 fn win_ansi_decode_bytes() {
1165 let enc = StandardEncoding::WinAnsi;
1166 let result = enc.decode_bytes(&[0x48, 0x65, 0x6C, 0x6C, 0x6F]);
1168 assert_eq!(result, "Hello");
1169 }
1170
1171 #[test]
1172 fn win_ansi_decode_bytes_with_extended() {
1173 let enc = StandardEncoding::WinAnsi;
1174 let result = enc.decode_bytes(&[0x63, 0x61, 0x66, 0xE9]);
1176 assert_eq!(result, "caf\u{00E9}");
1177 }
1178
1179 #[test]
1180 fn win_ansi_decode_bytes_with_undefined() {
1181 let enc = StandardEncoding::WinAnsi;
1182 let result = enc.decode_bytes(&[0x41, 0x81, 0x42]);
1184 assert_eq!(result, "A\u{FFFD}B");
1185 }
1186
1187 #[test]
1192 fn mac_roman_ascii_printable() {
1193 let enc = StandardEncoding::MacRoman;
1194 assert_eq!(enc.decode(0x41), Some('A'));
1195 assert_eq!(enc.decode(0x61), Some('a'));
1196 assert_eq!(enc.decode(0x20), Some(' '));
1197 }
1198
1199 #[test]
1200 fn mac_roman_extended_characters() {
1201 let enc = StandardEncoding::MacRoman;
1202 assert_eq!(enc.decode(0x80), Some('\u{00C4}')); assert_eq!(enc.decode(0x83), Some('\u{00C9}')); assert_eq!(enc.decode(0x84), Some('\u{00D1}')); assert_eq!(enc.decode(0x87), Some('\u{00E1}')); assert_eq!(enc.decode(0x8E), Some('\u{00E9}')); assert_eq!(enc.decode(0x96), Some('\u{00F1}')); assert_eq!(enc.decode(0xCA), Some('\u{00A0}')); assert_eq!(enc.decode(0xD2), Some('\u{201C}')); assert_eq!(enc.decode(0xD3), Some('\u{201D}')); assert_eq!(enc.decode(0xDB), Some('\u{20AC}')); }
1213
1214 #[test]
1215 fn mac_roman_special_symbols() {
1216 let enc = StandardEncoding::MacRoman;
1217 assert_eq!(enc.decode(0xA5), Some('\u{2022}')); assert_eq!(enc.decode(0xB0), Some('\u{221E}')); assert_eq!(enc.decode(0xB9), Some('\u{03C0}')); assert_eq!(enc.decode(0xC5), Some('\u{2248}')); assert_eq!(enc.decode(0xDE), Some('\u{FB01}')); assert_eq!(enc.decode(0xDF), Some('\u{FB02}')); }
1224
1225 #[test]
1226 fn mac_roman_decode_bytes() {
1227 let enc = StandardEncoding::MacRoman;
1228 let result = enc.decode_bytes(&[0x84]);
1230 assert_eq!(result, "\u{00D1}");
1231 }
1232
1233 #[test]
1238 fn mac_expert_fractions() {
1239 let enc = StandardEncoding::MacExpert;
1240 assert_eq!(enc.decode(0xF1), Some('\u{00BC}')); assert_eq!(enc.decode(0xF2), Some('\u{00BD}')); assert_eq!(enc.decode(0xF3), Some('\u{00BE}')); assert_eq!(enc.decode(0xC1), Some('\u{2153}')); assert_eq!(enc.decode(0xC2), Some('\u{2154}')); }
1246
1247 #[test]
1248 fn mac_expert_superscripts_subscripts() {
1249 let enc = StandardEncoding::MacExpert;
1250 assert_eq!(enc.decode(0x28), Some('\u{207D}')); assert_eq!(enc.decode(0x29), Some('\u{207E}')); assert_eq!(enc.decode(0xD5), Some('\u{2080}')); assert_eq!(enc.decode(0xD6), Some('\u{2081}')); assert_eq!(enc.decode(0xDE), Some('\u{2089}')); }
1258
1259 #[test]
1260 fn mac_expert_space_and_basic() {
1261 let enc = StandardEncoding::MacExpert;
1262 assert_eq!(enc.decode(0x20), Some(' '));
1263 assert_eq!(enc.decode(0x2C), Some(','));
1264 assert_eq!(enc.decode(0x2E), Some('.'));
1265 assert_eq!(enc.decode(0x2F), Some('\u{2044}')); }
1267
1268 #[test]
1269 fn mac_expert_undefined_codes() {
1270 let enc = StandardEncoding::MacExpert;
1271 assert_eq!(enc.decode(0x00), None);
1273 assert_eq!(enc.decode(0x01), None);
1274 assert_eq!(enc.decode(0x10), None);
1275 }
1276
1277 #[test]
1282 fn standard_ascii_letters() {
1283 let enc = StandardEncoding::Standard;
1284 assert_eq!(enc.decode(0x41), Some('A'));
1285 assert_eq!(enc.decode(0x5A), Some('Z'));
1286 assert_eq!(enc.decode(0x61), Some('a'));
1287 assert_eq!(enc.decode(0x7A), Some('z'));
1288 }
1289
1290 #[test]
1291 fn standard_differs_from_ascii() {
1292 let enc = StandardEncoding::Standard;
1293 assert_eq!(enc.decode(0x27), Some('\u{2019}'));
1296 assert_eq!(enc.decode(0x60), Some('\u{2018}'));
1298 }
1299
1300 #[test]
1301 fn standard_extended_characters() {
1302 let enc = StandardEncoding::Standard;
1303 assert_eq!(enc.decode(0xA1), Some('\u{00A1}')); assert_eq!(enc.decode(0xA4), Some('\u{2044}')); assert_eq!(enc.decode(0xAE), Some('\u{FB01}')); assert_eq!(enc.decode(0xAF), Some('\u{FB02}')); assert_eq!(enc.decode(0xB1), Some('\u{2013}')); assert_eq!(enc.decode(0xD0), Some('\u{2014}')); assert_eq!(enc.decode(0xE1), Some('\u{00C6}')); assert_eq!(enc.decode(0xF1), Some('\u{00E6}')); assert_eq!(enc.decode(0xFA), Some('\u{0153}')); assert_eq!(enc.decode(0xFB), Some('\u{00DF}')); }
1314
1315 #[test]
1316 fn standard_undefined_ranges() {
1317 let enc = StandardEncoding::Standard;
1318 for code in 0x80..=0x9F {
1320 assert_eq!(enc.decode(code), None, "code 0x{code:02X} should be None");
1321 }
1322 }
1323
1324 #[test]
1325 fn standard_diacritics() {
1326 let enc = StandardEncoding::Standard;
1327 assert_eq!(enc.decode(0xC1), Some('\u{0060}')); assert_eq!(enc.decode(0xC2), Some('\u{00B4}')); assert_eq!(enc.decode(0xC3), Some('\u{02C6}')); assert_eq!(enc.decode(0xC4), Some('\u{02DC}')); assert_eq!(enc.decode(0xCA), Some('\u{02DA}')); assert_eq!(enc.decode(0xCF), Some('\u{02C7}')); }
1334
1335 #[test]
1340 fn font_encoding_from_standard() {
1341 let enc = FontEncoding::from_standard(StandardEncoding::WinAnsi);
1342 assert_eq!(enc.decode(0x41), Some('A'));
1343 assert_eq!(enc.decode(0x80), Some('\u{20AC}'));
1344 }
1345
1346 #[test]
1347 fn font_encoding_differences_override() {
1348 let differences = vec![
1350 (0x41, '\u{0391}'), (0x42, '\u{0392}'), ];
1353 let enc =
1354 FontEncoding::from_standard_with_differences(StandardEncoding::WinAnsi, &differences);
1355
1356 assert_eq!(enc.decode(0x41), Some('\u{0391}')); assert_eq!(enc.decode(0x42), Some('\u{0392}')); assert_eq!(enc.decode(0x43), Some('C'));
1362 assert_eq!(enc.decode(0x80), Some('\u{20AC}')); }
1364
1365 #[test]
1366 fn font_encoding_differences_fill_undefined() {
1367 let differences = vec![
1369 (0x81, '\u{2603}'), ];
1371 let enc =
1372 FontEncoding::from_standard_with_differences(StandardEncoding::WinAnsi, &differences);
1373
1374 assert_eq!(enc.decode(0x81), Some('\u{2603}')); }
1376
1377 #[test]
1378 fn font_encoding_apply_differences_incrementally() {
1379 let mut enc = FontEncoding::from_standard(StandardEncoding::Standard);
1380 assert_eq!(enc.decode(0x27), Some('\u{2019}')); enc.apply_differences(&[(0x27, '\'')]); assert_eq!(enc.decode(0x27), Some('\'')); }
1386
1387 #[test]
1388 fn font_encoding_decode_bytes() {
1389 let differences = vec![(0xE9, '\u{00E9}')]; let enc =
1391 FontEncoding::from_standard_with_differences(StandardEncoding::Standard, &differences);
1392 let result = enc.decode_bytes(&[0x63, 0x61, 0x66, 0xE9]); assert_eq!(result, "caf\u{00E9}");
1394 }
1395
1396 #[test]
1397 fn font_encoding_from_custom_table() {
1398 let mut table = [None; 256];
1399 table[0x41] = Some('X');
1400 table[0x42] = Some('Y');
1401 let enc = FontEncoding::from_table(table);
1402 assert_eq!(enc.decode(0x41), Some('X'));
1403 assert_eq!(enc.decode(0x42), Some('Y'));
1404 assert_eq!(enc.decode(0x43), None);
1405 }
1406
1407 #[test]
1412 fn resolver_default_only() {
1413 let resolver =
1414 EncodingResolver::new(FontEncoding::from_standard(StandardEncoding::WinAnsi));
1415
1416 assert_eq!(resolver.resolve(0x41), Some("A".to_string()));
1417 assert_eq!(resolver.resolve(0x80), Some("\u{20AC}".to_string()));
1418 assert_eq!(resolver.resolve(0x81), None); }
1420
1421 #[test]
1422 fn resolver_font_encoding_over_default() {
1423 let default_enc = FontEncoding::from_standard(StandardEncoding::Standard);
1424 let font_enc = FontEncoding::from_standard(StandardEncoding::WinAnsi);
1425
1426 let resolver = EncodingResolver::new(default_enc).with_font_encoding(font_enc);
1427
1428 assert_eq!(resolver.resolve(0x27), Some("'".to_string()));
1431 }
1432
1433 #[test]
1434 fn resolver_to_unicode_highest_priority() {
1435 let mut to_unicode = HashMap::new();
1436 to_unicode.insert(0x41, "X".to_string()); let resolver =
1439 EncodingResolver::new(FontEncoding::from_standard(StandardEncoding::WinAnsi))
1440 .with_to_unicode(to_unicode);
1441
1442 assert_eq!(resolver.resolve(0x41), Some("X".to_string()));
1444 assert_eq!(resolver.resolve(0x42), Some("B".to_string()));
1446 }
1447
1448 #[test]
1449 fn resolver_full_chain() {
1450 let mut to_unicode = HashMap::new();
1451 to_unicode.insert(0x01, "TOUNICODE".to_string());
1452
1453 let mut font_table = [None; 256];
1454 font_table[0x02] = Some('F'); let font_enc = FontEncoding::from_table(font_table);
1456
1457 let default_enc = FontEncoding::from_standard(StandardEncoding::WinAnsi);
1458
1459 let resolver = EncodingResolver::new(default_enc)
1460 .with_font_encoding(font_enc)
1461 .with_to_unicode(to_unicode);
1462
1463 assert_eq!(resolver.resolve(0x01), Some("TOUNICODE".to_string()));
1465 assert_eq!(resolver.resolve(0x02), Some("F".to_string()));
1467 assert_eq!(resolver.resolve(0x41), Some("A".to_string()));
1469 assert_eq!(resolver.resolve(0x81), None); }
1472
1473 #[test]
1474 fn resolver_to_unicode_multi_char() {
1475 let mut to_unicode = HashMap::new();
1476 to_unicode.insert(0xFB01, "fi".to_string());
1478
1479 let resolver =
1480 EncodingResolver::new(FontEncoding::from_standard(StandardEncoding::WinAnsi))
1481 .with_to_unicode(to_unicode);
1482
1483 assert_eq!(resolver.resolve(0xFB01), Some("fi".to_string()));
1485 }
1486
1487 #[test]
1488 fn resolver_decode_bytes() {
1489 let mut to_unicode = HashMap::new();
1490 to_unicode.insert(0x41, "X".to_string()); let resolver =
1493 EncodingResolver::new(FontEncoding::from_standard(StandardEncoding::WinAnsi))
1494 .with_to_unicode(to_unicode);
1495
1496 let result = resolver.decode_bytes(&[0x41, 0x42, 0x43]);
1497 assert_eq!(result, "XBC"); }
1499
1500 #[test]
1501 fn resolver_decode_bytes_with_undefined() {
1502 let resolver =
1503 EncodingResolver::new(FontEncoding::from_standard(StandardEncoding::WinAnsi));
1504
1505 let result = resolver.decode_bytes(&[0x41, 0x81, 0x42]);
1506 assert_eq!(result, "A\u{FFFD}B"); }
1508
1509 #[test]
1514 fn ascii_range_consistent_except_standard() {
1515 for code in 0x20..=0x7E_u8 {
1517 let win = StandardEncoding::WinAnsi.decode(code);
1518 let mac = StandardEncoding::MacRoman.decode(code);
1519 assert_eq!(win, mac, "WinAnsi and MacRoman disagree at 0x{code:02X}");
1520 }
1521 }
1522
1523 #[test]
1524 fn standard_encoding_quote_marks_differ() {
1525 assert_eq!(StandardEncoding::Standard.decode(0x27), Some('\u{2019}')); assert_eq!(StandardEncoding::WinAnsi.decode(0x27), Some('\'')); assert_eq!(StandardEncoding::Standard.decode(0x60), Some('\u{2018}')); assert_eq!(StandardEncoding::WinAnsi.decode(0x60), Some('`')); }
1531
1532 #[test]
1533 fn all_encodings_have_space() {
1534 assert_eq!(StandardEncoding::WinAnsi.decode(0x20), Some(' '));
1535 assert_eq!(StandardEncoding::MacRoman.decode(0x20), Some(' '));
1536 assert_eq!(StandardEncoding::MacExpert.decode(0x20), Some(' '));
1537 assert_eq!(StandardEncoding::Standard.decode(0x20), Some(' '));
1538 }
1539}