1#[derive(Debug, Clone, PartialEq)]
8pub enum PdfEncoding {
9 StandardEncoding,
10 MacRomanEncoding,
11 WinAnsiEncoding,
12 PDFDocEncoding,
13 MacExpertEncoding,
14 Identity,
15 Custom(Vec<Option<String>>),
16}
17
18#[derive(Debug)]
20pub struct Encoding {
21 table: Vec<Option<String>>,
22}
23
24impl Encoding {
25 pub fn from_name(name: &[u8]) -> Self {
27 match name {
28 b"WinAnsiEncoding" => Self::win_ansi(),
29 b"MacRomanEncoding" => Self::mac_roman(),
30 b"StandardEncoding" => Self::standard(),
31 b"ZapfDingbatsEncoding" => Self::zapf_dingbats(),
32 b"SymbolEncoding" => Self::symbol(),
33 _ => Self::win_ansi(),
34 }
35 }
36
37 pub fn win_ansi() -> Self {
39 let mut table = vec![None; 256];
40 let cp1252_bytes: Vec<u8> = (0..=255u16).map(|i| i as u8).collect();
43 let (decoded, _, _) = encoding_rs::WINDOWS_1252.decode(&cp1252_bytes);
44 for (i, ch) in decoded.chars().enumerate() {
45 if i < 256 && (ch != '\0' || i == 0) {
46 table[i] = Some(ch.to_string());
47 }
48 }
49 table[0x09] = Some("\t".into());
51 table[0x0A] = Some("\n".into());
52 table[0x0D] = Some("\r".into());
53 table[0x20] = Some(" ".into());
54 Self { table }
55 }
56
57 pub fn mac_roman() -> Self {
59 let mut table = vec![None; 256];
60 let mac_bytes: Vec<u8> = (0..=255u16).map(|i| i as u8).collect();
62 let (decoded, _, _) = encoding_rs::MACINTOSH.decode(&mac_bytes);
63 for (i, ch) in decoded.chars().enumerate() {
64 if i < 256 {
65 table[i] = Some(ch.to_string());
66 }
67 }
68 table[0x09] = Some("\t".into());
69 table[0x0A] = Some("\n".into());
70 table[0x0D] = Some("\r".into());
71 table[0x20] = Some(" ".into());
72 Self { table }
73 }
74
75 pub fn standard() -> Self {
77 let mut enc = Self::win_ansi();
80 enc.table[0x60] = Some("\u{2018}".into()); enc.table[0x27] = Some("\u{2019}".into()); enc.table[0xA1] = Some("\u{00A1}".into()); enc.table[0xA2] = Some("\u{00A2}".into()); enc.table[0xA3] = Some("\u{00A3}".into()); enc.table[0xA4] = Some("\u{2044}".into()); enc.table[0xA5] = Some("\u{00A5}".into()); enc.table[0xA6] = Some("\u{0192}".into()); enc.table[0xA7] = Some("\u{00A7}".into()); enc.table[0xA8] = Some("\u{00A4}".into()); enc.table[0xAC] = Some("\u{FB01}".into()); enc.table[0xAD] = Some("\u{FB02}".into()); enc.table[0xB0] = Some("\u{2013}".into()); enc.table[0xB1] = Some("\u{2020}".into()); enc.table[0xB2] = Some("\u{2021}".into()); enc.table[0xB3] = Some("\u{00B7}".into()); enc.table[0xB7] = Some("\u{2022}".into()); enc.table[0xB8] = Some("\u{201A}".into()); enc.table[0xB9] = Some("\u{201E}".into()); enc.table[0xBA] = Some("\u{201D}".into()); enc.table[0xBB] = Some("\u{00BB}".into()); enc.table[0xBC] = Some("\u{2026}".into()); enc.table[0xBD] = Some("\u{2030}".into()); enc.table[0xC1] = Some("\u{0060}".into()); enc.table[0xC2] = Some("\u{00B4}".into()); enc.table[0xC3] = Some("\u{02C6}".into()); enc.table[0xC4] = Some("\u{02DC}".into()); enc.table[0xC5] = Some("\u{00AF}".into()); enc.table[0xC6] = Some("\u{02D8}".into()); enc.table[0xC7] = Some("\u{02D9}".into()); enc.table[0xC8] = Some("\u{00A8}".into()); enc.table[0xCA] = Some("\u{02DA}".into()); enc.table[0xCB] = Some("\u{00B8}".into()); enc.table[0xCD] = Some("\u{02DD}".into()); enc.table[0xCE] = Some("\u{02DB}".into()); enc.table[0xCF] = Some("\u{02C7}".into()); enc.table[0xD0] = Some("\u{2014}".into()); enc.table[0xE1] = Some("\u{00C6}".into()); enc.table[0xE3] = Some("\u{00AA}".into()); enc.table[0xE8] = Some("\u{0141}".into()); enc.table[0xE9] = Some("\u{00D8}".into()); enc.table[0xEA] = Some("\u{0152}".into()); enc.table[0xEB] = Some("\u{00BA}".into()); enc.table[0xF1] = Some("\u{00E6}".into()); enc.table[0xF5] = Some("\u{0131}".into()); enc.table[0xF8] = Some("\u{0142}".into()); enc.table[0xF9] = Some("\u{00F8}".into()); enc.table[0xFA] = Some("\u{0153}".into()); enc.table[0xFB] = Some("\u{00DF}".into()); enc
131 }
132
133 pub fn zapf_dingbats() -> Self {
138 let mut table = vec![None; 256];
139 table[0x20] = Some(" ".into());
141 let mappings: &[(u8, char)] = &[
143 (0x21, '\u{2701}'), (0x22, '\u{2702}'), (0x23, '\u{2703}'), (0x24, '\u{2704}'), (0x25, '\u{260E}'), (0x26, '\u{2706}'), (0x27, '\u{2707}'), (0x28, '\u{2708}'), (0x29, '\u{2709}'), (0x2A, '\u{261B}'), (0x2B, '\u{261E}'), (0x2C, '\u{270C}'), (0x2D, '\u{270D}'), (0x2E, '\u{270E}'), (0x2F, '\u{270F}'), (0x30, '\u{2710}'), (0x31, '\u{2711}'), (0x32, '\u{2712}'), (0x33, '\u{2713}'), (0x34, '\u{2714}'), (0x35, '\u{2715}'), (0x36, '\u{2716}'), (0x37, '\u{2717}'), (0x38, '\u{2718}'), (0x39, '\u{2719}'), (0x3A, '\u{271A}'), (0x3B, '\u{271B}'), (0x3C, '\u{271C}'), (0x3D, '\u{271D}'), (0x3E, '\u{271E}'), (0x3F, '\u{271F}'), (0x40, '\u{2720}'), (0x41, '\u{2721}'), (0x42, '\u{2722}'), (0x43, '\u{2723}'), (0x44, '\u{2724}'), (0x45, '\u{2725}'), (0x46, '\u{2726}'), (0x47, '\u{2727}'), (0x48, '\u{2605}'), (0x49, '\u{2729}'), (0x4A, '\u{272A}'), (0x4B, '\u{272B}'), (0x4C, '\u{272C}'), (0x4D, '\u{272D}'), (0x4E, '\u{272E}'), (0x4F, '\u{272F}'), (0x50, '\u{2730}'), (0x51, '\u{2731}'), (0x52, '\u{2732}'), (0x53, '\u{2733}'), (0x54, '\u{2734}'), (0x55, '\u{2735}'), (0x56, '\u{2736}'), (0x57, '\u{2737}'), (0x58, '\u{2738}'), (0x59, '\u{2739}'), (0x5A, '\u{273A}'), (0x5B, '\u{273B}'), (0x5C, '\u{273C}'), (0x5D, '\u{273D}'), (0x5E, '\u{273E}'), (0x5F, '\u{273F}'), (0x60, '\u{2740}'), (0x61, '\u{2741}'), (0x62, '\u{2742}'), (0x63, '\u{2743}'), (0x64, '\u{2744}'), (0x65, '\u{2745}'), (0x66, '\u{2746}'), (0x67, '\u{2747}'), (0x68, '\u{2748}'), (0x69, '\u{2749}'), (0x6A, '\u{274A}'), (0x6B, '\u{274B}'), (0x6C, '\u{25CF}'), (0x6D, '\u{274D}'), (0x6E, '\u{25A0}'), (0x6F, '\u{274F}'), (0x70, '\u{2750}'), (0x71, '\u{2751}'), (0x72, '\u{2752}'), (0x73, '\u{25B2}'), (0x74, '\u{25BC}'), (0x75, '\u{25C6}'), (0x76, '\u{2756}'), (0x77, '\u{25D7}'), (0x78, '\u{2758}'), (0x79, '\u{2759}'), (0x7A, '\u{275A}'), (0x7B, '\u{275B}'), (0x7C, '\u{275C}'), (0x7D, '\u{275D}'), (0x7E, '\u{275E}'), ];
239 for &(code, ch) in mappings {
240 table[code as usize] = Some(ch.to_string());
241 }
242 let high_mappings: &[(u8, char)] = &[
244 (0x80, '\u{2768}'),
245 (0x81, '\u{2769}'),
246 (0x82, '\u{276A}'),
247 (0x83, '\u{276B}'),
248 (0x84, '\u{276C}'),
249 (0x85, '\u{276D}'),
250 (0x86, '\u{276E}'),
251 (0x87, '\u{276F}'),
252 (0x88, '\u{2770}'),
253 (0x89, '\u{2771}'),
254 (0x8A, '\u{2772}'),
255 (0x8B, '\u{2773}'),
256 (0x8C, '\u{2774}'),
257 (0x8D, '\u{2775}'),
258 (0xA1, '\u{2761}'),
259 (0xA2, '\u{2762}'),
260 (0xA3, '\u{2763}'),
261 (0xA4, '\u{2764}'),
262 (0xA5, '\u{2765}'),
263 (0xA6, '\u{2766}'),
264 (0xA7, '\u{2767}'),
265 (0xB1, '\u{2460}'),
266 (0xB2, '\u{2461}'),
267 (0xB3, '\u{2462}'),
268 (0xB4, '\u{2463}'),
269 (0xB5, '\u{2464}'),
270 (0xB6, '\u{2465}'),
271 (0xB7, '\u{2466}'),
272 (0xB8, '\u{2467}'),
273 (0xB9, '\u{2468}'),
274 (0xBA, '\u{2469}'),
275 (0xC0, '\u{2776}'),
276 (0xC1, '\u{2777}'),
277 (0xC2, '\u{2778}'),
278 (0xC3, '\u{2779}'),
279 (0xC4, '\u{277A}'),
280 (0xC5, '\u{277B}'),
281 (0xC6, '\u{277C}'),
282 (0xC7, '\u{277D}'),
283 (0xC8, '\u{277E}'),
284 (0xC9, '\u{277F}'),
285 (0xD1, '\u{2780}'),
286 (0xD2, '\u{2781}'),
287 (0xD3, '\u{2782}'),
288 (0xD4, '\u{2783}'),
289 (0xD5, '\u{2784}'),
290 (0xD6, '\u{2785}'),
291 (0xD7, '\u{2786}'),
292 (0xD8, '\u{2787}'),
293 (0xD9, '\u{2788}'),
294 (0xDA, '\u{2789}'),
295 (0xE1, '\u{278A}'),
296 (0xE2, '\u{278B}'),
297 (0xE3, '\u{278C}'),
298 (0xE4, '\u{278D}'),
299 (0xE5, '\u{278E}'),
300 (0xE6, '\u{278F}'),
301 (0xE7, '\u{2790}'),
302 (0xE8, '\u{2791}'),
303 (0xE9, '\u{2792}'),
304 (0xEA, '\u{2793}'),
305 (0xF1, '\u{2794}'),
306 (0xF2, '\u{2192}'), (0xF3, '\u{2194}'), (0xF4, '\u{2195}'), ];
310 for &(code, ch) in high_mappings {
311 table[code as usize] = Some(ch.to_string());
312 }
313 Self { table }
314 }
315
316 pub fn symbol() -> Self {
318 let mut table = vec![None; 256];
319 table[0x20] = Some(" ".into());
322 let mappings: &[(u8, char)] = &[
323 (0x21, '!'),
324 (0x22, '\u{2200}'), (0x23, '#'),
326 (0x24, '\u{2203}'), (0x25, '%'),
328 (0x26, '&'),
329 (0x27, '\u{220B}'), (0x28, '('),
331 (0x29, ')'),
332 (0x2A, '*'),
333 (0x2B, '+'),
334 (0x2C, ','),
335 (0x2D, '\u{2212}'), (0x2E, '.'),
337 (0x2F, '/'),
338 (0x30, '0'),
339 (0x31, '1'),
340 (0x32, '2'),
341 (0x33, '3'),
342 (0x34, '4'),
343 (0x35, '5'),
344 (0x36, '6'),
345 (0x37, '7'),
346 (0x38, '8'),
347 (0x39, '9'),
348 (0x3A, ':'),
349 (0x3B, ';'),
350 (0x3C, '<'),
351 (0x3D, '='),
352 (0x3E, '>'),
353 (0x3F, '?'),
354 (0x40, '\u{2245}'), (0x41, '\u{0391}'), (0x42, '\u{0392}'), (0x43, '\u{03A7}'), (0x44, '\u{0394}'), (0x45, '\u{0395}'), (0x46, '\u{03A6}'), (0x47, '\u{0393}'), (0x48, '\u{0397}'), (0x49, '\u{0399}'), (0x4B, '\u{039A}'), (0x4C, '\u{039B}'), (0x4D, '\u{039C}'), (0x4E, '\u{039D}'), (0x4F, '\u{039F}'), (0x50, '\u{03A0}'), (0x51, '\u{0398}'), (0x52, '\u{03A1}'), (0x53, '\u{03A3}'), (0x54, '\u{03A4}'), (0x55, '\u{03A5}'), (0x57, '\u{03A9}'), (0x58, '\u{039E}'), (0x59, '\u{03A8}'), (0x5A, '\u{0396}'), (0x5B, '['),
380 (0x5D, ']'),
381 (0x5E, '\u{22A5}'), (0x5F, '_'),
383 (0x61, '\u{03B1}'), (0x62, '\u{03B2}'), (0x63, '\u{03C7}'), (0x64, '\u{03B4}'), (0x65, '\u{03B5}'), (0x66, '\u{03C6}'), (0x67, '\u{03B3}'), (0x68, '\u{03B7}'), (0x69, '\u{03B9}'), (0x6B, '\u{03BA}'), (0x6C, '\u{03BB}'), (0x6D, '\u{03BC}'), (0x6E, '\u{03BD}'), (0x6F, '\u{03BF}'), (0x70, '\u{03C0}'), (0x71, '\u{03B8}'), (0x72, '\u{03C1}'), (0x73, '\u{03C3}'), (0x74, '\u{03C4}'), (0x75, '\u{03C5}'), (0x77, '\u{03C9}'), (0x78, '\u{03BE}'), (0x79, '\u{03C8}'), (0x7A, '\u{03B6}'), (0x7B, '{'),
408 (0x7C, '|'),
409 (0x7D, '}'),
410 (0x7E, '\u{223C}'), (0xA0, '\u{20AC}'), (0xB1, '\u{00B1}'), (0xB4, '\u{00D7}'), (0xB5, '\u{221D}'), (0xB6, '\u{2202}'), (0xB7, '\u{2022}'), (0xB8, '\u{00F7}'), (0xB9, '\u{2260}'), (0xBA, '\u{2261}'), (0xBB, '\u{2248}'), (0xBC, '\u{2026}'), (0xC0, '\u{2135}'), (0xC1, '\u{2111}'), (0xC2, '\u{211C}'), (0xC3, '\u{2118}'), (0xC5, '\u{2297}'), (0xC6, '\u{2295}'), (0xC7, '\u{2205}'), (0xC8, '\u{2229}'), (0xC9, '\u{222A}'), (0xCA, '\u{2283}'), (0xCB, '\u{2287}'), (0xCC, '\u{2284}'), (0xCD, '\u{2282}'), (0xCE, '\u{2286}'), (0xCF, '\u{2208}'), (0xD0, '\u{2209}'), (0xD1, '\u{2220}'), (0xD2, '\u{2207}'), (0xD5, '\u{220F}'), (0xD6, '\u{221A}'), (0xE0, '\u{25CA}'), (0xE5, '\u{2211}'), (0xF2, '\u{222B}'), (0xF5, '\u{221E}'), ];
447 for &(code, ch) in mappings {
448 table[code as usize] = Some(ch.to_string());
449 }
450 Self { table }
451 }
452
453 pub fn apply_differences(&mut self, differences: &[folio_cos::PdfObject]) {
455 let mut code = 0u16;
456 for item in differences {
457 match item {
458 folio_cos::PdfObject::Integer(n) => code = *n as u16,
459 folio_cos::PdfObject::Name(name) => {
460 if (code as usize) < self.table.len() {
461 let unicode = glyph_name_to_unicode(name);
462 self.table[code as usize] = Some(unicode);
463 }
464 code += 1;
465 }
466 _ => {}
467 }
468 }
469 }
470
471 pub fn decode_char(&self, code: u8) -> Option<&str> {
473 self.table.get(code as usize).and_then(|s| s.as_deref())
474 }
475
476 pub fn decode_bytes(&self, data: &[u8]) -> String {
478 let mut result = String::new();
479 for &byte in data {
480 match self.decode_char(byte) {
481 Some(s) => result.push_str(s),
482 None => result.push(char::REPLACEMENT_CHARACTER),
483 }
484 }
485 result
486 }
487}
488
489fn glyph_name_to_unicode(name: &[u8]) -> String {
493 let name_str = std::str::from_utf8(name).unwrap_or("");
494 match name_str {
495 "space" | "nbspace" => " ".into(),
497 "exclam" => "!".into(),
498 "quotedbl" => "\"".into(),
499 "numbersign" => "#".into(),
500 "dollar" => "$".into(),
501 "percent" => "%".into(),
502 "ampersand" => "&".into(),
503 "quotesingle" => "'".into(),
504 "parenleft" => "(".into(),
505 "parenright" => ")".into(),
506 "asterisk" => "*".into(),
507 "plus" => "+".into(),
508 "comma" => ",".into(),
509 "hyphen" | "minus" | "hyphenchar" => "-".into(),
510 "period" => ".".into(),
511 "slash" => "/".into(),
512 "zero" => "0".into(),
513 "one" => "1".into(),
514 "two" => "2".into(),
515 "three" => "3".into(),
516 "four" => "4".into(),
517 "five" => "5".into(),
518 "six" => "6".into(),
519 "seven" => "7".into(),
520 "eight" => "8".into(),
521 "nine" => "9".into(),
522 "colon" => ":".into(),
523 "semicolon" => ";".into(),
524 "less" => "<".into(),
525 "equal" => "=".into(),
526 "greater" => ">".into(),
527 "question" => "?".into(),
528 "at" => "@".into(),
529 "bracketleft" => "[".into(),
530 "backslash" => "\\".into(),
531 "bracketright" => "]".into(),
532 "asciicircum" => "^".into(),
533 "underscore" => "_".into(),
534 "grave" | "quoteleft" => "\u{2018}".into(),
535 "braceleft" => "{".into(),
536 "bar" => "|".into(),
537 "braceright" => "}".into(),
538 "asciitilde" => "~".into(),
539
540 "quoteright" => "\u{2019}".into(),
542 "quotedblleft" => "\u{201C}".into(),
543 "quotedblright" => "\u{201D}".into(),
544 "quotedblbase" => "\u{201E}".into(),
545 "quotesinglbase" => "\u{201A}".into(),
546 "guillemotleft" | "guilsinglleft" => "\u{00AB}".into(),
547 "guillemotright" | "guilsinglright" => "\u{00BB}".into(),
548
549 "endash" => "\u{2013}".into(),
550 "emdash" => "\u{2014}".into(),
551 "bullet" => "\u{2022}".into(),
552 "ellipsis" => "\u{2026}".into(),
553 "dagger" => "\u{2020}".into(),
554 "daggerdbl" => "\u{2021}".into(),
555 "perthousand" => "\u{2030}".into(),
556 "trademark" => "\u{2122}".into(),
557 "copyright" => "\u{00A9}".into(),
558 "registered" => "\u{00AE}".into(),
559
560 "fi" => "\u{FB01}".into(),
562 "fl" => "\u{FB02}".into(),
563 "ff" => "\u{FB00}".into(),
564 "ffi" => "\u{FB03}".into(),
565 "ffl" => "\u{FB04}".into(),
566 "lozenge" => "\u{25CA}".into(),
567 "Euro" => "\u{20AC}".into(),
568 "degree" => "\u{00B0}".into(),
569 "section" => "\u{00A7}".into(),
570 "paragraph" | "pilcrow" => "\u{00B6}".into(),
571 "fraction" => "\u{2044}".into(),
572 "florin" => "\u{0192}".into(),
573
574 "Agrave" => "\u{00C0}".into(),
576 "Aacute" => "\u{00C1}".into(),
577 "Acircumflex" => "\u{00C2}".into(),
578 "Atilde" => "\u{00C3}".into(),
579 "Adieresis" => "\u{00C4}".into(),
580 "Aring" => "\u{00C5}".into(),
581 "AE" => "\u{00C6}".into(),
582 "Ccedilla" => "\u{00C7}".into(),
583 "Egrave" => "\u{00C8}".into(),
584 "Eacute" => "\u{00C9}".into(),
585 "Ecircumflex" => "\u{00CA}".into(),
586 "Edieresis" => "\u{00CB}".into(),
587 "Igrave" => "\u{00CC}".into(),
588 "Iacute" => "\u{00CD}".into(),
589 "Icircumflex" => "\u{00CE}".into(),
590 "Idieresis" => "\u{00CF}".into(),
591 "Eth" => "\u{00D0}".into(),
592 "Ntilde" => "\u{00D1}".into(),
593 "Ograve" => "\u{00D2}".into(),
594 "Oacute" => "\u{00D3}".into(),
595 "Ocircumflex" => "\u{00D4}".into(),
596 "Otilde" => "\u{00D5}".into(),
597 "Odieresis" => "\u{00D6}".into(),
598 "Oslash" => "\u{00D8}".into(),
599 "Ugrave" => "\u{00D9}".into(),
600 "Uacute" => "\u{00DA}".into(),
601 "Ucircumflex" => "\u{00DB}".into(),
602 "Udieresis" => "\u{00DC}".into(),
603 "Yacute" => "\u{00DD}".into(),
604 "Thorn" => "\u{00DE}".into(),
605 "germandbls" => "\u{00DF}".into(),
606 "agrave" => "\u{00E0}".into(),
607 "aacute" => "\u{00E1}".into(),
608 "acircumflex" => "\u{00E2}".into(),
609 "atilde" => "\u{00E3}".into(),
610 "adieresis" => "\u{00E4}".into(),
611 "aring" => "\u{00E5}".into(),
612 "ae" => "\u{00E6}".into(),
613 "ccedilla" => "\u{00E7}".into(),
614 "egrave" => "\u{00E8}".into(),
615 "eacute" => "\u{00E9}".into(),
616 "ecircumflex" => "\u{00EA}".into(),
617 "edieresis" => "\u{00EB}".into(),
618 "igrave" => "\u{00EC}".into(),
619 "iacute" => "\u{00ED}".into(),
620 "icircumflex" => "\u{00EE}".into(),
621 "idieresis" => "\u{00EF}".into(),
622 "eth" => "\u{00F0}".into(),
623 "ntilde" => "\u{00F1}".into(),
624 "ograve" => "\u{00F2}".into(),
625 "oacute" => "\u{00F3}".into(),
626 "ocircumflex" => "\u{00F4}".into(),
627 "otilde" => "\u{00F5}".into(),
628 "odieresis" => "\u{00F6}".into(),
629 "oslash" => "\u{00F8}".into(),
630 "ugrave" => "\u{00F9}".into(),
631 "uacute" => "\u{00FA}".into(),
632 "ucircumflex" => "\u{00FB}".into(),
633 "udieresis" => "\u{00FC}".into(),
634 "yacute" => "\u{00FD}".into(),
635 "thorn" => "\u{00FE}".into(),
636 "ydieresis" => "\u{00FF}".into(),
637
638 "Alpha" => "\u{0391}".into(),
640 "Beta" => "\u{0392}".into(),
641 "Gamma" => "\u{0393}".into(),
642 "Delta" => "\u{0394}".into(),
643 "Epsilon" => "\u{0395}".into(),
644 "Zeta" => "\u{0396}".into(),
645 "Eta" => "\u{0397}".into(),
646 "Theta" => "\u{0398}".into(),
647 "Iota" => "\u{0399}".into(),
648 "Kappa" => "\u{039A}".into(),
649 "Lambda" => "\u{039B}".into(),
650 "Mu" => "\u{039C}".into(),
651 "Nu" => "\u{039D}".into(),
652 "Xi" => "\u{039E}".into(),
653 "Omicron" => "\u{039F}".into(),
654 "Pi" => "\u{03A0}".into(),
655 "Rho" => "\u{03A1}".into(),
656 "Sigma" => "\u{03A3}".into(),
657 "Tau" => "\u{03A4}".into(),
658 "Upsilon" => "\u{03A5}".into(),
659 "Phi" => "\u{03A6}".into(),
660 "Chi" => "\u{03A7}".into(),
661 "Psi" => "\u{03A8}".into(),
662 "Omega" => "\u{03A9}".into(),
663
664 "alpha" => "\u{03B1}".into(),
666 "beta" => "\u{03B2}".into(),
667 "gamma" => "\u{03B3}".into(),
668 "delta" => "\u{03B4}".into(),
669 "epsilon" | "varepsilon" => "\u{03B5}".into(),
670 "zeta" => "\u{03B6}".into(),
671 "eta" => "\u{03B7}".into(),
672 "theta" => "\u{03B8}".into(),
673 "iota" => "\u{03B9}".into(),
674 "kappa" => "\u{03BA}".into(),
675 "lambda" => "\u{03BB}".into(),
676 "mu" => "\u{03BC}".into(),
677 "nu" => "\u{03BD}".into(),
678 "xi" => "\u{03BE}".into(),
679 "omicron" => "\u{03BF}".into(),
680 "pi" => "\u{03C0}".into(),
681 "rho" => "\u{03C1}".into(),
682 "sigma" => "\u{03C3}".into(),
683 "varsigma" | "sigmafinal" => "\u{03C2}".into(),
684 "tau" => "\u{03C4}".into(),
685 "upsilon" => "\u{03C5}".into(),
686 "phi" | "varphi" => "\u{03C6}".into(),
687 "chi" => "\u{03C7}".into(),
688 "psi" => "\u{03C8}".into(),
689 "omega" => "\u{03C9}".into(),
690 "vartheta" => "\u{03D1}".into(),
691 "varpi" => "\u{03D6}".into(),
692
693 "multiply" => "\u{00D7}".into(),
695 "divide" => "\u{00F7}".into(),
696 "plusminus" => "\u{00B1}".into(),
697 "minusmath" => "\u{2212}".into(),
698 "notequal" => "\u{2260}".into(),
699 "lessequal" | "leq" => "\u{2264}".into(),
700 "greaterequal" | "geq" => "\u{2265}".into(),
701 "infinity" => "\u{221E}".into(),
702 "summation" => "\u{2211}".into(),
703 "product" => "\u{220F}".into(),
704 "integral" => "\u{222B}".into(),
705 "radical" | "sqrt" => "\u{221A}".into(),
706 "approxequal" | "approx" => "\u{2248}".into(),
707 "nabla" | "gradient" => "\u{2207}".into(),
708 "partial" | "partialdiff" => "\u{2202}".into(),
709 "element" | "in" => "\u{2208}".into(),
710 "notelement" | "notin" => "\u{2209}".into(),
711 "propersubset" | "subset" => "\u{2282}".into(),
712 "propersuperset" | "superset" => "\u{2283}".into(),
713 "reflexsubset" | "subseteq" => "\u{2286}".into(),
714 "reflexsuperset" | "supseteq" => "\u{2287}".into(),
715 "union" | "cup" => "\u{222A}".into(),
716 "intersection" | "cap" => "\u{2229}".into(),
717 "emptyset" => "\u{2205}".into(),
718 "forall" | "universal" => "\u{2200}".into(),
719 "existential" | "exists" => "\u{2203}".into(),
720 "logicaland" | "wedge" => "\u{2227}".into(),
721 "logicalor" | "vee" => "\u{2228}".into(),
722 "logicalnot" | "neg" => "\u{00AC}".into(),
723 "therefore" => "\u{2234}".into(),
724 "because" => "\u{2235}".into(),
725 "equivalence" | "equiv" => "\u{2261}".into(),
726 "proportional" | "propto" => "\u{221D}".into(),
727 "perpendicular" | "perp" => "\u{22A5}".into(),
728 "angle" => "\u{2220}".into(),
729 "arrowleft" | "leftarrow" => "\u{2190}".into(),
730 "arrowup" | "uparrow" => "\u{2191}".into(),
731 "arrowright" | "rightarrow" => "\u{2192}".into(),
732 "arrowdown" | "downarrow" => "\u{2193}".into(),
733 "arrowboth" | "leftrightarrow" => "\u{2194}".into(),
734 "Arrowleft" | "Leftarrow" => "\u{21D0}".into(),
735 "Arrowright" | "Rightarrow" => "\u{21D2}".into(),
736 "Arrowboth" | "Leftrightarrow" => "\u{21D4}".into(),
737 "aleph" => "\u{2135}".into(),
738 "Ifraktur" | "Im" => "\u{2111}".into(),
739 "Rfraktur" | "Re" => "\u{211C}".into(),
740 "weierstrass" | "wp" => "\u{2118}".into(),
741 "circleplus" | "oplus" => "\u{2295}".into(),
742 "circlemultiply" | "otimes" => "\u{2297}".into(),
743 "dotmath" | "cdot" | "periodcentered" | "middot" => "\u{00B7}".into(),
744 "times" => "\u{00D7}".into(),
745 "star" => "\u{22C6}".into(),
746
747 "Lslash" => "\u{0141}".into(),
749 "lslash" => "\u{0142}".into(),
750 "OE" => "\u{0152}".into(),
751 "oe" => "\u{0153}".into(),
752 "Scaron" => "\u{0160}".into(),
753 "scaron" => "\u{0161}".into(),
754 "Zcaron" => "\u{017D}".into(),
755 "zcaron" => "\u{017E}".into(),
756 "Ydieresis" => "\u{0178}".into(),
757 "dotlessi" => "\u{0131}".into(),
758 "circumflex" => "\u{02C6}".into(),
759 "tilde" => "\u{02DC}".into(),
760 "breve" => "\u{02D8}".into(),
761 "dotaccent" => "\u{02D9}".into(),
762 "ring" => "\u{02DA}".into(),
763 "cedilla" => "\u{00B8}".into(),
764 "hungarumlaut" => "\u{02DD}".into(),
765 "ogonek" => "\u{02DB}".into(),
766 "caron" => "\u{02C7}".into(),
767 "macron" => "\u{00AF}".into(),
768 "dieresis" => "\u{00A8}".into(),
769 "acute" => "\u{00B4}".into(),
770
771 "blackdiamond" | "diamond" => "\u{25C6}".into(),
773 "filledbox" | "blacksquare" => "\u{25A0}".into(),
774 "filledcircle" | "blackcircle" => "\u{25CF}".into(),
775 "opendiamond" => "\u{25C7}".into(),
776 "openbox" | "square" => "\u{25A1}".into(),
777 "circle" | "opencircle" => "\u{25CB}".into(),
778 "triagup" => "\u{25B2}".into(),
779 "triagdn" => "\u{25BC}".into(),
780
781 _ if name_str.starts_with("uni") && name_str.len() == 7 => {
783 u32::from_str_radix(&name_str[3..], 16)
784 .ok()
785 .and_then(char::from_u32)
786 .map(|c| c.to_string())
787 .unwrap_or_else(|| name_str.into())
788 }
789 _ if name_str.starts_with('u')
791 && name_str.len() >= 5
792 && name_str[1..].chars().all(|c| c.is_ascii_hexdigit()) =>
793 {
794 u32::from_str_radix(&name_str[1..], 16)
795 .ok()
796 .and_then(char::from_u32)
797 .map(|c| c.to_string())
798 .unwrap_or_else(|| name_str.into())
799 }
800 _ if name_str.len() == 1 => name_str.into(),
802 _ => name_str.into(),
803 }
804}
805
806pub fn decode_text(
808 data: &[u8],
809 encoding: &Encoding,
810 tounicode: Option<&super::cmap::ToUnicodeCMap>,
811) -> String {
812 if let Some(cmap) = tounicode {
813 return cmap.decode(data);
814 }
815 encoding.decode_bytes(data)
816}