use super::adobe_glyph_list::ADOBE_GLYPH_LIST;
use super::cmap::CMap;
use std::collections::HashMap;
#[derive(Clone, Debug)]
pub struct PredefinedCMapConfig {
pub ordering: String,
}
#[derive(Clone)]
pub struct CharacterMapper {
tounicode_cmap: Option<CMap>,
predefined_cmap: Option<PredefinedCMapConfig>,
font_encoding: Option<HashMap<u32, char>>,
}
impl CharacterMapper {
pub fn new() -> Self {
Self {
tounicode_cmap: None,
predefined_cmap: None,
font_encoding: None,
}
}
pub fn set_tounicode_cmap(&mut self, cmap: Option<CMap>) {
self.tounicode_cmap = cmap;
}
pub fn set_predefined_cmap(&mut self, config: Option<PredefinedCMapConfig>) {
self.predefined_cmap = config;
}
pub fn set_font_encoding(&mut self, encoding: Option<HashMap<u32, char>>) {
self.font_encoding = encoding;
}
pub fn map_character(&self, code: u32) -> Option<String> {
if let Some(ref cmap) = self.tounicode_cmap {
if let Some(mapped) = cmap.get(&code) {
return Some(mapped.clone());
}
}
if let Some(glyph_name) = self.code_to_glyph_name(code) {
if let Some(unicode_str) = self.map_glyph_name_internal(&glyph_name) {
return Some(unicode_str);
}
}
if let Some(ref config) = self.predefined_cmap {
if let Some(unicode_str) = self.lookup_predefined_cmap(config, code) {
return Some(unicode_str);
}
}
if let Some(ref encoding) = self.font_encoding {
if let Some(&ch) = encoding.get(&code) {
return Some(ch.to_string());
}
}
Some("\u{FFFD}".to_string())
}
fn lookup_predefined_cmap(&self, config: &PredefinedCMapConfig, code: u32) -> Option<String> {
let cid = code as u16;
let unicode_codepoint = match config.ordering.as_str() {
"GB1" => super::cid_mappings::lookup_adobe_gb1(cid),
"Japan1" => super::cid_mappings::lookup_adobe_japan1(cid),
"CNS1" => super::cid_mappings::lookup_adobe_cns1(cid),
"Korea1" => super::cid_mappings::lookup_adobe_korea1(cid),
"Identity" => {
if code <= 0xFFFF {
Some(code)
} else {
None
}
},
_ => None,
};
unicode_codepoint.and_then(|cp| char::from_u32(cp).map(|ch| ch.to_string()))
}
pub fn map_glyph_name(&self, glyph_name: &str) -> Option<String> {
self.map_glyph_name_internal(glyph_name)
}
fn map_glyph_name_internal(&self, glyph_name: &str) -> Option<String> {
ADOBE_GLYPH_LIST.get(glyph_name).map(|&ch| ch.to_string())
}
fn code_to_glyph_name(&self, code: u32) -> Option<String> {
if code <= 0x7E {
return self.code_to_glyph_name_ascii(code);
}
self.code_to_glyph_name_extended(code)
}
fn code_to_glyph_name_ascii(&self, code: u32) -> Option<String> {
match code {
0x20 => Some("space".to_string()),
0x21 => Some("exclam".to_string()),
0x22 => Some("quotedbl".to_string()),
0x23 => Some("numbersign".to_string()),
0x24 => Some("dollar".to_string()),
0x25 => Some("percent".to_string()),
0x26 => Some("ampersand".to_string()),
0x27 => Some("quoteright".to_string()),
0x28 => Some("parenleft".to_string()),
0x29 => Some("parenright".to_string()),
0x2A => Some("asterisk".to_string()),
0x2B => Some("plus".to_string()),
0x2C => Some("comma".to_string()),
0x2D => Some("hyphen".to_string()),
0x2E => Some("period".to_string()),
0x2F => Some("slash".to_string()),
0x30 => Some("zero".to_string()),
0x31 => Some("one".to_string()),
0x32 => Some("two".to_string()),
0x33 => Some("three".to_string()),
0x34 => Some("four".to_string()),
0x35 => Some("five".to_string()),
0x36 => Some("six".to_string()),
0x37 => Some("seven".to_string()),
0x38 => Some("eight".to_string()),
0x39 => Some("nine".to_string()),
0x3A => Some("colon".to_string()),
0x3B => Some("semicolon".to_string()),
0x3C => Some("less".to_string()),
0x3D => Some("equal".to_string()),
0x3E => Some("greater".to_string()),
0x3F => Some("question".to_string()),
0x40 => Some("at".to_string()),
0x41..=0x5A => {
let ch = (code - 0x41) as u8 + b'A';
Some((ch as char).to_string())
},
0x5B => Some("bracketleft".to_string()),
0x5C => Some("backslash".to_string()),
0x5D => Some("bracketright".to_string()),
0x5E => Some("asciicircum".to_string()),
0x5F => Some("underscore".to_string()),
0x60 => Some("grave".to_string()),
0x61..=0x7A => {
let ch = (code - 0x61) as u8 + b'a';
Some((ch as char).to_string())
},
0x7B => Some("braceleft".to_string()),
0x7C => Some("bar".to_string()),
0x7D => Some("braceright".to_string()),
0x7E => Some("asciitilde".to_string()),
_ => None,
}
}
pub fn code_to_glyph_name_extended(&self, code: u32) -> Option<String> {
match code {
0x80 => Some("Euro".to_string()), 0x81 => None, 0x82 => Some("quotesinglbase".to_string()), 0x83 => Some("florin".to_string()), 0x84 => Some("quotedblbase".to_string()), 0x85 => Some("ellipsis".to_string()), 0x86 => Some("dagger".to_string()), 0x87 => Some("daggerdbl".to_string()), 0x88 => Some("circumflex".to_string()), 0x89 => Some("perthousand".to_string()), 0x8A => Some("Scaron".to_string()), 0x8B => Some("guilsinglleft".to_string()), 0x8C => Some("OEligature".to_string()), 0x8D => None, 0x8E => Some("Zcaron".to_string()), 0x8F => None,
0x90 => None, 0x91 => Some("quoteleft".to_string()), 0x92 => Some("quoteright".to_string()), 0x93 => Some("quotedblleft".to_string()), 0x94 => Some("quotedblright".to_string()), 0x95 => Some("bullet".to_string()), 0x96 => Some("endash".to_string()), 0x97 => Some("emdash".to_string()), 0x98 => Some("tilde".to_string()), 0x99 => Some("trademark".to_string()), 0x9A => Some("scaron".to_string()), 0x9B => Some("guilsinglright".to_string()), 0x9C => Some("oeligature".to_string()), 0x9D => None, 0x9E => Some("zcaron".to_string()), 0x9F => Some("ydieresis".to_string()),
0xA0 => Some("space".to_string()), 0xA1 => Some("exclamdown".to_string()), 0xA2 => Some("cent".to_string()), 0xA3 => Some("sterling".to_string()), 0xA4 => Some("currency".to_string()), 0xA5 => Some("yen".to_string()), 0xA6 => Some("brokenbar".to_string()), 0xA7 => Some("section".to_string()), 0xA8 => Some("dieresis".to_string()), 0xA9 => Some("copyright".to_string()), 0xAA => Some("ordfeminine".to_string()), 0xAB => Some("guillemotleft".to_string()), 0xAC => Some("logicalnot".to_string()), 0xAD => Some("hyphen".to_string()), 0xAE => Some("registered".to_string()), 0xAF => Some("macron".to_string()), 0xB0 => Some("degree".to_string()), 0xB1 => Some("plusminus".to_string()), 0xB2 => Some("twosuperior".to_string()), 0xB3 => Some("threesuperior".to_string()), 0xB4 => Some("acute".to_string()), 0xB5 => Some("mu".to_string()), 0xB6 => Some("paragraph".to_string()), 0xB7 => Some("periodcentered".to_string()), 0xB8 => Some("cedilla".to_string()), 0xB9 => Some("onesuperior".to_string()), 0xBA => Some("ordmasculine".to_string()), 0xBB => Some("guillemotright".to_string()), 0xBC => Some("onequarter".to_string()), 0xBD => Some("onehalf".to_string()), 0xBE => Some("threequarters".to_string()), 0xBF => Some("questiondown".to_string()),
0xC0 => Some("Agrave".to_string()), 0xC1 => Some("Aacute".to_string()), 0xC2 => Some("Acircumflex".to_string()), 0xC3 => Some("Atilde".to_string()), 0xC4 => Some("Adieresis".to_string()), 0xC5 => Some("Aring".to_string()), 0xC6 => Some("AEligature".to_string()), 0xC7 => Some("Ccedilla".to_string()), 0xC8 => Some("Egrave".to_string()), 0xC9 => Some("Eacute".to_string()), 0xCA => Some("Ecircumflex".to_string()), 0xCB => Some("Edieresis".to_string()), 0xCC => Some("Igrave".to_string()), 0xCD => Some("Iacute".to_string()), 0xCE => Some("Icircumflex".to_string()), 0xCF => Some("Idieresis".to_string()), 0xD0 => Some("Eth".to_string()), 0xD1 => Some("Ntilde".to_string()), 0xD2 => Some("Ograve".to_string()), 0xD3 => Some("Oacute".to_string()), 0xD4 => Some("Ocircumflex".to_string()), 0xD5 => Some("Otilde".to_string()), 0xD6 => Some("Odieresis".to_string()), 0xD7 => Some("multiply".to_string()), 0xD8 => Some("Oslash".to_string()), 0xD9 => Some("Ugrave".to_string()), 0xDA => Some("Uacute".to_string()), 0xDB => Some("Ucircumflex".to_string()), 0xDC => Some("Udieresis".to_string()), 0xDD => Some("Yacute".to_string()), 0xDE => Some("Thorn".to_string()), 0xDF => Some("germandbls".to_string()), 0xE0 => Some("agrave".to_string()), 0xE1 => Some("aacute".to_string()), 0xE2 => Some("acircumflex".to_string()), 0xE3 => Some("atilde".to_string()), 0xE4 => Some("adieresis".to_string()), 0xE5 => Some("aring".to_string()), 0xE6 => Some("aeligature".to_string()), 0xE7 => Some("ccedilla".to_string()), 0xE8 => Some("egrave".to_string()), 0xE9 => Some("eacute".to_string()), 0xEA => Some("ecircumflex".to_string()), 0xEB => Some("edieresis".to_string()), 0xEC => Some("igrave".to_string()), 0xED => Some("iacute".to_string()), 0xEE => Some("icircumflex".to_string()), 0xEF => Some("idieresis".to_string()), 0xF0 => Some("eth".to_string()), 0xF1 => Some("ntilde".to_string()), 0xF2 => Some("ograve".to_string()), 0xF3 => Some("oacute".to_string()), 0xF4 => Some("ocircumflex".to_string()), 0xF5 => Some("otilde".to_string()), 0xF6 => Some("odieresis".to_string()), 0xF7 => Some("divide".to_string()), 0xF8 => Some("oslash".to_string()), 0xF9 => Some("ugrave".to_string()), 0xFA => Some("uacute".to_string()), 0xFB => Some("ucircumflex".to_string()), 0xFC => Some("udieresis".to_string()), 0xFD => Some("yacute".to_string()), 0xFE => Some("thorn".to_string()), 0xFF => Some("ydieresis".to_string()),
_ => None,
}
}
}
impl Default for CharacterMapper {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod internal_tests {
use super::*;
#[test]
fn test_ascii_glyph_names() {
let mapper = CharacterMapper::new();
assert_eq!(mapper.code_to_glyph_name(0x20), Some("space".to_string()));
assert_eq!(mapper.code_to_glyph_name(0x41), Some("A".to_string()));
assert_eq!(mapper.code_to_glyph_name(0x61), Some("a".to_string()));
}
#[test]
fn test_glyph_name_lookup() {
let mapper = CharacterMapper::new();
assert!(mapper.map_glyph_name("A").is_some());
assert!(mapper.map_glyph_name("space").is_some());
}
#[test]
fn test_predefined_cmap_japan1_ascii() {
let mut mapper = CharacterMapper::new();
mapper.set_predefined_cmap(Some(PredefinedCMapConfig {
ordering: "Japan1".to_string(),
}));
let result = mapper.map_character(34);
assert!(result.is_some());
}
#[test]
fn test_predefined_cmap_japan1_hiragana() {
let mut mapper = CharacterMapper::new();
mapper.set_tounicode_cmap(None);
mapper.set_predefined_cmap(Some(PredefinedCMapConfig {
ordering: "Japan1".to_string(),
}));
let result = mapper.map_character(843);
assert_eq!(result, Some("\u{3042}".to_string())); }
#[test]
fn test_predefined_cmap_gb1_chinese() {
let mut mapper = CharacterMapper::new();
mapper.set_predefined_cmap(Some(PredefinedCMapConfig {
ordering: "GB1".to_string(),
}));
let result = mapper.map_character(4559);
assert_eq!(result, Some("\u{4E2D}".to_string())); }
#[test]
fn test_predefined_cmap_korea1_hangul() {
let mut mapper = CharacterMapper::new();
mapper.set_predefined_cmap(Some(PredefinedCMapConfig {
ordering: "Korea1".to_string(),
}));
let result = mapper.map_character(1086);
assert_eq!(result, Some("\u{AC00}".to_string())); }
#[test]
fn test_predefined_cmap_cns1() {
let mut mapper = CharacterMapper::new();
mapper.set_predefined_cmap(Some(PredefinedCMapConfig {
ordering: "CNS1".to_string(),
}));
let result = mapper.map_character(34);
assert!(result.is_some());
}
#[test]
fn test_predefined_cmap_identity() {
let mut mapper = CharacterMapper::new();
mapper.set_predefined_cmap(Some(PredefinedCMapConfig {
ordering: "Identity".to_string(),
}));
let result = mapper.map_character(0x4E2D);
assert_eq!(result, Some("\u{4E2D}".to_string())); }
#[test]
fn test_predefined_cmap_unknown_ordering() {
let mut mapper = CharacterMapper::new();
mapper.set_predefined_cmap(Some(PredefinedCMapConfig {
ordering: "UnknownCollection".to_string(),
}));
let result = mapper.map_character(0x4E2D);
assert_eq!(result, Some("\u{FFFD}".to_string()));
}
#[test]
fn test_predefined_cmap_not_set() {
let mapper = CharacterMapper::new();
assert_eq!(mapper.map_character(0x41), Some("A".to_string()));
}
#[test]
fn test_tounicode_overrides_predefined_cmap() {
use super::super::cmap::parse_tounicode_cmap;
let mut mapper = CharacterMapper::new();
mapper.set_predefined_cmap(Some(PredefinedCMapConfig {
ordering: "Japan1".to_string(),
}));
let cmap_data = b"/CIDInit /ProcSet findresource begin\n\
12 dict begin\n\
begincmap\n\
/CIDSystemInfo << /Registry (Adobe) /Ordering (UCS) /Supplement 0 >> def\n\
/CMapName /Adobe-Identity-UCS def\n\
1 beginbfchar\n\
<034B> <0058>\n\
endbfchar\n\
endcmap\n\
CMapName currentdict /CMap defineresource pop\n\
end\n\
end";
if let Ok(cmap) = parse_tounicode_cmap(cmap_data) {
mapper.set_tounicode_cmap(Some(cmap));
}
let result = mapper.map_character(843); assert_eq!(result, Some("X".to_string()));
}
#[test]
fn test_predefined_cmap_config_clone() {
let config = PredefinedCMapConfig {
ordering: "Japan1".to_string(),
};
let cloned = config.clone();
assert_eq!(cloned.ordering, "Japan1");
}
}