use pdf_oxide::fonts::{CIDSystemInfo, Encoding, FontInfo};
use std::collections::HashMap;
#[test]
fn test_identity_h_cmap_simple_cid_to_unicode() {
let font = FontInfo {
base_font: "ChineseFont+Identity-H".to_string(),
subtype: "Type0".to_string(),
encoding: Encoding::Standard("Identity-H".to_string()),
to_unicode: None, truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
embedded_font_data: None,
cid_to_gid_map: None,
cid_system_info: Some(CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "Identity".to_string(),
supplement: 0,
}),
cid_font_type: Some("2".to_string()), cid_widths: None,
cid_default_width: 1000.0,
font_weight: None,
flags: None,
stem_v: None,
widths: None,
first_char: None,
last_char: None,
default_width: 1000.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
let result = font.char_to_unicode(0x4E00);
assert!(result.is_some(), "Identity-H should map CID 0x4E00");
let mapped = result.unwrap();
assert_eq!(mapped, "一", "CID 0x4E00 should map to Chinese character '一' (U+4E00)");
}
#[test]
fn test_unigb_ucs2_h_cmap_simplified_chinese() {
let font = FontInfo {
base_font: "STSong+UniGB-UCS2-H".to_string(),
subtype: "Type0".to_string(),
encoding: Encoding::Standard("UniGB-UCS2-H".to_string()),
to_unicode: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
embedded_font_data: None,
cid_to_gid_map: None,
cid_system_info: Some(CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "GB1".to_string(), supplement: 2,
}),
cid_font_type: Some("2".to_string()),
cid_widths: None,
cid_default_width: 1000.0,
font_weight: None,
flags: None,
stem_v: None,
widths: None,
first_char: None,
last_char: None,
default_width: 1000.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
let result = font.char_to_unicode(0x2EE5);
assert!(result.is_some(), "UniGB-UCS2-H should map char code 0x2EE5");
let mapped = result.unwrap();
assert_eq!(
mapped, "\u{2EE5}",
"char code 0x2EE5 should map to U+2EE5 (⻥) via CID-as-Unicode"
);
}
#[test]
fn test_unijis_ucs2_h_cmap_japanese() {
let font = FontInfo {
base_font: "HeiseiMin-W3+UniJIS-UCS2-H".to_string(),
subtype: "Type0".to_string(),
encoding: Encoding::Standard("UniJIS-UCS2-H".to_string()),
to_unicode: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
embedded_font_data: None,
cid_to_gid_map: None,
cid_system_info: Some(CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "Japan1".to_string(), supplement: 4,
}),
cid_font_type: Some("2".to_string()),
cid_widths: None,
cid_default_width: 1000.0,
font_weight: None,
flags: None,
stem_v: None,
widths: None,
first_char: None,
last_char: None,
default_width: 1000.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
let result = font.char_to_unicode(0x3042);
assert!(result.is_some(), "UniJIS-UCS2-H should map CID 0x3042");
let mapped = result.unwrap();
assert_eq!(mapped, "あ", "CID 0x3042 should map to Japanese character 'あ' (U+3042)");
}
#[test]
fn test_unicns_ucs2_h_cmap_traditional_chinese() {
let font = FontInfo {
base_font: "MingLiU+UniCNS-UCS2-H".to_string(),
subtype: "Type0".to_string(),
encoding: Encoding::Standard("UniCNS-UCS2-H".to_string()),
to_unicode: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
embedded_font_data: None,
cid_to_gid_map: None,
cid_system_info: Some(CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "CNS1".to_string(), supplement: 3,
}),
cid_font_type: Some("2".to_string()),
cid_widths: None,
cid_default_width: 1000.0,
font_weight: None,
flags: None,
stem_v: None,
widths: None,
first_char: None,
last_char: None,
default_width: 1000.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
let result = font.char_to_unicode(0x4E00);
assert!(result.is_some(), "UniCNS-UCS2-H should map CID 0x4E00");
let mapped = result.unwrap();
assert_eq!(mapped, "一", "CID 0x4E00 should map to Chinese character '一' (U+4E00)");
}
#[test]
fn test_uniks_ucs2_h_cmap_korean() {
let font = FontInfo {
base_font: "HYGoThic+UniKS-UCS2-H".to_string(),
subtype: "Type0".to_string(),
encoding: Encoding::Standard("UniKS-UCS2-H".to_string()),
to_unicode: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
embedded_font_data: None,
cid_to_gid_map: None,
cid_system_info: Some(CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "Korea1".to_string(), supplement: 1,
}),
cid_font_type: Some("2".to_string()),
cid_widths: None,
cid_default_width: 1000.0,
font_weight: None,
flags: None,
stem_v: None,
widths: None,
first_char: None,
last_char: None,
default_width: 1000.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
let result = font.char_to_unicode(0xAC00);
assert!(result.is_some(), "UniKS-UCS2-H should map CID 0xAC00");
let mapped = result.unwrap();
assert_eq!(mapped, "가", "CID 0xAC00 should map to Korean character '가' (U+AC00)");
}