use pdf_oxide::fonts::cmap::LazyCMap;
use pdf_oxide::fonts::FontInfo;
use std::collections::HashMap;
#[test]
fn test_predefined_cmap_adobe_gb1_loading() {
let predefined_gb1_sample = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (GB1)
/Supplement 0
>> def
/CMapName /H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
100 beginbfchar
<0020> <4E00>
<0021> <4E8C>
<0022> <4E09>
<0023> <56DB>
<0024> <4E94>
<0025> <516D>
<0026> <4E03>
<0027> <4E09>
<0028> <4E5D>
<0029> <5341>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font = FontInfo {
base_font: "SimHei".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Standard("GB1-H".to_string()),
to_unicode: Some(LazyCMap::new(predefined_gb1_sample)),
font_weight: Some(400),
flags: None,
stem_v: Some(80.0),
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: Some(pdf_oxide::fonts::CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "GB1".to_string(),
supplement: 0,
}),
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(font.char_to_unicode(0x0020), Some("一".to_string()));
assert_eq!(font.char_to_unicode(0x0021), Some("二".to_string()));
assert_eq!(font.char_to_unicode(0x0022), Some("三".to_string()));
}
#[test]
fn test_predefined_cmap_adobe_cns1_loading() {
let predefined_cns1_sample = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (CNS1)
/Supplement 0
>> def
/CMapName /H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
50 beginbfchar
<0001> <4E00>
<0002> <4E8C>
<0003> <4E09>
<0004> <56DB>
<0005> <4E94>
<0010> <6237>
<0020> <7B2C>
<0030> <4E00>
<0040> <4E8C>
<0050> <4E09>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font = FontInfo {
base_font: "MingLiU".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Standard("CNS1-H".to_string()),
to_unicode: Some(LazyCMap::new(predefined_cns1_sample)),
font_weight: Some(400),
flags: None,
stem_v: Some(85.0),
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: Some(pdf_oxide::fonts::CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "CNS1".to_string(),
supplement: 0,
}),
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(font.char_to_unicode(0x0001), Some("一".to_string()));
assert_eq!(font.char_to_unicode(0x0002), Some("二".to_string()));
assert_eq!(font.char_to_unicode(0x0003), Some("三".to_string()));
}
#[test]
fn test_predefined_cmap_adobe_japan1_loading() {
let predefined_japan1_sample = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Japan1)
/Supplement 0
>> def
/CMapName /H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
30 beginbfchar
<0001> <3041>
<0002> <3043>
<0003> <3045>
<0004> <3047>
<0005> <3049>
<0086> <30A1>
<0087> <30A3>
<0088> <30A5>
<0089> <30A7>
<008A> <30A9>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font = FontInfo {
base_font: "Hiragino".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Standard("Japan1-H".to_string()),
to_unicode: Some(LazyCMap::new(predefined_japan1_sample)),
font_weight: Some(400),
flags: None,
stem_v: Some(82.0),
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: Some(pdf_oxide::fonts::CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "Japan1".to_string(),
supplement: 0,
}),
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(font.char_to_unicode(0x0001), Some("ぁ".to_string())); assert_eq!(font.char_to_unicode(0x0002), Some("ぃ".to_string()));
assert_eq!(font.char_to_unicode(0x0086), Some("ァ".to_string())); assert_eq!(font.char_to_unicode(0x0087), Some("ィ".to_string())); }
#[test]
fn test_predefined_cmap_adobe_korea1_loading() {
let predefined_korea1_sample = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Korea1)
/Supplement 0
>> def
/CMapName /H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
20 beginbfchar
<0001> <AC00>
<0002> <AC01>
<0003> <AC02>
<0004> <AC03>
<0005> <AC04>
<0020> <C911>
<0030> <B098>
<0040> <B2E4>
<0050> <B77C>
<0060> <B9C8>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font = FontInfo {
base_font: "NotoSansKR".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Standard("Korea1-H".to_string()),
to_unicode: Some(LazyCMap::new(predefined_korea1_sample)),
font_weight: Some(400),
flags: None,
stem_v: Some(85.0),
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: Some(pdf_oxide::fonts::CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "Korea1".to_string(),
supplement: 0,
}),
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(font.char_to_unicode(0x0001), Some("가".to_string())); assert_eq!(font.char_to_unicode(0x0005), Some("간".to_string())); }
#[test]
fn test_predefined_cmap_caching_same_identity() {
let cmap_bytes = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (GB1)
/Supplement 0
>> def
/CMapName /H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
10 beginbfchar
<0020> <4E00>
<0021> <4E8C>
<0022> <4E09>
<0023> <56DB>
<0024> <4E94>
<0025> <516D>
<0026> <4E03>
<0027> <4E5D>
<0028> <5341>
<0029> <5343>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font1 = FontInfo {
base_font: "SimHei-Page1".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Standard("GB1-H".to_string()),
to_unicode: Some(LazyCMap::new(cmap_bytes.clone())),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: Some(pdf_oxide::fonts::CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "GB1".to_string(),
supplement: 0,
}),
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
let font2 = FontInfo {
base_font: "SimHei-Page2".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Standard("GB1-H".to_string()),
to_unicode: Some(LazyCMap::new(cmap_bytes.clone())),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: Some(pdf_oxide::fonts::CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "GB1".to_string(),
supplement: 0,
}),
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(font1.char_to_unicode(0x0020), font2.char_to_unicode(0x0020));
assert_eq!(font1.char_to_unicode(0x0021), font2.char_to_unicode(0x0021));
}
#[test]
fn test_predefined_cmap_vertical_writing_support() {
let vertical_cmap = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (GB1)
/Supplement 0
>> def
/CMapName /V def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
10 beginbfchar
<0020> <4E00>
<0021> <4E8C>
<0022> <4E09>
<0023> <FF0C>
<0024> <3001>
<0025> <3002>
<0026> <4E03>
<0027> <FF1A>
<0028> <FF1F>
<0029> <3006>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font = FontInfo {
base_font: "SimHei-V".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Standard("GB1-V".to_string()), to_unicode: Some(LazyCMap::new(vertical_cmap)),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: Some(pdf_oxide::fonts::CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "GB1".to_string(),
supplement: 0,
}),
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(font.char_to_unicode(0x0023), Some(",".to_string())); assert_eq!(font.char_to_unicode(0x0024), Some("、".to_string())); }
#[test]
fn test_predefined_cmap_large_supplement_versions() {
let supplement_2_cmap = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (GB1)
/Supplement 2
>> def
/CMapName /H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
5 beginbfchar
<1F8E> <2F808>
<1F8F> <2F809>
<2000> <4E00>
<2001> <4E8C>
<2002> <4E09>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font = FontInfo {
base_font: "SimHei-Sup2".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Standard("GB1-H".to_string()),
to_unicode: Some(LazyCMap::new(supplement_2_cmap)),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: Some(pdf_oxide::fonts::CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "GB1".to_string(),
supplement: 2,
}),
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(font.char_to_unicode(0x2000), Some("一".to_string()));
assert_eq!(font.char_to_unicode(0x2001), Some("二".to_string()));
}
#[test]
fn test_predefined_cmap_fallback_when_embedding_unavailable() {
let predefined_cmap = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (GB1)
/Supplement 0
>> def
/CMapName /H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
10 beginbfchar
<0001> <4E00>
<0002> <4E8C>
<0003> <4E09>
<0004> <56DB>
<0005> <4E94>
<0020> <FF08>
<0021> <FF09>
<0022> <A1A1>
<0023> <A1A2>
<0024> <A1A3>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font = FontInfo {
base_font: "SimHei".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Standard("GB1-H".to_string()),
to_unicode: Some(LazyCMap::new(predefined_cmap)), font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: Some(pdf_oxide::fonts::CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "GB1".to_string(),
supplement: 0,
}),
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(font.char_to_unicode(0x0001), Some("一".to_string()));
assert_eq!(font.char_to_unicode(0x0002), Some("二".to_string()));
assert_eq!(font.char_to_unicode(0x0003), Some("三".to_string()));
}