use pdf_oxide::fonts::cmap::LazyCMap;
use pdf_oxide::fonts::FontInfo;
use std::collections::HashMap;
#[test]
fn test_lazy_cmap_not_parsed_on_creation() {
let simple_cmap = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /Identity-H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
3 beginbfchar
<0041> <0041>
<0042> <0042>
<0043> <0043>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#;
let font = FontInfo {
base_font: "TestFont".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Identity,
to_unicode: Some(LazyCMap::new(simple_cmap.as_bytes().to_vec())),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: None,
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
let result = font.char_to_unicode(0x0041);
assert_eq!(result, Some("A".to_string()), "First lookup should trigger lazy parse");
let result2 = font.char_to_unicode(0x0041);
assert_eq!(result2, Some("A".to_string()), "Cached result should be available");
let result3 = font.char_to_unicode(0x0042);
assert_eq!(result3, Some("B".to_string()), "Other characters should also work");
}
#[test]
fn test_lazy_cmap_thread_safe_parsing() {
let cmap_data = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /Identity-H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
2 beginbfchar
<0061> <0061>
<0062> <0062>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#;
let font = FontInfo {
base_font: "TestFont".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Identity,
to_unicode: Some(LazyCMap::new(cmap_data.as_bytes().to_vec())),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: None,
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(font.char_to_unicode(0x0061), Some("a".to_string()));
assert_eq!(font.char_to_unicode(0x0061), Some("a".to_string()));
assert_eq!(font.char_to_unicode(0x0062), Some("b".to_string()));
}
#[test]
fn test_lazy_cmap_large_map_deferred_parsing() {
let mut large_cmap_entries = String::from(
r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /Identity-H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
"#,
);
large_cmap_entries.push_str("100 beginbfchar\n");
for i in 0..100 {
large_cmap_entries.push_str(&format!("<{:04X}> <{:04X}>\n", 0x0100 + i, 0x0100 + i));
}
large_cmap_entries.push_str("endbfchar\n");
large_cmap_entries.push_str(
r#"
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#,
);
let font = FontInfo {
base_font: "LargeMapFont".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Identity,
to_unicode: Some(LazyCMap::new(large_cmap_entries.as_bytes().to_vec())),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: None,
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(
font.char_to_unicode(0x0100),
Some("\u{0100}".to_string()),
"First entry accessible"
);
assert_eq!(
font.char_to_unicode(0x0150),
Some("\u{0150}".to_string()),
"Middle entry accessible"
);
assert_eq!(
font.char_to_unicode(0x0163),
Some("\u{0163}".to_string()),
"Last entry accessible"
);
}
#[test]
fn test_lazy_cmap_cache_hit_performance() {
let cmap_data = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /Identity-H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
10 beginbfchar
<0030> <0030>
<0031> <0031>
<0032> <0032>
<0033> <0033>
<0034> <0034>
<0035> <0035>
<0036> <0036>
<0037> <0037>
<0038> <0038>
<0039> <0039>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#;
let font = FontInfo {
base_font: "DigitFont".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Identity,
to_unicode: Some(LazyCMap::new(cmap_data.as_bytes().to_vec())),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: None,
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
for code in 0x30..=0x39 {
let result = font.char_to_unicode(code);
assert!(result.is_some(), "Digit 0x{:02X} should be mapped", code);
}
for code in 0x30..=0x39 {
let result = font.char_to_unicode(code);
assert!(result.is_some(), "Cached access should work");
}
}
#[test]
fn test_lazy_cmap_with_notdefrange_lazy_parsing() {
let cmap_with_notdef = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /Identity-H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
2 beginbfchar
<0041> <0041>
<0042> <0042>
endbfchar
1 beginnotdefrange
<0000> <0040> <FFFD>
endnotdefrange
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#;
let font = FontInfo {
base_font: "TestFont".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Identity,
to_unicode: Some(LazyCMap::new(cmap_with_notdef.as_bytes().to_vec())),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: None,
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(
font.char_to_unicode(0x0041),
Some("A".to_string()),
"Explicit mapping should work"
);
assert_eq!(
font.char_to_unicode(0x0001),
Some("\u{FFFD}".to_string()),
"Notdefrange should work with lazy loading"
);
}