use pdf_oxide::fonts::cmap::LazyCMap;
use pdf_oxide::fonts::FontInfo;
use std::collections::HashMap;
#[test]
fn test_cmap_cache_hit_same_font_multiple_references() {
let cmap_bytes = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /Identity-H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
3 beginbfchar
<0041> <0041>
<0042> <0042>
<0043> <0043>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font1 = FontInfo {
base_font: "Font1".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Identity,
to_unicode: Some(LazyCMap::new(cmap_bytes.clone())),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: None,
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
let font2 = FontInfo {
base_font: "Font2".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Identity,
to_unicode: Some(LazyCMap::new(cmap_bytes.clone())),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: None,
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(font1.char_to_unicode(0x0041), Some("A".to_string()));
assert_eq!(font2.char_to_unicode(0x0041), Some("A".to_string()));
assert_eq!(font1.char_to_unicode(0x0042), Some("B".to_string()));
assert_eq!(font2.char_to_unicode(0x0043), Some("C".to_string()));
}
#[test]
fn test_cmap_cache_different_cmaps_separate_entries() {
let cmap_a = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /TestA def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
1 beginbfchar
<0001> <0061>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let cmap_b = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /TestB def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
1 beginbfchar
<0001> <0062>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font_a = FontInfo {
base_font: "FontA".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Identity,
to_unicode: Some(LazyCMap::new(cmap_a)),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: None,
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
let font_b = FontInfo {
base_font: "FontB".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Identity,
to_unicode: Some(LazyCMap::new(cmap_b)),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: None,
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(font_a.char_to_unicode(0x0001), Some("a".to_string()));
assert_eq!(font_b.char_to_unicode(0x0001), Some("b".to_string()));
assert_eq!(font_a.char_to_unicode(0x0001), Some("a".to_string()));
assert_eq!(font_b.char_to_unicode(0x0001), Some("b".to_string()));
}
#[test]
fn test_cmap_cache_multi_page_document_performance() {
let cmap_bytes = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /Identity-H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
10 beginbfchar
<0030> <0030>
<0031> <0031>
<0032> <0032>
<0033> <0033>
<0034> <0034>
<0035> <0035>
<0036> <0036>
<0037> <0037>
<0038> <0038>
<0039> <0039>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let pages: Vec<FontInfo> = (1..=5)
.map(|page_num| FontInfo {
base_font: format!("PageFont{}", page_num),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Identity,
to_unicode: Some(LazyCMap::new(cmap_bytes.clone())),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: None,
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
})
.collect();
for (page_idx, font) in pages.iter().enumerate() {
for code in 0x30..=0x39 {
let result = font.char_to_unicode(code);
assert!(result.is_some(), "Page {} code 0x{:02X} should map", page_idx + 1, code);
}
}
}
#[test]
fn test_cmap_cache_lru_eviction_policy() {
let cmap_template = |id: u8| {
format!(
r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /CMap{} def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
1 beginbfchar
<0001> <{:04X}>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#,
id,
0x4100 + id as u32
)
.as_bytes()
.to_vec()
};
let fonts: Vec<FontInfo> = (0..3)
.map(|id| FontInfo {
base_font: format!("Font{}", id),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Identity,
to_unicode: Some(LazyCMap::new(cmap_template(id as u8))),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: None,
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
})
.collect();
for (idx, font) in fonts.iter().enumerate() {
let result = font.char_to_unicode(0x0001);
assert!(result.is_some(), "Font {} should map", idx);
}
}
#[test]
fn test_cmap_cache_statistics_and_diagnostics() {
let cmap_bytes = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /Identity-H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
1 beginbfchar
<0041> <0041>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font = FontInfo {
base_font: "TestFont".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Identity,
to_unicode: Some(LazyCMap::new(cmap_bytes)),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: None,
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(font.char_to_unicode(0x0041), Some("A".to_string()));
}
#[test]
fn test_cmap_cache_concurrent_access() {
use std::sync::Arc;
use std::thread;
let cmap_bytes = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /Identity-H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
2 beginbfchar
<0041> <0041>
<0042> <0042>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font = Arc::new(FontInfo {
base_font: "ThreadTestFont".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Identity,
to_unicode: Some(LazyCMap::new(cmap_bytes)),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: None,
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
});
let mut handles = vec![];
for _ in 0..4 {
let font_clone = Arc::clone(&font);
let handle = thread::spawn(move || {
assert_eq!(font_clone.char_to_unicode(0x0041), Some("A".to_string()));
assert_eq!(font_clone.char_to_unicode(0x0042), Some("B".to_string()));
});
handles.push(handle);
}
for handle in handles {
handle.join().expect("Thread should complete successfully");
}
}