use pdf_oxide::fonts::cmap::LazyCMap;
use pdf_oxide::fonts::FontInfo;
use std::collections::HashMap;
#[test]
fn test_cmap_with_comments_and_metadata() {
let cmap_with_comments = r#"
% CMap for test font
% Version 1.0
% Created 2025-12-10
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe) % Adobe registry
/Ordering (Identity) % Identity ordering
/Supplement 0 % Supplement 0
>> def
/CMapName /H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF> % 2-byte codes
endcodespacerange
% Character mappings
2 beginbfchar
<0001> <0041> % A
<0002> <0042> % B
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font = FontInfo {
base_font: "CommentedFont".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Standard("Identity-H".to_string()),
to_unicode: Some(LazyCMap::new(cmap_with_comments)),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: Some(pdf_oxide::fonts::CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "Identity".to_string(),
supplement: 0,
}),
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(font.char_to_unicode(0x0001), Some("A".to_string()));
assert_eq!(font.char_to_unicode(0x0002), Some("B".to_string()));
}
#[test]
fn test_cmap_escape_sequences_in_codes() {
let cmap_with_escapes = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
5 beginbfchar
<0041> <000A>
<0042> <000D>
<0043> <0009>
<0044> <003F>
<0045> <005C>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font = FontInfo {
base_font: "EscapeFont".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Standard("Identity-H".to_string()),
to_unicode: Some(LazyCMap::new(cmap_with_escapes)),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: Some(pdf_oxide::fonts::CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "Identity".to_string(),
supplement: 0,
}),
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert!(font.char_to_unicode(0x0041).is_some());
assert!(font.char_to_unicode(0x0042).is_some());
}
#[test]
fn test_cmap_edge_case_bfchar_boundaries() {
let large_bfchar = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
3 beginbfchar
<0000> <0000>
<7FFF> <0041>
<FFFF> <0042>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font = FontInfo {
base_font: "BoundaryFont".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Standard("Identity-H".to_string()),
to_unicode: Some(LazyCMap::new(large_bfchar)),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: Some(pdf_oxide::fonts::CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "Identity".to_string(),
supplement: 0,
}),
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(font.char_to_unicode(0x0000), Some("\u{FFFD}".to_string())); assert_eq!(font.char_to_unicode(0x7FFF), Some("A".to_string()));
assert_eq!(font.char_to_unicode(0xFFFF), Some("B".to_string()));
}
#[test]
fn test_cmap_surrogate_pair_handling() {
let surrogate_cmap = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
3 beginbfchar
<0001> <0041>
<0002> <D800>
<0003> <DFFF>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font = FontInfo {
base_font: "SurrogateFont".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Standard("Identity-H".to_string()),
to_unicode: Some(LazyCMap::new(surrogate_cmap)),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: Some(pdf_oxide::fonts::CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "Identity".to_string(),
supplement: 0,
}),
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(font.char_to_unicode(0x0001), Some("A".to_string()));
let _ = font.char_to_unicode(0x0002);
let _ = font.char_to_unicode(0x0003);
}
#[test]
fn test_cmap_large_sparse_mapping() {
let sparse_cmap = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
4 beginbfchar
<0001> <0041>
<0100> <0042>
<1000> <0043>
<FFFF> <0044>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font = FontInfo {
base_font: "SparseFont".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Standard("Identity-H".to_string()),
to_unicode: Some(LazyCMap::new(sparse_cmap)),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: Some(pdf_oxide::fonts::CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "Identity".to_string(),
supplement: 0,
}),
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(font.char_to_unicode(0x0001), Some("A".to_string()));
assert_eq!(font.char_to_unicode(0x0100), Some("B".to_string()));
assert_eq!(font.char_to_unicode(0x1000), Some("C".to_string()));
assert_eq!(font.char_to_unicode(0xFFFF), Some("D".to_string()));
let result = font.char_to_unicode(0x0050);
let _ = result;
}
#[test]
fn test_cmap_overlapping_bfrange_priority() {
let overlapping_ranges = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
2 beginbfrange
<0001> <0100> <0041>
<0050> <0060> <1000>
endbfrange
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font = FontInfo {
base_font: "OverlapFont".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Standard("Identity-H".to_string()),
to_unicode: Some(LazyCMap::new(overlapping_ranges)),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: Some(pdf_oxide::fonts::CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "Identity".to_string(),
supplement: 0,
}),
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
let result_1 = font.char_to_unicode(0x0001);
assert!(result_1.is_some());
let result_overlap = font.char_to_unicode(0x0050);
assert!(result_overlap.is_some());
}
#[test]
fn test_cmap_mixed_bfchar_and_bfrange() {
let mixed_cmap = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
1 beginbfchar
<0001> <2000>
endbfchar
1 beginbfrange
<0010> <0020> <0041>
endbfrange
1 beginbfchar
<0100> <3000>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font = FontInfo {
base_font: "MixedFont".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Standard("Identity-H".to_string()),
to_unicode: Some(LazyCMap::new(mixed_cmap)),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: Some(pdf_oxide::fonts::CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "Identity".to_string(),
supplement: 0,
}),
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(font.char_to_unicode(0x0001), Some("\u{2000}".to_string()));
assert_eq!(font.char_to_unicode(0x0100), Some("\u{3000}".to_string()));
assert_eq!(font.char_to_unicode(0x0010), Some("A".to_string())); assert_eq!(font.char_to_unicode(0x0020), Some("Q".to_string())); }
#[test]
fn test_cmap_performance_large_sequential_mapping() {
let large_sequential = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /H def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
1 beginbfrange
<0000> <2710> <0000>
endbfrange
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font = FontInfo {
base_font: "LargeSeqFont".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Standard("Identity-H".to_string()),
to_unicode: Some(LazyCMap::new(large_sequential)),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: Some(pdf_oxide::fonts::CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "Identity".to_string(),
supplement: 0,
}),
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert!(font.char_to_unicode(0x0000).is_some()); assert!(font.char_to_unicode(0x1388).is_some()); assert!(font.char_to_unicode(0x2710).is_some()); }
#[test]
fn test_cmap_notdefrange_with_gaps() {
let notdef_with_gaps = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /H def
/CMapType 2 def
1 begincodespacerange
<0000> <0100>
endcodespacerange
1 beginbfchar
<0001> <0041>
endbfchar
1 beginnotdefrange
<0000> <0100> <FFFD>
endnotdefrange
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font = FontInfo {
base_font: "NotdefGapsFont".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Standard("Identity-H".to_string()),
to_unicode: Some(LazyCMap::new(notdef_with_gaps)),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: Some(pdf_oxide::fonts::CIDSystemInfo {
registry: "Adobe".to_string(),
ordering: "Identity".to_string(),
supplement: 0,
}),
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(font.char_to_unicode(0x0001), Some("A".to_string()));
let result = font.char_to_unicode(0x0050);
assert!(result.is_some());
}