use pdf_oxide::fonts::cmap::LazyCMap;
use pdf_oxide::fonts::FontInfo;
use std::collections::HashMap;
#[test]
fn test_4byte_cmap_extended_range_parsing() {
let cmap_4byte = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /Identity-H def
/CMapType 2 def
1 begincodespacerange
<00000000> <FFFFFFFF>
endcodespacerange
3 beginbfchar
<00000041> <0041>
<00010041> <4E00>
<FFFFFFF0> <FFFD>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#;
let result = pdf_oxide::fonts::cmap::parse_tounicode_cmap(cmap_4byte.as_bytes());
assert!(result.is_ok(), "Should parse 4-byte CMap successfully");
let cmap = result.unwrap();
assert_eq!(
cmap.get(&0x00000041),
Some(&"A".to_string()),
"4-byte code 0x00000041 should map to 'A'"
);
assert_eq!(
cmap.get(&0x00010041),
Some(&"\u{4E00}".to_string()),
"4-byte code 0x00010041 should map to CJK character"
);
}
#[test]
fn test_4byte_large_cid_values() {
let large_cid_cmap = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /Identity-H def
/CMapType 2 def
1 begincodespacerange
<00000000> <FFFFFFFF>
endcodespacerange
3 beginbfchar
<00008000> <8000>
<FFFFFFFF> <FFFD>
<FFFFFFFE> <FFFE>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#;
let result = pdf_oxide::fonts::cmap::parse_tounicode_cmap(large_cid_cmap.as_bytes());
assert!(result.is_ok(), "Should parse large CID CMaps");
let cmap = result.unwrap();
assert_eq!(cmap.get(&0x00008000), Some(&"\u{8000}".to_string()));
assert_eq!(cmap.get(&0xFFFFFFFF), Some(&"\u{FFFD}".to_string()));
assert_eq!(cmap.get(&0xFFFFFFFE), Some(&"\u{FFFE}".to_string()));
}
#[test]
fn test_4byte_cmap_bfrange_extended() {
let bfrange_4byte = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /Identity-H def
/CMapType 2 def
1 begincodespacerange
<00000000> <FFFFFFFF>
endcodespacerange
1 beginbfrange
<00010000> <0001000F> <4E00>
endbfrange
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#;
let result = pdf_oxide::fonts::cmap::parse_tounicode_cmap(bfrange_4byte.as_bytes());
assert!(result.is_ok(), "Should parse 4-byte bfrange");
let cmap = result.unwrap();
assert_eq!(cmap.get(&0x00010000), Some(&"\u{4E00}".to_string())); assert_eq!(cmap.get(&0x00010005), Some(&"\u{4E05}".to_string())); assert_eq!(cmap.get(&0x0001000F), Some(&"\u{4E0F}".to_string())); }
#[test]
fn test_4byte_notdefrange_large_cids() {
let notdef_4byte = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /Identity-H def
/CMapType 2 def
1 begincodespacerange
<00000000> <FFFFFFFF>
endcodespacerange
1 beginbfchar
<00010000> <4E00>
endbfchar
1 beginnotdefrange
<00000000> <0000FFFF> <FFFD>
endnotdefrange
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#;
let result = pdf_oxide::fonts::cmap::parse_tounicode_cmap(notdef_4byte.as_bytes());
assert!(result.is_ok(), "Should parse 4-byte notdefrange");
let cmap = result.unwrap();
assert_eq!(cmap.get(&0x00010000), Some(&"\u{4E00}".to_string()));
assert_eq!(cmap.get(&0x00000001), Some(&"\u{FFFD}".to_string()));
}
#[test]
fn test_4byte_extended_unicode_codepoints() {
let invalid_cmap = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /Identity-H def
/CMapType 2 def
1 begincodespacerange
<00000000> <FFFFFFFF>
endcodespacerange
2 beginbfchar
<00000041> <0041>
<110000> <110000>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#;
let result = pdf_oxide::fonts::cmap::parse_tounicode_cmap(invalid_cmap.as_bytes());
assert!(result.is_ok(), "Should parse CMap with invalid Unicode");
let cmap = result.unwrap();
assert_eq!(cmap.get(&0x00000041), Some(&"A".to_string()));
}
#[test]
fn test_4byte_cmap_with_lazy_loading() {
let cmap_bytes = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /Identity-H def
/CMapType 2 def
1 begincodespacerange
<00000000> <FFFFFFFF>
endcodespacerange
2 beginbfchar
<00010000> <4E00>
<00020000> <4E01>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#
.as_bytes()
.to_vec();
let font = FontInfo {
base_font: "4ByteCMapFont".to_string(),
subtype: "Type0".to_string(),
encoding: pdf_oxide::fonts::Encoding::Identity,
to_unicode: Some(LazyCMap::new(cmap_bytes)),
font_weight: None,
flags: None,
stem_v: None,
embedded_font_data: None,
truetype_cmap: std::sync::OnceLock::new(),
is_truetype_font: false,
cid_to_gid_map: None,
cid_system_info: None,
cid_font_type: None,
cid_widths: None,
cid_default_width: 1000.0,
widths: None,
first_char: None,
last_char: None,
default_width: 500.0,
cff_gid_map: None,
multi_char_map: HashMap::new(),
byte_to_char_table: std::sync::OnceLock::new(),
byte_to_width_table: std::sync::OnceLock::new(),
};
assert_eq!(font.char_to_unicode(0x00010000), Some("\u{4E00}".to_string()));
assert_eq!(font.char_to_unicode(0x00020000), Some("\u{4E01}".to_string()));
assert_eq!(font.char_to_unicode(0x00010000), Some("\u{4E00}".to_string()));
}
#[test]
fn test_4byte_mixed_width_codes() {
let mixed_cmap = r#"
/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (Identity)
/Supplement 0
>> def
/CMapName /Identity-H def
/CMapType 2 def
1 begincodespacerange
<00000000> <FFFFFFFF>
endcodespacerange
3 beginbfchar
<41> <0041>
<0042> <0042>
<00000043> <0043>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
end
end
"#;
let result = pdf_oxide::fonts::cmap::parse_tounicode_cmap(mixed_cmap.as_bytes());
assert!(result.is_ok(), "Should parse mixed-width codes");
let cmap = result.unwrap();
assert_eq!(cmap.get(&0x41), Some(&"A".to_string()));
assert_eq!(cmap.get(&0x42), Some(&"B".to_string()));
assert_eq!(cmap.get(&0x43), Some(&"C".to_string()));
}