use oxidize_pdf::fonts::cmap_utils::parse_cmap_format_12_filtered;
use std::collections::HashSet;
fn build_format12_subtable(groups: &[(u32, u32, u32)]) -> Vec<u8> {
let num_groups = groups.len() as u32;
let total_length = 16 + num_groups * 12;
let mut buf = Vec::with_capacity(total_length as usize);
buf.extend_from_slice(&12u16.to_be_bytes());
buf.extend_from_slice(&0u16.to_be_bytes());
buf.extend_from_slice(&total_length.to_be_bytes());
buf.extend_from_slice(&0u32.to_be_bytes());
buf.extend_from_slice(&num_groups.to_be_bytes());
for &(start_char, end_char, start_glyph) in groups {
buf.extend_from_slice(&start_char.to_be_bytes());
buf.extend_from_slice(&end_char.to_be_bytes());
buf.extend_from_slice(&start_glyph.to_be_bytes());
}
buf
}
#[test]
fn test_parse_cmap_format_12_no_filter_returns_all() {
let start = 0x4E00u32;
let end = 0x4E09u32;
let data = build_format12_subtable(&[(start, end, 1)]);
let map = parse_cmap_format_12_filtered(&data, 0, None).expect("parse should succeed");
let expected_count = (end - start + 1) as usize; assert_eq!(
map.len(),
expected_count,
"No filter should return all {expected_count} codepoints"
);
assert_eq!(map.get(&start), Some(&1u16));
assert_eq!(map.get(&end), Some(&((1 + (end - start)) as u16)));
}
#[test]
fn test_parse_cmap_format_12_no_filter_large_range() {
let start = 0x4E00u32;
let end = 0x9FFFu32;
let data = build_format12_subtable(&[(start, end, 1)]);
let map = parse_cmap_format_12_filtered(&data, 0, None).expect("parse should succeed");
let expected = (end - start + 1) as usize;
assert_eq!(
map.len(),
expected,
"Should map all {expected} CJK codepoints"
);
}
#[test]
fn test_parse_cmap_format_12_filters_by_used_chars() {
let start = 0x4E00u32;
let end = 0x9FFFu32;
let data = build_format12_subtable(&[(start, end, 1)]);
let filter: HashSet<u32> = [0x4E00, 0x4E2D, 0x9FFF].iter().copied().collect();
let map = parse_cmap_format_12_filtered(&data, 0, Some(&filter)).expect("parse should succeed");
assert_eq!(
map.len(),
3,
"Filtered parse should return exactly 3 entries, got {}",
map.len()
);
assert_eq!(map.get(&0x4E00), Some(&1u16)); assert_eq!(map.get(&0x4E2D), Some(&((1 + (0x4E2Du32 - start)) as u16)));
assert_eq!(map.get(&0x9FFF), Some(&((1 + (0x9FFFu32 - start)) as u16)));
}
#[test]
fn test_parse_cmap_format_12_filter_excludes_out_of_range() {
let start = 0x4E00u32;
let end = 0x4E09u32;
let data = build_format12_subtable(&[(start, end, 1)]);
let filter: HashSet<u32> = [0x4E00, 0x4E05, 0x0041, 0x00FF, 0x9FFF]
.iter()
.copied()
.collect();
let map = parse_cmap_format_12_filtered(&data, 0, Some(&filter)).expect("parse should succeed");
assert_eq!(
map.len(),
2,
"Only in-range filtered chars should be returned, got {}",
map.len()
);
assert!(map.contains_key(&0x4E00));
assert!(map.contains_key(&0x4E05));
}
#[test]
fn test_parse_cmap_format_12_empty_filter_returns_empty() {
let data = build_format12_subtable(&[(0x4E00, 0x9FFF, 1)]);
let filter: HashSet<u32> = HashSet::new();
let map = parse_cmap_format_12_filtered(&data, 0, Some(&filter)).expect("parse should succeed");
assert!(
map.is_empty(),
"Empty filter should produce empty map, got {} entries",
map.len()
);
}
#[test]
fn test_parse_cmap_format_12_multiple_groups_no_filter() {
let data = build_format12_subtable(&[(0x41, 0x5A, 1), (0x4E00, 0x4E02, 27)]);
let map = parse_cmap_format_12_filtered(&data, 0, None).expect("parse should succeed");
assert_eq!(map.len(), 29, "26 ASCII + 3 CJK = 29 entries");
assert_eq!(map.get(&0x41), Some(&1u16)); assert_eq!(map.get(&0x5A), Some(&26u16)); assert_eq!(map.get(&0x4E00), Some(&27u16));
assert_eq!(map.get(&0x4E02), Some(&29u16));
}
#[test]
fn test_parse_cmap_format_12_multiple_groups_with_filter() {
let data = build_format12_subtable(&[(0x41, 0x5A, 1), (0x4E00, 0x4E02, 27)]);
let filter: HashSet<u32> = [0x41, 0x5A, 0x4E01].iter().copied().collect();
let map = parse_cmap_format_12_filtered(&data, 0, Some(&filter)).expect("parse should succeed");
assert_eq!(map.len(), 3);
assert_eq!(map.get(&0x41), Some(&1u16));
assert_eq!(map.get(&0x5A), Some(&26u16));
assert_eq!(map.get(&0x4E01), Some(&28u16));
}
#[test]
fn test_parse_cmap_format_12_skips_notdef_glyph() {
let data = build_format12_subtable(&[(0x41, 0x43, 0)]);
let map = parse_cmap_format_12_filtered(&data, 0, None).expect("parse should succeed");
assert!(
!map.contains_key(&0x41),
"GID 0 (.notdef) should be excluded"
);
assert_eq!(map.get(&0x42), Some(&1u16));
assert_eq!(map.get(&0x43), Some(&2u16));
assert_eq!(map.len(), 2);
}
#[test]
fn test_parse_cmap_format_12_truncated_header_returns_error() {
let tiny = vec![0u8; 10];
let result = parse_cmap_format_12_filtered(&tiny, 0, None);
assert!(result.is_err(), "Truncated header should return an error");
}
#[test]
fn test_parse_cmap_format_12_offset_nonzero() {
let groups = vec![(0x41u32, 0x43u32, 1u32)];
let subtable = build_format12_subtable(&groups);
let mut buf = vec![0u8; 4]; buf.extend_from_slice(&subtable);
let map =
parse_cmap_format_12_filtered(&buf, 4, None).expect("parse at offset 4 should succeed");
assert_eq!(map.len(), 3);
assert_eq!(map.get(&0x41), Some(&1u16));
}