use oxidize_pdf::fonts::TtfParser;
fn build_cmap_format12_font(groups: &[(u32, u32, u32)]) -> Vec<u8> {
let max_gid = groups
.iter()
.map(|(start, end, gid)| gid + (end - start))
.max()
.unwrap_or(0) as u16;
let num_glyphs = if max_gid == 0 { 1 } else { max_gid + 1 };
let mut head = vec![0u8; 54];
head[0] = 0x00;
head[1] = 0x01;
head[2] = 0x00;
head[3] = 0x00;
let units_per_em: u16 = 1000;
head[18] = (units_per_em >> 8) as u8;
head[19] = (units_per_em & 0xFF) as u8;
head[50] = 0x00;
head[51] = 0x01;
let mut hhea = vec![0u8; 36];
hhea[0] = 0x00;
hhea[1] = 0x01;
let ascent: i16 = 800;
hhea[4] = (ascent >> 8) as u8;
hhea[5] = (ascent & 0xFF) as u8;
let descent: i16 = -200;
hhea[6] = (descent >> 8) as u8;
hhea[7] = (descent & 0xFF) as u8;
hhea[34] = (num_glyphs >> 8) as u8;
hhea[35] = (num_glyphs & 0xFF) as u8;
let mut hmtx = Vec::with_capacity(num_glyphs as usize * 4);
for _ in 0..num_glyphs {
hmtx.extend_from_slice(&600u16.to_be_bytes());
hmtx.extend_from_slice(&0i16.to_be_bytes());
}
let font_name = b"TestFont";
let mut name_table = Vec::new();
name_table.extend_from_slice(&0u16.to_be_bytes());
name_table.extend_from_slice(&1u16.to_be_bytes());
name_table.extend_from_slice(&18u16.to_be_bytes());
name_table.extend_from_slice(&1u16.to_be_bytes()); name_table.extend_from_slice(&0u16.to_be_bytes()); name_table.extend_from_slice(&0u16.to_be_bytes()); name_table.extend_from_slice(&6u16.to_be_bytes()); name_table.extend_from_slice(&(font_name.len() as u16).to_be_bytes());
name_table.extend_from_slice(&0u16.to_be_bytes()); name_table.extend_from_slice(font_name);
let mut cmap = Vec::new();
cmap.extend_from_slice(&0u16.to_be_bytes()); cmap.extend_from_slice(&1u16.to_be_bytes());
cmap.extend_from_slice(&3u16.to_be_bytes()); cmap.extend_from_slice(&10u16.to_be_bytes()); cmap.extend_from_slice(&12u32.to_be_bytes());
let num_groups = groups.len() as u32;
let subtable_length = 16 + num_groups * 12;
cmap.extend_from_slice(&12u16.to_be_bytes()); cmap.extend_from_slice(&0u16.to_be_bytes()); cmap.extend_from_slice(&subtable_length.to_be_bytes()); cmap.extend_from_slice(&0u32.to_be_bytes()); cmap.extend_from_slice(&num_groups.to_be_bytes());
for &(start_char, end_char, start_gid) in groups {
cmap.extend_from_slice(&start_char.to_be_bytes());
cmap.extend_from_slice(&end_char.to_be_bytes());
cmap.extend_from_slice(&start_gid.to_be_bytes());
}
let num_tables: u16 = 5; let tables: Vec<(&[u8; 4], &[u8])> = vec![
(b"cmap", &cmap),
(b"head", &head),
(b"hhea", &hhea),
(b"hmtx", &hmtx),
(b"name", &name_table),
];
let header_size = 12 + num_tables as usize * 16;
let mut font = Vec::new();
font.extend_from_slice(&0x00010000u32.to_be_bytes());
font.extend_from_slice(&num_tables.to_be_bytes());
font.extend_from_slice(&0u16.to_be_bytes());
font.extend_from_slice(&0u16.to_be_bytes());
font.extend_from_slice(&0u16.to_be_bytes());
let mut current_offset = header_size;
let mut table_entries: Vec<(u32, u32)> = Vec::new(); for (_, data) in &tables {
while current_offset % 4 != 0 {
current_offset += 1;
}
table_entries.push((current_offset as u32, data.len() as u32));
current_offset += data.len();
}
for (i, (tag, _)) in tables.iter().enumerate() {
font.extend_from_slice(*tag); font.extend_from_slice(&0u32.to_be_bytes()); font.extend_from_slice(&table_entries[i].0.to_be_bytes()); font.extend_from_slice(&table_entries[i].1.to_be_bytes()); }
for (i, (_, data)) in tables.iter().enumerate() {
while font.len() < table_entries[i].0 as usize {
font.push(0);
}
font.extend_from_slice(data);
}
font
}
struct Format4Segment {
start_code: u16,
end_code: u16,
id_delta: i16,
glyph_ids: Vec<u16>,
}
fn build_cmap_format4_font(segments: &[Format4Segment], max_gid: u16) -> Vec<u8> {
let num_glyphs = if max_gid == 0 { 1 } else { max_gid + 1 };
let mut head = vec![0u8; 54];
head[0] = 0x00;
head[1] = 0x01; let units_per_em: u16 = 1000;
head[18] = (units_per_em >> 8) as u8;
head[19] = (units_per_em & 0xFF) as u8;
head[50] = 0x00;
head[51] = 0x01;
let mut hhea = vec![0u8; 36];
hhea[0] = 0x00;
hhea[1] = 0x01;
let ascent: i16 = 800;
hhea[4] = (ascent >> 8) as u8;
hhea[5] = (ascent & 0xFF) as u8;
let descent: i16 = -200;
hhea[6] = (descent >> 8) as u8;
hhea[7] = (descent & 0xFF) as u8;
hhea[34] = (num_glyphs >> 8) as u8;
hhea[35] = (num_glyphs & 0xFF) as u8;
let mut hmtx = Vec::with_capacity(num_glyphs as usize * 4);
for _ in 0..num_glyphs {
hmtx.extend_from_slice(&600u16.to_be_bytes());
hmtx.extend_from_slice(&0i16.to_be_bytes());
}
let font_name = b"TestFont";
let mut name_table = Vec::new();
name_table.extend_from_slice(&0u16.to_be_bytes()); name_table.extend_from_slice(&1u16.to_be_bytes()); name_table.extend_from_slice(&18u16.to_be_bytes()); name_table.extend_from_slice(&1u16.to_be_bytes()); name_table.extend_from_slice(&0u16.to_be_bytes()); name_table.extend_from_slice(&0u16.to_be_bytes()); name_table.extend_from_slice(&6u16.to_be_bytes()); name_table.extend_from_slice(&(font_name.len() as u16).to_be_bytes());
name_table.extend_from_slice(&0u16.to_be_bytes()); name_table.extend_from_slice(font_name);
let seg_count = segments.len() + 1;
let mut end_codes: Vec<u16> = segments.iter().map(|s| s.end_code).collect();
end_codes.push(0xFFFF); let mut start_codes: Vec<u16> = segments.iter().map(|s| s.start_code).collect();
start_codes.push(0xFFFF); let mut id_deltas: Vec<i16> = segments.iter().map(|s| s.id_delta).collect();
id_deltas.push(1i16);
let mut glyph_id_array: Vec<u16> = Vec::new();
let id_range_offset_array_offset = 14 + seg_count * 2 + 2 + seg_count * 2 + seg_count * 2;
let glyph_id_array_offset = id_range_offset_array_offset + seg_count * 2;
let mut id_range_offsets: Vec<u16> = Vec::new();
let mut glyph_id_cursor = 0usize;
for (i, seg) in segments.iter().enumerate() {
if seg.glyph_ids.is_empty() {
id_range_offsets.push(0);
} else {
let range_offset_pos = id_range_offset_array_offset + i * 2;
let first_glyph_pos = glyph_id_array_offset + glyph_id_cursor * 2;
let id_range_off = (first_glyph_pos - range_offset_pos) as u16;
id_range_offsets.push(id_range_off);
glyph_id_array.extend_from_slice(&seg.glyph_ids);
glyph_id_cursor += seg.glyph_ids.len();
}
}
id_range_offsets.push(0u16);
let subtable_len = glyph_id_array_offset + glyph_id_array.len() * 2;
let mut cmap_subtable: Vec<u8> = Vec::new();
cmap_subtable.extend_from_slice(&4u16.to_be_bytes()); cmap_subtable.extend_from_slice(&(subtable_len as u16).to_be_bytes()); cmap_subtable.extend_from_slice(&0u16.to_be_bytes()); cmap_subtable.extend_from_slice(&((seg_count * 2) as u16).to_be_bytes()); cmap_subtable.extend_from_slice(&0u16.to_be_bytes()); cmap_subtable.extend_from_slice(&0u16.to_be_bytes()); cmap_subtable.extend_from_slice(&0u16.to_be_bytes());
for &ec in &end_codes {
cmap_subtable.extend_from_slice(&ec.to_be_bytes());
}
cmap_subtable.extend_from_slice(&0u16.to_be_bytes()); for &sc in &start_codes {
cmap_subtable.extend_from_slice(&sc.to_be_bytes());
}
for &d in &id_deltas {
cmap_subtable.extend_from_slice(&(d as u16).to_be_bytes());
}
for &ro in &id_range_offsets {
cmap_subtable.extend_from_slice(&ro.to_be_bytes());
}
for &gid in &glyph_id_array {
cmap_subtable.extend_from_slice(&gid.to_be_bytes());
}
let mut cmap = Vec::new();
cmap.extend_from_slice(&0u16.to_be_bytes()); cmap.extend_from_slice(&1u16.to_be_bytes()); cmap.extend_from_slice(&3u16.to_be_bytes()); cmap.extend_from_slice(&1u16.to_be_bytes()); cmap.extend_from_slice(&12u32.to_be_bytes());
cmap.extend_from_slice(&cmap_subtable);
let num_tables: u16 = 5;
let tables: Vec<(&[u8; 4], &[u8])> = vec![
(b"cmap", &cmap),
(b"head", &head),
(b"hhea", &hhea),
(b"hmtx", &hmtx),
(b"name", &name_table),
];
let header_size = 12 + num_tables as usize * 16;
let mut font = Vec::new();
font.extend_from_slice(&0x00010000u32.to_be_bytes()); font.extend_from_slice(&num_tables.to_be_bytes());
font.extend_from_slice(&0u16.to_be_bytes()); font.extend_from_slice(&0u16.to_be_bytes()); font.extend_from_slice(&0u16.to_be_bytes());
let mut current_offset = header_size;
let mut table_entries: Vec<(u32, u32)> = Vec::new();
for (_, data) in &tables {
while current_offset % 4 != 0 {
current_offset += 1;
}
table_entries.push((current_offset as u32, data.len() as u32));
current_offset += data.len();
}
for (i, (tag, _)) in tables.iter().enumerate() {
font.extend_from_slice(*tag);
font.extend_from_slice(&0u32.to_be_bytes()); font.extend_from_slice(&table_entries[i].0.to_be_bytes());
font.extend_from_slice(&table_entries[i].1.to_be_bytes());
}
for (i, (_, data)) in tables.iter().enumerate() {
while font.len() < table_entries[i].0 as usize {
font.push(0);
}
font.extend_from_slice(data);
}
font
}
#[test]
fn test_cmap_format4_id_range_offset_zero() {
let segments = [Format4Segment {
start_code: 0x41,
end_code: 0x43,
id_delta: 4,
glyph_ids: vec![],
}];
let font_data = build_cmap_format4_font(&segments, 80);
let parser = TtfParser::new(&font_data).unwrap();
let mapping = parser.extract_glyph_mapping().unwrap();
assert_eq!(mapping.char_to_glyph('A'), Some(0x41 + 4)); assert_eq!(mapping.char_to_glyph('B'), Some(0x42 + 4)); assert_eq!(mapping.char_to_glyph('C'), Some(0x43 + 4)); assert_eq!(mapping.char_to_glyph('D'), None);
}
#[test]
fn test_cmap_format4_id_range_offset_nonzero() {
let segments = [Format4Segment {
start_code: 0x41,
end_code: 0x41,
id_delta: 0,
glyph_ids: vec![5],
}];
let font_data = build_cmap_format4_font(&segments, 10);
let parser = TtfParser::new(&font_data).unwrap();
let mapping = parser.extract_glyph_mapping().unwrap();
assert_eq!(mapping.char_to_glyph('A'), Some(5));
assert_eq!(mapping.char_to_glyph('B'), None);
}
#[test]
fn test_cmap_format4_id_range_offset_with_delta() {
let segments = [Format4Segment {
start_code: 0x41,
end_code: 0x42,
id_delta: 10,
glyph_ids: vec![3, 7],
}];
let font_data = build_cmap_format4_font(&segments, 30);
let parser = TtfParser::new(&font_data).unwrap();
let mapping = parser.extract_glyph_mapping().unwrap();
assert_eq!(mapping.char_to_glyph('A'), Some(13)); assert_eq!(mapping.char_to_glyph('B'), Some(17)); assert_eq!(mapping.char_to_glyph('C'), None);
}
#[test]
fn test_cmap_format12_basic_cjk_range() {
let font_data = build_cmap_format12_font(&[(0x4E00, 0x4E02, 1)]);
let parser = TtfParser::new(&font_data).unwrap();
let mapping = parser.extract_glyph_mapping().unwrap();
assert_eq!(mapping.char_to_glyph('\u{4E00}'), Some(1));
assert_eq!(mapping.char_to_glyph('\u{4E01}'), Some(2));
assert_eq!(mapping.char_to_glyph('\u{4E02}'), Some(3));
assert_eq!(mapping.char_to_glyph('!'), None);
}
#[test]
fn test_cmap_format12_multiple_groups() {
let font_data = build_cmap_format12_font(&[(0x0041, 0x0042, 1), (0x4E00, 0x4E00, 3)]);
let parser = TtfParser::new(&font_data).unwrap();
let mapping = parser.extract_glyph_mapping().unwrap();
assert_eq!(mapping.char_to_glyph('A'), Some(1));
assert_eq!(mapping.char_to_glyph('B'), Some(2));
assert_eq!(mapping.char_to_glyph('\u{4E00}'), Some(3));
}
#[test]
fn test_cmap_format12_glyph_zero_not_mapped() {
let font_data = build_cmap_format12_font(&[(0x0041, 0x0041, 0)]);
let parser = TtfParser::new(&font_data).unwrap();
let mapping = parser.extract_glyph_mapping().unwrap();
assert_eq!(mapping.char_to_glyph('A'), None);
}
#[test]
fn test_cmap_format12_empty_groups() {
let font_data = build_cmap_format12_font(&[]);
let parser = TtfParser::new(&font_data).unwrap();
let mapping = parser.extract_glyph_mapping().unwrap();
assert_eq!(mapping.char_to_glyph('A'), None);
}
#[test]
fn test_cmap_format12_supplementary_plane() {
let font_data = build_cmap_format12_font(&[(0x20000, 0x20000, 1)]);
let parser = TtfParser::new(&font_data).unwrap();
let mapping = parser.extract_glyph_mapping().unwrap();
assert_eq!(mapping.char_to_glyph('\u{20000}'), Some(1));
}
#[test]
fn test_cmap_format12_gid_above_0xffff_skipped() {
let font_data = build_cmap_format12_font(&[(0x0041, 0x0043, 0xFFFF)]);
let parser = TtfParser::new(&font_data).unwrap();
let mapping = parser.extract_glyph_mapping().unwrap();
assert_eq!(mapping.char_to_glyph('A'), Some(0xFFFF)); assert_eq!(mapping.char_to_glyph('B'), None); assert_eq!(mapping.char_to_glyph('C'), None); }
#[test]
fn test_cmap_format12_overlapping_groups_last_wins() {
let font_data = build_cmap_format12_font(&[(0x4E00, 0x4E00, 1), (0x4E00, 0x4E01, 5)]);
let parser = TtfParser::new(&font_data).unwrap();
let mapping = parser.extract_glyph_mapping().unwrap();
assert_eq!(mapping.char_to_glyph('\u{4E00}'), Some(5)); assert_eq!(mapping.char_to_glyph('\u{4E01}'), Some(6));
}