include!("../../generated/generated_cmap.rs");
use std::collections::HashMap;
const WINDOWS_BMP_ENCODING: u16 = 1;
const WINDOWS_FULL_REPERTOIRE_ENCODING: u16 = 10;
const UNICODE_BMP_ENCODING: u16 = 3;
const UNICODE_FULL_REPERTOIRE_ENCODING: u16 = 4;
fn size_of_cmap4(seg_count: u16, gid_count: u16) -> u16 {
8 * 2 + 2 * seg_count * 4 + 2 * gid_count }
fn size_of_cmap12(num_groups: u32) -> u32 {
2 * 2 + 3 * 4 + num_groups * 3 * 4 }
impl CmapSubtable {
fn create_format_4(mappings: &[(char, GlyphId)]) -> Option<Self> {
let mut end_code = Vec::new();
let mut start_code = Vec::new();
let mut id_deltas = Vec::new();
let mut prev = (u16::MAX - 1, u16::MAX - 1);
for (cp, gid) in mappings {
let gid = gid.to_u16();
if *cp > '\u{FFFF}' {
continue;
}
let cp = (*cp as u32).try_into().unwrap();
let next_in_run = (
prev.0.checked_add(1).unwrap(),
prev.1.checked_add(1).unwrap(),
);
let current = (cp, gid);
if current != next_in_run {
start_code.push(cp);
end_code.push(cp);
let delta: i32 = gid as i32 - cp as i32;
let delta = if let Ok(delta) = TryInto::<i16>::try_into(delta) {
delta
} else {
delta.rem_euclid(0x10000).try_into().unwrap()
};
id_deltas.push(delta);
} else {
let last = end_code.last_mut().unwrap();
*last = cp;
}
prev = current;
}
if start_code.is_empty() {
return None;
}
start_code.push(0xFFFF);
end_code.push(0xFFFF);
id_deltas.push(1);
assert!(
end_code.len() == start_code.len() && end_code.len() == id_deltas.len(),
"uneven parallel arrays, very bad. Very very bad."
);
let seg_count: u16 = start_code.len().try_into().unwrap();
let entry_selector = (seg_count as f32).log2().floor();
let search_range = 2u16.pow(entry_selector as u32).checked_mul(2).unwrap();
let entry_selector = entry_selector as u16;
let range_shift = seg_count * 2 - search_range;
let id_range_offsets = vec![0; id_deltas.len()];
Some(CmapSubtable::format_4(
size_of_cmap4(seg_count, 0),
0, seg_count * 2,
search_range,
entry_selector,
range_shift,
end_code,
start_code,
id_deltas,
id_range_offsets,
vec![], ))
}
fn create_format_12(mappings: &[(char, GlyphId)]) -> Self {
let (mut char_codes, gids): (Vec<u32>, Vec<u32>) = mappings
.iter()
.map(|(cp, gid)| (*cp as u32, gid.to_u16() as u32))
.unzip();
let cmap: HashMap<_, _> = char_codes.iter().cloned().zip(gids).collect();
char_codes.dedup();
let mut start_char_code = *char_codes.first().unwrap();
let mut start_glyph_id = cmap[&start_char_code];
let mut last_glyph_id = start_glyph_id.wrapping_sub(1);
let mut last_char_code = start_char_code.wrapping_sub(1);
let mut groups = Vec::new();
for char_code in char_codes {
let glyph_id = cmap[&char_code];
if glyph_id != last_glyph_id.wrapping_add(1)
|| char_code != last_char_code.wrapping_add(1)
{
groups.push((start_char_code, last_char_code, start_glyph_id));
start_char_code = char_code;
start_glyph_id = glyph_id;
}
last_glyph_id = glyph_id;
last_char_code = char_code;
}
groups.push((start_char_code, last_char_code, start_glyph_id));
let num_groups: u32 = groups.len().try_into().unwrap();
let seq_map_groups = groups
.into_iter()
.map(|(start_char, end_char, gid)| SequentialMapGroup::new(start_char, end_char, gid))
.collect::<Vec<_>>();
CmapSubtable::format_12(
size_of_cmap12(num_groups),
0, num_groups,
seq_map_groups,
)
}
}
impl Cmap {
pub fn from_mappings(mappings: impl IntoIterator<Item = (char, GlyphId)>) -> Cmap {
let mut mappings: Vec<_> = mappings.into_iter().collect();
mappings.sort();
let mut uni_records = Vec::new(); let mut win_records = Vec::new(); let bmp_subtable = CmapSubtable::create_format_4(&mappings);
if let Some(bmp_subtable) = bmp_subtable {
uni_records.push(EncodingRecord::new(
PlatformId::Unicode,
UNICODE_BMP_ENCODING,
bmp_subtable.clone(),
));
win_records.push(EncodingRecord::new(
PlatformId::Windows,
WINDOWS_BMP_ENCODING,
bmp_subtable,
));
}
if mappings.iter().any(|(cp, _)| *cp > '\u{FFFF}') {
let full_repertoire_subtable = CmapSubtable::create_format_12(&mappings);
uni_records.push(EncodingRecord::new(
PlatformId::Unicode,
UNICODE_FULL_REPERTOIRE_ENCODING,
full_repertoire_subtable.clone(),
));
win_records.push(EncodingRecord::new(
PlatformId::Windows,
WINDOWS_FULL_REPERTOIRE_ENCODING,
full_repertoire_subtable,
));
}
Cmap::new(uni_records.into_iter().chain(win_records).collect())
}
}
#[cfg(test)]
mod tests {
use font_types::GlyphId;
use read_fonts::{
tables::cmap::{Cmap, CmapSubtable, PlatformId},
FontData, FontRead,
};
use crate::{
dump_table,
tables::cmap::{
self as write, UNICODE_BMP_ENCODING, UNICODE_FULL_REPERTOIRE_ENCODING,
WINDOWS_BMP_ENCODING, WINDOWS_FULL_REPERTOIRE_ENCODING,
},
};
fn assert_generates_simple_cmap(mappings: Vec<(char, GlyphId)>) {
let cmap = write::Cmap::from_mappings(mappings);
let bytes = dump_table(&cmap).unwrap();
let font_data = FontData::new(&bytes);
let cmap = Cmap::read(font_data).unwrap();
assert_eq!(2, cmap.encoding_records().len(), "{cmap:?}");
assert_eq!(
vec![
(PlatformId::Unicode, UNICODE_BMP_ENCODING),
(PlatformId::Windows, WINDOWS_BMP_ENCODING)
],
cmap.encoding_records()
.iter()
.map(|er| (er.platform_id(), er.encoding_id()))
.collect::<Vec<_>>(),
"{cmap:?}"
);
for encoding_record in cmap.encoding_records() {
let CmapSubtable::Format4(cmap4) = encoding_record.subtable(font_data).unwrap() else {
panic!("Expected a cmap4 in {encoding_record:?}");
};
assert_eq!(
(8, 8, 2, 0),
(
cmap4.seg_count_x2(),
cmap4.search_range(),
cmap4.entry_selector(),
cmap4.range_shift()
)
);
assert_eq!(cmap4.start_code(), &[10u16, 30u16, 153u16, 0xffffu16]);
assert_eq!(cmap4.end_code(), &[20u16, 90u16, 480u16, 0xffffu16]);
assert_eq!(cmap4.id_delta(), &[-10i16, -19i16, -81i16, 1i16]);
assert_eq!(cmap4.id_range_offsets(), &[0u16, 0u16, 0u16, 0u16]);
}
}
fn simple_cmap_mappings() -> Vec<(char, GlyphId)> {
(10..=20)
.chain(30..=90)
.chain(153..=480)
.enumerate()
.map(|(idx, codepoint)| (char::from_u32(codepoint).unwrap(), GlyphId::new(idx as u16)))
.collect()
}
#[test]
fn generate_simple_cmap4() {
let mappings = simple_cmap_mappings();
assert_generates_simple_cmap(mappings);
}
#[test]
fn generate_cmap4_out_of_order_input() {
let mut ordered = simple_cmap_mappings();
let mut disordered = Vec::new();
while !ordered.is_empty() {
if ordered.len() % 2 == 0 {
disordered.insert(0, ordered.remove(0));
} else {
disordered.push(ordered.remove(0));
}
}
assert_ne!(ordered, disordered);
assert_generates_simple_cmap(disordered);
}
#[test]
fn generate_cmap4_large_values() {
let mut mappings = simple_cmap_mappings();
let codepoint = char::from_u32(0xa78b).unwrap();
let gid = GlyphId::new(153);
mappings.push((codepoint, gid));
let cmap = write::Cmap::from_mappings(mappings);
let bytes = dump_table(&cmap).unwrap();
let font_data = FontData::new(&bytes);
let cmap = Cmap::read(font_data).unwrap();
assert_eq!(cmap.map_codepoint(codepoint), Some(gid));
}
#[test]
fn bytes_are_reused() {
let mappings = simple_cmap_mappings();
let cmap_both = write::Cmap::from_mappings(mappings);
assert_eq!(2, cmap_both.encoding_records.len(), "{cmap_both:?}");
let bytes_for_both = dump_table(&cmap_both).unwrap().len();
for i in 0..cmap_both.encoding_records.len() {
let mut cmap = cmap_both.clone();
cmap.encoding_records.remove(i);
let bytes_for_one = dump_table(&cmap).unwrap().len();
assert_eq!(bytes_for_one + 8, bytes_for_both);
}
}
fn non_bmp_cmap_mappings() -> Vec<(char, GlyphId)> {
vec![
('\u{1f12f}', GlyphId::new(481)),
('\u{1f130}', GlyphId::new(482)),
('\u{1f132}', GlyphId::new(483)),
('\u{1f133}', GlyphId::new(484)),
('\u{1f134}', GlyphId::new(486)),
('\u{1f136}', GlyphId::new(487)),
('\u{1f136}', GlyphId::new(488)),
]
}
fn bmp_and_non_bmp_cmap_mappings() -> Vec<(char, GlyphId)> {
let mut mappings = simple_cmap_mappings();
mappings.extend(non_bmp_cmap_mappings());
mappings
}
fn assert_cmap12_groups(
font_data: FontData,
cmap: &Cmap,
record_index: usize,
expected: &[(u32, u32, u32)],
) {
let rec = &cmap.encoding_records()[record_index];
let CmapSubtable::Format12(subtable) = rec.subtable(font_data).unwrap() else {
panic!("Expected a cmap12 in {rec:?}");
};
let groups = subtable
.groups()
.iter()
.map(|g| (g.start_char_code(), g.end_char_code(), g.start_glyph_id()))
.collect::<Vec<_>>();
assert_eq!(groups.len(), expected.len());
assert_eq!(groups, expected);
}
#[test]
fn generate_cmap4_and_12() {
let mappings = bmp_and_non_bmp_cmap_mappings();
let cmap = write::Cmap::from_mappings(mappings);
let bytes = dump_table(&cmap).unwrap();
let font_data = FontData::new(&bytes);
let cmap = Cmap::read(font_data).unwrap();
assert_eq!(4, cmap.encoding_records().len(), "{cmap:?}");
assert_eq!(
vec![
(PlatformId::Unicode, UNICODE_BMP_ENCODING),
(PlatformId::Unicode, UNICODE_FULL_REPERTOIRE_ENCODING),
(PlatformId::Windows, WINDOWS_BMP_ENCODING),
(PlatformId::Windows, WINDOWS_FULL_REPERTOIRE_ENCODING)
],
cmap.encoding_records()
.iter()
.map(|er| (er.platform_id(), er.encoding_id()))
.collect::<Vec<_>>(),
"{cmap:?}"
);
let encoding_records = cmap.encoding_records();
let first_rec = &encoding_records[0];
assert!(
matches!(
first_rec.subtable(font_data).unwrap(),
CmapSubtable::Format4(_)
),
"Expected a cmap4 in {first_rec:?}"
);
let expected_groups = vec![
(10, 20, 0),
(30, 90, 11),
(153, 480, 72),
(0x1f12f, 0x1f130, 481),
(0x1f132, 0x1f133, 483),
(0x1f134, 0x1f134, 486),
(0x1f136, 0x1f136, 488),
];
assert_cmap12_groups(font_data, &cmap, 1, &expected_groups);
assert_cmap12_groups(font_data, &cmap, 3, &expected_groups);
}
#[test]
fn generate_cmap12_only() {
let mappings = non_bmp_cmap_mappings();
let cmap = write::Cmap::from_mappings(mappings);
let bytes = dump_table(&cmap).unwrap();
let font_data = FontData::new(&bytes);
let cmap = Cmap::read(font_data).unwrap();
assert_eq!(2, cmap.encoding_records().len(), "{cmap:?}");
assert_eq!(
vec![
(PlatformId::Unicode, UNICODE_FULL_REPERTOIRE_ENCODING),
(PlatformId::Windows, WINDOWS_FULL_REPERTOIRE_ENCODING)
],
cmap.encoding_records()
.iter()
.map(|er| (er.platform_id(), er.encoding_id()))
.collect::<Vec<_>>(),
"{cmap:?}"
);
let expected_groups = vec![
(0x1f12f, 0x1f130, 481),
(0x1f132, 0x1f133, 483),
(0x1f134, 0x1f134, 486),
(0x1f136, 0x1f136, 488),
];
assert_cmap12_groups(font_data, &cmap, 0, &expected_groups);
assert_cmap12_groups(font_data, &cmap, 1, &expected_groups);
}
}