use std::collections::HashMap;
pub fn reverse_cmap(data: &[u8]) -> Option<HashMap<u32, u32>> {
let font = sfnt_slice(data)?;
let cmap = find_table(font, b"cmap")?;
let num_subtables = read_u16(cmap, 2)? as usize;
let mut best: Option<(u32, u32)> = None; for i in 0..num_subtables {
let rec = 4 + i * 8;
let platform = read_u16(cmap, rec)?;
let encoding = read_u16(cmap, rec + 2)?;
let offset = read_u32(cmap, rec + 4)?;
let score = match (platform, encoding) {
(3, 10) | (0, 4) | (0, 6) => 4, (3, 1) | (0, 0..=3) => 3, _ => 0,
};
if score > 0 && best.is_none_or(|(s, _)| score > s) {
best = Some((score, offset));
}
}
let (_, offset) = best?;
let sub = cmap.get(offset as usize..)?;
let mut map: HashMap<u32, u32> = HashMap::new();
let mut add = |glyph: u32, unicode: u32| {
if glyph == 0 || unicode == 0 {
return;
}
match map.entry(glyph) {
std::collections::hash_map::Entry::Vacant(e) => {
e.insert(unicode);
}
std::collections::hash_map::Entry::Occupied(mut e) => {
let cur = *e.get();
let cur_pua = (0xE000..=0xF8FF).contains(&cur);
let new_pua = (0xE000..=0xF8FF).contains(&unicode);
if (cur_pua && !new_pua) || (cur_pua == new_pua && unicode < cur) {
e.insert(unicode);
}
}
}
};
match read_u16(sub, 0)? {
0 => {
for code in 0..256u32 {
let g = *sub.get(6 + code as usize)? as u32;
add(g, code);
}
}
4 => {
let seg_count = (read_u16(sub, 6)? / 2) as usize;
let end_codes = 14;
let start_codes = end_codes + seg_count * 2 + 2;
let id_deltas = start_codes + seg_count * 2;
let id_range_offsets = id_deltas + seg_count * 2;
for seg in 0..seg_count {
let end = read_u16(sub, end_codes + seg * 2)? as u32;
let start = read_u16(sub, start_codes + seg * 2)? as u32;
let delta = read_u16(sub, id_deltas + seg * 2)? as u32;
let range_offset = read_u16(sub, id_range_offsets + seg * 2)? as usize;
if start == 0xFFFF && end == 0xFFFF {
continue;
}
for code in start..=end.min(0xFFFE) {
let glyph = if range_offset == 0 {
(code + delta) & 0xFFFF
} else {
let slot =
id_range_offsets + seg * 2 + range_offset + (code - start) as usize * 2;
let g = read_u16(sub, slot)? as u32;
if g == 0 { 0 } else { (g + delta) & 0xFFFF }
};
add(glyph, code);
}
}
}
6 => {
let first = read_u16(sub, 6)? as u32;
let count = read_u16(sub, 8)? as usize;
for i in 0..count {
let g = read_u16(sub, 10 + i * 2)? as u32;
add(g, first + i as u32);
}
}
12 => {
let n_groups = read_u32(sub, 12)? as usize;
for i in 0..n_groups.min(100_000) {
let rec = 16 + i * 12;
let start = read_u32(sub, rec)?;
let end = read_u32(sub, rec + 4)?;
let start_glyph = read_u32(sub, rec + 8)?;
if end < start || end - start > 0x10000 {
continue;
}
for off in 0..=(end - start) {
add(start_glyph + off, start + off);
}
}
}
_ => return None,
}
if map.is_empty() { None } else { Some(map) }
}
fn sfnt_slice(data: &[u8]) -> Option<&[u8]> {
let tag = data.get(..4)?;
if tag == b"ttcf" {
let first = read_u32(data, 12)? as usize;
let sub = data.get(first..)?;
let m = sub.get(..4)?;
return (m == [0, 1, 0, 0] || m == b"OTTO" || m == b"true").then_some(sub);
}
(tag == [0, 1, 0, 0] || tag == b"OTTO" || tag == b"true").then_some(data)
}
fn find_table<'a>(font: &'a [u8], tag: &[u8; 4]) -> Option<&'a [u8]> {
let num_tables = read_u16(font, 4)? as usize;
for i in 0..num_tables {
let rec = 12 + i * 16;
if font.get(rec..rec + 4)? == tag {
let offset = read_u32(font, rec + 8)? as usize;
let length = read_u32(font, rec + 12)? as usize;
return font.get(offset..offset.checked_add(length)?);
}
}
None
}
fn read_u16(data: &[u8], offset: usize) -> Option<u16> {
let b = data.get(offset..offset + 2)?;
Some(u16::from_be_bytes([b[0], b[1]]))
}
fn read_u32(data: &[u8], offset: usize) -> Option<u32> {
let b = data.get(offset..offset + 4)?;
Some(u32::from_be_bytes([b[0], b[1], b[2], b[3]]))
}
#[cfg(test)]
mod tests {
use super::*;
fn minimal_format4_font() -> Vec<u8> {
let mut sub: Vec<u8> = Vec::new();
sub.extend([0, 4]); sub.extend([0, 0]); sub.extend([0, 0]); sub.extend([0, 4]); sub.extend([0, 0, 0, 0, 0, 0]); sub.extend([0x00, 0x43, 0xFF, 0xFF]); sub.extend([0, 0]); sub.extend([0x00, 0x41, 0xFF, 0xFF]); sub.extend([0xFF, 0xC9, 0x00, 0x01]); sub.extend([0, 0, 0, 0]);
let mut cmap: Vec<u8> = Vec::new();
cmap.extend([0, 0]); cmap.extend([0, 1]); cmap.extend([0, 3, 0, 1]); cmap.extend(12u32.to_be_bytes()); cmap.extend(&sub);
let mut font: Vec<u8> = Vec::new();
font.extend([0, 1, 0, 0]); font.extend([0, 1]); font.extend([0, 0, 0, 0, 0, 0]); font.extend(b"cmap");
font.extend([0, 0, 0, 0]); font.extend(28u32.to_be_bytes()); font.extend((cmap.len() as u32).to_be_bytes());
font.extend(&cmap);
font
}
#[test]
fn parses_format4_and_inverts() {
let font = minimal_format4_font();
let map = reverse_cmap(&font).unwrap();
assert_eq!(map.get(&10), Some(&0x41)); assert_eq!(map.get(&11), Some(&0x42)); assert_eq!(map.get(&12), Some(&0x43)); assert!(!map.contains_key(&0));
}
#[test]
fn rejects_non_sfnt() {
assert!(reverse_cmap(b"%!PS-AdobeFont").is_none());
assert!(reverse_cmap(&[1, 0, 0, 0]).is_none()); assert!(reverse_cmap(&[]).is_none());
}
}