use std::collections::HashMap;
const SFNT_TRUETYPE: u32 = 0x00010000;
const SFNT_OTTO: u32 = 0x4F54_544F; const TAG_CMAP: u32 = 0x636D_6170; const TAG_HEAD: u32 = 0x6865_6164; const TAG_HHEA: u32 = 0x6868_6561; const TAG_HMTX: u32 = 0x686D_7478; const TAG_MAXP: u32 = 0x6D61_7870;
pub fn inject_unicode_cmap(
font_bytes: &[u8],
unicode_to_gid: &HashMap<u32, u16>,
) -> Option<Vec<u8>> {
if font_bytes.len() < 12 || unicode_to_gid.is_empty() {
return None;
}
let sfnt_version = read_u32(font_bytes, 0)?;
if sfnt_version != SFNT_TRUETYPE && sfnt_version != SFNT_OTTO {
return None;
}
let num_tables = read_u16(font_bytes, 4)? as usize;
if font_bytes.len() < 12 + num_tables * 16 {
return None;
}
#[derive(Clone)]
struct TableEntry {
tag: u32,
checksum: u32,
offset: u32,
length: u32,
data: Vec<u8>,
}
let mut tables: Vec<TableEntry> = Vec::with_capacity(num_tables + 1);
for i in 0..num_tables {
let rec_off = 12 + i * 16;
let tag = read_u32(font_bytes, rec_off)?;
let checksum = read_u32(font_bytes, rec_off + 4)?;
let offset = read_u32(font_bytes, rec_off + 8)?;
let length = read_u32(font_bytes, rec_off + 12)?;
let end = (offset as usize).checked_add(length as usize)?;
if end > font_bytes.len() {
return None;
}
let data = font_bytes[offset as usize..end].to_vec();
tables.push(TableEntry {
tag,
checksum,
offset,
length,
data,
});
}
let new_cmap_bytes = build_format4_cmap(unicode_to_gid)?;
let new_cmap_checksum = checksum_table(&new_cmap_bytes);
let cmap_idx = tables.iter().position(|t| t.tag == TAG_CMAP);
if let Some(idx) = cmap_idx {
tables[idx].length = new_cmap_bytes.len() as u32;
tables[idx].checksum = new_cmap_checksum;
tables[idx].data = new_cmap_bytes;
} else {
tables.push(TableEntry {
tag: TAG_CMAP,
checksum: new_cmap_checksum,
offset: 0,
length: new_cmap_bytes.len() as u32,
data: new_cmap_bytes,
});
}
tables.sort_by_key(|t| t.tag);
let new_num_tables = tables.len();
let header_size = 12 + new_num_tables * 16;
let mut cur_offset = header_size as u32;
for t in tables.iter_mut() {
t.offset = cur_offset;
let pad = (4 - (t.length as usize & 3)) & 3;
cur_offset += t.length + pad as u32;
}
let entry_selector = (new_num_tables as f64).log2().floor() as u16;
let search_range = (1u16 << entry_selector) * 16;
let range_shift = (new_num_tables as u16) * 16 - search_range;
let total_size = cur_offset as usize;
let mut out = vec![0u8; total_size];
write_u32(&mut out, 0, sfnt_version);
write_u16(&mut out, 4, new_num_tables as u16);
write_u16(&mut out, 6, search_range);
write_u16(&mut out, 8, entry_selector);
write_u16(&mut out, 10, range_shift);
for (i, t) in tables.iter().enumerate() {
let rec_off = 12 + i * 16;
write_u32(&mut out, rec_off, t.tag);
write_u32(&mut out, rec_off + 4, t.checksum);
write_u32(&mut out, rec_off + 8, t.offset);
write_u32(&mut out, rec_off + 12, t.length);
let off = t.offset as usize;
out[off..off + t.data.len()].copy_from_slice(&t.data);
}
if let Some(head) = tables.iter().find(|t| t.tag == TAG_HEAD) {
let head_off = head.offset as usize;
if head.length >= 12 && head_off + 12 <= out.len() {
write_u32(&mut out, head_off + 8, 0);
let sum_total = sum_u32_padded(&out);
let adjustment = 0xB1B0_AFBA_u32.wrapping_sub(sum_total);
write_u32(&mut out, head_off + 8, adjustment);
}
}
Some(out)
}
pub fn inject_hmtx(font_bytes: &[u8], widths_by_gid: &HashMap<u16, u16>) -> Option<Vec<u8>> {
if font_bytes.len() < 12 {
return None;
}
let sfnt_version = read_u32(font_bytes, 0)?;
if sfnt_version != SFNT_TRUETYPE && sfnt_version != SFNT_OTTO {
return None;
}
let num_tables = read_u16(font_bytes, 4)? as usize;
if font_bytes.len() < 12 + num_tables * 16 {
return None;
}
#[derive(Clone)]
struct TableEntry {
tag: u32,
checksum: u32,
offset: u32,
length: u32,
data: Vec<u8>,
}
let mut tables: Vec<TableEntry> = Vec::with_capacity(num_tables + 1);
for i in 0..num_tables {
let rec_off = 12 + i * 16;
let tag = read_u32(font_bytes, rec_off)?;
let checksum = read_u32(font_bytes, rec_off + 4)?;
let offset = read_u32(font_bytes, rec_off + 8)?;
let length = read_u32(font_bytes, rec_off + 12)?;
let end = (offset as usize).checked_add(length as usize)?;
if end > font_bytes.len() {
return None;
}
let data = font_bytes[offset as usize..end].to_vec();
tables.push(TableEntry {
tag,
checksum,
offset,
length,
data,
});
}
let maxp = tables.iter().find(|t| t.tag == TAG_MAXP)?.data.clone();
if maxp.len() < 6 {
return None;
}
let num_glyphs = read_u16(&maxp, 4)? as usize;
if num_glyphs == 0 {
return None;
}
let head_data = tables.iter().find(|t| t.tag == TAG_HEAD)?.data.clone();
if head_data.len() < 20 {
return None;
}
let units_per_em = read_u16(&head_data, 18)? as u32;
if units_per_em == 0 {
return None;
}
let mut new_hmtx = Vec::with_capacity(num_glyphs * 4);
for gid in 0u16..num_glyphs as u16 {
let advance_1000 = widths_by_gid.get(&gid).copied().unwrap_or(500);
let advance_design = ((advance_1000 as u32 * units_per_em) / 1000).min(0xFFFF) as u16;
new_hmtx.extend_from_slice(&advance_design.to_be_bytes());
new_hmtx.extend_from_slice(&[0u8, 0u8]); }
let new_hmtx_checksum = checksum_table(&new_hmtx);
let hmtx_idx = tables.iter().position(|t| t.tag == TAG_HMTX);
if let Some(idx) = hmtx_idx {
tables[idx].length = new_hmtx.len() as u32;
tables[idx].checksum = new_hmtx_checksum;
tables[idx].data = new_hmtx;
} else {
tables.push(TableEntry {
tag: TAG_HMTX,
checksum: new_hmtx_checksum,
offset: 0,
length: new_hmtx.len() as u32,
data: new_hmtx,
});
}
let hhea_idx = tables.iter().position(|t| t.tag == TAG_HHEA)?;
if tables[hhea_idx].data.len() < 36 {
return None;
}
let mut new_hhea = tables[hhea_idx].data.clone();
write_u16(&mut new_hhea, 34, num_glyphs as u16);
tables[hhea_idx].checksum = checksum_table(&new_hhea);
tables[hhea_idx].length = new_hhea.len() as u32;
tables[hhea_idx].data = new_hhea;
tables.sort_by_key(|t| t.tag);
let new_num_tables = tables.len();
let header_size = 12 + new_num_tables * 16;
let mut cur_offset = header_size as u32;
for t in tables.iter_mut() {
t.offset = cur_offset;
let pad = (4 - (t.length as usize & 3)) & 3;
cur_offset += t.length + pad as u32;
}
let entry_selector = (new_num_tables as f64).log2().floor() as u16;
let search_range = (1u16 << entry_selector) * 16;
let range_shift = (new_num_tables as u16) * 16 - search_range;
let total_size = cur_offset as usize;
let mut out = vec![0u8; total_size];
write_u32(&mut out, 0, sfnt_version);
write_u16(&mut out, 4, new_num_tables as u16);
write_u16(&mut out, 6, search_range);
write_u16(&mut out, 8, entry_selector);
write_u16(&mut out, 10, range_shift);
for (i, t) in tables.iter().enumerate() {
let rec_off = 12 + i * 16;
write_u32(&mut out, rec_off, t.tag);
write_u32(&mut out, rec_off + 4, t.checksum);
write_u32(&mut out, rec_off + 8, t.offset);
write_u32(&mut out, rec_off + 12, t.length);
let off = t.offset as usize;
out[off..off + t.data.len()].copy_from_slice(&t.data);
}
if let Some(head) = tables.iter().find(|t| t.tag == TAG_HEAD) {
let head_off = head.offset as usize;
if head.length >= 12 && head_off + 12 <= out.len() {
write_u32(&mut out, head_off + 8, 0);
let sum_total = sum_u32_padded(&out);
let adjustment = 0xB1B0_AFBA_u32.wrapping_sub(sum_total);
write_u32(&mut out, head_off + 8, adjustment);
}
}
Some(out)
}
fn build_format4_cmap(unicode_to_gid: &HashMap<u32, u16>) -> Option<Vec<u8>> {
let mut pairs: Vec<(u16, u16)> = unicode_to_gid
.iter()
.filter_map(|(&cp, &gid)| {
if cp <= 0xFFFF {
Some((cp as u16, gid))
} else {
None
}
})
.collect();
if pairs.is_empty() {
return None;
}
pairs.sort_by_key(|(cp, _)| *cp);
pairs.dedup_by_key(|(cp, _)| *cp);
struct Segment {
start: u16,
end: u16,
delta: i32, }
let mut segs: Vec<Segment> = Vec::new();
for &(cp, gid) in pairs.iter() {
let want_delta = gid as i32 - cp as i32;
let extend = segs
.last_mut()
.map(|s| s.end as u32 + 1 == cp as u32 && s.delta == want_delta)
.unwrap_or(false);
if extend {
let last = segs.last_mut().unwrap();
last.end = cp;
} else {
segs.push(Segment {
start: cp,
end: cp,
delta: want_delta,
});
}
}
if segs.last().is_none_or(|s| s.end != 0xFFFF) {
segs.push(Segment {
start: 0xFFFF,
end: 0xFFFF,
delta: 1,
});
}
let seg_count = segs.len();
let seg_count_x2 = (seg_count as u16) * 2;
let entry_selector = (seg_count as f64).log2().floor() as u16;
let search_range = (1u16 << entry_selector) * 2;
let range_shift = seg_count_x2 - search_range;
let subtable_size = 16 + 8 * seg_count;
let total_size = 4 + 8 + subtable_size;
let mut buf = Vec::with_capacity(total_size);
write_be_u16(&mut buf, 0); write_be_u16(&mut buf, 1);
write_be_u16(&mut buf, 0); write_be_u16(&mut buf, 3); write_be_u32(&mut buf, 12);
write_be_u16(&mut buf, 4); write_be_u16(&mut buf, subtable_size as u16); write_be_u16(&mut buf, 0); write_be_u16(&mut buf, seg_count_x2); write_be_u16(&mut buf, search_range);
write_be_u16(&mut buf, entry_selector);
write_be_u16(&mut buf, range_shift);
for s in &segs {
write_be_u16(&mut buf, s.end);
}
write_be_u16(&mut buf, 0);
for s in &segs {
write_be_u16(&mut buf, s.start);
}
for s in &segs {
let d16 = s.delta.rem_euclid(0x1_0000) as u16;
write_be_u16(&mut buf, d16);
}
for _ in &segs {
write_be_u16(&mut buf, 0);
}
Some(buf)
}
fn read_u16(buf: &[u8], off: usize) -> Option<u16> {
Some(u16::from_be_bytes([*buf.get(off)?, *buf.get(off + 1)?]))
}
fn read_u32(buf: &[u8], off: usize) -> Option<u32> {
Some(u32::from_be_bytes([
*buf.get(off)?,
*buf.get(off + 1)?,
*buf.get(off + 2)?,
*buf.get(off + 3)?,
]))
}
fn write_u16(buf: &mut [u8], off: usize, v: u16) {
buf[off..off + 2].copy_from_slice(&v.to_be_bytes());
}
fn write_u32(buf: &mut [u8], off: usize, v: u32) {
buf[off..off + 4].copy_from_slice(&v.to_be_bytes());
}
fn write_be_u16(buf: &mut Vec<u8>, v: u16) {
buf.extend_from_slice(&v.to_be_bytes());
}
fn write_be_u32(buf: &mut Vec<u8>, v: u32) {
buf.extend_from_slice(&v.to_be_bytes());
}
fn checksum_table(data: &[u8]) -> u32 {
let mut sum = 0u32;
let mut i = 0;
while i < data.len() {
let mut word = [0u8; 4];
let take = (data.len() - i).min(4);
word[..take].copy_from_slice(&data[i..i + take]);
sum = sum.wrapping_add(u32::from_be_bytes(word));
i += 4;
}
sum
}
fn sum_u32_padded(data: &[u8]) -> u32 {
let mut sum = 0u32;
let mut i = 0;
while i + 4 <= data.len() {
sum =
sum.wrapping_add(u32::from_be_bytes([data[i], data[i + 1], data[i + 2], data[i + 3]]));
i += 4;
}
if i < data.len() {
let mut word = [0u8; 4];
let take = data.len() - i;
word[..take].copy_from_slice(&data[i..i + take]);
sum = sum.wrapping_add(u32::from_be_bytes(word));
}
sum
}
#[cfg(test)]
mod tests {
use super::*;
fn minimal_otf() -> Vec<u8> {
let mut buf = Vec::new();
write_be_u32(&mut buf, SFNT_OTTO);
write_be_u16(&mut buf, 0); write_be_u16(&mut buf, 0); write_be_u16(&mut buf, 0); write_be_u16(&mut buf, 0); buf
}
#[test]
fn empty_map_returns_none() {
let font = minimal_otf();
let map = HashMap::new();
assert!(inject_unicode_cmap(&font, &map).is_none());
}
#[test]
fn invalid_sfnt_returns_none() {
let font = vec![0u8; 32];
let mut map = HashMap::new();
map.insert(0x41u32, 65u16);
assert!(inject_unicode_cmap(&font, &map).is_none());
}
#[test]
fn injects_into_empty_otf() {
let font = minimal_otf();
let mut map = HashMap::new();
map.insert(b'A' as u32, 65u16);
map.insert(b'B' as u32, 66u16);
let patched = inject_unicode_cmap(&font, &map).expect("patch ok");
assert_eq!(read_u16(&patched, 4).unwrap(), 1);
assert_eq!(read_u32(&patched, 12).unwrap(), TAG_CMAP);
}
#[test]
fn segment_coalescing_consecutive_cps_consecutive_gids() {
let mut map = HashMap::new();
for i in 0..10u32 {
map.insert(b'A' as u32 + i, 100 + i as u16);
}
let cmap = build_format4_cmap(&map).expect("build cmap");
let seg_count_x2 = read_u16(&cmap, 18).unwrap();
assert_eq!(seg_count_x2, 4);
}
}