use crate::fonts::cmap_utils::parse_cmap_format_12_filtered;
use crate::parser::{ParseError, ParseResult};
use std::collections::{HashMap, HashSet};
#[derive(Debug, PartialEq, Clone)]
pub enum CffDictToken {
Operand(i32),
Operator(u8),
EscapedOperator(u8),
}
pub struct CffDictScanner<'a> {
data: &'a [u8],
pos: usize,
}
impl<'a> CffDictScanner<'a> {
pub fn new(data: &'a [u8]) -> Self {
Self { data, pos: 0 }
}
pub fn position(&self) -> usize {
self.pos
}
}
impl<'a> Iterator for CffDictScanner<'a> {
type Item = CffDictToken;
fn next(&mut self) -> Option<CffDictToken> {
loop {
if self.pos >= self.data.len() {
return None;
}
let b = self.data[self.pos];
match b {
28 => {
if self.pos + 2 >= self.data.len() {
return None; }
let v = i16::from_be_bytes([self.data[self.pos + 1], self.data[self.pos + 2]])
as i32;
self.pos += 3;
return Some(CffDictToken::Operand(v));
}
29 => {
if self.pos + 4 >= self.data.len() {
return None; }
let v = i32::from_be_bytes([
self.data[self.pos + 1],
self.data[self.pos + 2],
self.data[self.pos + 3],
self.data[self.pos + 4],
]);
self.pos += 5;
return Some(CffDictToken::Operand(v));
}
30 => {
self.pos += 1;
while self.pos < self.data.len() {
let nibble_byte = self.data[self.pos];
self.pos += 1;
if nibble_byte & 0x0F == 0x0F || nibble_byte >> 4 == 0x0F {
break;
}
}
return Some(CffDictToken::Operand(0));
}
32..=246 => {
let v = b as i32 - 139;
self.pos += 1;
return Some(CffDictToken::Operand(v));
}
247..=250 => {
if self.pos + 1 >= self.data.len() {
return None; }
let w = self.data[self.pos + 1] as i32;
let v = (b as i32 - 247) * 256 + w + 108;
self.pos += 2;
return Some(CffDictToken::Operand(v));
}
251..=254 => {
if self.pos + 1 >= self.data.len() {
return None; }
let w = self.data[self.pos + 1] as i32;
let v = -(b as i32 - 251) * 256 - w - 108;
self.pos += 2;
return Some(CffDictToken::Operand(v));
}
12 => {
self.pos += 1;
if self.pos >= self.data.len() {
return None;
}
let op2 = self.data[self.pos];
self.pos += 1;
return Some(CffDictToken::EscapedOperator(op2));
}
_ => {
self.pos += 1;
return Some(CffDictToken::Operator(b));
}
}
}
}
}
pub struct CffSubsetResult {
pub font_data: Vec<u8>,
pub glyph_mapping: HashMap<u32, u16>,
pub is_raw_cff: bool,
}
pub fn subset_cff_font(
font_data: &[u8],
used_chars: &HashSet<char>,
) -> ParseResult<CffSubsetResult> {
let otf = OtfFile::parse(font_data)?;
let cff_entry = otf.find_table(b"CFF ")?;
let cff_start = cff_entry.offset as usize;
let cff_end = cff_start + cff_entry.length as usize;
if cff_end > font_data.len() {
return Err(ParseError::SyntaxError {
position: cff_start,
message: "CFF table extends beyond font data".to_string(),
});
}
let cff_data = &font_data[cff_start..cff_end];
let cmap_entry = otf.find_table(b"cmap")?;
let cmap_start = cmap_entry.offset as usize;
let cmap_end = cmap_start + cmap_entry.length as usize;
if cmap_end > font_data.len() {
return Err(ParseError::SyntaxError {
position: cmap_start,
message: "cmap table extends beyond font data".to_string(),
});
}
let cmap_data = &font_data[cmap_start..cmap_end];
let codepoint_filter: HashSet<u32> = used_chars.iter().map(|c| *c as u32).collect();
let unicode_to_gid = parse_cmap(cmap_data, Some(&codepoint_filter))?;
let mut needed_gids: Vec<u16> = vec![0]; let mut new_glyph_mapping: HashMap<u32, u16> = HashMap::new();
for ch in used_chars {
let codepoint = *ch as u32;
if let Some(&gid) = unicode_to_gid.get(&codepoint) {
if gid != 0 {
needed_gids.push(gid);
}
}
}
needed_gids.sort();
needed_gids.dedup();
let mut gid_remap: HashMap<u16, u16> = HashMap::new();
for (new_gid, &old_gid) in needed_gids.iter().enumerate() {
gid_remap.insert(old_gid, new_gid as u16);
}
for ch in used_chars {
let codepoint = *ch as u32;
if let Some(&old_gid) = unicode_to_gid.get(&codepoint) {
if let Some(&new_gid) = gid_remap.get(&old_gid) {
new_glyph_mapping.insert(codepoint, new_gid);
}
}
}
let mut old_gid_to_unicode: HashMap<u16, u32> = HashMap::new();
for ch in used_chars {
let codepoint = *ch as u32;
if let Some(&gid) = unicode_to_gid.get(&codepoint) {
old_gid_to_unicode.entry(gid).or_insert(codepoint);
}
}
let new_cff = match subset_cff_table(cff_data, &needed_gids, &gid_remap, &old_gid_to_unicode) {
Ok(data) => data,
Err(e) => {
tracing::debug!(
"CFF table subsetting failed: {:?}; falling back to full font",
e
);
let filtered_mapping: HashMap<u32, u16> = used_chars
.iter()
.filter_map(|ch| {
let cp = *ch as u32;
unicode_to_gid.get(&cp).map(|&gid| (cp, gid))
})
.collect();
return Ok(CffSubsetResult {
font_data: font_data.to_vec(),
glyph_mapping: filtered_mapping,
is_raw_cff: false,
});
}
};
let top_dict_index_for_check = parse_cff_index(cff_data, {
let name_idx = parse_cff_index(cff_data, cff_data[2] as usize)?;
name_idx.end_offset()
})?;
let td_bytes = top_dict_index_for_check
.get_item(0, cff_data)
.unwrap_or(&[]);
let td_offsets = parse_top_dict(td_bytes);
let is_cid = td_offsets.fd_array_offset.is_some() || td_offsets.fd_select_offset.is_some();
if is_cid {
Ok(CffSubsetResult {
font_data: new_cff,
glyph_mapping: new_glyph_mapping,
is_raw_cff: true,
})
} else {
let new_font = otf.rebuild_subset(font_data, &new_cff, needed_gids.len() as u16)?;
Ok(CffSubsetResult {
font_data: new_font,
glyph_mapping: new_glyph_mapping,
is_raw_cff: false,
})
}
}
struct OtfTableEntry {
tag: [u8; 4],
offset: u32,
length: u32,
}
struct OtfFile {
sfnt_version: u32,
tables: Vec<OtfTableEntry>,
}
impl OtfFile {
fn parse(data: &[u8]) -> ParseResult<Self> {
if data.len() < 12 {
return Err(ParseError::SyntaxError {
position: 0,
message: "OTF file too small".to_string(),
});
}
let sfnt_version = read_u32(data, 0)?;
let num_tables = read_u16(data, 4)? as usize;
if data.len() < 12 + num_tables * 16 {
return Err(ParseError::SyntaxError {
position: 0,
message: "OTF table directory truncated".to_string(),
});
}
let mut tables = Vec::with_capacity(num_tables);
for i in 0..num_tables {
let base = 12 + i * 16;
let tag = [data[base], data[base + 1], data[base + 2], data[base + 3]];
let offset = read_u32(data, base + 8)?;
let length = read_u32(data, base + 12)?;
tables.push(OtfTableEntry {
tag,
offset,
length,
});
}
Ok(Self {
sfnt_version,
tables,
})
}
fn find_table(&self, tag: &[u8; 4]) -> ParseResult<&OtfTableEntry> {
self.tables
.iter()
.find(|e| &e.tag == tag)
.ok_or_else(|| ParseError::SyntaxError {
position: 0,
message: format!("Table {} not found", String::from_utf8_lossy(tag)),
})
}
const REQUIRED_TABLES: &'static [&'static [u8; 4]] =
&[b"CFF ", b"head", b"maxp", b"hhea", b"hmtx", b"cmap"];
fn rebuild_subset(
&self,
original: &[u8],
new_cff: &[u8],
new_glyph_count: u16,
) -> ParseResult<Vec<u8>> {
let patched_maxp = self.patch_maxp(original, new_glyph_count);
let patched_hhea = self.patch_hhea(original, new_glyph_count);
let patched_hmtx = self.truncate_hmtx(original, new_glyph_count);
let minimal_cmap = Self::build_minimal_cmap();
let replacements: std::collections::HashMap<&[u8; 4], &[u8]> = [
(b"CFF ", new_cff as &[u8]),
(b"maxp", patched_maxp.as_deref().unwrap_or(&[])),
(b"hhea", patched_hhea.as_deref().unwrap_or(&[])),
(b"hmtx", patched_hmtx.as_deref().unwrap_or(&[])),
(b"cmap", &minimal_cmap as &[u8]),
]
.into_iter()
.filter(|(_, data)| !data.is_empty())
.collect();
let kept_tables: Vec<&OtfTableEntry> = self
.tables
.iter()
.filter(|e| Self::REQUIRED_TABLES.iter().any(|t| *t == &e.tag))
.collect();
let num_tables = kept_tables.len() as u16;
let header_size = 12 + num_tables as usize * 16;
let entry_selector = if num_tables > 0 {
(u16::BITS - num_tables.leading_zeros() - 1) as u16
} else {
0
};
let search_range = (1u16 << entry_selector) * 16;
let range_shift = num_tables * 16 - search_range;
let mut offsets: Vec<u32> = Vec::with_capacity(kept_tables.len());
let mut current = header_size;
for entry in &kept_tables {
while current % 4 != 0 {
current += 1;
}
offsets.push(current as u32);
let len = if let Some(data) = replacements.get(&entry.tag) {
data.len()
} else {
entry.length as usize
};
current += len;
}
let total_size = current;
let mut out = vec![0u8; total_size];
out[0..4].copy_from_slice(&self.sfnt_version.to_be_bytes());
out[4..6].copy_from_slice(&num_tables.to_be_bytes());
out[6..8].copy_from_slice(&search_range.to_be_bytes());
out[8..10].copy_from_slice(&entry_selector.to_be_bytes());
out[10..12].copy_from_slice(&range_shift.to_be_bytes());
for (i, entry) in kept_tables.iter().enumerate() {
let offset = offsets[i] as usize;
let table_data: &[u8] = if let Some(data) = replacements.get(&entry.tag) {
data
} else {
let src_start = entry.offset as usize;
let src_end = src_start + entry.length as usize;
if src_end > original.len() {
return Err(ParseError::SyntaxError {
position: src_start,
message: format!(
"Table {} data out of bounds",
String::from_utf8_lossy(&entry.tag)
),
});
}
&original[src_start..src_end]
};
let checksum = otf_checksum(table_data);
let dir_base = 12 + i * 16;
out[dir_base..dir_base + 4].copy_from_slice(&entry.tag);
out[dir_base + 4..dir_base + 8].copy_from_slice(&checksum.to_be_bytes());
out[dir_base + 8..dir_base + 12].copy_from_slice(&(offsets[i]).to_be_bytes());
out[dir_base + 12..dir_base + 16]
.copy_from_slice(&(table_data.len() as u32).to_be_bytes());
out[offset..offset + table_data.len()].copy_from_slice(table_data);
}
if let Some((head_idx, _)) = kept_tables
.iter()
.enumerate()
.find(|(_, e)| &e.tag == b"head")
{
let head_offset = offsets[head_idx] as usize;
if head_offset + 12 <= out.len() {
out[head_offset + 8..head_offset + 12].copy_from_slice(&[0u8; 4]);
let total_checksum = otf_checksum(&out);
let adjustment = 0xB1B0_AFBAu32.wrapping_sub(total_checksum);
out[head_offset + 8..head_offset + 12].copy_from_slice(&adjustment.to_be_bytes());
let head_len = if replacements.contains_key(b"head") {
replacements[b"head"].len()
} else {
kept_tables[head_idx].length as usize
};
let new_head_checksum = otf_checksum(&out[head_offset..head_offset + head_len]);
let dir_base = 12 + head_idx * 16;
out[dir_base + 4..dir_base + 8].copy_from_slice(&new_head_checksum.to_be_bytes());
}
}
Ok(out)
}
fn patch_maxp(&self, original: &[u8], new_glyph_count: u16) -> Option<Vec<u8>> {
let entry = self.tables.iter().find(|e| &e.tag == b"maxp")?;
let start = entry.offset as usize;
let end = start + entry.length as usize;
if end > original.len() || entry.length < 6 {
return None;
}
let mut patched = original[start..end].to_vec();
patched[4..6].copy_from_slice(&new_glyph_count.to_be_bytes());
Some(patched)
}
fn patch_hhea(&self, original: &[u8], new_glyph_count: u16) -> Option<Vec<u8>> {
let entry = self.tables.iter().find(|e| &e.tag == b"hhea")?;
let start = entry.offset as usize;
let end = start + entry.length as usize;
if end > original.len() || entry.length < 36 {
return None;
}
let mut patched = original[start..end].to_vec();
patched[34..36].copy_from_slice(&new_glyph_count.to_be_bytes());
Some(patched)
}
fn truncate_hmtx(&self, original: &[u8], new_glyph_count: u16) -> Option<Vec<u8>> {
let entry = self.tables.iter().find(|e| &e.tag == b"hmtx")?;
let start = entry.offset as usize;
let needed = new_glyph_count as usize * 4;
let available = entry.length as usize;
let take = needed.min(available);
let end = start + take;
if end > original.len() {
return None;
}
Some(original[start..end].to_vec())
}
fn build_minimal_cmap() -> Vec<u8> {
let mut cmap = Vec::new();
cmap.extend_from_slice(&0u16.to_be_bytes()); cmap.extend_from_slice(&1u16.to_be_bytes()); cmap.extend_from_slice(&3u16.to_be_bytes()); cmap.extend_from_slice(&1u16.to_be_bytes()); cmap.extend_from_slice(&12u32.to_be_bytes());
let seg_count: u16 = 1;
let seg_count_x2 = seg_count * 2;
let length: u16 = 14 + seg_count_x2 * 4; cmap.extend_from_slice(&4u16.to_be_bytes()); cmap.extend_from_slice(&length.to_be_bytes()); cmap.extend_from_slice(&0u16.to_be_bytes()); cmap.extend_from_slice(&seg_count_x2.to_be_bytes()); cmap.extend_from_slice(&2u16.to_be_bytes()); cmap.extend_from_slice(&0u16.to_be_bytes()); cmap.extend_from_slice(&0u16.to_be_bytes()); cmap.extend_from_slice(&0xFFFFu16.to_be_bytes());
cmap.extend_from_slice(&0u16.to_be_bytes());
cmap.extend_from_slice(&0xFFFFu16.to_be_bytes());
cmap.extend_from_slice(&1u16.to_be_bytes());
cmap.extend_from_slice(&0u16.to_be_bytes());
cmap
}
}
struct CffIndex {
start_offset: usize,
byte_length: usize,
item_offsets: Vec<usize>,
}
impl CffIndex {
fn end_offset(&self) -> usize {
self.start_offset + self.byte_length
}
fn count(&self) -> usize {
if self.item_offsets.is_empty() {
0
} else {
self.item_offsets.len() - 1
}
}
fn get_item<'a>(&self, index: usize, cff: &'a [u8]) -> Option<&'a [u8]> {
if index + 1 >= self.item_offsets.len() {
return None;
}
let start = self.item_offsets[index];
let end = self.item_offsets[index + 1];
if end > cff.len() || start > end {
return None;
}
Some(&cff[start..end])
}
fn raw_bytes<'a>(&self, cff: &'a [u8]) -> &'a [u8] {
let end = self.start_offset + self.byte_length;
&cff[self.start_offset..end.min(cff.len())]
}
}
fn parse_cff_index(cff: &[u8], pos: usize) -> ParseResult<CffIndex> {
if pos + 2 > cff.len() {
return Err(ParseError::SyntaxError {
position: pos,
message: "CFF INDEX truncated (count)".to_string(),
});
}
let count = u16::from_be_bytes([cff[pos], cff[pos + 1]]) as usize;
if count == 0 {
return Ok(CffIndex {
start_offset: pos,
byte_length: 2,
item_offsets: vec![],
});
}
if pos + 3 > cff.len() {
return Err(ParseError::SyntaxError {
position: pos + 2,
message: "CFF INDEX truncated (offSize)".to_string(),
});
}
let off_size = cff[pos + 2] as usize;
if off_size < 1 || off_size > 4 {
return Err(ParseError::SyntaxError {
position: pos + 2,
message: format!("CFF INDEX invalid offSize: {}", off_size),
});
}
let offsets_start = pos + 3;
let offsets_end = offsets_start + (count + 1) * off_size;
if offsets_end > cff.len() {
return Err(ParseError::SyntaxError {
position: offsets_start,
message: "CFF INDEX offset array truncated".to_string(),
});
}
let data_base = offsets_end;
let mut item_offsets = Vec::with_capacity(count + 1);
for i in 0..=count {
let off_pos = offsets_start + i * off_size;
let raw_offset = read_offset(cff, off_pos, off_size)?;
let abs_offset = data_base + (raw_offset as usize) - 1;
item_offsets.push(abs_offset);
}
let data_len = item_offsets[count] - data_base;
let byte_length = 3 + (count + 1) * off_size + data_len;
Ok(CffIndex {
start_offset: pos,
byte_length,
item_offsets,
})
}
fn read_offset(data: &[u8], pos: usize, off_size: usize) -> ParseResult<u32> {
if pos + off_size > data.len() {
return Err(ParseError::SyntaxError {
position: pos,
message: "read_offset: out of bounds".to_string(),
});
}
let val = match off_size {
1 => data[pos] as u32,
2 => u16::from_be_bytes([data[pos], data[pos + 1]]) as u32,
3 => ((data[pos] as u32) << 16) | ((data[pos + 1] as u32) << 8) | (data[pos + 2] as u32),
4 => u32::from_be_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]),
_ => {
return Err(ParseError::SyntaxError {
position: pos,
message: format!("read_offset: invalid off_size {}", off_size),
})
}
};
Ok(val)
}
pub fn usize_to_cff_offset(val: usize) -> ParseResult<i32> {
i32::try_from(val).map_err(|_| ParseError::SyntaxError {
position: 0,
message: format!("CFF offset {} exceeds i32 range", val),
})
}
pub fn build_cff_index(items: &[&[u8]]) -> Vec<u8> {
let count = items.len();
let mut result = Vec::new();
result.extend_from_slice(&(count as u16).to_be_bytes());
if count == 0 {
return result;
}
let total_data: usize = items.iter().map(|i| i.len()).sum();
let max_offset = total_data + 1;
let off_size: u8 = if max_offset <= 0xFF {
1
} else if max_offset <= 0xFFFF {
2
} else if max_offset <= 0xFF_FFFF {
3
} else {
4
};
result.push(off_size);
let mut current: u32 = 1;
for item in items.iter() {
write_offset(&mut result, current, off_size);
current += item.len() as u32;
}
write_offset(&mut result, current, off_size);
for item in items {
result.extend_from_slice(item);
}
result
}
fn write_offset(out: &mut Vec<u8>, value: u32, off_size: u8) {
match off_size {
1 => out.push(value as u8),
2 => out.extend_from_slice(&(value as u16).to_be_bytes()),
3 => {
out.push((value >> 16) as u8);
out.push((value >> 8) as u8);
out.push(value as u8);
}
4 => out.extend_from_slice(&value.to_be_bytes()),
_ => unreachable!("write_offset called with invalid off_size {}", off_size),
}
}
#[derive(Debug, Default)]
struct TopDictOffsets {
charstrings_offset: Option<i32>,
charset_offset: Option<i32>,
private_dict: Option<(i32, i32)>,
fd_array_offset: Option<i32>,
fd_select_offset: Option<i32>,
}
fn parse_top_dict(data: &[u8]) -> TopDictOffsets {
let mut offsets = TopDictOffsets::default();
let mut operand_stack: Vec<i32> = Vec::new();
for token in CffDictScanner::new(data) {
match token {
CffDictToken::Operand(v) => {
operand_stack.push(v);
}
CffDictToken::EscapedOperator(op2) => {
match op2 {
36 => {
if let Some(&v) = operand_stack.last() {
offsets.fd_array_offset = Some(v);
}
}
37 => {
if let Some(&v) = operand_stack.last() {
offsets.fd_select_offset = Some(v);
}
}
_ => {}
}
operand_stack.clear();
}
CffDictToken::Operator(b) => {
match b {
15 => {
if let Some(&v) = operand_stack.last() {
offsets.charset_offset = Some(v);
}
}
17 => {
if let Some(&v) = operand_stack.last() {
offsets.charstrings_offset = Some(v);
}
}
18 => {
if operand_stack.len() >= 2 {
let offset = operand_stack[operand_stack.len() - 1];
let size = operand_stack[operand_stack.len() - 2];
offsets.private_dict = Some((size, offset));
}
}
_ => {}
}
operand_stack.clear();
}
}
}
offsets
}
fn encode_cff_int_5byte(value: i32) -> [u8; 5] {
let bytes = value.to_be_bytes();
[29, bytes[0], bytes[1], bytes[2], bytes[3]]
}
fn rebuild_cid_top_dict(
original: &[u8],
charset_offset: i32,
charstrings_offset: i32,
fd_array_offset: i32,
fd_select_offset: i32,
) -> Vec<u8> {
let mut out = Vec::new();
let mut scanner = CffDictScanner::new(original);
let mut operand_start = 0usize;
loop {
let token = match scanner.next() {
Some(t) => t,
None => break,
};
match token {
CffDictToken::Operand(_) => {
}
CffDictToken::EscapedOperator(op2) => {
match op2 {
36 => {
out.extend_from_slice(&encode_cff_int_5byte(fd_array_offset));
out.push(12);
out.push(36);
}
37 => {
out.extend_from_slice(&encode_cff_int_5byte(fd_select_offset));
out.push(12);
out.push(37);
}
_ => {
out.extend_from_slice(&original[operand_start..scanner.position()]);
}
}
operand_start = scanner.position();
}
CffDictToken::Operator(b) => {
match b {
15 => {
out.extend_from_slice(&encode_cff_int_5byte(charset_offset));
out.push(15);
}
17 => {
out.extend_from_slice(&encode_cff_int_5byte(charstrings_offset));
out.push(17);
}
18 => {
}
_ => {
out.extend_from_slice(&original[operand_start..scanner.position()]);
}
}
operand_start = scanner.position();
}
}
}
out
}
fn rebuild_fd_dict(original: &[u8], private_size: i32, private_offset: i32) -> Vec<u8> {
let mut out = Vec::new();
let mut scanner = CffDictScanner::new(original);
let mut operand_start = 0usize;
loop {
let token = match scanner.next() {
Some(t) => t,
None => break,
};
match token {
CffDictToken::Operand(_) => {
}
CffDictToken::EscapedOperator(_) => {
out.extend_from_slice(&original[operand_start..scanner.position()]);
operand_start = scanner.position();
}
CffDictToken::Operator(b) => {
match b {
18 => {
out.extend_from_slice(&encode_cff_int_5byte(private_size));
out.extend_from_slice(&encode_cff_int_5byte(private_offset));
out.push(18);
}
_ => {
out.extend_from_slice(&original[operand_start..scanner.position()]);
}
}
operand_start = scanner.position();
}
}
}
out
}
fn rebuild_top_dict(
original: &[u8],
charset_offset: i32,
charstrings_offset: i32,
private_size: i32,
private_offset: i32,
has_private: bool,
) -> Vec<u8> {
let mut out = Vec::new();
let mut scanner = CffDictScanner::new(original);
let mut operand_start = 0usize;
loop {
let token = match scanner.next() {
Some(t) => t,
None => break,
};
match token {
CffDictToken::Operand(_) => {
}
CffDictToken::EscapedOperator(_) => {
out.extend_from_slice(&original[operand_start..scanner.position()]);
operand_start = scanner.position();
}
CffDictToken::Operator(b) => {
match b {
15 => {
out.extend_from_slice(&encode_cff_int_5byte(charset_offset));
out.push(15);
}
17 => {
out.extend_from_slice(&encode_cff_int_5byte(charstrings_offset));
out.push(17);
}
18 => {
if has_private {
out.extend_from_slice(&encode_cff_int_5byte(private_size));
out.extend_from_slice(&encode_cff_int_5byte(private_offset));
out.push(18);
}
}
_ => {
out.extend_from_slice(&original[operand_start..scanner.position()]);
}
}
operand_start = scanner.position();
}
}
}
out
}
fn parse_fd_select(cff: &[u8], offset: usize, num_glyphs: usize) -> ParseResult<Vec<u8>> {
if offset >= cff.len() {
return Err(ParseError::SyntaxError {
position: offset,
message: "FDSelect offset out of range".to_string(),
});
}
let format = cff[offset];
match format {
0 => {
if offset + 1 + num_glyphs > cff.len() {
return Err(ParseError::SyntaxError {
position: offset,
message: "FDSelect Format 0 truncated".to_string(),
});
}
Ok(cff[offset + 1..offset + 1 + num_glyphs].to_vec())
}
3 => {
if offset + 3 > cff.len() {
return Err(ParseError::SyntaxError {
position: offset,
message: "FDSelect Format 3 header truncated".to_string(),
});
}
let n_ranges = read_u16(cff, offset + 1)? as usize;
let ranges_end = offset + 3 + n_ranges * 3 + 2;
if ranges_end > cff.len() {
return Err(ParseError::SyntaxError {
position: offset,
message: "FDSelect Format 3 ranges truncated".to_string(),
});
}
let mut result = vec![0u8; num_glyphs];
for i in 0..n_ranges {
let range_base = offset + 3 + i * 3;
let first_gid = read_u16(cff, range_base)? as usize;
let fd_idx = cff[range_base + 2];
let end_gid = if i + 1 < n_ranges {
read_u16(cff, offset + 3 + (i + 1) * 3)? as usize
} else {
read_u16(cff, offset + 3 + n_ranges * 3)? as usize
};
let end_gid = end_gid.min(num_glyphs);
for gid in first_gid..end_gid {
if gid < result.len() {
result[gid] = fd_idx;
}
}
}
Ok(result)
}
_ => Err(ParseError::SyntaxError {
position: offset,
message: format!("FDSelect format {} not supported", format),
}),
}
}
fn parse_fd_private(fd_dict: &[u8]) -> Option<(i32, i32)> {
let mut operand_stack: Vec<i32> = Vec::new();
for token in CffDictScanner::new(fd_dict) {
match token {
CffDictToken::Operand(v) => {
operand_stack.push(v);
}
CffDictToken::EscapedOperator(_) => {
operand_stack.clear();
}
CffDictToken::Operator(b) => {
if b == 18 && operand_stack.len() >= 2 {
let offset = operand_stack[operand_stack.len() - 1];
let size = operand_stack[operand_stack.len() - 2];
return Some((size, offset));
}
operand_stack.clear();
}
}
}
None
}
struct FdData {
fd_dict_bytes: Vec<u8>,
private_bytes: Vec<u8>,
local_subr_bytes: Vec<u8>,
}
fn subset_cid_cff_table(
cff: &[u8],
needed_gids: &[u16],
gid_remap: &HashMap<u16, u16>,
old_gid_to_unicode: &HashMap<u16, u32>,
top_dict_bytes: &[u8],
top_dict_offsets: &TopDictOffsets,
name_index: &CffIndex,
_top_dict_index: &CffIndex,
string_index: &CffIndex,
global_subr_index: &CffIndex,
) -> ParseResult<Vec<u8>> {
let hdr_size = cff[2] as usize;
let header_bytes = &cff[0..hdr_size];
let fd_array_off = top_dict_offsets
.fd_array_offset
.ok_or_else(|| ParseError::SyntaxError {
position: 0,
message: "CIDFont missing FDArray offset".to_string(),
})?;
let fd_select_off =
top_dict_offsets
.fd_select_offset
.ok_or_else(|| ParseError::SyntaxError {
position: 0,
message: "CIDFont missing FDSelect offset".to_string(),
})?;
let charstrings_off =
top_dict_offsets
.charstrings_offset
.ok_or_else(|| ParseError::SyntaxError {
position: 0,
message: "CIDFont missing CharStrings offset".to_string(),
})?;
if charstrings_off <= 0 || charstrings_off as usize >= cff.len() {
return Err(ParseError::SyntaxError {
position: 0,
message: format!("CharStrings offset out of range: {}", charstrings_off),
});
}
let charstrings_index = parse_cff_index(cff, charstrings_off as usize)?;
let total_glyphs = charstrings_index.count();
tracing::debug!(
"CID CFF subsetting: {} total glyphs, {} needed",
total_glyphs,
needed_gids.len()
);
for &gid in needed_gids {
if gid as usize >= total_glyphs {
return Err(ParseError::SyntaxError {
position: 0,
message: format!(
"GID {} out of range (font has {} glyphs)",
gid, total_glyphs
),
});
}
}
if fd_select_off <= 0 || fd_select_off as usize >= cff.len() {
return Err(ParseError::SyntaxError {
position: 0,
message: format!("FDSelect offset out of range: {}", fd_select_off),
});
}
let fd_select = parse_fd_select(cff, fd_select_off as usize, total_glyphs)?;
if fd_array_off <= 0 || fd_array_off as usize >= cff.len() {
return Err(ParseError::SyntaxError {
position: 0,
message: format!("FDArray offset out of range: {}", fd_array_off),
});
}
let fd_array_index = parse_cff_index(cff, fd_array_off as usize)?;
let num_fds = fd_array_index.count();
tracing::debug!("CID CFF: {} FDs in FDArray", num_fds);
let mut needed_fd_set: std::collections::BTreeSet<u8> = std::collections::BTreeSet::new();
for &gid in needed_gids {
let fd = if (gid as usize) < fd_select.len() {
fd_select[gid as usize]
} else {
0
};
needed_fd_set.insert(fd);
}
let needed_fds: Vec<u8> = needed_fd_set.into_iter().collect();
tracing::debug!("CID CFF: needed FDs: {:?}", needed_fds);
let fd_remap: HashMap<u8, u8> = needed_fds
.iter()
.enumerate()
.map(|(new_fd, &old_fd)| (old_fd, new_fd as u8))
.collect();
let sorted_old_gids: Vec<u16> = {
let mut pairs: Vec<(u16, u16)> = needed_gids
.iter()
.filter_map(|&old_gid| gid_remap.get(&old_gid).map(|&new_gid| (new_gid, old_gid)))
.collect();
pairs.sort_by_key(|&(new, _)| new);
let mut gids: Vec<u16> = pairs.iter().map(|&(_, old)| old).collect();
if gids.first().copied() != Some(0) {
gids.retain(|&g| g != 0);
gids.insert(0, 0);
}
gids
};
let new_charstrings: Vec<&[u8]> = sorted_old_gids
.iter()
.map(|&old_gid| {
charstrings_index
.get_item(old_gid as usize, cff)
.unwrap_or(&[0x0E])
})
.collect();
let new_charstrings_index = build_cff_index(&new_charstrings);
let mut new_fd_select: Vec<u8> = Vec::new();
new_fd_select.push(0); for &old_gid in &sorted_old_gids {
let old_fd = if (old_gid as usize) < fd_select.len() {
fd_select[old_gid as usize]
} else {
0
};
let new_fd = fd_remap.get(&old_fd).copied().unwrap_or(0);
new_fd_select.push(new_fd);
}
let mut new_charset: Vec<u8> = Vec::new();
new_charset.push(0); for (new_gid_idx, &old_gid) in sorted_old_gids.iter().enumerate() {
if new_gid_idx == 0 {
continue; }
let cid = old_gid_to_unicode
.get(&old_gid)
.copied()
.unwrap_or(old_gid as u32);
let cid16 = if cid > 0xFFFF {
tracing::debug!(
"CFF subsetter: U+{:X} exceeds 16-bit CID range for GID {}, using GID as CID",
cid,
old_gid
);
old_gid
} else {
cid as u16
};
new_charset.extend_from_slice(&cid16.to_be_bytes());
}
let mut fd_data_list: Vec<FdData> = Vec::new();
for &old_fd in &needed_fds {
let fd_dict = fd_array_index
.get_item(old_fd as usize, cff)
.ok_or_else(|| ParseError::SyntaxError {
position: 0,
message: format!("FD {} not found in FDArray", old_fd),
})?;
let (private_bytes, local_subr_bytes) =
if let Some((priv_size, priv_off)) = parse_fd_private(fd_dict) {
if priv_off > 0 && priv_size > 0 {
let start = priv_off as usize;
let end = (start + priv_size as usize).min(cff.len());
let pb = cff[start..end].to_vec();
let ls = if let Some(subrs_rel) = parse_local_subrs_offset(&pb) {
let subrs_abs = start + subrs_rel;
match parse_cff_index(cff, subrs_abs) {
Ok(idx) if idx.count() > 0 => idx.raw_bytes(cff).to_vec(),
_ => vec![],
}
} else if end < cff.len() {
match parse_cff_index(cff, end) {
Ok(idx) if idx.count() > 0 => idx.raw_bytes(cff).to_vec(),
_ => vec![],
}
} else {
vec![]
};
(pb, ls)
} else {
(vec![], vec![])
}
} else {
(vec![], vec![])
};
fd_data_list.push(FdData {
fd_dict_bytes: fd_dict.to_vec(),
private_bytes,
local_subr_bytes,
});
}
for fd in &mut fd_data_list {
if !fd.local_subr_bytes.is_empty() {
let orig_len = fd.private_bytes.len();
patch_private_subrs_offset(&mut fd.private_bytes, 0);
let after_first = fd.private_bytes.len();
let subrs_offset = after_first as i32;
patch_private_subrs_offset(&mut fd.private_bytes, subrs_offset);
tracing::debug!(
"Private DICT patch: {} → {} bytes, op19={}",
orig_len,
fd.private_bytes.len(),
subrs_offset
);
}
}
let name_bytes = name_index.raw_bytes(cff);
let string_bytes = string_index.raw_bytes(cff);
let global_subr_bytes = global_subr_index.raw_bytes(cff);
let placeholder_offset = 100_000i32;
let placeholder_top_dict = rebuild_cid_top_dict(
top_dict_bytes,
placeholder_offset,
placeholder_offset,
placeholder_offset,
placeholder_offset,
);
let placeholder_top_dict_ref: &[u8] = &placeholder_top_dict;
let placeholder_top_dict_index = build_cff_index(&[placeholder_top_dict_ref]);
let placeholder_fd_dicts: Vec<Vec<u8>> = fd_data_list
.iter()
.map(|fd| {
let private_size = usize_to_cff_offset(fd.private_bytes.len())?;
Ok(rebuild_fd_dict(
&fd.fd_dict_bytes,
private_size,
placeholder_offset,
))
})
.collect::<ParseResult<Vec<_>>>()?;
let placeholder_fd_refs: Vec<&[u8]> =
placeholder_fd_dicts.iter().map(|v| v.as_slice()).collect();
let placeholder_fd_array_index = build_cff_index(&placeholder_fd_refs);
let after_header = header_bytes.len();
let after_name = after_header + name_bytes.len();
let after_top_dict = after_name + placeholder_top_dict_index.len();
let after_string = after_top_dict + string_bytes.len();
let after_global_subr = after_string + global_subr_bytes.len();
let new_charset_offset = usize_to_cff_offset(after_global_subr)?;
let new_fd_select_offset = usize_to_cff_offset(after_global_subr + new_charset.len())?;
let new_charstrings_offset =
usize_to_cff_offset(new_fd_select_offset as usize + new_fd_select.len())?;
let new_fd_array_offset =
usize_to_cff_offset(new_charstrings_offset as usize + new_charstrings_index.len())?;
let after_fd_array = new_fd_array_offset as usize + placeholder_fd_array_index.len();
let mut private_offsets: Vec<i32> = Vec::new();
let mut cursor = after_fd_array;
for fd in &fd_data_list {
private_offsets.push(usize_to_cff_offset(cursor)?);
cursor += fd.private_bytes.len() + fd.local_subr_bytes.len();
}
let real_fd_dicts: Vec<Vec<u8>> = fd_data_list
.iter()
.zip(private_offsets.iter())
.map(|(fd, &priv_off)| {
let private_size = usize_to_cff_offset(fd.private_bytes.len())?;
Ok(rebuild_fd_dict(&fd.fd_dict_bytes, private_size, priv_off))
})
.collect::<ParseResult<Vec<_>>>()?;
let real_fd_refs: Vec<&[u8]> = real_fd_dicts.iter().map(|v| v.as_slice()).collect();
let real_fd_array_index = build_cff_index(&real_fd_refs);
if real_fd_array_index.len() != placeholder_fd_array_index.len() {
return Err(ParseError::SyntaxError {
position: 0,
message: format!(
"FDArray size changed between passes ({} vs {})",
real_fd_array_index.len(),
placeholder_fd_array_index.len()
),
});
}
let real_top_dict = rebuild_cid_top_dict(
top_dict_bytes,
new_charset_offset,
new_charstrings_offset,
new_fd_array_offset,
new_fd_select_offset,
);
let real_top_dict_ref: &[u8] = &real_top_dict;
let real_top_dict_index = build_cff_index(&[real_top_dict_ref]);
if real_top_dict_index.len() != placeholder_top_dict_index.len() {
return Err(ParseError::SyntaxError {
position: 0,
message: format!(
"CID Top DICT size changed between passes ({} vs {})",
real_top_dict_index.len(),
placeholder_top_dict_index.len()
),
});
}
let mut new_cff: Vec<u8> = Vec::new();
new_cff.extend_from_slice(header_bytes);
new_cff.extend_from_slice(name_bytes);
new_cff.extend_from_slice(&real_top_dict_index);
new_cff.extend_from_slice(string_bytes);
new_cff.extend_from_slice(global_subr_bytes);
new_cff.extend_from_slice(&new_charset);
new_cff.extend_from_slice(&new_fd_select);
new_cff.extend_from_slice(&new_charstrings_index);
new_cff.extend_from_slice(&real_fd_array_index);
for fd in &fd_data_list {
new_cff.extend_from_slice(&fd.private_bytes);
new_cff.extend_from_slice(&fd.local_subr_bytes);
}
tracing::debug!(
"CID CFF subset: {} bytes → {} bytes ({} glyphs, {} FDs)",
cff.len(),
new_cff.len(),
sorted_old_gids.len(),
needed_fds.len()
);
Ok(new_cff)
}
fn subset_cff_table(
cff: &[u8],
needed_gids: &[u16],
gid_remap: &HashMap<u16, u16>,
old_gid_to_unicode: &HashMap<u16, u32>,
) -> ParseResult<Vec<u8>> {
if cff.len() < 4 {
return Err(ParseError::SyntaxError {
position: 0,
message: "CFF table too small".to_string(),
});
}
let _major = cff[0];
let _minor = cff[1];
let hdr_size = cff[2] as usize;
if hdr_size > cff.len() {
return Err(ParseError::SyntaxError {
position: 2,
message: "CFF hdrSize larger than table".to_string(),
});
}
let header_bytes = &cff[0..hdr_size];
let name_index = parse_cff_index(cff, hdr_size)?;
let top_dict_index = parse_cff_index(cff, name_index.end_offset())?;
if top_dict_index.count() == 0 {
return Err(ParseError::SyntaxError {
position: name_index.end_offset(),
message: "CFF Top DICT INDEX is empty".to_string(),
});
}
let top_dict_bytes =
top_dict_index
.get_item(0, cff)
.ok_or_else(|| ParseError::SyntaxError {
position: top_dict_index.start_offset,
message: "Cannot read Top DICT item 0".to_string(),
})?;
let top_dict_offsets = parse_top_dict(top_dict_bytes);
let string_index = parse_cff_index(cff, top_dict_index.end_offset())?;
let global_subr_index = parse_cff_index(cff, string_index.end_offset())?;
if top_dict_offsets.fd_array_offset.is_some() || top_dict_offsets.fd_select_offset.is_some() {
return subset_cid_cff_table(
cff,
needed_gids,
gid_remap,
old_gid_to_unicode,
top_dict_bytes,
&top_dict_offsets,
&name_index,
&top_dict_index,
&string_index,
&global_subr_index,
);
}
let charstrings_offset =
top_dict_offsets
.charstrings_offset
.ok_or_else(|| ParseError::SyntaxError {
position: 0,
message: "Top DICT has no CharStrings offset".to_string(),
})?;
if charstrings_offset <= 0 || charstrings_offset as usize >= cff.len() {
return Err(ParseError::SyntaxError {
position: 0,
message: format!("CharStrings offset out of range: {}", charstrings_offset),
});
}
let charstrings_index = parse_cff_index(cff, charstrings_offset as usize)?;
let total_glyphs = charstrings_index.count();
for &gid in needed_gids {
if gid as usize >= total_glyphs {
return Err(ParseError::SyntaxError {
position: 0,
message: format!(
"GID {} out of range (font has {} glyphs)",
gid, total_glyphs
),
});
}
}
let mut sorted_new: Vec<u16> = {
let mut pairs: Vec<(u16, u16)> = needed_gids
.iter()
.filter_map(|&old_gid| gid_remap.get(&old_gid).map(|&new_gid| (new_gid, old_gid)))
.collect();
pairs.sort_by_key(|&(new, _)| new);
pairs.iter().map(|&(_, old)| old).collect()
};
if sorted_new.first().copied() != Some(0) {
sorted_new.retain(|&g| g != 0);
sorted_new.insert(0, 0);
}
let new_charstrings: Vec<&[u8]> = sorted_new
.iter()
.map(|&old_gid| {
charstrings_index
.get_item(old_gid as usize, cff)
.unwrap_or(&[0x0E]) })
.collect();
let new_charstrings_index = build_cff_index(&new_charstrings);
let orig_charset_offset = top_dict_offsets.charset_offset.unwrap_or(0);
let new_charset = build_subset_charset(cff, orig_charset_offset, &sorted_new, total_glyphs);
let (private_bytes, private_orig_offset) =
if let Some((size, offset)) = top_dict_offsets.private_dict {
if offset > 0 && size > 0 {
let start = offset as usize;
let end = (start + size as usize).min(cff.len());
(cff[start..end].to_vec(), offset)
} else {
(vec![], 0)
}
} else {
(vec![], 0)
};
let local_subr_bytes = if !private_bytes.is_empty() && private_orig_offset > 0 {
let priv_start = private_orig_offset as usize;
let priv_end = priv_start + private_bytes.len();
if let Some(subrs_rel) = parse_local_subrs_offset(&private_bytes) {
let subrs_abs = priv_start + subrs_rel;
match parse_cff_index(cff, subrs_abs) {
Ok(idx) if idx.count() > 0 => idx.raw_bytes(cff).to_vec(),
_ => vec![],
}
} else if priv_end < cff.len() {
match parse_cff_index(cff, priv_end) {
Ok(idx) if idx.count() > 0 => idx.raw_bytes(cff).to_vec(),
_ => vec![],
}
} else {
vec![]
}
} else {
vec![]
};
let name_bytes = name_index.raw_bytes(cff);
let string_bytes = string_index.raw_bytes(cff);
let global_subr_bytes = global_subr_index.raw_bytes(cff);
let placeholder_offset = 100_000i32;
let has_private = !private_bytes.is_empty();
let private_size = usize_to_cff_offset(private_bytes.len())?;
let placeholder_top_dict = rebuild_top_dict(
top_dict_bytes,
placeholder_offset,
placeholder_offset,
private_size,
placeholder_offset,
has_private,
);
let placeholder_top_dict_ref: &[u8] = &placeholder_top_dict;
let placeholder_top_dict_index = build_cff_index(&[placeholder_top_dict_ref]);
let after_header = header_bytes.len();
let after_name = after_header + name_bytes.len();
let after_top_dict_index = after_name + placeholder_top_dict_index.len();
let after_string = after_top_dict_index + string_bytes.len();
let after_global_subr = after_string + global_subr_bytes.len();
let new_charset_offset = usize_to_cff_offset(after_global_subr)?;
let new_charstrings_offset = usize_to_cff_offset(after_global_subr + new_charset.len())?;
let new_private_offset = if has_private {
usize_to_cff_offset(new_charstrings_offset as usize + new_charstrings_index.len())?
} else {
0
};
let real_top_dict = rebuild_top_dict(
top_dict_bytes,
new_charset_offset,
new_charstrings_offset,
private_size,
new_private_offset,
has_private,
);
let real_top_dict_ref: &[u8] = &real_top_dict;
let real_top_dict_index = build_cff_index(&[real_top_dict_ref]);
if real_top_dict_index.len() != placeholder_top_dict_index.len() {
return Err(ParseError::SyntaxError {
position: 0,
message: format!(
"Top DICT size changed between passes ({} vs {}), \
this indicates a bug in offset encoding",
real_top_dict_index.len(),
placeholder_top_dict_index.len()
),
});
}
let mut new_cff = Vec::new();
new_cff.extend_from_slice(header_bytes);
new_cff.extend_from_slice(name_bytes);
new_cff.extend_from_slice(&real_top_dict_index);
new_cff.extend_from_slice(string_bytes);
new_cff.extend_from_slice(global_subr_bytes);
new_cff.extend_from_slice(&new_charset);
new_cff.extend_from_slice(&new_charstrings_index);
if has_private {
new_cff.extend_from_slice(&private_bytes);
if !local_subr_bytes.is_empty() {
new_cff.extend_from_slice(&local_subr_bytes);
}
}
Ok(new_cff)
}
fn build_subset_charset(
cff: &[u8],
orig_charset_offset: i32,
sorted_old_gids: &[u16], total_orig_glyphs: usize,
) -> Vec<u8> {
let orig_sids = if orig_charset_offset > 2 {
read_charset_format0(cff, orig_charset_offset as usize, total_orig_glyphs)
} else {
vec![]
};
let mut charset = Vec::new();
charset.push(0u8);
for (new_gid_idx, &old_gid) in sorted_old_gids.iter().enumerate() {
if new_gid_idx == 0 {
continue;
}
let sid = if old_gid as usize > 0 && old_gid as usize <= orig_sids.len() {
orig_sids[old_gid as usize - 1] } else {
old_gid };
charset.extend_from_slice(&sid.to_be_bytes());
}
charset
}
fn parse_local_subrs_offset(private_dict: &[u8]) -> Option<usize> {
let mut operand_stack: Vec<i32> = Vec::new();
for token in CffDictScanner::new(private_dict) {
match token {
CffDictToken::Operand(v) => {
operand_stack.push(v);
}
CffDictToken::EscapedOperator(_) => {
operand_stack.clear();
}
CffDictToken::Operator(b) => {
if b == 19 {
return operand_stack.last().copied().and_then(|v| {
if v > 0 {
Some(v as usize)
} else {
None
}
});
}
operand_stack.clear();
}
}
}
None
}
fn patch_private_subrs_offset(private_dict: &mut Vec<u8>, new_offset: i32) {
let mut scanner = CffDictScanner::new(private_dict);
let mut op19_operand_start: Option<usize> = None;
let mut op19_end: Option<usize> = None;
let mut operand_start = 0usize;
loop {
let token = match scanner.next() {
Some(t) => t,
None => break,
};
match token {
CffDictToken::Operand(_) => {}
CffDictToken::EscapedOperator(_) => {
operand_start = scanner.position();
}
CffDictToken::Operator(b) => {
if b == 19 {
op19_operand_start = Some(operand_start);
op19_end = Some(scanner.position());
}
operand_start = scanner.position();
}
}
}
if let (Some(start), Some(end)) = (op19_operand_start, op19_end) {
let mut replacement = encode_cff_int_5byte(new_offset).to_vec();
replacement.push(19); private_dict.splice(start..end, replacement);
} else {
private_dict.extend_from_slice(&encode_cff_int_5byte(new_offset));
private_dict.push(19);
}
}
fn read_charset_format0(cff: &[u8], offset: usize, num_glyphs: usize) -> Vec<u16> {
if offset >= cff.len() {
return vec![];
}
let format = cff[offset];
match format {
0 => {
let mut sids = Vec::with_capacity(num_glyphs.saturating_sub(1));
let mut pos = offset + 1;
for _ in 1..num_glyphs {
if pos + 2 > cff.len() {
break;
}
let sid = u16::from_be_bytes([cff[pos], cff[pos + 1]]);
sids.push(sid);
pos += 2;
}
sids
}
1 => {
let mut sids = Vec::with_capacity(num_glyphs.saturating_sub(1));
let mut pos = offset + 1;
while sids.len() < num_glyphs.saturating_sub(1) {
if pos + 3 > cff.len() {
break;
}
let first_sid = u16::from_be_bytes([cff[pos], cff[pos + 1]]);
let n_left = cff[pos + 2] as u16;
pos += 3;
for i in 0..=n_left {
if sids.len() >= num_glyphs.saturating_sub(1) {
break;
}
sids.push(first_sid.wrapping_add(i));
}
}
sids
}
2 => {
let mut sids = Vec::with_capacity(num_glyphs.saturating_sub(1));
let mut pos = offset + 1;
while sids.len() < num_glyphs.saturating_sub(1) {
if pos + 4 > cff.len() {
break;
}
let first_sid = u16::from_be_bytes([cff[pos], cff[pos + 1]]);
let n_left = u16::from_be_bytes([cff[pos + 2], cff[pos + 3]]);
pos += 4;
for i in 0..=n_left {
if sids.len() >= num_glyphs.saturating_sub(1) {
break;
}
sids.push(first_sid.wrapping_add(i));
}
}
sids
}
_ => {
tracing::debug!(
"CFF charset format {} not supported; SIDs will be approximated",
format
);
vec![]
}
}
}
fn parse_cmap(
cmap: &[u8],
used_codepoints: Option<&HashSet<u32>>,
) -> ParseResult<HashMap<u32, u16>> {
if cmap.len() < 4 {
return Err(ParseError::SyntaxError {
position: 0,
message: "cmap table too small".to_string(),
});
}
let num_subtables = read_u16(cmap, 2)? as usize;
let mut best_offset: Option<u32> = None;
let mut best_priority = 0u8;
for i in 0..num_subtables {
let base = 4 + i * 8;
if base + 8 > cmap.len() {
break;
}
let platform = read_u16(cmap, base)?;
let encoding = read_u16(cmap, base + 2)?;
let offset = read_u32(cmap, base + 4)?;
let priority = match (platform, encoding) {
(3, 10) => 3,
(3, 1) => 2,
(0, _) => 1,
_ => 0,
};
if priority > best_priority {
best_priority = priority;
best_offset = Some(offset);
}
}
let subtable_offset = best_offset.ok_or_else(|| ParseError::SyntaxError {
position: 0,
message: "No usable cmap subtable found".to_string(),
})? as usize;
if subtable_offset + 2 > cmap.len() {
return Err(ParseError::SyntaxError {
position: subtable_offset,
message: "cmap subtable offset out of range".to_string(),
});
}
let format = read_u16(cmap, subtable_offset)?;
match format {
4 => parse_cmap_format_4(cmap, subtable_offset),
12 => parse_cmap_format_12_filtered(cmap, subtable_offset, used_codepoints).map_err(|e| {
ParseError::SyntaxError {
position: subtable_offset,
message: e.to_string(),
}
}),
_ => {
Ok(HashMap::new())
}
}
}
fn parse_cmap_format_4(cmap: &[u8], offset: usize) -> ParseResult<HashMap<u32, u16>> {
if offset + 14 > cmap.len() {
return Err(ParseError::SyntaxError {
position: offset,
message: "cmap Format 4 header truncated".to_string(),
});
}
let seg_count = (read_u16(cmap, offset + 6)? / 2) as usize;
let seg_arrays_start = offset + 14;
let end_code_start = seg_arrays_start;
let start_code_start = seg_arrays_start + seg_count * 2 + 2; let id_delta_start = start_code_start + seg_count * 2;
let id_range_offset_start = id_delta_start + seg_count * 2;
let glyph_id_array_start = id_range_offset_start + seg_count * 2;
let needed = glyph_id_array_start;
if needed > cmap.len() {
return Err(ParseError::SyntaxError {
position: offset,
message: "cmap Format 4 data truncated".to_string(),
});
}
let mut map = HashMap::new();
for i in 0..seg_count {
let end_code = read_u16(cmap, end_code_start + i * 2)? as u32;
if end_code == 0xFFFF {
break; }
let start_code = read_u16(cmap, start_code_start + i * 2)? as u32;
let id_delta = read_i16(cmap, id_delta_start + i * 2)? as i32;
let id_range_offset = read_u16(cmap, id_range_offset_start + i * 2)? as usize;
if start_code > end_code {
continue;
}
for code in start_code..=end_code {
let gid = if id_range_offset == 0 {
((code as i32 + id_delta) & 0xFFFF) as u16
} else {
let range_offset_pos = id_range_offset_start + i * 2;
let glyph_idx =
range_offset_pos + id_range_offset + (code as usize - start_code as usize) * 2;
if glyph_idx + 2 > cmap.len() {
continue;
}
let raw_gid = read_u16(cmap, glyph_idx)?;
if raw_gid == 0 {
0
} else {
((raw_gid as i32 + id_delta) & 0xFFFF) as u16
}
};
if gid != 0 {
map.insert(code, gid);
}
}
}
Ok(map)
}
fn read_u16(data: &[u8], offset: usize) -> ParseResult<u16> {
if offset + 2 > data.len() {
return Err(ParseError::SyntaxError {
position: offset,
message: "read_u16: out of bounds".to_string(),
});
}
Ok(u16::from_be_bytes([data[offset], data[offset + 1]]))
}
fn read_u32(data: &[u8], offset: usize) -> ParseResult<u32> {
if offset + 4 > data.len() {
return Err(ParseError::SyntaxError {
position: offset,
message: "read_u32: out of bounds".to_string(),
});
}
Ok(u32::from_be_bytes([
data[offset],
data[offset + 1],
data[offset + 2],
data[offset + 3],
]))
}
fn read_i16(data: &[u8], offset: usize) -> ParseResult<i16> {
if offset + 2 > data.len() {
return Err(ParseError::SyntaxError {
position: offset,
message: "read_i16: out of bounds".to_string(),
});
}
Ok(i16::from_be_bytes([data[offset], data[offset + 1]]))
}
fn otf_checksum(data: &[u8]) -> u32 {
let mut sum = 0u32;
let mut i = 0;
while i + 3 < data.len() {
sum = sum.wrapping_add(u32::from_be_bytes([
data[i],
data[i + 1],
data[i + 2],
data[i + 3],
]));
i += 4;
}
if i < data.len() {
let mut last = [0u8; 4];
last[..data.len() - i].copy_from_slice(&data[i..]);
sum = sum.wrapping_add(u32::from_be_bytes(last));
}
sum
}