use crate::font::FontError;
fn read_u16(data: &[u8], offset: usize) -> Result<u16, FontError> {
let end = offset + 2;
if end > data.len() {
return Err(FontError::InvalidData("unexpected end of data (u16)"));
}
Ok(u16::from_be_bytes([data[offset], data[offset + 1]]))
}
fn read_i16(data: &[u8], offset: usize) -> Result<i16, FontError> {
let end = offset + 2;
if end > data.len() {
return Err(FontError::InvalidData("unexpected end of data (i16)"));
}
Ok(i16::from_be_bytes([data[offset], data[offset + 1]]))
}
fn read_u32(data: &[u8], offset: usize) -> Result<u32, FontError> {
let end = offset + 4;
if end > data.len() {
return Err(FontError::InvalidData("unexpected end of data (u32)"));
}
Ok(u32::from_be_bytes([
data[offset],
data[offset + 1],
data[offset + 2],
data[offset + 3],
]))
}
#[derive(Debug, Clone, Copy)]
struct TableRecord {
offset: u32,
length: u32,
}
struct TableDirectory {
records: Vec<(u32, TableRecord)>,
}
impl TableDirectory {
fn parse(data: &[u8], start: usize) -> Result<Self, FontError> {
if data.len() < start + 12 {
return Err(FontError::InvalidData("data too short for sfnt header"));
}
let sfnt_version = read_u32(data, start)?;
if sfnt_version != 0x0001_0000 && sfnt_version != 0x7472_7565 {
return Err(FontError::InvalidData("unsupported sfnt version"));
}
let num_tables = read_u16(data, start + 4)? as usize;
let header_end = start + 12 + num_tables * 16;
if data.len() < header_end {
return Err(FontError::InvalidData("data too short for table directory"));
}
let mut records = Vec::with_capacity(num_tables);
for i in 0..num_tables {
let base = start + 12 + i * 16;
let tag = read_u32(data, base)?;
let offset = read_u32(data, base + 8)?;
let length = read_u32(data, base + 12)?;
let end = offset as u64 + length as u64;
if end > data.len() as u64 {
return Err(FontError::InvalidData("table record exceeds data length"));
}
records.push((tag, TableRecord { offset, length }));
}
Ok(Self { records })
}
fn find(&self, tag: u32) -> Option<TableRecord> {
self.records
.iter()
.find(|(t, _)| *t == tag)
.map(|(_, r)| *r)
}
fn require(&self, tag: u32) -> Result<TableRecord, FontError> {
self.find(tag)
.ok_or(FontError::InvalidData("required table not found"))
}
}
const fn tag(b: &[u8; 4]) -> u32 {
((b[0] as u32) << 24) | ((b[1] as u32) << 16) | ((b[2] as u32) << 8) | (b[3] as u32)
}
const TAG_HEAD: u32 = tag(b"head");
const TAG_MAXP: u32 = tag(b"maxp");
const TAG_HHEA: u32 = tag(b"hhea");
const TAG_HMTX: u32 = tag(b"hmtx");
const TAG_LOCA: u32 = tag(b"loca");
const TAG_CMAP: u32 = tag(b"cmap");
const TAG_GLYF: u32 = tag(b"glyf");
pub(crate) struct HeadTable {
pub units_per_em: u16,
pub index_to_loc_format: i16,
}
fn parse_head(data: &[u8], rec: TableRecord) -> Result<HeadTable, FontError> {
let off = rec.offset as usize;
if rec.length < 54 {
return Err(FontError::InvalidData("head table too short"));
}
let units_per_em = read_u16(data, off + 18)?;
if units_per_em == 0 {
return Err(FontError::InvalidData("units_per_em is zero"));
}
let index_to_loc_format = read_i16(data, off + 50)?;
Ok(HeadTable {
units_per_em,
index_to_loc_format,
})
}
fn parse_maxp(data: &[u8], rec: TableRecord) -> Result<u16, FontError> {
let off = rec.offset as usize;
if rec.length < 6 {
return Err(FontError::InvalidData("maxp table too short"));
}
read_u16(data, off + 4)
}
pub(crate) struct HheaTable {
pub ascent: i16,
pub descent: i16,
pub line_gap: i16,
pub number_of_h_metrics: u16,
}
fn parse_hhea(data: &[u8], rec: TableRecord) -> Result<HheaTable, FontError> {
let off = rec.offset as usize;
if rec.length < 36 {
return Err(FontError::InvalidData("hhea table too short"));
}
let ascent = read_i16(data, off + 4)?;
let descent = read_i16(data, off + 6)?;
let line_gap = read_i16(data, off + 8)?;
let number_of_h_metrics = read_u16(data, off + 34)?;
Ok(HheaTable {
ascent,
descent,
line_gap,
number_of_h_metrics,
})
}
#[derive(Clone)]
pub(crate) struct HmtxTable {
pub advance_widths: Vec<u16>,
#[allow(dead_code)]
pub lsbs: Vec<i16>,
}
fn parse_hmtx(
data: &[u8],
rec: TableRecord,
num_h_metrics: u16,
num_glyphs: u16,
) -> Result<HmtxTable, FontError> {
let off = rec.offset as usize;
let nh = num_h_metrics as usize;
let ng = num_glyphs as usize;
let min_len = nh * 4 + (ng.saturating_sub(nh)) * 2;
if (rec.length as usize) < min_len {
return Err(FontError::InvalidData("hmtx table too short"));
}
let mut advance_widths = Vec::with_capacity(ng);
let mut lsbs = Vec::with_capacity(ng);
for i in 0..nh {
let base = off + i * 4;
advance_widths.push(read_u16(data, base)?);
lsbs.push(read_i16(data, base + 2)?);
}
let last_aw = advance_widths.last().copied().unwrap_or(0);
let extra_base = off + nh * 4;
for i in 0..(ng - nh) {
advance_widths.push(last_aw);
lsbs.push(read_i16(data, extra_base + i * 2)?);
}
Ok(HmtxTable {
advance_widths,
lsbs,
})
}
fn parse_loca(
data: &[u8],
rec: TableRecord,
num_glyphs: u16,
index_to_loc_format: i16,
) -> Result<Vec<u32>, FontError> {
let off = rec.offset as usize;
let n = num_glyphs as usize + 1;
let offsets = if index_to_loc_format == 0 {
let min_len = n * 2;
if (rec.length as usize) < min_len {
return Err(FontError::InvalidData("loca table too short (short)"));
}
(0..n)
.map(|i| read_u16(data, off + i * 2).map(|v| v as u32 * 2))
.collect::<Result<Vec<_>, _>>()?
} else {
let min_len = n * 4;
if (rec.length as usize) < min_len {
return Err(FontError::InvalidData("loca table too short (long)"));
}
(0..n)
.map(|i| read_u32(data, off + i * 4))
.collect::<Result<Vec<_>, _>>()?
};
Ok(offsets)
}
#[derive(Clone)]
pub(crate) enum CmapLookup {
Format4(CmapFormat4),
Format12(CmapFormat12),
}
#[derive(Clone)]
pub(crate) struct CmapFormat4 {
segments: Vec<CmapFormat4Segment>,
glyph_id_array: Vec<u16>,
glyph_id_array_offset: usize,
id_range_offset_base: usize,
}
#[derive(Clone)]
struct CmapFormat4Segment {
end_code: u16,
start_code: u16,
id_delta: i16,
id_range_offset: u16,
}
#[derive(Clone)]
pub(crate) struct CmapFormat12 {
groups: Vec<CmapFormat12Group>,
}
#[derive(Clone)]
struct CmapFormat12Group {
start_char: u32,
end_char: u32,
start_glyph_id: u32,
}
impl CmapLookup {
pub fn map(&self, codepoint: u32) -> u16 {
match self {
CmapLookup::Format4(f4) => f4.map(codepoint),
CmapLookup::Format12(f12) => f12.map(codepoint),
}
}
}
impl CmapFormat4 {
fn map(&self, codepoint: u32) -> u16 {
if codepoint > 0xFFFF {
return 0;
}
let cp = codepoint as u16;
let idx = match self.segments.binary_search_by(|seg| seg.end_code.cmp(&cp)) {
Ok(i) => i,
Err(i) => {
if i >= self.segments.len() {
return 0;
}
i
}
};
let seg = &self.segments[idx];
if cp < seg.start_code {
return 0;
}
if seg.id_range_offset == 0 {
(cp as i32 + seg.id_delta as i32) as u16
} else {
let seg_byte_offset = self.id_range_offset_base + idx * 2;
let target_byte_offset =
seg_byte_offset + seg.id_range_offset as usize + (cp - seg.start_code) as usize * 2;
let array_index = (target_byte_offset - self.glyph_id_array_offset) / 2;
if array_index >= self.glyph_id_array.len() {
return 0;
}
let glyph_id = self.glyph_id_array[array_index];
if glyph_id == 0 {
0
} else {
(glyph_id as i32 + seg.id_delta as i32) as u16
}
}
}
}
impl CmapFormat12 {
fn map(&self, codepoint: u32) -> u16 {
let idx = match self
.groups
.binary_search_by(|g| g.start_char.cmp(&codepoint))
{
Ok(i) => i,
Err(0) => return 0,
Err(i) => i - 1,
};
let g = &self.groups[idx];
if codepoint > g.end_char {
return 0;
}
let glyph_id = g.start_glyph_id + (codepoint - g.start_char);
glyph_id as u16
}
}
fn parse_cmap(data: &[u8], rec: TableRecord) -> Result<CmapLookup, FontError> {
let off = rec.offset as usize;
if rec.length < 4 {
return Err(FontError::InvalidData("cmap table too short"));
}
let num_tables = read_u16(data, off + 2)? as usize;
if rec.length < (4 + num_tables * 8) as u32 {
return Err(FontError::InvalidData(
"cmap table too short for encoding records",
));
}
let mut best_offset: Option<u32> = None;
let mut best_priority = 0u8;
for i in 0..num_tables {
let base = off + 4 + i * 8;
let platform_id = read_u16(data, base)?;
let encoding_id = read_u16(data, base + 2)?;
let subtable_offset = read_u32(data, base + 4)?;
let priority = match (platform_id, encoding_id) {
(3, 10) => 4, (0, 4..=6) => 3, (3, 1) => 2, (0, 3) => 1, _ => 0,
};
if priority > best_priority {
best_priority = priority;
best_offset = Some(subtable_offset);
}
}
let subtable_off =
off + best_offset.ok_or(FontError::InvalidData("no suitable cmap subtable"))? as usize;
if subtable_off + 2 > data.len() {
return Err(FontError::InvalidData("cmap subtable offset out of range"));
}
let format = read_u16(data, subtable_off)?;
match format {
4 => parse_cmap_format4(data, subtable_off),
12 => parse_cmap_format12(data, subtable_off),
_ => Err(FontError::InvalidData("unsupported cmap format")),
}
}
fn parse_cmap_format4(data: &[u8], off: usize) -> Result<CmapLookup, FontError> {
if off + 14 > data.len() {
return Err(FontError::InvalidData("cmap format 4 header too short"));
}
let length = read_u16(data, off + 2)? as usize;
if off + length > data.len() {
return Err(FontError::InvalidData("cmap format 4 length exceeds data"));
}
let seg_count_x2 = read_u16(data, off + 6)? as usize;
let seg_count = seg_count_x2 / 2;
let end_codes_off = off + 14;
let start_codes_off = end_codes_off + seg_count * 2 + 2;
let id_deltas_off = start_codes_off + seg_count * 2;
let id_range_offsets_off = id_deltas_off + seg_count * 2;
let glyph_id_array_off = id_range_offsets_off + seg_count * 2;
let table_end = off + length;
if glyph_id_array_off > table_end {
return Err(FontError::InvalidData(
"cmap format 4 arrays exceed table length",
));
}
let mut segments = Vec::with_capacity(seg_count);
for i in 0..seg_count {
let end_code = read_u16(data, end_codes_off + i * 2)?;
let start_code = read_u16(data, start_codes_off + i * 2)?;
let id_delta = read_i16(data, id_deltas_off + i * 2)?;
let id_range_offset = read_u16(data, id_range_offsets_off + i * 2)?;
segments.push(CmapFormat4Segment {
end_code,
start_code,
id_delta,
id_range_offset,
});
}
let glyph_id_count = (table_end - glyph_id_array_off) / 2;
let mut glyph_id_array = Vec::with_capacity(glyph_id_count);
for i in 0..glyph_id_count {
glyph_id_array.push(read_u16(data, glyph_id_array_off + i * 2)?);
}
Ok(CmapLookup::Format4(CmapFormat4 {
segments,
glyph_id_array,
glyph_id_array_offset: glyph_id_array_off,
id_range_offset_base: id_range_offsets_off,
}))
}
fn parse_cmap_format12(data: &[u8], off: usize) -> Result<CmapLookup, FontError> {
if off + 16 > data.len() {
return Err(FontError::InvalidData("cmap format 12 header too short"));
}
let num_groups = read_u32(data, off + 12)? as usize;
let groups_off = off + 16;
if groups_off + num_groups * 12 > data.len() {
return Err(FontError::InvalidData("cmap format 12 groups exceed data"));
}
let mut groups = Vec::with_capacity(num_groups);
for i in 0..num_groups {
let base = groups_off + i * 12;
let start_char = read_u32(data, base)?;
let end_char = read_u32(data, base + 4)?;
let start_glyph_id = read_u32(data, base + 8)?;
groups.push(CmapFormat12Group {
start_char,
end_char,
start_glyph_id,
});
}
Ok(CmapLookup::Format12(CmapFormat12 { groups }))
}
#[derive(Clone)]
pub(crate) struct ParsedTables {
pub units_per_em: u16,
#[allow(dead_code)]
pub num_glyphs: u16,
pub ascent: i16,
pub descent: i16,
pub line_gap: i16,
pub loca_offsets: Vec<u32>,
pub glyf_offset: u32,
#[allow(dead_code)]
pub glyf_length: u32,
pub cmap: CmapLookup,
pub hmtx: HmtxTable,
}
const TAG_TTCF: u32 = tag(b"ttcf");
fn ttc_font_offset(data: &[u8], index: u32) -> Result<usize, FontError> {
if data.len() < 12 {
return Err(FontError::InvalidData("TTC header too short"));
}
let num_fonts = read_u32(data, 8)?;
if index >= num_fonts {
return Err(FontError::InvalidData("TTC font index out of range"));
}
let offset_pos = 12 + index as usize * 4;
if offset_pos + 4 > data.len() {
return Err(FontError::InvalidData("TTC offset table too short"));
}
let offset = read_u32(data, offset_pos)? as usize;
Ok(offset)
}
pub(crate) fn parse_all(data: &[u8], index: u32) -> Result<ParsedTables, FontError> {
let sfnt_start = if data.len() >= 4 && read_u32(data, 0)? == TAG_TTCF {
ttc_font_offset(data, index)?
} else {
0
};
let dir = TableDirectory::parse(data, sfnt_start)?;
let head_rec = dir.require(TAG_HEAD)?;
let maxp_rec = dir.require(TAG_MAXP)?;
let hhea_rec = dir.require(TAG_HHEA)?;
let hmtx_rec = dir.require(TAG_HMTX)?;
let loca_rec = dir.require(TAG_LOCA)?;
let cmap_rec = dir.require(TAG_CMAP)?;
let glyf_rec = dir.require(TAG_GLYF)?;
let head = parse_head(data, head_rec)?;
let num_glyphs = parse_maxp(data, maxp_rec)?;
let hhea = parse_hhea(data, hhea_rec)?;
let hmtx = parse_hmtx(data, hmtx_rec, hhea.number_of_h_metrics, num_glyphs)?;
let loca_offsets = parse_loca(data, loca_rec, num_glyphs, head.index_to_loc_format)?;
let cmap = parse_cmap(data, cmap_rec)?;
Ok(ParsedTables {
units_per_em: head.units_per_em,
num_glyphs,
ascent: hhea.ascent,
descent: hhea.descent,
line_gap: hhea.line_gap,
loca_offsets,
glyf_offset: glyf_rec.offset,
glyf_length: glyf_rec.length,
cmap,
hmtx,
})
}
pub(crate) fn be_u16(data: &[u8], offset: usize) -> Result<u16, FontError> {
read_u16(data, offset)
}
pub(crate) fn be_i16(data: &[u8], offset: usize) -> Result<i16, FontError> {
read_i16(data, offset)
}
pub(crate) fn be_u8(data: &[u8], offset: usize) -> Result<u8, FontError> {
if offset >= data.len() {
return Err(FontError::InvalidData("unexpected end of data (u8)"));
}
Ok(data[offset])
}
pub(crate) fn be_i8(data: &[u8], offset: usize) -> Result<i8, FontError> {
if offset >= data.len() {
return Err(FontError::InvalidData("unexpected end of data (i8)"));
}
Ok(data[offset] as i8)
}