use fontcull_read_fonts::{
tables::cmap::{
self, Cmap, Cmap12, Cmap12Iter, Cmap14, Cmap14Iter, Cmap4, Cmap4Iter, CmapIterLimits,
CmapSubtable, EncodingRecord, PlatformId,
},
types::GlyphId,
FontData, FontRef, TableProvider,
};
pub use fontcull_read_fonts::tables::cmap::MapVariant;
#[derive(Clone, Default)]
pub struct Charmap<'a> {
codepoint_subtable: Option<CodepointSubtable<'a>>,
variant_subtable: Option<Cmap14<'a>>,
cmap12_limits: CmapIterLimits,
}
impl<'a> Charmap<'a> {
pub fn new(font: &FontRef<'a>) -> Self {
let Ok(cmap) = font.cmap() else {
return Default::default();
};
let selection = MappingSelection::new(font, &cmap);
Self {
codepoint_subtable: selection
.codepoint_subtable
.map(|subtable| CodepointSubtable {
subtable,
is_symbol: selection.mapping_index.codepoint_subtable_is_symbol,
}),
variant_subtable: selection.variant_subtable,
cmap12_limits: selection.mapping_index.cmap12_limits,
}
}
pub fn has_map(&self) -> bool {
self.codepoint_subtable.is_some()
}
pub fn is_symbol(&self) -> bool {
self.codepoint_subtable
.as_ref()
.map(|x| x.is_symbol)
.unwrap_or(false)
}
pub fn has_variant_map(&self) -> bool {
self.variant_subtable.is_some()
}
pub fn map(&self, ch: impl Into<u32>) -> Option<GlyphId> {
self.codepoint_subtable.as_ref()?.map(ch.into())
}
pub fn mappings(&self) -> Mappings<'a> {
self.codepoint_subtable
.as_ref()
.map(|subtable| {
Mappings(match &subtable.subtable {
SupportedSubtable::Format4(cmap4) => MappingsInner::Format4(cmap4.iter()),
SupportedSubtable::Format12(cmap12) => {
MappingsInner::Format12(cmap12.iter_with_limits(self.cmap12_limits))
}
})
})
.unwrap_or(Mappings(MappingsInner::None))
}
pub fn map_variant(&self, ch: impl Into<u32>, selector: impl Into<u32>) -> Option<MapVariant> {
self.variant_subtable.as_ref()?.map_variant(ch, selector)
}
pub fn variant_mappings(&self) -> VariantMappings<'a> {
VariantMappings(self.variant_subtable.clone().map(|cmap14| cmap14.iter()))
}
}
#[derive(Copy, Clone, Default, Debug)]
pub struct MappingIndex {
codepoint_subtable: Option<u16>,
codepoint_subtable_is_symbol: bool,
variant_subtable: Option<u16>,
cmap12_limits: CmapIterLimits,
}
impl MappingIndex {
pub fn new(font: &FontRef) -> Self {
let Ok(cmap) = font.cmap() else {
return Default::default();
};
MappingSelection::new(font, &cmap).mapping_index
}
pub fn charmap<'a>(&self, font: &FontRef<'a>) -> Charmap<'a> {
let Ok(cmap) = font.cmap() else {
return Default::default();
};
let records = cmap.encoding_records();
let data = cmap.offset_data();
Charmap {
codepoint_subtable: self
.codepoint_subtable
.and_then(|index| get_subtable(data, records, index))
.and_then(SupportedSubtable::new)
.map(|subtable| CodepointSubtable {
subtable,
is_symbol: self.codepoint_subtable_is_symbol,
}),
variant_subtable: self
.variant_subtable
.and_then(|index| get_subtable(data, records, index))
.and_then(|subtable| match subtable {
CmapSubtable::Format14(cmap14) => Some(cmap14),
_ => None,
}),
cmap12_limits: self.cmap12_limits,
}
}
}
#[derive(Clone)]
pub struct Mappings<'a>(MappingsInner<'a>);
impl Iterator for Mappings<'_> {
type Item = (u32, GlyphId);
fn next(&mut self) -> Option<Self::Item> {
loop {
let item = match &mut self.0 {
MappingsInner::None => None,
MappingsInner::Format4(iter) => iter.next(),
MappingsInner::Format12(iter) => iter.next(),
}?;
if item.1 != GlyphId::NOTDEF {
return Some(item);
}
}
}
}
#[derive(Clone)]
enum MappingsInner<'a> {
None,
Format4(Cmap4Iter<'a>),
Format12(Cmap12Iter<'a>),
}
#[derive(Clone)]
pub struct VariantMappings<'a>(Option<Cmap14Iter<'a>>);
impl Iterator for VariantMappings<'_> {
type Item = (u32, u32, MapVariant);
fn next(&mut self) -> Option<Self::Item> {
self.0.as_mut()?.next()
}
}
fn get_subtable<'a>(
data: FontData<'a>,
records: &[EncodingRecord],
index: u16,
) -> Option<CmapSubtable<'a>> {
records
.get(index as usize)
.and_then(|record| record.subtable(data).ok())
}
#[derive(Clone)]
struct CodepointSubtable<'a> {
subtable: SupportedSubtable<'a>,
is_symbol: bool,
}
impl CodepointSubtable<'_> {
fn map(&self, codepoint: u32) -> Option<GlyphId> {
self.map_impl(codepoint).or_else(|| {
if self.is_symbol && codepoint <= 0x00FF {
self.map_impl(codepoint + 0xF000)
} else {
None
}
})
}
fn map_impl(&self, codepoint: u32) -> Option<GlyphId> {
let gid = match &self.subtable {
SupportedSubtable::Format4(subtable) => subtable.map_codepoint(codepoint),
SupportedSubtable::Format12(subtable) => subtable.map_codepoint(codepoint),
}?;
(gid != GlyphId::NOTDEF).then_some(gid)
}
}
#[derive(Clone)]
enum SupportedSubtable<'a> {
Format4(Cmap4<'a>),
Format12(Cmap12<'a>),
}
impl<'a> SupportedSubtable<'a> {
fn new(subtable: CmapSubtable<'a>) -> Option<Self> {
Some(match subtable {
CmapSubtable::Format4(cmap4) => Self::Format4(cmap4),
CmapSubtable::Format12(cmap12) => Self::Format12(cmap12),
_ => return None,
})
}
fn from_cmap_record(cmap: &Cmap<'a>, record: &cmap::EncodingRecord) -> Option<Self> {
Self::new(record.subtable(cmap.offset_data()).ok()?)
}
}
#[derive(Copy, Clone, PartialEq, PartialOrd)]
enum MappingKind {
None = 0,
UnicodeBmp = 1,
UnicodeFull = 2,
Symbol = 3,
}
struct MappingSelection<'a> {
mapping_index: MappingIndex,
codepoint_subtable: Option<SupportedSubtable<'a>>,
variant_subtable: Option<Cmap14<'a>>,
}
impl<'a> MappingSelection<'a> {
fn new(font: &FontRef<'a>, cmap: &Cmap<'a>) -> Self {
const ENCODING_MS_SYMBOL: u16 = 0;
const ENCODING_MS_UNICODE_CS: u16 = 1;
const ENCODING_APPLE_ID_UNICODE_32: u16 = 4;
const ENCODING_APPLE_ID_VARIANT_SELECTOR: u16 = 5;
const ENCODING_MS_ID_UCS_4: u16 = 10;
let mut mapping_index = MappingIndex::default();
let mut mapping_kind = MappingKind::None;
let mut codepoint_subtable = None;
let mut variant_subtable = None;
let mut maybe_choose_subtable = |kind, index, subtable| {
if kind > mapping_kind {
mapping_kind = kind;
mapping_index.codepoint_subtable_is_symbol = kind == MappingKind::Symbol;
mapping_index.codepoint_subtable = Some(index as u16);
codepoint_subtable = Some(subtable);
}
};
for (i, record) in cmap.encoding_records().iter().enumerate().rev() {
match (record.platform_id(), record.encoding_id()) {
(PlatformId::Unicode, ENCODING_APPLE_ID_VARIANT_SELECTOR) => {
if let Ok(CmapSubtable::Format14(subtable)) =
record.subtable(cmap.offset_data())
{
if variant_subtable.is_none() {
mapping_index.variant_subtable = Some(i as u16);
variant_subtable = Some(subtable);
}
}
}
(PlatformId::Windows, ENCODING_MS_SYMBOL) => {
if let Some(subtable) = SupportedSubtable::from_cmap_record(cmap, record) {
maybe_choose_subtable(MappingKind::Symbol, i, subtable);
}
}
(PlatformId::Windows, ENCODING_MS_ID_UCS_4)
| (PlatformId::Unicode, ENCODING_APPLE_ID_UNICODE_32) => {
if let Some(subtable) = SupportedSubtable::from_cmap_record(cmap, record) {
maybe_choose_subtable(MappingKind::UnicodeFull, i, subtable);
}
}
(PlatformId::ISO, _)
| (PlatformId::Unicode, _)
| (PlatformId::Windows, ENCODING_MS_UNICODE_CS) => {
if let Some(subtable) = SupportedSubtable::from_cmap_record(cmap, record) {
maybe_choose_subtable(MappingKind::UnicodeBmp, i, subtable);
}
}
_ => {}
}
}
mapping_index.cmap12_limits = CmapIterLimits::default_for_font(font);
Self {
mapping_index,
codepoint_subtable,
variant_subtable,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::MetadataProvider;
use fontcull_read_fonts::FontRef;
#[test]
fn choose_format_12_over_4() {
let font = FontRef::new(fontcull_font_test_data::CMAP12_FONT1).unwrap();
let charmap = font.charmap();
assert!(matches!(
charmap.codepoint_subtable.unwrap().subtable,
SupportedSubtable::Format12(..)
));
}
#[test]
fn choose_format_4() {
let font = FontRef::new(fontcull_font_test_data::VAZIRMATN_VAR).unwrap();
let charmap = font.charmap();
assert!(matches!(
charmap.codepoint_subtable.unwrap().subtable,
SupportedSubtable::Format4(..)
));
}
#[test]
fn choose_symbol() {
let font = FontRef::new(fontcull_font_test_data::CMAP4_SYMBOL_PUA).unwrap();
let charmap = font.charmap();
assert!(charmap.is_symbol());
assert!(matches!(
charmap.codepoint_subtable.unwrap().subtable,
SupportedSubtable::Format4(..)
));
}
#[test]
fn map_format_4() {
let font = FontRef::new(fontcull_font_test_data::VAZIRMATN_VAR).unwrap();
let charmap = font.charmap();
assert_eq!(charmap.map('A'), Some(GlyphId::new(1)));
assert_eq!(charmap.map('À'), Some(GlyphId::new(2)));
assert_eq!(charmap.map('`'), Some(GlyphId::new(3)));
assert_eq!(charmap.map('B'), None);
}
#[test]
fn map_format_12() {
let font = FontRef::new(fontcull_font_test_data::CMAP12_FONT1).unwrap();
let charmap = font.charmap();
assert_eq!(charmap.map(' '), None);
assert_eq!(charmap.map(0x101723_u32), Some(GlyphId::new(1)));
assert_eq!(charmap.map(0x101725_u32), Some(GlyphId::new(3)));
assert_eq!(charmap.map(0x102523_u32), Some(GlyphId::new(6)));
assert_eq!(charmap.map(0x102526_u32), Some(GlyphId::new(9)));
assert_eq!(charmap.map(0x102527_u32), Some(GlyphId::new(10)));
}
#[test]
fn map_symbol_pua() {
let font = FontRef::new(fontcull_font_test_data::CMAP4_SYMBOL_PUA).unwrap();
let charmap = font.charmap();
assert!(charmap.codepoint_subtable.as_ref().unwrap().is_symbol);
assert_eq!(charmap.map(0xF001_u32), Some(GlyphId::new(1)));
assert_eq!(charmap.map(0xF002_u32), Some(GlyphId::new(2)));
assert_eq!(charmap.map(0xF003_u32), Some(GlyphId::new(3)));
assert_eq!(charmap.map(0xF0FE_u32), Some(GlyphId::new(4)));
assert_eq!(charmap.map(0x1_u32), Some(GlyphId::new(1)));
assert_eq!(charmap.map(0x2_u32), Some(GlyphId::new(2)));
assert_eq!(charmap.map(0x3_u32), Some(GlyphId::new(3)));
assert_eq!(charmap.map(0xFE_u32), Some(GlyphId::new(4)));
}
#[test]
fn map_variants() {
use super::MapVariant::*;
let font = FontRef::new(fontcull_font_test_data::CMAP14_FONT1).unwrap();
let charmap = font.charmap();
let selector = '\u{e0100}';
assert_eq!(charmap.map_variant('a', selector), None);
assert_eq!(charmap.map_variant('\u{4e00}', selector), Some(UseDefault));
assert_eq!(charmap.map_variant('\u{4e06}', selector), Some(UseDefault));
assert_eq!(
charmap.map_variant('\u{4e08}', selector),
Some(Variant(GlyphId::new(25)))
);
assert_eq!(
charmap.map_variant('\u{4e09}', selector),
Some(Variant(GlyphId::new(26)))
);
}
#[test]
fn mappings() {
for font_data in [
fontcull_font_test_data::VAZIRMATN_VAR,
fontcull_font_test_data::CMAP12_FONT1,
fontcull_font_test_data::SIMPLE_GLYF,
fontcull_font_test_data::CMAP4_SYMBOL_PUA,
] {
let font = FontRef::new(font_data).unwrap();
let charmap = font.charmap();
for (codepoint, glyph_id) in charmap.mappings() {
assert_ne!(
glyph_id,
GlyphId::NOTDEF,
"we should never encounter notdef glyphs"
);
assert_eq!(charmap.map(codepoint), Some(glyph_id));
}
}
}
#[test]
fn variant_mappings() {
let font = FontRef::new(fontcull_font_test_data::CMAP14_FONT1).unwrap();
let charmap = font.charmap();
for (codepoint, selector, variant) in charmap.variant_mappings() {
assert_eq!(charmap.map_variant(codepoint, selector), Some(variant));
}
}
}