use crate::font::blob::{CffFontBlob, OpenTypeFontBlob};
use crate::font::generated::{glyph_names, mac_os_roman, mac_roman, standard};
use crate::font::standard_font::StandardKind;
use crate::font::{
Encoding, FallbackFontQuery, FontFlags, glyph_name_to_unicode, read_to_unicode,
strip_subset_prefix, synthesize_unicode_map_from_encoding, unicode_from_name,
};
use crate::util::OptionLog;
use crate::{CMapResolverFn, CacheKey, FontResolverFn};
use kurbo::BezPath;
use log::warn;
use pdf_font::cmap::{BfString, CMap};
use pdf_syntax::object::Array;
use pdf_syntax::object::Dict;
use pdf_syntax::object::Name;
use pdf_syntax::object::Object;
use pdf_syntax::object::Stream;
use pdf_syntax::object::dict::keys::*;
use skrifa::attribute::Style;
use skrifa::raw::TableProvider;
use skrifa::raw::tables::cmap::PlatformId;
use skrifa::{GlyphId, GlyphId16, MetadataProvider};
use std::cell::RefCell;
use std::collections::HashMap;
use std::ops::Deref;
use std::sync::Arc;
#[derive(Debug)]
pub(crate) struct TrueTypeFont {
cache_key: u128,
kind: Kind,
to_unicode: Option<CMap>,
encoding_unicode: Option<[Option<char>; 256]>,
}
#[derive(Debug)]
enum Kind {
Embedded(EmbeddedKind),
Standard(StandardKind),
TextOnly(TextOnlyKind),
}
impl TrueTypeFont {
pub(crate) fn new(
dict: &Dict<'_>,
font_resolver: &FontResolverFn,
cmap_resolver: &CMapResolverFn,
) -> Option<Self> {
let cache_key = dict.cache_key();
let to_unicode = read_to_unicode(dict, cmap_resolver);
let encoding_unicode = to_unicode
.is_none()
.then(|| synthesize_unicode_map_from_encoding(dict))
.flatten();
if let Some(embedded) = EmbeddedKind::new(dict) {
return Some(Self {
cache_key,
kind: Kind::Embedded(embedded),
to_unicode,
encoding_unicode,
});
}
let fallback = || {
let fallback_query = FallbackFontQuery::new(dict);
let standard_font = fallback_query.pick_standard_font();
warn!(
"unable to load TrueType font {}, falling back to {}",
fallback_query
.post_script_name
.unwrap_or("(no name)".to_string()),
standard_font.as_str()
);
Some(Self {
cache_key,
kind: Kind::Standard(StandardKind::new_with_standard(
dict,
standard_font,
true,
font_resolver,
)?),
to_unicode: to_unicode.clone(),
encoding_unicode,
})
};
if let Some(standard) = StandardKind::new(dict, font_resolver) {
return Some(Self {
cache_key,
kind: Kind::Standard(standard),
to_unicode,
encoding_unicode,
});
}
if let Some(font) = fallback() {
return Some(font);
}
if to_unicode.is_some() || encoding_unicode.is_some() {
let text_only = TextOnlyKind::new(dict);
warn!(
"TrueType font {} has no embedded program and no substitute; \
falling back to text-only extraction (no rendering)",
text_only.postscript_name.as_deref().unwrap_or("(no name)")
);
return Some(Self {
cache_key,
kind: Kind::TextOnly(text_only),
to_unicode,
encoding_unicode,
});
}
None
}
pub(crate) fn outline_glyph(&self, glyph: GlyphId) -> BezPath {
match &self.kind {
Kind::Embedded(e) => e.outline_glyph(glyph),
Kind::Standard(s) => s.outline_glyph(glyph),
Kind::TextOnly(_) => BezPath::new(),
}
}
pub(crate) fn font_data(&self) -> Option<crate::font::FontData> {
match &self.kind {
Kind::Embedded(e) => Some(e.base_font.font_data()),
Kind::Standard(_) | Kind::TextOnly(_) => None,
}
}
pub(crate) fn postscript_name(&self) -> Option<&str> {
match &self.kind {
Kind::Embedded(e) => e.postscript_name.as_deref(),
Kind::Standard(_) => None,
Kind::TextOnly(t) => t.postscript_name.as_deref(),
}
}
pub(crate) fn weight(&self) -> Option<u32> {
match &self.kind {
Kind::Embedded(e) => {
let weight = e.base_font.font_ref().attributes().weight.value().round() as u32;
if weight > 0 { Some(weight) } else { None }
}
Kind::Standard(s) => Some(if s.is_bold() { 700 } else { 400 }),
Kind::TextOnly(t) => Some(t.weight),
}
}
pub(crate) fn is_italic(&self) -> bool {
match &self.kind {
Kind::Embedded(e) => {
if let Some(flags) = &e.font_flags
&& flags.contains(FontFlags::ITALIC)
{
return true;
}
e.base_font.font_ref().attributes().style != Style::Normal
}
Kind::Standard(s) => s.is_italic(),
Kind::TextOnly(t) => t.is_italic,
}
}
pub(crate) fn is_serif(&self) -> bool {
match &self.kind {
Kind::Embedded(e) => e
.font_flags
.as_ref()
.is_some_and(|f| f.contains(FontFlags::SERIF)),
Kind::Standard(s) => s.is_serif(),
Kind::TextOnly(t) => t.is_serif,
}
}
pub(crate) fn is_monospace(&self) -> bool {
match &self.kind {
Kind::Embedded(e) => {
if let Some(flags) = &e.font_flags
&& flags.contains(FontFlags::FIXED_PITCH)
{
return true;
}
e.base_font
.font_ref()
.metrics(
skrifa::instance::Size::unscaled(),
skrifa::instance::LocationRef::default(),
)
.is_monospace
}
Kind::Standard(s) => s.is_monospace(),
Kind::TextOnly(t) => t.is_monospace,
}
}
pub(crate) fn map_code(&self, code: u8) -> GlyphId {
match &self.kind {
Kind::Embedded(e) => e.map_code(code),
Kind::Standard(s) => s.map_code(code),
Kind::TextOnly(_) => GlyphId::new(code as u32),
}
}
pub(crate) fn glyph_width(&self, code: u8) -> f32 {
match &self.kind {
Kind::Embedded(e) => e.glyph_width(code),
Kind::Standard(s) => s.glyph_width(code).unwrap_or(0.0),
Kind::TextOnly(t) => t.glyph_width(code),
}
}
pub(crate) fn char_code_to_unicode(&self, code: u32) -> Option<BfString> {
if let Some(to_unicode) = &self.to_unicode
&& let Some(c) = to_unicode.lookup_bf_string(code)
{
return Some(c);
}
if let Some(map) = &self.encoding_unicode
&& let Some(ch) = usize::try_from(code)
.ok()
.and_then(|char_code| map.get(char_code))
.copied()
.flatten()
{
return Some(BfString::Char(ch));
}
match &self.kind {
Kind::Embedded(e) => e
.code_to_name(code as u8)
.and_then(glyph_name_to_unicode)
.map(BfString::Char),
Kind::Standard(s) => s.char_code_to_unicode(code as u8).map(BfString::Char),
Kind::TextOnly(_) => None,
}
}
}
#[derive(Debug)]
pub(crate) struct TextOnlyKind {
widths: Vec<Width>,
missing_width: f32,
postscript_name: Option<String>,
weight: u32,
is_italic: bool,
is_serif: bool,
is_monospace: bool,
}
impl TextOnlyKind {
fn new(dict: &Dict<'_>) -> Self {
let descriptor = dict.get::<Dict<'_>>(FONT_DESC).unwrap_or_default();
let (widths, missing_width) =
read_widths(dict, &descriptor).unwrap_or_else(|| (Vec::new(), 0.0));
let query = FallbackFontQuery::new(dict);
Self {
widths,
missing_width,
postscript_name: query.post_script_name.clone(),
weight: query.font_weight,
is_italic: query.is_italic,
is_serif: query.is_serif,
is_monospace: query.is_fixed_pitch,
}
}
fn glyph_width(&self, code: u8) -> f32 {
match self.widths.get(code as usize).copied() {
Some(Width::Value(w)) => w,
_ => self.missing_width,
}
}
}
#[derive(Debug)]
struct EmbeddedKind {
base_font: OpenTypeFontBlob,
widths: Vec<Width>,
missing_width: f32,
font_flags: Option<FontFlags>,
glyph_names: HashMap<String, GlyphId>,
encoding: Encoding,
cff_blob: Option<CffFontBlob>,
differences: HashMap<u8, String>,
cached_mappings: RefCell<HashMap<u8, GlyphId>>,
postscript_name: Option<String>,
}
impl EmbeddedKind {
fn new(dict: &Dict<'_>) -> Option<Self> {
let descriptor = dict.get::<Dict<'_>>(FONT_DESC).unwrap_or_default();
let font_flags = descriptor.get::<u32>(FLAGS).and_then(FontFlags::from_bits);
let (widths, missing_width) = read_widths(dict, &descriptor)?;
let (encoding, differences) = read_encoding(dict);
let base_font = descriptor
.get::<Stream<'_>>(FONT_FILE2)
.and_then(|s| s.decoded().ok())
.and_then(|d| OpenTypeFontBlob::new(Arc::new(d.to_vec()), 0))?;
let mut glyph_names = HashMap::new();
if let Ok(post) = base_font.font_ref().post() {
for i in 0..base_font.num_glyphs() {
if let Some(str) = post.glyph_name(GlyphId16::new(i)) {
glyph_names.insert(str.to_string(), GlyphId::new(i as u32));
}
}
}
let cff_font_blob = base_font
.font_ref()
.cff()
.ok()
.and_then(|cff| CffFontBlob::new(Arc::new(cff.offset_data().as_ref().to_vec())));
let postscript_name = dict
.get::<Name>(BASE_FONT)
.map(|n| strip_subset_prefix(n.as_str()).to_string());
Some(Self {
base_font,
differences,
cff_blob: cff_font_blob,
widths,
missing_width,
glyph_names,
font_flags,
encoding,
cached_mappings: RefCell::new(HashMap::new()),
postscript_name,
})
}
fn is_non_symbolic(&self) -> bool {
self.font_flags
.as_ref()
.map(|f| f.contains(FontFlags::NON_SYMBOLIC))
.unwrap_or(false)
}
fn code_to_name(&self, code: u8) -> Option<&str> {
self.differences
.get(&code)
.map(|s| s.as_str())
.or_else(|| self.encoding.map_code(code))
.or_else(|| standard::get(code))
}
fn outline_glyph(&self, glyph: GlyphId) -> BezPath {
self.base_font.outline_glyph(glyph)
}
fn map_code(&self, code: u8) -> GlyphId {
if let Some(glyph) = self.cached_mappings.borrow().get(&code) {
return *glyph;
}
if let Some(blob) = self.cff_blob.as_ref() {
let table = blob.table();
return self
.code_to_name(code)
.and_then(|name| table.glyph_index_by_name(name))
.map(|g| GlyphId::new(g.0 as u32))
.unwrap_or(GlyphId::NOTDEF);
}
let mut glyph = None;
if self.is_non_symbolic() {
let Some(lookup) = self.code_to_name(code) else {
return GlyphId::NOTDEF;
};
if let Ok(cmap) = self.base_font.font_ref().cmap() {
for record in cmap.encoding_records() {
if record.platform_id() == PlatformId::Windows
&& record.encoding_id() == 1
&& let Ok(subtable) = record.subtable(cmap.offset_data())
{
glyph = glyph.or_else(|| {
glyph_names::get(lookup)
.map(|n| n.to_string())
.or_else(|| unicode_from_name(lookup).map(|n| n.to_string()))
.and_then(|n| n.chars().next())
.and_then(|c| subtable.map_codepoint(c))
.filter(|g| *g != GlyphId::NOTDEF)
});
}
}
for record in cmap.encoding_records() {
if record.platform_id() == PlatformId::Macintosh
&& record.encoding_id() == 0
&& let Ok(subtable) = record.subtable(cmap.offset_data())
{
glyph = glyph.or_else(|| {
mac_os_roman::get_inverse(lookup)
.or_else(|| mac_roman::get_inverse(lookup))
.and_then(|c| subtable.map_codepoint(c))
.filter(|g| *g != GlyphId::NOTDEF)
});
}
}
}
if glyph.is_none() {
if let Some(gid) = self.glyph_names.get(lookup) {
glyph = Some(*gid);
} else if let Some(gid) = glyph_num_string(lookup) {
glyph = Some(GlyphId::new(gid));
}
}
} else if let Ok(cmap) = self.base_font.font_ref().cmap() {
for record in cmap.encoding_records() {
if record.platform_id() == PlatformId::Windows
&& matches!(record.encoding_id(), 0 | 1)
{
if let Ok(subtable) = record.subtable(cmap.offset_data()) {
for offset in [0x0000_u32, 0xF000, 0xF100, 0xF200] {
glyph = glyph
.or_else(|| subtable.map_codepoint(code as u32 + offset))
.filter(|g| *g != GlyphId::NOTDEF);
}
}
} else if matches!(
record.platform_id(),
PlatformId::Macintosh | PlatformId::Unicode
) && record.encoding_id() == 0
&& let Ok(subtable) = record.subtable(cmap.offset_data())
{
glyph = glyph
.or_else(|| subtable.map_codepoint(code))
.filter(|g| *g != GlyphId::NOTDEF);
}
}
}
let glyph = glyph.unwrap_or(GlyphId::NOTDEF);
self.cached_mappings.borrow_mut().insert(code, glyph);
glyph
}
fn glyph_width(&self, code: u8) -> f32 {
match self.widths.get(code as usize).copied() {
Some(Width::Value(w)) => w,
Some(Width::Missing) => self.missing_width,
_ => self
.base_font
.glyph_metrics()
.advance_width(self.map_code(code))
.warn_none(&format!("failed to find advance width for code {code}"))
.unwrap_or(0.0),
}
}
}
#[derive(Debug, Clone, Copy)]
pub(crate) enum Width {
Value(f32),
Missing,
}
pub(crate) fn read_widths(dict: &Dict<'_>, descriptor: &Dict<'_>) -> Option<(Vec<Width>, f32)> {
let mut widths = Vec::new();
let first_char = dict.get::<usize>(FIRST_CHAR);
let last_char = dict.get::<usize>(LAST_CHAR);
let widths_arr = dict.get::<Array<'_>>(WIDTHS);
let missing_width = descriptor.get::<f32>(MISSING_WIDTH).unwrap_or(0.0);
if let (Some(fc), Some(lc), Some(w)) = (first_char, last_char, widths_arr) {
let iter = w.iter::<f32>().take(lc.checked_sub(fc)?.checked_add(1)?);
for _ in 0..fc {
widths.push(Width::Missing);
}
for w in iter {
widths.push(Width::Value(w));
}
while widths.len() <= (u8::MAX as usize) + 1 {
widths.push(Width::Missing);
}
}
Some((widths, missing_width))
}
fn glyph_num_string(s: &str) -> Option<u32> {
if !s.starts_with('g') || s.len() < 2 {
return None;
}
s[1..].parse::<u32>().ok()
}
impl CacheKey for TrueTypeFont {
fn cache_key(&self) -> u128 {
self.cache_key
}
}
pub(crate) fn read_encoding(dict: &Dict<'_>) -> (Encoding, HashMap<u8, String>) {
fn get_encoding_base(dict: &Dict<'_>, name: Name) -> Encoding {
match dict.get::<Name>(name.clone()) {
Some(n) => match n.deref() {
WIN_ANSI_ENCODING => Encoding::WinAnsi,
MAC_ROMAN_ENCODING => Encoding::MacRoman,
MAC_EXPERT_ENCODING => Encoding::MacExpert,
_ => {
warn!("Unknown font encoding {}", name.as_str());
Encoding::Standard
}
},
None => Encoding::BuiltIn,
}
}
let mut map = HashMap::new();
if let Some(encoding_dict) = dict.get::<Dict<'_>>(ENCODING) {
if let Some(differences) = encoding_dict.get::<Array<'_>>(DIFFERENCES) {
let entries = differences.iter::<Object<'_>>();
let mut code = 0;
for obj in entries {
if let Some(num) = obj.clone().into_i32() {
code = num;
} else if let Some(name) = obj.into_name() {
map.insert(code as u8, name.as_str().to_string());
code += 1;
}
}
}
(
get_encoding_base(&encoding_dict, Name::new(BASE_ENCODING)),
map,
)
} else {
(get_encoding_base(dict, Name::new(ENCODING)), HashMap::new())
}
}