use crate::cache::Cache;
use crate::context::Context;
use crate::device::Device;
use crate::font::cid::Type0Font;
use crate::font::generated::{
glyph_names, mac_expert, mac_os_roman, mac_roman, standard, win_ansi,
};
use crate::font::true_type::TrueTypeFont;
use crate::font::type1::Type1Font;
use crate::font::type3::Type3;
use crate::interpret::state::State;
use crate::{CMapResolverFn, CacheKey, FontResolverFn, InterpreterSettings, Paint};
use bitflags::bitflags;
use kurbo::{Affine, BezPath, Vec2};
use log::warn;
use outline::OutlineFont;
use pdf_syntax::object::Name;
use pdf_syntax::object::dict::keys::SUBTYPE;
use pdf_syntax::object::dict::keys::*;
use pdf_syntax::object::{Dict, Stream};
use pdf_syntax::page::Resources;
use pdf_syntax::xref::XRef;
use skrifa::GlyphId;
use std::fmt::Debug;
use std::ops::Deref;
use std::rc::Rc;
use std::sync::Arc;
mod blob;
mod cid;
mod generated;
mod glyph_simulator;
pub(crate) mod outline;
mod standard_font;
mod true_type;
mod type1;
pub(crate) mod type3;
pub(crate) const UNITS_PER_EM: f32 = 1000.0;
pub(crate) fn stretch_glyph(path: BezPath, expected_width: f32, actual_width: f32) -> BezPath {
if actual_width != 0.0 && actual_width != expected_width {
let stretch_factor = expected_width / actual_width;
Affine::scale_non_uniform(stretch_factor as f64, 1.0) * path
} else {
path
}
}
pub type FontData = Arc<dyn AsRef<[u8]> + Send + Sync>;
pub(crate) fn strip_subset_prefix(name: &str) -> &str {
match name.split_once('+') {
Some((prefix, rest)) if prefix.len() == 6 => rest,
_ => name,
}
}
use crate::util::hash128;
pub use outline::OutlineFontData;
use pdf_font::cmap::{BfString, CMap, CMapName, CharacterCollection};
pub use standard_font::StandardFont;
pub enum Glyph<'a> {
Outline(OutlineGlyph),
Type3(Box<Type3Glyph<'a>>),
}
impl Glyph<'_> {
pub fn as_unicode(&self) -> Option<BfString> {
match self {
Glyph::Outline(g) => g.as_unicode(),
Glyph::Type3(g) => g.as_unicode(),
}
}
}
#[derive(Clone, Debug)]
pub struct GlyphIdentifier {
id: GlyphId,
font: OutlineFont,
}
impl CacheKey for GlyphIdentifier {
fn cache_key(&self) -> u128 {
hash128(&(self.id, self.font.cache_key()))
}
}
#[derive(Clone, Debug)]
pub struct OutlineGlyph {
pub(crate) id: GlyphId,
pub(crate) font: OutlineFont,
pub(crate) char_code: u32,
}
impl OutlineGlyph {
pub fn outline(&self) -> BezPath {
self.font.outline_glyph(self.id, self.char_code)
}
pub fn identifier(&self) -> GlyphIdentifier {
GlyphIdentifier {
id: self.id,
font: self.font.clone(),
}
}
pub fn as_unicode(&self) -> Option<BfString> {
self.font.char_code_to_unicode(self.char_code)
}
pub fn font_data(&self) -> Option<OutlineFontData> {
self.font.font_data()
}
pub fn glyph_id(&self) -> GlyphId {
self.id
}
pub fn advance_width(&self) -> Option<f32> {
self.font.glyph_advance_width(self.char_code)
}
pub fn font_cache_key(&self) -> u128 {
self.font.cache_key()
}
}
#[derive(Clone)]
pub struct Type3Glyph<'a> {
pub(crate) font: Rc<Type3<'a>>,
pub(crate) glyph_id: GlyphId,
pub(crate) state: State<'a>,
pub(crate) parent_resources: Resources<'a>,
pub(crate) cache: Cache,
pub(crate) xref: &'a XRef,
pub(crate) settings: InterpreterSettings,
pub(crate) char_code: u32,
}
impl<'a> Type3Glyph<'a> {
pub fn interpret(
&self,
device: &mut impl Device<'a>,
transform: Affine,
glyph_transform: Affine,
paint: &Paint<'a>,
) {
self.font
.render_glyph(self, transform, glyph_transform, paint, device);
}
pub fn as_unicode(&self) -> Option<BfString> {
self.font.char_code_to_unicode(self.char_code)
}
}
impl CacheKey for Type3Glyph<'_> {
fn cache_key(&self) -> u128 {
hash128(&(self.font.cache_key(), self.glyph_id))
}
}
#[derive(Clone, Debug)]
pub(crate) struct Font<'a>(u128, FontType<'a>);
impl<'a> Font<'a> {
pub(crate) fn new(
dict: &Dict<'a>,
font_resolver: &FontResolverFn,
cmap_resolver: &CMapResolverFn,
) -> Option<Self> {
let f_type = match dict.get::<Name>(SUBTYPE)?.deref() {
TYPE1 | MM_TYPE1 => {
FontType::Type1(Rc::new(Type1Font::new(dict, font_resolver, cmap_resolver)?))
}
TRUE_TYPE | OPEN_TYPE => FontType::TrueType(Rc::new(TrueTypeFont::new(
dict,
font_resolver,
cmap_resolver,
)?)),
TYPE0 => FontType::Type0(Rc::new(Type0Font::new(dict, font_resolver, cmap_resolver)?)),
TYPE3 => FontType::Type3(Rc::new(Type3::new(dict, cmap_resolver)?)),
f => {
warn!(
"unimplemented font type {:?}",
std::str::from_utf8(f).unwrap_or("unknown type")
);
return None;
}
};
let cache_key = dict.cache_key();
Some(Self(cache_key, f_type))
}
pub(crate) fn new_standard(
standard_font: StandardFont,
font_resolver: &FontResolverFn,
) -> Option<Self> {
let font = Type1Font::new_standard(standard_font, font_resolver)?;
Some(Self(0, FontType::Type1(Rc::new(font))))
}
pub(crate) fn map_code(&self, code: u32) -> GlyphId {
match &self.1 {
FontType::Type1(f) => {
debug_assert!(code <= u8::MAX as u32);
f.map_code(code as u8)
}
FontType::TrueType(t) => {
debug_assert!(code <= u8::MAX as u32);
t.map_code(code as u8)
}
FontType::Type0(t) => t.map_code(code),
FontType::Type3(t) => {
debug_assert!(code <= u8::MAX as u32);
t.map_code(code as u8)
}
}
}
pub(crate) fn get_glyph(
&self,
glyph: GlyphId,
char_code: u32,
ctx: &mut Context<'a>,
resources: &Resources<'a>,
origin_displacement: Vec2,
) -> (Glyph<'a>, Affine) {
let glyph_transform = ctx.get().text_state.full_transform()
* Affine::scale(1.0 / UNITS_PER_EM as f64)
* Affine::translate(origin_displacement);
let glyph = match &self.1 {
FontType::Type1(t) => {
let font = OutlineFont::Type1(t.clone());
Glyph::Outline(OutlineGlyph {
id: glyph,
font,
char_code,
})
}
FontType::TrueType(t) => {
let font = OutlineFont::TrueType(t.clone());
Glyph::Outline(OutlineGlyph {
id: glyph,
font,
char_code,
})
}
FontType::Type0(t) => {
let font = OutlineFont::Type0(t.clone());
Glyph::Outline(OutlineGlyph {
id: glyph,
font,
char_code,
})
}
FontType::Type3(t) => {
let shape_glyph = Type3Glyph {
font: t.clone(),
glyph_id: glyph,
state: ctx.get().clone(),
parent_resources: resources.clone(),
cache: ctx.object_cache.clone(),
xref: ctx.xref,
settings: ctx.settings.clone(),
char_code,
};
Glyph::Type3(Box::new(shape_glyph))
}
};
(glyph, glyph_transform)
}
pub(crate) fn code_advance(&self, code: u32) -> Vec2 {
match &self.1 {
FontType::Type1(t) => {
debug_assert!(code <= u8::MAX as u32);
Vec2::new(t.glyph_width(code as u8).unwrap_or(0.0) as f64, 0.0)
}
FontType::TrueType(t) => {
debug_assert!(code <= u8::MAX as u32);
Vec2::new(t.glyph_width(code as u8) as f64, 0.0)
}
FontType::Type0(t) => t.code_advance(code),
FontType::Type3(t) => {
debug_assert!(code <= u8::MAX as u32);
Vec2::new(t.glyph_width(code as u8) as f64, 0.0)
}
}
}
pub(crate) fn origin_displacement(&self, code: u32) -> Vec2 {
match &self.1 {
FontType::Type1(_) => Vec2::default(),
FontType::TrueType(_) => Vec2::default(),
FontType::Type0(t) => t.origin_displacement(code),
FontType::Type3(_) => Vec2::default(),
}
}
pub(crate) fn read_code(&self, bytes: &[u8], offset: usize) -> (u32, usize) {
match &self.1 {
FontType::Type1(_) => (bytes[offset] as u32, 1),
FontType::TrueType(_) => (bytes[offset] as u32, 1),
FontType::Type0(t) => t.read_code(bytes, offset),
FontType::Type3(_) => (bytes[offset] as u32, 1),
}
}
pub(crate) fn is_horizontal(&self) -> bool {
match &self.1 {
FontType::Type1(_) => true,
FontType::TrueType(_) => true,
FontType::Type0(t) => t.is_horizontal(),
FontType::Type3(_) => true,
}
}
}
impl CacheKey for Font<'_> {
fn cache_key(&self) -> u128 {
self.0
}
}
#[derive(Clone, Debug)]
enum FontType<'a> {
Type1(Rc<Type1Font>),
TrueType(Rc<TrueTypeFont>),
Type0(Rc<Type0Font>),
Type3(Rc<Type3<'a>>),
}
#[derive(Debug)]
enum Encoding {
Standard,
MacRoman,
WinAnsi,
MacExpert,
BuiltIn,
}
impl Encoding {
fn map_code(&self, code: u8) -> Option<&'static str> {
if code == 0 {
return Some(".notdef");
}
match self {
Self::Standard => standard::get(code),
Self::MacRoman => mac_roman::get(code).or_else(|| mac_os_roman::get(code)),
Self::WinAnsi => win_ansi::get(code),
Self::MacExpert => mac_expert::get(code),
Self::BuiltIn => None,
}
}
}
#[derive(Debug, Copy, Clone)]
pub enum FontStretch {
Normal,
UltraCondensed,
ExtraCondensed,
Condensed,
SemiCondensed,
SemiExpanded,
Expanded,
ExtraExpanded,
UltraExpanded,
}
impl FontStretch {
fn from_string(s: &str) -> Self {
match s {
"UltraCondensed" => Self::UltraCondensed,
"ExtraCondensed" => Self::ExtraCondensed,
"Condensed" => Self::Condensed,
"SemiCondensed" => Self::SemiCondensed,
"SemiExpanded" => Self::SemiExpanded,
"Expanded" => Self::Expanded,
"ExtraExpanded" => Self::ExtraExpanded,
"UltraExpanded" => Self::UltraExpanded,
_ => Self::Normal,
}
}
}
bitflags! {
#[derive(Debug)]
pub(crate) struct FontFlags: u32 {
const FIXED_PITCH = 1 << 0;
const SERIF = 1 << 1;
const SYMBOLIC = 1 << 2;
const SCRIPT = 1 << 3;
const NON_SYMBOLIC = 1 << 5;
const ITALIC = 1 << 6;
const ALL_CAP = 1 << 16;
const SMALL_CAP = 1 << 17;
const FORCE_BOLD = 1 << 18;
}
}
pub enum FontQuery {
Standard(StandardFont),
Fallback(FallbackFontQuery),
}
#[derive(Debug, Clone)]
pub struct FallbackFontQuery {
pub post_script_name: Option<String>,
pub font_name: Option<String>,
pub font_family: Option<String>,
pub font_stretch: FontStretch,
pub font_weight: u32,
pub is_fixed_pitch: bool,
pub is_serif: bool,
pub is_italic: bool,
pub is_bold: bool,
pub is_small_cap: bool,
pub character_collection: Option<CharacterCollection>,
}
impl FallbackFontQuery {
pub(crate) fn new(dict: &Dict<'_>) -> Self {
let post_script_name = dict
.get::<Name>(BASE_FONT)
.map(|n| strip_subset_prefix(n.as_str()).to_string());
let mut data = Self {
post_script_name,
..Default::default()
};
if let Some(descriptor) = dict.get::<Dict<'_>>(FONT_DESC) {
data.font_name = dict
.get::<Name>(FONT_NAME)
.map(|n| strip_subset_prefix(n.as_str()).to_string());
data.font_family = descriptor
.get::<Name>(FONT_FAMILY)
.map(|n| n.as_str().to_string());
data.font_stretch = descriptor
.get::<Name>(FONT_STRETCH)
.map(|n| FontStretch::from_string(n.as_str()))
.unwrap_or(FontStretch::Normal);
data.font_weight = descriptor.get::<u32>(FONT_WEIGHT).unwrap_or(400);
if let Some(flags) = descriptor
.get::<u32>(FLAGS)
.map(FontFlags::from_bits_truncate)
{
data.is_fixed_pitch = flags.contains(FontFlags::FIXED_PITCH);
data.is_serif = flags.contains(FontFlags::SERIF);
data.is_italic = flags.contains(FontFlags::ITALIC);
data.is_small_cap = flags.contains(FontFlags::SMALL_CAP);
}
}
data.is_bold |= data.font_weight >= 700;
if let Some(name) = &data.post_script_name {
let lower = name.to_ascii_lowercase();
data.is_italic |=
lower.contains("italic") || lower.contains("oblique") || lower.contains("slant");
data.is_bold |= lower.contains("bold")
|| lower.contains("demi")
|| lower.contains("semibold")
|| lower.contains("heavy")
|| lower.contains("black");
}
data
}
pub fn pick_standard_font(&self) -> StandardFont {
if self.is_fixed_pitch {
match (self.is_bold, self.is_italic) {
(true, true) => StandardFont::CourierBoldOblique,
(true, false) => StandardFont::CourierBold,
(false, true) => StandardFont::CourierOblique,
(false, false) => StandardFont::Courier,
}
} else if !self.is_serif {
match (self.is_bold, self.is_italic) {
(true, true) => StandardFont::HelveticaBoldOblique,
(true, false) => StandardFont::HelveticaBold,
(false, true) => StandardFont::HelveticaOblique,
(false, false) => StandardFont::Helvetica,
}
} else {
match (self.is_bold, self.is_italic) {
(true, true) => StandardFont::TimesBoldItalic,
(true, false) => StandardFont::TimesBold,
(false, true) => StandardFont::TimesItalic,
(false, false) => StandardFont::TimesRoman,
}
}
}
}
impl Default for FallbackFontQuery {
fn default() -> Self {
Self {
post_script_name: None,
font_name: None,
font_family: None,
font_stretch: FontStretch::Normal,
font_weight: 400,
is_fixed_pitch: false,
is_serif: false,
is_italic: false,
is_bold: false,
is_small_cap: false,
character_collection: None,
}
}
}
pub(crate) fn glyph_name_to_unicode(name: &str) -> Option<char> {
if let Some(unicode_str) = glyph_names::get(name) {
return unicode_str.chars().next();
}
if let Some(c) = unicode_from_name(name) {
return Some(c);
}
if let Some(dot_pos) = name.find('.') {
let base = &name[..dot_pos];
if !base.is_empty() {
if let Some(c) = glyph_names::get(base).and_then(|s| s.chars().next()) {
return Some(c);
}
if let Some(c) = unicode_from_name(base) {
return Some(c);
}
}
}
if name.starts_with('a') && name.len() >= 2 {
if let Ok(code) = name[1..].parse::<u32>() {
if let Some(c) = char::from_u32(code) {
if !c.is_control() || c == ' ' {
return Some(c);
}
}
}
}
warn!("failed to map glyph name {} to unicode", name);
None
}
pub(crate) fn glyph_name_to_string(name: &str) -> Option<String> {
if let Some(c) = glyph_name_to_unicode(name) {
return Some(c.to_string());
}
let base = name.split_once('.').map(|(b, _)| b).unwrap_or(name);
if !base.contains('_') {
return None;
}
let mut out = String::new();
for part in base.split('_') {
if part.is_empty() {
return None;
}
let c = glyph_name_to_unicode(part)?;
out.push(c);
}
(!out.is_empty()).then_some(out)
}
pub(crate) fn unicode_from_name(name: &str) -> Option<char> {
let convert = |input: &str| u32::from_str_radix(input, 16).ok().and_then(char::from_u32);
name.starts_with("uni")
.then(|| name.get(3..).and_then(convert))
.or_else(|| {
name.starts_with("u")
.then(|| name.get(1..).and_then(convert))
})
.flatten()
}
pub(crate) fn read_to_unicode(dict: &Dict<'_>, cmap_resolver: &CMapResolverFn) -> Option<CMap> {
dict.get::<Stream<'_>>(TO_UNICODE)
.and_then(|s| s.decoded().ok())
.or_else(|| {
dict.get::<Name>(TO_UNICODE)
.and_then(|name| (cmap_resolver)(CMapName::from_bytes(name.as_ref())))
.map(|d| d.to_vec())
})
.and_then(|data| {
let cmap_resolver = cmap_resolver.clone();
CMap::parse(&data, move |name| (cmap_resolver)(name))
})
}
pub(crate) fn synthesize_unicode_map_from_encoding(dict: &Dict<'_>) -> Option<[Option<char>; 256]> {
let (base_encoding, differences) = true_type::read_encoding(dict);
if matches!(base_encoding, Encoding::BuiltIn) && differences.is_empty() {
return None;
}
let mut table: [Option<char>; 256] = [None; 256];
for code in 0u8..=255 {
let glyph_name = differences
.get(&code)
.map(String::as_str)
.or_else(|| base_encoding.map_code(code));
if let Some(name) = glyph_name {
table[code as usize] = glyph_name_to_unicode(name);
}
}
Some(table)
}
pub(crate) fn normalized_glyph_name(mut name: &str) -> &str {
if name == "nbspace" || name == "nonbreakingspace" {
name = "space";
}
if name == "sfthyphen" || name == "softhyphen" {
name = "hyphen";
}
name
}
#[cfg(test)]
mod normalized_glyph_name_tests {
use super::normalized_glyph_name;
#[test]
fn maps_space_aliases() {
assert_eq!(normalized_glyph_name("nbspace"), "space");
assert_eq!(normalized_glyph_name("nonbreakingspace"), "space");
}
#[test]
fn maps_hyphen_aliases() {
assert_eq!(normalized_glyph_name("sfthyphen"), "hyphen");
assert_eq!(normalized_glyph_name("softhyphen"), "hyphen");
}
#[test]
fn preserves_unrelated_names() {
assert_eq!(normalized_glyph_name("A"), "A");
assert_eq!(normalized_glyph_name(".notdef"), ".notdef");
assert_eq!(normalized_glyph_name("hyphen"), "hyphen");
assert_eq!(normalized_glyph_name("space"), "space");
}
}
#[cfg(test)]
mod glyph_name_to_unicode_tests {
use super::{glyph_name_to_string, glyph_name_to_unicode};
#[test]
fn standard_agl_name() {
assert_eq!(glyph_name_to_unicode("A"), Some('A'));
assert_eq!(glyph_name_to_unicode("space"), Some(' '));
assert_eq!(glyph_name_to_unicode("hyphen"), Some('-'));
}
#[test]
fn ligature_name_underscore_joined() {
assert_eq!(glyph_name_to_string("f_i"), Some("fi".to_string()));
assert_eq!(glyph_name_to_string("f_f_i"), Some("ffi".to_string()));
assert_eq!(glyph_name_to_string("A_B_C"), Some("ABC".to_string()));
}
#[test]
fn ligature_name_with_suffix() {
assert_eq!(glyph_name_to_string("f_i.alt"), Some("fi".to_string()));
}
#[test]
fn ligature_name_falls_back_to_single_char_path() {
assert_eq!(glyph_name_to_string("fi"), Some("\u{FB01}".to_string()));
assert_eq!(glyph_name_to_string("A"), Some("A".to_string()));
}
#[test]
fn ligature_name_rejects_unresolvable_component() {
assert!(glyph_name_to_string("A_totallyUnknownGlyph").is_none());
assert!(glyph_name_to_string("_").is_none());
assert!(glyph_name_to_string("A__B").is_none());
}
#[test]
fn uni_prefix() {
assert_eq!(glyph_name_to_unicode("uni0041"), Some('A'));
assert_eq!(glyph_name_to_unicode("uni00E9"), Some('é'));
}
#[test]
fn u_prefix() {
assert_eq!(glyph_name_to_unicode("u0041"), Some('A'));
assert_eq!(glyph_name_to_unicode("u2022"), Some('•'));
}
#[test]
fn variant_suffix_stripped() {
assert_eq!(glyph_name_to_unicode("A.swash"), Some('A'));
assert_eq!(glyph_name_to_unicode("comma.alt"), Some(','));
assert_eq!(glyph_name_to_unicode("space.narrow"), Some(' '));
}
#[test]
fn variant_suffix_with_uni_prefix() {
assert_eq!(glyph_name_to_unicode("uni0041.ss01"), Some('A'));
}
#[test]
fn a_decimal_glyph_name() {
assert_eq!(glyph_name_to_unicode("a65"), Some('A'));
assert_eq!(glyph_name_to_unicode("a32"), Some(' '));
assert_eq!(glyph_name_to_unicode("a97"), Some('a'));
}
#[test]
fn a_decimal_rejects_control_chars() {
assert_eq!(glyph_name_to_unicode("a0"), None);
assert_eq!(glyph_name_to_unicode("a7"), None);
}
#[test]
fn unknown_name_returns_none() {
assert_eq!(glyph_name_to_unicode("xyzzynonexistent"), None);
}
}
#[cfg(test)]
mod fallback_font_query_tests {
use super::*;
fn query_with(name: &str, flags: u32, weight: u32) -> FallbackFontQuery {
let mut q = FallbackFontQuery {
post_script_name: Some(name.to_string()),
font_weight: weight,
..Default::default()
};
let font_flags = FontFlags::from_bits_truncate(flags);
q.is_fixed_pitch = font_flags.contains(FontFlags::FIXED_PITCH);
q.is_serif = font_flags.contains(FontFlags::SERIF);
q.is_italic = font_flags.contains(FontFlags::ITALIC);
q.is_small_cap = font_flags.contains(FontFlags::SMALL_CAP);
q.is_bold |= q.font_weight >= 700;
if let Some(name) = &q.post_script_name {
let lower = name.to_ascii_lowercase();
q.is_italic |=
lower.contains("italic") || lower.contains("oblique") || lower.contains("slant");
q.is_bold |= lower.contains("bold")
|| lower.contains("demi")
|| lower.contains("semibold")
|| lower.contains("heavy")
|| lower.contains("black");
}
q
}
#[test]
fn fixed_pitch_flag_selects_courier() {
let q = query_with("LetterGothic", FontFlags::FIXED_PITCH.bits(), 400);
assert!(matches!(q.pick_standard_font(), StandardFont::Courier));
}
#[test]
fn demi_in_name_selects_bold() {
let q = query_with("FranklinGothic-Demi", FontFlags::SERIF.bits(), 400);
assert!(q.is_bold);
assert!(matches!(q.pick_standard_font(), StandardFont::TimesBold));
}
#[test]
fn oblique_detected_as_italic() {
let q = query_with("HelveticaNeue-LightOblique", 0, 400);
assert!(q.is_italic);
assert!(matches!(
q.pick_standard_font(),
StandardFont::HelveticaOblique
));
}
#[test]
fn font_weight_700_detected_as_bold() {
let q = query_with("CustomFont", 0, 700);
assert!(q.is_bold);
assert!(matches!(
q.pick_standard_font(),
StandardFont::HelveticaBold
));
}
#[test]
fn semibold_detected_as_bold() {
let q = query_with("AGaramond-Semibold", FontFlags::SERIF.bits(), 400);
assert!(q.is_bold);
assert!(matches!(q.pick_standard_font(), StandardFont::TimesBold));
}
}