use crate::font::blob::{CffFontBlob, OpenTypeFontBlob, Type1FontBlob};
use crate::font::generated::glyph_names;
use crate::font::standard_font::select_standard_font;
use crate::font::{
FallbackFontQuery, FontFlags, FontQuery, glyph_name_to_string, read_to_unicode, stretch_glyph,
strip_subset_prefix,
};
use crate::{CMapResolverFn, CacheKey, FontResolverFn};
use kurbo::{BezPath, Vec2};
use log::warn;
use pdf_font::cmap::{BfString, CMap, CidFamily, WritingMode};
use pdf_syntax::object::Dict;
use pdf_syntax::object::Name;
use pdf_syntax::object::Stream;
use pdf_syntax::object::dict::keys::*;
use pdf_syntax::object::{Array, Object};
use skrifa::attribute::Style;
use skrifa::raw::collections::int_set::Domain;
use skrifa::{FontRef, GlyphId, MetadataProvider};
use std::collections::HashMap;
use std::ops::Deref;
use std::sync::Arc;
#[derive(Debug)]
pub(crate) struct Type0Font {
font_type: FontType,
horizontal: bool,
cache_key: u128,
dw: f32,
dw2: (f32, f32),
widths: HashMap<u32, f32>,
encoding: CMap,
to_unicode: Option<CMap>,
widths2: HashMap<u32, [f32; 3]>,
cid_to_gid_map: CidToGIdMap,
postscript_name: Option<String>,
font_flags: Option<FontFlags>,
fallback: bool,
to_unicode_is_cid_indexed: bool,
}
impl Type0Font {
pub(crate) fn new(
dict: &Dict<'_>,
font_resolver: &FontResolverFn,
cmap_resolver: &CMapResolverFn,
) -> Option<Self> {
let cmap = read_encoding(&dict.get::<Object<'_>>(ENCODING)?, cmap_resolver)?;
let horizontal = cmap.metadata().writing_mode != Some(WritingMode::Vertical);
let descendant_font = dict
.get::<Array<'_>>(DESCENDANT_FONTS)?
.iter::<Dict<'_>>()
.next()?;
let font_descriptor = descendant_font
.get::<Dict<'_>>(FONT_DESC)
.unwrap_or_default();
let (font_type, fallback, _is_standard_fallback) = match FontType::new(&font_descriptor) {
Some(ft) => (ft, false, false),
None => {
let (query, is_standard) =
if let Some((standard, _)) = select_standard_font(dict, &font_descriptor) {
(FontQuery::Standard(standard), true)
} else {
let mut query = FallbackFontQuery::new(dict);
query.character_collection = cmap.metadata().character_collection.clone();
warn!(
"unable to load CID font {} ({:?}), attempting fallback",
query.post_script_name.as_deref().unwrap_or("(no name)"),
dict.obj_id()
);
(FontQuery::Fallback(query), false)
};
let (data, index) = font_resolver(&query)?;
let blob = OpenTypeFontBlob::new(data.clone(), index)
.map(FontType::OpenType)
.or_else(|| CffFontBlob::new(data).map(FontType::Cff))?;
(blob, true, is_standard)
}
};
let default_width = descendant_font.get::<f32>(DW).unwrap_or(1000.0);
let dw2 = descendant_font
.get::<[f32; 2]>(DW2)
.map(|v| (v[0], v[1]))
.unwrap_or((880.0, -1000.0));
let widths = descendant_font
.get::<Array<'_>>(W)
.and_then(|a| read_widths(&a))
.unwrap_or_default();
let widths2 = descendant_font
.get::<Array<'_>>(W2)
.and_then(|a| read_widths2(&a))
.unwrap_or_default();
let cid_to_gid_map = CidToGIdMap::new(&descendant_font).unwrap_or_default();
let cache_key = dict.cache_key();
let mut to_unicode = read_to_unicode(dict, cmap_resolver);
let mut to_unicode_is_cid_indexed = false;
if to_unicode.is_none()
&& let Some(cc) = cmap.metadata().character_collection.as_ref()
&& cc.family != CidFamily::AdobeIdentity
&& let Some(ucs2_name) = cc.family.ucs2_cmap()
&& let Some(data) = (cmap_resolver)(ucs2_name)
{
let resolver = cmap_resolver.clone();
if let Some(ucs2_cmap) = CMap::parse(data, move |n| (resolver)(n)) {
to_unicode = Some(ucs2_cmap);
to_unicode_is_cid_indexed = true;
}
}
let postscript_name = dict
.get::<Name>(BASE_FONT)
.map(|n| strip_subset_prefix(n.as_str()).to_string());
let font_flags = font_descriptor
.get::<u32>(FLAGS)
.and_then(FontFlags::from_bits);
Some(Self {
cache_key,
horizontal,
encoding: cmap,
to_unicode,
font_type,
dw: default_width,
dw2,
widths,
widths2,
cid_to_gid_map,
postscript_name,
font_flags,
fallback,
to_unicode_is_cid_indexed,
})
}
pub(crate) fn map_code(&self, code: u32) -> GlyphId {
let Some(cid) = self.code_to_cid(code) else {
return GlyphId::NOTDEF;
};
let to_unicode_key = if self.to_unicode_is_cid_indexed {
cid
} else {
code
};
if self.fallback
&& let Some(glyph) = self.map_via_unicode(to_unicode_key)
{
return glyph;
}
match &self.font_type {
FontType::OpenType(_) => self.cid_to_gid_map.map(cid as u16),
FontType::Cff(c) => {
let table = c.table();
if table.is_cid() {
if matches!(self.cid_to_gid_map, CidToGIdMap::Identity) {
table
.glyph_index_by_cid(cid as u16)
.map(|g| GlyphId::new(g.0 as u32))
.unwrap_or(GlyphId::NOTDEF)
} else {
GlyphId::new(self.cid_to_gid_map.inverse_map(GlyphId::new(cid)) as u32)
}
} else {
self.cid_to_gid_map.map(cid as u16)
}
}
FontType::Type1(_) => GlyphId::new(cid),
}
}
fn map_via_unicode(&self, key: u32) -> Option<GlyphId> {
let to_unicode = self.to_unicode.as_ref()?;
let character = to_unicode
.lookup_bf_string(key)
.or_else(|| {
for len in 0..4 {
if let Some(code) = to_unicode.lookup_cid_code(key, len)
&& let Some(code) = char::from_u32(code.to_u32())
{
return Some(BfString::Char(code));
}
}
None
})
.and_then(|bf| match bf {
BfString::Char(c) => Some(c),
BfString::String(_) => None,
})?;
match &self.font_type {
FontType::OpenType(t) => t.font_ref().charmap().map(character),
FontType::Cff(c) => {
let table = c.table();
if let Some(name) = glyph_names::get_reverse(character)
&& let Some(gid) = table.glyph_index_by_name(name)
{
Some(GlyphId::new(gid.0 as u32))
} else {
None
}
}
FontType::Type1(t) => {
let name = glyph_names::get_reverse(character)?;
let idx = t.table().charstring_index(name)?;
Some(GlyphId::new(idx as u32))
}
}
}
fn code_to_cid(&self, code: u32) -> Option<u32> {
for byte_len in 1..=4_u8 {
if let Some(cid) = self.encoding.lookup_cid_code(code, byte_len) {
return Some(cid);
}
}
None
}
pub(crate) fn outline_glyph(&self, glyph: GlyphId, code: u32) -> BezPath {
let path = match &self.font_type {
FontType::OpenType(t) => t.outline_glyph(glyph),
FontType::Cff(c) => c.outline_glyph(glyph),
FontType::Type1(t) => {
let name = t
.table()
.charstring_names()
.get(glyph.to_u32() as usize)
.map(|n| n.as_str())
.unwrap_or(".notdef");
t.outline_glyph(name)
}
};
if self.fallback
&& let FontType::OpenType(t) = &self.font_type
&& let Some(cid) = self.code_to_cid(code)
&& let Some(actual_width) = t.glyph_metrics().advance_width(glyph)
&& let Some(expected_width) = self.widths.get(&cid).copied()
{
return stretch_glyph(path, expected_width, actual_width);
}
path
}
pub(crate) fn font_data(&self) -> Option<crate::font::FontData> {
match &self.font_type {
FontType::OpenType(t) => Some(t.font_data()),
FontType::Cff(c) => Some(c.font_data()),
FontType::Type1(_) => None,
}
}
pub(crate) fn postscript_name(&self) -> Option<&str> {
self.postscript_name.as_deref()
}
pub(crate) fn weight(&self) -> Option<u32> {
match &self.font_type {
FontType::OpenType(t) => {
let weight = t.font_ref().attributes().weight.value().round() as u32;
if weight > 0 { Some(weight) } else { None }
}
FontType::Cff(_) | FontType::Type1(_) => None,
}
}
pub(crate) fn is_italic(&self) -> bool {
if let Some(flags) = &self.font_flags
&& flags.contains(FontFlags::ITALIC)
{
return true;
}
match &self.font_type {
FontType::OpenType(t) => t.font_ref().attributes().style != Style::Normal,
FontType::Cff(_) | FontType::Type1(_) => false,
}
}
pub(crate) fn is_serif(&self) -> bool {
self.font_flags
.as_ref()
.is_some_and(|f| f.contains(FontFlags::SERIF))
}
pub(crate) fn is_monospace(&self) -> bool {
if let Some(flags) = &self.font_flags
&& flags.contains(FontFlags::FIXED_PITCH)
{
return true;
}
match &self.font_type {
FontType::OpenType(t) => {
t.font_ref()
.metrics(
skrifa::instance::Size::unscaled(),
skrifa::instance::LocationRef::default(),
)
.is_monospace
}
FontType::Cff(_) | FontType::Type1(_) => false,
}
}
pub(crate) fn code_advance(&self, code: u32) -> Vec2 {
let cid = self.code_to_cid(code).unwrap_or(0);
if self.horizontal {
Vec2::new(self.horizontal_width(cid) as f64, 0.0)
} else if let Some([w, _, _]) = self.widths2.get(&cid) {
Vec2::new(0.0, *w as f64)
} else {
Vec2::new(0.0, self.dw2.1 as f64)
}
}
fn horizontal_width(&self, cid: u32) -> f32 {
self.widths.get(&cid).copied().unwrap_or(self.dw)
}
pub(crate) fn is_horizontal(&self) -> bool {
self.horizontal
}
pub(crate) fn read_code(&self, bytes: &[u8], offset: usize) -> (u32, usize) {
let mut code = 0_u32;
let remaining = bytes.len() - offset;
for n in 0..4.min(remaining) {
code = (code << 8) | bytes[offset + n] as u32;
if self.encoding.lookup_cid_code(code, (n + 1) as u8).is_some() {
return (code, n + 1);
}
}
(0, 1)
}
pub(crate) fn origin_displacement(&self, code: u32) -> Vec2 {
let cid = self.code_to_cid(code).unwrap_or(0);
if self.is_horizontal() {
Vec2::default()
} else if let Some([_, v1, v2]) = self.widths2.get(&cid) {
Vec2::new(-*v1 as f64, -*v2 as f64)
} else {
Vec2::new(-self.horizontal_width(cid) as f64 / 2.0, -self.dw2.0 as f64)
}
}
pub(crate) fn char_code_to_unicode(&self, code: u32) -> Option<BfString> {
if let Some(to_unicode) = &self.to_unicode {
let key = if self.to_unicode_is_cid_indexed {
match self.code_to_cid(code) {
Some(cid) => cid,
None => return self.unicode_from_font_program(code),
}
} else {
code
};
if let Some(mapped) = to_unicode.lookup_bf_string(key) {
return Some(mapped);
}
}
self.unicode_from_font_program(code)
}
fn unicode_from_font_program(&self, code: u32) -> Option<BfString> {
fn bf_string_from(s: String) -> BfString {
let mut chars = s.chars();
match (chars.next(), chars.next()) {
(Some(c), None) => BfString::Char(c),
_ => BfString::String(s),
}
}
let glyph = self.map_code(code);
if glyph == GlyphId::NOTDEF {
return self.identity_unicode_fallback(code).map(BfString::Char);
}
match &self.font_type {
FontType::OpenType(t) => t
.glyph_id_to_unicode(glyph)
.or_else(|| self.identity_unicode_fallback(code))
.map(BfString::Char),
FontType::Cff(c) => {
let table = c.table();
if table.is_cid() {
self.identity_unicode_fallback(code).map(BfString::Char)
} else {
table
.glyph_name(pdf_font::GlyphId(glyph.to_u32() as u16))
.and_then(glyph_name_to_string)
.map(bf_string_from)
}
}
FontType::Type1(t) => t
.table()
.charstring_names()
.get(glyph.to_u32() as usize)
.map(|n| n.as_str())
.and_then(glyph_name_to_string)
.map(bf_string_from),
}
}
fn identity_unicode_fallback(&self, code: u32) -> Option<char> {
let is_identity = self
.encoding
.metadata()
.character_collection
.as_ref()
.is_some_and(|cc| cc.family == CidFamily::AdobeIdentity);
if !is_identity {
return None;
}
let cid = self.code_to_cid(code).unwrap_or(code);
let c = char::from_u32(cid)?;
if c.is_control() && c != ' ' && c != '\t' {
return None;
}
Some(c)
}
}
impl CacheKey for Type0Font {
fn cache_key(&self) -> u128 {
self.cache_key
}
}
#[derive(Debug)]
enum FontType {
OpenType(OpenTypeFontBlob),
Cff(CffFontBlob),
Type1(Type1FontBlob),
}
impl FontType {
fn new(descriptor: &Dict<'_>) -> Option<Self> {
let data = descriptor
.get::<Stream<'_>>(FONT_FILE2)
.or_else(|| descriptor.get::<Stream<'_>>(FONT_FILE3))
.or_else(|| descriptor.get::<Stream<'_>>(FONT_FILE))?
.decoded()
.ok()?;
let data = Arc::new(data);
let parsed = if let Ok(_font_ref) = FontRef::from_index(data.as_ref(), 0) {
Self::OpenType(OpenTypeFontBlob::new(data, 0)?)
} else if let Some(cff) = CffFontBlob::new(data.clone()) {
Self::Cff(cff)
} else if let Some(t1) = Type1FontBlob::new(data) {
Self::Type1(t1)
} else {
return None;
};
Some(parsed)
}
}
#[derive(Debug, Default)]
enum CidToGIdMap {
#[default]
Identity,
Mapped {
forward: HashMap<u16, GlyphId>,
inverse: HashMap<GlyphId, u16>,
},
}
impl CidToGIdMap {
fn new(dict: &Dict<'_>) -> Option<Self> {
if let Some(name) = dict.get::<Name>(CID_TO_GID_MAP) {
if name.deref() == IDENTITY {
Some(Self::Identity)
} else {
None
}
} else if let Some(stream) = dict.get::<Stream<'_>>(CID_TO_GID_MAP) {
let decoded = stream.decoded().ok()?;
let mut forward = HashMap::new();
let mut inverse = HashMap::new();
for (cid, gid) in decoded.chunks_exact(2).enumerate() {
let gid = GlyphId::new(u16::from_be_bytes([gid[0], gid[1]]) as u32);
forward.insert(cid as u16, gid);
inverse.insert(gid, cid as u16);
}
Some(Self::Mapped { forward, inverse })
} else {
None
}
}
fn map(&self, code: u16) -> GlyphId {
match self {
Self::Identity => GlyphId::new(code as u32),
Self::Mapped { forward, .. } => forward.get(&code).copied().unwrap_or(GlyphId::NOTDEF),
}
}
fn inverse_map(&self, gid: GlyphId) -> u16 {
match self {
Self::Identity => gid.to_u32() as u16,
Self::Mapped { inverse, .. } => {
inverse.get(&gid).copied().unwrap_or(gid.to_u32() as u16)
}
}
}
}
fn read_widths(arr: &Array<'_>) -> Option<HashMap<u32, f32>> {
let mut map = HashMap::new();
let mut iter = arr.flex_iter();
loop {
if let Some((mut first, range)) = iter.next::<(u32, Array<'_>)>() {
for width in range.iter::<f32>() {
map.insert(first, width);
first = first.checked_add(1)?;
}
} else if let Some((first, second, width)) = iter.next::<(u32, u32, f32)>() {
for i in first..=second {
map.insert(i, width);
}
} else {
break;
}
}
Some(map)
}
fn read_widths2(arr: &Array<'_>) -> Option<HashMap<u32, [f32; 3]>> {
let mut map = HashMap::new();
let mut iter = arr.flex_iter();
loop {
if let Some((mut first, range)) = iter.next::<(u32, Array<'_>)>() {
let mut iter = range.iter::<f32>();
while let Some(w) = iter.next() {
let v1 = iter.next()?;
let v2 = iter.next()?;
map.insert(first, [w, v1, v2]);
first = first.checked_add(1)?;
}
} else if let Some((first, second, w, v1, v2)) = iter.next::<(u32, u32, f32, f32, f32)>() {
for i in first..=second {
map.insert(i, [w, v1, v2]);
}
} else {
break;
}
}
Some(map)
}
#[cfg(test)]
mod tests {
use super::*;
use pdf_syntax::object::FromBytes;
fn parse_widths(bytes: &[u8]) -> HashMap<u32, f32> {
let arr = Array::from_bytes(bytes).expect("valid array bytes");
read_widths(&arr).expect("read_widths returned None")
}
#[test]
fn test_w_range_form() {
let map = parse_widths(b"[40 60 600]");
assert_eq!(map.len(), 21); for cid in 40..=60 {
assert_eq!(
map.get(&cid).copied(),
Some(600.0),
"cid {cid} should have width 600"
);
}
}
#[test]
fn test_w_array_form() {
let map = parse_widths(b"[40 [600 700 800]]");
assert_eq!(map.len(), 3);
assert_eq!(map.get(&40).copied(), Some(600.0));
assert_eq!(map.get(&41).copied(), Some(700.0));
assert_eq!(map.get(&42).copied(), Some(800.0));
}
#[test]
fn test_dw_fallback() {
let map = parse_widths(b"[40 [600]]");
let dw = 1000.0_f32;
let width = map.get(&99).copied().unwrap_or(dw);
assert_eq!(width, dw, "missing cid should fall back to /DW");
}
#[test]
fn test_w_mixed_form() {
let map = parse_widths(b"[40 [600 600] 100 200 500]");
assert_eq!(map.get(&40).copied(), Some(600.0));
assert_eq!(map.get(&41).copied(), Some(600.0));
for cid in 100..=200 {
assert_eq!(
map.get(&cid).copied(),
Some(500.0),
"cid {cid} should have width 500"
);
}
}
#[test]
fn test_w_range_single_glyph() {
let map = parse_widths(b"[50 50 250]");
assert_eq!(map.get(&50).copied(), Some(250.0));
assert_eq!(map.len(), 1);
}
#[test]
fn test_w_empty_array() {
let map = parse_widths(b"[]");
assert!(map.is_empty());
}
}
fn read_encoding(object: &Object<'_>, cmap_resolver: &CMapResolverFn) -> Option<CMap> {
match object {
Object::Name(n) => {
let cmap_type = pdf_font::cmap::CMapName::from_bytes(n.deref());
match cmap_type {
pdf_font::cmap::CMapName::IdentityH => Some(CMap::identity_h()),
pdf_font::cmap::CMapName::IdentityV => Some(CMap::identity_v()),
_ => {
let data = (cmap_resolver)(cmap_type)?;
let resolver = cmap_resolver.clone();
CMap::parse(data, move |n| (resolver)(n))
}
}
}
Object::Stream(s) => {
let decoded = s.decoded().ok()?;
let resolver = cmap_resolver.clone();
CMap::parse(&decoded, move |n| (resolver)(n))
}
_ => None,
}
}