1use zpdf_core::{ObjectId, PdfObject, Result};
2use zpdf_font::{CidWidths, FontCache, LoadedFont, PdfFontType};
3use zpdf_parser::PdfFile;
4
5use crate::page::PdfPage;
6
7pub fn load_page_fonts(file: &PdfFile, page: &PdfPage) -> FontCache {
9 let mut cache = FontCache::new();
10
11 for (name, &font_ref) in &page.resources.fonts {
12 match load_single_font(file, font_ref) {
13 Ok(font) => {
14 cache.insert(name.clone(), font);
15 }
16 Err(e) => {
17 tracing::debug!("font {name} ({font_ref}): fallback - {e}");
18 cache.insert(name.clone(), LoadedFont::new_placeholder(name.clone()));
19 }
20 }
21 }
22
23 cache
24}
25
26pub fn load_single_font(file: &PdfFile, font_ref: ObjectId) -> Result<LoadedFont> {
27 let obj = file.resolve(font_ref)?;
28 let dict = obj.as_dict()?;
29
30 let subtype = dict.get_name("Subtype").unwrap_or("");
31 let base_font = dict.get_name("BaseFont").unwrap_or("Unknown").to_string();
32
33 let mut font = match subtype {
34 "Type0" => load_type0_font(file, dict, base_font)?,
35 "TrueType" => load_truetype_font(file, dict, base_font)?,
36 "Type3" => load_type3_font(file, dict, base_font)?,
37 "Type1" | "MMType1" => load_type1_font(file, dict, base_font)?,
38 _ => LoadedFont::new_placeholder(base_font),
39 };
40
41 attach_text_mappings(file, dict, subtype, &mut font);
42 font.build_substitute_cid_to_gid();
45 Ok(font)
46}
47
48fn substitute_hints(
50 file: &PdfFile,
51 dict: &zpdf_core::PdfDict,
52) -> zpdf_font::system::SubstituteHints {
53 let mut hints = zpdf_font::system::SubstituteHints::default();
54 if let Ok(fd_ref) = dict.get_ref("FontDescriptor") {
55 if let Ok(fd) = file.resolve(fd_ref) {
56 if let Ok(fd) = fd.as_dict() {
57 if let Ok(flags) = fd.get_i64("Flags") {
58 hints.fixed_pitch = flags & 1 != 0;
59 hints.serif = flags & 2 != 0;
60 hints.italic = flags & 64 != 0;
61 hints.bold = flags & (1 << 18) != 0; }
63 if let Ok(w) = fd.get_f64("StemV") {
64 hints.bold |= w >= 160.0;
65 }
66 }
67 }
68 }
69 hints
70}
71
72fn try_system_substitute_simple(
76 file: &PdfFile,
77 dict: &zpdf_core::PdfDict,
78 base_font: &str,
79 font_type: PdfFontType,
80 mut cid_widths: CidWidths,
81) -> Option<LoadedFont> {
82 let hints = substitute_hints(file, dict);
83 let m = zpdf_font::system::find_system_font(base_font, hints, None)?;
84 if cid_widths.is_empty() {
85 if let Some(metrics) = zpdf_font::standard_fonts::lookup(base_font) {
86 for (code, &w) in metrics.widths.iter().enumerate() {
87 if w > 0 {
88 cid_widths.set(code as u16, w as f64);
89 }
90 }
91 }
92 }
93 LoadedFont::new_substitute(
94 font_type,
95 base_font.to_string(),
96 m.data,
97 m.face_index,
98 cid_widths,
99 )
100}
101
102fn attach_text_mappings(
105 file: &PdfFile,
106 dict: &zpdf_core::PdfDict,
107 subtype: &str,
108 font: &mut LoadedFont,
109) {
110 if let Ok(tu_ref) = dict.get_ref("ToUnicode") {
112 if let Ok(data) = file.resolve_stream_data(tu_ref) {
113 let map = zpdf_font::cmap::ToUnicodeMap::parse(&data);
114 if !map.is_empty() {
115 font.to_unicode = Some(map);
116 }
117 }
118 }
119
120 if subtype == "Type0" {
122 return;
123 }
124
125 font.symbolic = font_descriptor_symbolic(file, dict);
126
127 let encoding = if dict.get("Encoding").is_none() {
128 builtin_symbol_encoding(&font.base_font)
131 .or_else(|| parse_encoding(file, dict, subtype, font.symbolic))
132 } else {
133 parse_encoding(file, dict, subtype, font.symbolic)
134 };
135 if let Some(enc) = encoding {
136 font.encoding = Some(enc);
137 }
138
139 font.map_unencoded_orphans();
142}
143
144fn builtin_symbol_encoding(base_font: &str) -> Option<zpdf_font::encoding::Encoding> {
148 use zpdf_font::encoding::{base_encoding_by_name, Encoding};
149 let name = base_font.rsplit('+').next().unwrap_or(base_font);
150 let canonical = if name.contains("ZapfDingbats") || name.contains("Dingbats") {
151 "ZapfDingbats"
152 } else if name.contains("Symbol") {
153 "Symbol"
154 } else {
155 return None;
156 };
157 base_encoding_by_name(canonical).map(Encoding::from_base)
158}
159
160fn font_descriptor_symbolic(file: &PdfFile, dict: &zpdf_core::PdfDict) -> bool {
163 let fd_ref = match dict.get_ref("FontDescriptor") {
164 Ok(r) => r,
165 Err(_) => return false,
166 };
167 let flags = file
168 .resolve(fd_ref)
169 .ok()
170 .and_then(|o| o.as_dict().ok().and_then(|d| d.get_i64("Flags").ok()));
171 matches!(flags, Some(f) if (f & 4) != 0 && (f & 32) == 0)
172}
173
174fn parse_encoding(
177 file: &PdfFile,
178 dict: &zpdf_core::PdfDict,
179 subtype: &str,
180 symbolic: bool,
181) -> Option<zpdf_font::encoding::Encoding> {
182 use zpdf_font::encoding::{base_encoding_by_name, Encoding};
183
184 let enc_obj = match dict.get("Encoding").cloned() {
185 Some(PdfObject::Ref(r)) => file.resolve(r).ok(),
186 other => other,
187 };
188
189 match enc_obj {
190 Some(PdfObject::Name(n)) => base_encoding_by_name(n.as_str()).map(Encoding::from_base),
191 Some(PdfObject::Dict(enc_dict)) => {
192 let base = enc_dict
193 .get_name("BaseEncoding")
194 .ok()
195 .and_then(base_encoding_by_name)
196 .unwrap_or_else(|| default_simple_base(subtype));
197 let mut encoding = Encoding::from_base(base);
198 apply_differences(&enc_dict, &mut encoding);
199 Some(encoding)
200 }
201 _ if symbolic => None,
203 _ => Some(Encoding::from_base(default_simple_base(subtype))),
204 }
205}
206
207fn default_simple_base(subtype: &str) -> &'static zpdf_font::encoding::EncodingTable {
208 match subtype {
209 "TrueType" => &zpdf_font::encoding::WIN_ANSI_ENCODING,
210 _ => &zpdf_font::encoding::STANDARD_ENCODING,
211 }
212}
213
214fn apply_differences(enc_dict: &zpdf_core::PdfDict, encoding: &mut zpdf_font::encoding::Encoding) {
215 if let Ok(diffs) = enc_dict.get_array("Differences") {
216 let mut code = 0u32;
217 for obj in diffs {
218 match obj {
219 PdfObject::Integer(n) => code = (*n).max(0) as u32,
220 PdfObject::Name(name) => {
221 if code <= 255 {
222 encoding.apply_difference(code as u8, name.as_str());
223 }
224 code += 1;
225 }
226 _ => {}
227 }
228 }
229 }
230}
231
232fn parse_type0_encoding(file: &PdfFile, dict: &zpdf_core::PdfDict) -> zpdf_font::cmap::CidCMap {
236 use zpdf_font::cmap::CidCMap;
237 fn identity_fallback(name: &str) -> CidCMap {
240 let wmode = name.ends_with("-V") as u8;
241 tracing::warn!(
242 "unsupported predefined CMap {name}; using Identity-{}",
243 if wmode == 1 { "V" } else { "H" }
244 );
245 CidCMap::identity(wmode)
246 }
247 match dict.get("Encoding") {
248 Some(PdfObject::Name(n)) => {
249 CidCMap::predefined(n.as_str()).unwrap_or_else(|| identity_fallback(n.as_str()))
250 }
251 Some(PdfObject::Ref(r)) => match file.resolve(*r) {
252 Ok(PdfObject::Name(n)) => {
253 CidCMap::predefined(n.as_str()).unwrap_or_else(|| identity_fallback(n.as_str()))
254 }
255 Ok(PdfObject::Stream(s)) => {
256 let data = file
257 .resolve_stream_data(*r)
258 .or_else(|_| zpdf_parser::filters::decode_stream(&s.data, &s.dict));
259 let mut cmap = match data {
260 Ok(d) => CidCMap::parse(&d),
261 Err(e) => {
262 tracing::warn!("undecodable embedded CMap: {e}; using Identity-H");
263 CidCMap::identity(0)
264 }
265 };
266 if let Ok(1) = s.dict.get_i64("WMode") {
268 cmap.wmode = 1;
269 }
270 cmap
271 }
272 _ => CidCMap::identity(0),
273 },
274 _ => CidCMap::identity(0),
275 }
276}
277
278fn parse_dw2(file: &PdfFile, desc_dict: &zpdf_core::PdfDict) -> (f64, f64) {
280 resolve_array(file, desc_dict, "DW2")
281 .and_then(|arr| {
282 let v: Vec<f64> = arr.iter().filter_map(|o| o.as_f64().ok()).collect();
283 (v.len() >= 2).then(|| (v[0], v[1]))
284 })
285 .unwrap_or((880.0, -1000.0))
286}
287
288fn load_type0_font(
289 file: &PdfFile,
290 dict: &zpdf_core::PdfDict,
291 base_font: String,
292) -> Result<LoadedFont> {
293 let descendants = resolve_array(file, dict, "DescendantFonts")
295 .ok_or_else(|| zpdf_core::Error::MissingKey("DescendantFonts".into()))?;
296 let desc_ref = descendants
297 .first()
298 .ok_or_else(|| zpdf_core::Error::MissingKey("DescendantFonts[0]".into()))?
299 .as_ref()?;
300
301 let desc_obj = file.resolve(desc_ref)?;
302 let desc_dict = desc_obj.as_dict()?;
303
304 let cid_widths = parse_cid_widths(file, desc_dict);
305 let cmap = parse_type0_encoding(file, dict);
306 let dw2 = parse_dw2(file, desc_dict);
307
308 let font_data = extract_font_file(file, desc_dict);
309
310 let mut font = match font_data {
311 Some(data) => {
312 let mut font =
313 LoadedFont::new_with_data(PdfFontType::Type0CidType2, base_font, data, cid_widths);
314 if let Some(map) = parse_cid_to_gid_stream(file, desc_dict) {
319 let subtype = desc_dict.get_name("Subtype").unwrap_or("");
320 if subtype == "CIDFontType2" || font.cid_to_gid.is_none() {
321 font.cid_to_gid = Some(map);
322 }
323 }
324 font
325 }
326 None => {
327 let ordering = resolve_dict(file, desc_dict, "CIDSystemInfo").and_then(|csi| match csi
331 .get("Ordering")
332 {
333 Some(PdfObject::String(s)) => Some(s.to_string_lossy()),
334 Some(PdfObject::Name(n)) => Some(n.as_str().to_string()),
335 _ => None,
336 });
337 let hints = substitute_hints(file, desc_dict);
338 let substituted =
339 zpdf_font::system::find_system_font(&base_font, hints, ordering.as_deref())
340 .and_then(|m| {
341 LoadedFont::new_substitute(
342 PdfFontType::Type0CidType2,
343 base_font.clone(),
344 m.data,
345 m.face_index,
346 cid_widths,
347 )
348 });
349 substituted.unwrap_or_else(|| LoadedFont::new_placeholder(base_font))
350 }
351 };
352 font.cid_cmap = Some(cmap);
353 font.dw2 = dw2;
354 font.validate_cid_cmap();
357 Ok(font)
358}
359
360fn parse_cid_to_gid_stream(
366 file: &PdfFile,
367 desc_dict: &zpdf_core::PdfDict,
368) -> Option<std::collections::HashMap<u16, u16>> {
369 let stream_ref = match desc_dict.get("CIDToGIDMap") {
370 Some(PdfObject::Ref(r)) => *r,
371 _ => return None,
373 };
374 let data = match file.resolve_stream_data(stream_ref) {
375 Ok(d) => d,
376 Err(e) => {
377 tracing::debug!("CIDToGIDMap {stream_ref}: not a decodable stream - {e}");
379 return None;
380 }
381 };
382 let mut map = std::collections::HashMap::new();
383 for (cid, gid_bytes) in data.chunks_exact(2).enumerate().take(u16::MAX as usize + 1) {
384 let gid = u16::from_be_bytes([gid_bytes[0], gid_bytes[1]]);
385 if gid != 0 {
386 map.insert(cid as u16, gid);
387 }
388 }
389 if map.is_empty() {
390 None
391 } else {
392 Some(map)
393 }
394}
395
396fn load_truetype_font(
397 file: &PdfFile,
398 dict: &zpdf_core::PdfDict,
399 base_font: String,
400) -> Result<LoadedFont> {
401 let cid_widths = parse_simple_widths(file, dict);
402 let font_data = extract_font_file_from_descriptor(file, dict);
403
404 match font_data {
405 Some(data) => Ok(LoadedFont::new_with_data(
406 PdfFontType::TrueType,
407 base_font,
408 data,
409 cid_widths,
410 )),
411 None => Ok(try_system_substitute_simple(
412 file,
413 dict,
414 &base_font,
415 PdfFontType::TrueType,
416 cid_widths,
417 )
418 .or_else(|| LoadedFont::new_standard(base_font.clone()))
419 .unwrap_or_else(|| LoadedFont::new_placeholder(base_font))),
420 }
421}
422
423fn load_type3_font(
424 file: &PdfFile,
425 dict: &zpdf_core::PdfDict,
426 base_font: String,
427) -> Result<LoadedFont> {
428 use std::sync::Arc;
429
430 let font_matrix = {
435 let mut m = [0.001, 0.0, 0.0, -0.001, 0.0, 0.0];
436 if let Some(arr) = resolve_array(file, dict, "FontMatrix") {
437 for (i, obj) in arr.iter().enumerate().take(6) {
438 if let Ok(v) = obj.as_f64() {
439 m[i] = v;
440 }
441 }
442 }
443 m
444 };
445
446 let mut encoding = Vec::new();
448 if let Some(enc_dict) = resolve_dict(file, dict, "Encoding") {
449 if let Some(diffs) = resolve_array(file, &enc_dict, "Differences") {
450 let mut current_code = 0usize;
451 for obj in &diffs {
452 match obj {
453 PdfObject::Integer(n) => {
454 current_code = *n as usize;
455 while encoding.len() < current_code {
456 encoding.push(String::new());
457 }
458 }
459 PdfObject::Name(n) => {
460 while encoding.len() <= current_code {
461 encoding.push(String::new());
462 }
463 encoding[current_code] = n.0.clone();
464 current_code += 1;
465 }
466 _ => {}
467 }
468 }
469 }
470 }
471
472 let mut char_procs = std::collections::HashMap::new();
474 if let Some(cp_dict) = resolve_dict(file, dict, "CharProcs") {
475 for (name, obj) in &cp_dict.0 {
476 if let PdfObject::Ref(r) = obj {
477 if let Ok(data) = file.resolve_stream_data(*r) {
478 char_procs.insert(name.0.clone(), Arc::from(data));
479 }
480 }
481 }
482 }
483
484 let first_char = dict.get_i64("FirstChar").unwrap_or(0) as u16;
486 let widths: Vec<f64> = resolve_array(file, dict, "Widths")
487 .unwrap_or_default()
488 .iter()
489 .map(|o| o.as_f64().unwrap_or(0.0))
490 .collect();
491
492 let font = LoadedFont {
493 font_type: zpdf_font::PdfFontType::Type3 {
494 font_matrix,
495 char_procs,
496 encoding,
497 widths,
498 first_char,
499 },
500 base_font,
501 font_data: None,
502 face_index: 0,
503 is_substitute: false,
504 cid_widths: CidWidths::new(1000.0),
505 units_per_em: 1000.0,
506 ascent: 880.0,
507 descent: -120.0,
508 cid_to_gid: None,
509 builtin_encoding_gids: None,
510 orphan_gids: Vec::new(),
511 encoding: None,
512 to_unicode: None,
513 symbolic: false,
514 type1: None,
515 cid_cmap: None,
516 dw2: (880.0, -1000.0),
517 };
518
519 Ok(font)
520}
521
522fn load_type1_font(
523 file: &PdfFile,
524 dict: &zpdf_core::PdfDict,
525 base_font: String,
526) -> Result<LoadedFont> {
527 let cid_widths = parse_simple_widths(file, dict);
528 let font_data = extract_font_file_from_descriptor(file, dict);
529
530 match font_data {
531 Some(data) => Ok(LoadedFont::new_with_data(
532 PdfFontType::Type1,
533 base_font,
534 data,
535 cid_widths,
536 )),
537 None => Ok(try_system_substitute_simple(
538 file,
539 dict,
540 &base_font,
541 PdfFontType::Type1,
542 cid_widths,
543 )
544 .or_else(|| LoadedFont::new_standard(base_font.clone()))
545 .unwrap_or_else(|| LoadedFont::new_placeholder(base_font))),
546 }
547}
548
549fn extract_font_file(file: &PdfFile, cid_dict: &zpdf_core::PdfDict) -> Option<Vec<u8>> {
551 let fd_ref = cid_dict.get_ref("FontDescriptor").ok()?;
552 let fd_obj = file.resolve(fd_ref).ok()?;
553 let fd_dict = fd_obj.as_dict().ok()?;
554
555 for key in &["FontFile2", "FontFile3", "FontFile"] {
557 if let Ok(ff_ref) = fd_dict.get_ref(key) {
558 if let Ok(data) = file.resolve_stream_data(ff_ref) {
559 if !data.is_empty() {
560 return Some(data);
561 }
562 }
563 }
564 }
565 None
566}
567
568fn extract_font_file_from_descriptor(
569 file: &PdfFile,
570 font_dict: &zpdf_core::PdfDict,
571) -> Option<Vec<u8>> {
572 let fd_ref = font_dict.get_ref("FontDescriptor").ok()?;
573 let fd_obj = file.resolve(fd_ref).ok()?;
574 let fd_dict = fd_obj.as_dict().ok()?;
575
576 for key in &["FontFile2", "FontFile3", "FontFile"] {
577 if let Ok(ff_ref) = fd_dict.get_ref(key) {
578 if let Ok(data) = file.resolve_stream_data(ff_ref) {
579 if !data.is_empty() {
580 return Some(data);
581 }
582 }
583 }
584 }
585 None
586}
587
588fn resolve_array(file: &PdfFile, dict: &zpdf_core::PdfDict, key: &str) -> Option<Vec<PdfObject>> {
592 match dict.get(key) {
593 Some(PdfObject::Array(a)) => Some(a.clone()),
594 Some(PdfObject::Ref(id)) => file
595 .resolve(*id)
596 .ok()
597 .and_then(|o| o.as_array().ok().map(|a| a.to_vec())),
598 _ => None,
599 }
600}
601
602fn resolve_dict(
606 file: &PdfFile,
607 dict: &zpdf_core::PdfDict,
608 key: &str,
609) -> Option<zpdf_core::PdfDict> {
610 match dict.get(key) {
611 Some(PdfObject::Dict(d)) => Some(d.clone()),
612 Some(PdfObject::Ref(id)) => file
613 .resolve(*id)
614 .ok()
615 .and_then(|o| o.as_dict().ok().cloned()),
616 _ => None,
617 }
618}
619
620fn parse_cid_widths(file: &PdfFile, dict: &zpdf_core::PdfDict) -> CidWidths {
622 let dw = dict.get_f64("DW").unwrap_or(1000.0);
623 let mut widths = CidWidths::new(dw);
624
625 let w_array = match resolve_array(file, dict, "W") {
626 Some(arr) => arr,
627 None => return widths,
628 };
629
630 let mut i = 0;
631 while i < w_array.len() {
632 let cid_start = match w_array[i].as_i64() {
633 Ok(v) => v as u16,
634 Err(_) => break,
635 };
636 i += 1;
637 if i >= w_array.len() {
638 break;
639 }
640
641 match &w_array[i] {
642 PdfObject::Array(arr) => {
643 for (j, obj) in arr.iter().enumerate() {
645 let Some(cid) = cid_start.checked_add(j as u16) else {
646 break;
647 };
648 if let Ok(w) = obj.as_f64() {
649 widths.set(cid, w);
650 }
651 }
652 i += 1;
653 }
654 PdfObject::Integer(_) | PdfObject::Real(_) => {
655 let cid_end = w_array[i].as_i64().unwrap_or(cid_start as i64) as u16;
657 i += 1;
658 if i < w_array.len() {
659 let w = w_array[i].as_f64().unwrap_or(dw);
660 for cid in cid_start..=cid_end {
661 widths.set(cid, w);
662 }
663 i += 1;
664 }
665 }
666 _ => {
667 i += 1;
668 }
669 }
670 }
671
672 widths
673}
674
675fn parse_simple_widths(file: &PdfFile, dict: &zpdf_core::PdfDict) -> CidWidths {
676 let first_char = dict.get_i64("FirstChar").unwrap_or(0) as u16;
677 let mut widths = CidWidths::new(1000.0);
678
679 if let Some(arr) = resolve_array(file, dict, "Widths") {
680 for (j, obj) in arr.iter().enumerate() {
681 let Some(code) = first_char.checked_add(j as u16) else {
682 break;
683 };
684 if let Ok(w) = obj.as_f64() {
685 widths.set(code, w);
686 }
687 }
688 }
689
690 widths
691}