Skip to main content

zpdf_document/
font_loader.rs

1use zpdf_core::{ObjectId, PdfObject, Result};
2use zpdf_font::{CidWidths, FontCache, LoadedFont, PdfFontType};
3use zpdf_parser::PdfFile;
4
5use crate::page::PdfPage;
6
7/// Load all fonts referenced by a page into a FontCache.
8pub fn load_page_fonts(file: &PdfFile, page: &PdfPage) -> FontCache {
9    let mut cache = FontCache::new();
10
11    for (name, &font_ref) in &page.resources.fonts {
12        match load_single_font(file, font_ref) {
13            Ok(font) => {
14                cache.insert(name.clone(), font);
15            }
16            Err(e) => {
17                tracing::debug!("font {name} ({font_ref}): fallback - {e}");
18                cache.insert(name.clone(), LoadedFont::new_placeholder(name.clone()));
19            }
20        }
21    }
22
23    cache
24}
25
26pub fn load_single_font(file: &PdfFile, font_ref: ObjectId) -> Result<LoadedFont> {
27    let obj = file.resolve(font_ref)?;
28    let dict = obj.as_dict()?;
29    load_single_font_dict(file, dict)
30}
31
32/// Load a font from its (already-resolved) font dictionary. Used both by
33/// [`load_single_font`] and for inline font dicts in form resources (e.g. a
34/// synthesized field appearance referencing a standard Helvetica).
35pub fn load_single_font_dict(file: &PdfFile, dict: &zpdf_core::PdfDict) -> Result<LoadedFont> {
36    let subtype = dict.get_name("Subtype").unwrap_or("");
37    let base_font = dict.get_name("BaseFont").unwrap_or("Unknown").to_string();
38
39    let mut font = match subtype {
40        "Type0" => load_type0_font(file, dict, base_font)?,
41        "TrueType" => load_truetype_font(file, dict, base_font)?,
42        "Type3" => load_type3_font(file, dict, base_font)?,
43        "Type1" | "MMType1" => load_type1_font(file, dict, base_font)?,
44        _ => LoadedFont::new_placeholder(base_font),
45    };
46
47    attach_text_mappings(file, dict, subtype, &mut font);
48    // A substituted composite font needs /ToUnicode (attached just above) to
49    // route CIDs through the system face's Unicode cmap.
50    font.build_substitute_cid_to_gid();
51    Ok(font)
52}
53
54/// FontDescriptor-derived hints for system-font substitution.
55fn substitute_hints(
56    file: &PdfFile,
57    dict: &zpdf_core::PdfDict,
58) -> zpdf_font::system::SubstituteHints {
59    let mut hints = zpdf_font::system::SubstituteHints::default();
60    if let Ok(fd_ref) = dict.get_ref("FontDescriptor") {
61        if let Ok(fd) = file.resolve(fd_ref) {
62            if let Ok(fd) = fd.as_dict() {
63                if let Ok(flags) = fd.get_i64("Flags") {
64                    hints.fixed_pitch = flags & 1 != 0;
65                    hints.serif = flags & 2 != 0;
66                    hints.italic = flags & 64 != 0;
67                    hints.bold = flags & (1 << 18) != 0; // ForceBold
68                }
69                if let Ok(w) = fd.get_f64("StemV") {
70                    hints.bold |= w >= 160.0;
71                }
72            }
73        }
74    }
75    hints
76}
77
78/// Try to substitute an installed system font for a non-embedded simple font.
79/// The PDF /Widths stay authoritative for advances when present; otherwise the
80/// standard-14 metrics (if the name matches one) seed the widths.
81fn try_system_substitute_simple(
82    file: &PdfFile,
83    dict: &zpdf_core::PdfDict,
84    base_font: &str,
85    font_type: PdfFontType,
86    mut cid_widths: CidWidths,
87) -> Option<LoadedFont> {
88    let hints = substitute_hints(file, dict);
89    let m = zpdf_font::system::find_system_font(base_font, hints, None)?;
90    if cid_widths.is_empty() {
91        if let Some(metrics) = zpdf_font::standard_fonts::lookup(base_font) {
92            for (code, &w) in metrics.widths.iter().enumerate() {
93                if w > 0 {
94                    cid_widths.set(code as u16, w as f64);
95                }
96            }
97        }
98    }
99    LoadedFont::new_substitute(
100        font_type,
101        base_font.to_string(),
102        m.data,
103        m.face_index,
104        cid_widths,
105    )
106}
107
108/// Attach the simple-font /Encoding, the symbolic flag, and /ToUnicode (for
109/// text extraction) to a freshly-loaded font.
110fn attach_text_mappings(
111    file: &PdfFile,
112    dict: &zpdf_core::PdfDict,
113    subtype: &str,
114    font: &mut LoadedFont,
115) {
116    // /ToUnicode lives at the top-level font dict for both simple and Type0 fonts.
117    if let Ok(tu_ref) = dict.get_ref("ToUnicode") {
118        if let Ok(data) = file.resolve_stream_data(tu_ref) {
119            let map = zpdf_font::cmap::ToUnicodeMap::parse(&data);
120            if !map.is_empty() {
121                font.to_unicode = Some(map);
122            }
123        }
124    }
125
126    // /Encoding and the symbolic flag apply only to simple (non-composite) fonts.
127    if subtype == "Type0" {
128        return;
129    }
130
131    font.symbolic = font_descriptor_symbolic(file, dict);
132
133    let encoding = if dict.get("Encoding").is_none() {
134        // No explicit /Encoding: the Symbol/ZapfDingbats standard fonts carry their
135        // own built-in encoding; other symbolic fonts use the font program's cmap.
136        builtin_symbol_encoding(&font.base_font)
137            .or_else(|| parse_encoding(file, dict, subtype, font.symbolic))
138    } else {
139        parse_encoding(file, dict, subtype, font.symbolic)
140    };
141    if let Some(enc) = encoding {
142        font.encoding = Some(enc);
143    }
144
145    // With encoding and widths in place, recover Quartz-subset glyphs that are
146    // reachable through no declared encoding (charset entries named ".notdef").
147    font.map_unencoded_orphans();
148}
149
150/// The built-in encoding for the Symbol / ZapfDingbats standard fonts, matched by
151/// BaseFont (ignoring any subset prefix). Used when no explicit /Encoding is given,
152/// so symbolic Symbol/Dingbats text is still extractable via the glyph list.
153fn builtin_symbol_encoding(base_font: &str) -> Option<zpdf_font::encoding::Encoding> {
154    use zpdf_font::encoding::{base_encoding_by_name, Encoding};
155    let name = base_font.rsplit('+').next().unwrap_or(base_font);
156    let canonical = if name.contains("ZapfDingbats") || name.contains("Dingbats") {
157        "ZapfDingbats"
158    } else if name.contains("Symbol") {
159        "Symbol"
160    } else {
161        return None;
162    };
163    base_encoding_by_name(canonical).map(Encoding::from_base)
164}
165
166/// Read the FontDescriptor /Flags and decide whether the font is symbolic
167/// (bit 3 set, bit 6 clear).
168fn font_descriptor_symbolic(file: &PdfFile, dict: &zpdf_core::PdfDict) -> bool {
169    let fd_ref = match dict.get_ref("FontDescriptor") {
170        Ok(r) => r,
171        Err(_) => return false,
172    };
173    let flags = file
174        .resolve(fd_ref)
175        .ok()
176        .and_then(|o| o.as_dict().ok().and_then(|d| d.get_i64("Flags").ok()));
177    matches!(flags, Some(f) if (f & 4) != 0 && (f & 32) == 0)
178}
179
180/// Build the effective simple-font encoding from /Encoding (a name, a dict with
181/// /BaseEncoding + /Differences, or absent).
182fn parse_encoding(
183    file: &PdfFile,
184    dict: &zpdf_core::PdfDict,
185    subtype: &str,
186    symbolic: bool,
187) -> Option<zpdf_font::encoding::Encoding> {
188    use zpdf_font::encoding::{base_encoding_by_name, Encoding};
189
190    let enc_obj = match dict.get("Encoding").cloned() {
191        Some(PdfObject::Ref(r)) => file.resolve(r).ok(),
192        other => other,
193    };
194
195    match enc_obj {
196        Some(PdfObject::Name(n)) => base_encoding_by_name(n.as_str()).map(Encoding::from_base),
197        Some(PdfObject::Dict(enc_dict)) => {
198            let base = enc_dict
199                .get_name("BaseEncoding")
200                .ok()
201                .and_then(base_encoding_by_name)
202                .unwrap_or_else(|| default_simple_base(subtype));
203            let mut encoding = Encoding::from_base(base);
204            apply_differences(&enc_dict, &mut encoding);
205            Some(encoding)
206        }
207        // No /Encoding: symbolic fonts use their built-in cmap; others get a default.
208        _ if symbolic => None,
209        _ => Some(Encoding::from_base(default_simple_base(subtype))),
210    }
211}
212
213fn default_simple_base(subtype: &str) -> &'static zpdf_font::encoding::EncodingTable {
214    match subtype {
215        "TrueType" => &zpdf_font::encoding::WIN_ANSI_ENCODING,
216        _ => &zpdf_font::encoding::STANDARD_ENCODING,
217    }
218}
219
220fn apply_differences(enc_dict: &zpdf_core::PdfDict, encoding: &mut zpdf_font::encoding::Encoding) {
221    if let Ok(diffs) = enc_dict.get_array("Differences") {
222        let mut code = 0u32;
223        for obj in diffs {
224            match obj {
225                PdfObject::Integer(n) => code = (*n).max(0) as u32,
226                PdfObject::Name(name) => {
227                    if code <= 255 {
228                        encoding.apply_difference(code as u8, name.as_str());
229                    }
230                    code += 1;
231                }
232                _ => {}
233            }
234        }
235    }
236}
237
238/// Resolve a Type0 font's /Encoding into a code → CID CMap: a predefined
239/// name, or an embedded CMap stream. Unknown legacy CMaps fall back to
240/// Identity-H with a warning.
241fn parse_type0_encoding(file: &PdfFile, dict: &zpdf_core::PdfDict) -> zpdf_font::cmap::CidCMap {
242    use zpdf_font::cmap::CidCMap;
243    // Unknown (legacy byte-encoded) CMaps degrade to Identity, but the
244    // writing mode is still known from the -V suffix and kept.
245    fn identity_fallback(name: &str) -> CidCMap {
246        let wmode = name.ends_with("-V") as u8;
247        tracing::warn!(
248            "unsupported predefined CMap {name}; using Identity-{}",
249            if wmode == 1 { "V" } else { "H" }
250        );
251        CidCMap::identity(wmode)
252    }
253    match dict.get("Encoding") {
254        Some(PdfObject::Name(n)) => {
255            CidCMap::predefined(n.as_str()).unwrap_or_else(|| identity_fallback(n.as_str()))
256        }
257        Some(PdfObject::Ref(r)) => match file.resolve(*r) {
258            Ok(PdfObject::Name(n)) => {
259                CidCMap::predefined(n.as_str()).unwrap_or_else(|| identity_fallback(n.as_str()))
260            }
261            Ok(PdfObject::Stream(s)) => {
262                let data = file
263                    .resolve_stream_data(*r)
264                    .or_else(|_| zpdf_parser::filters::decode_stream(&s.data, &s.dict));
265                let mut cmap = match data {
266                    Ok(d) => CidCMap::parse(&d),
267                    Err(e) => {
268                        tracing::warn!("undecodable embedded CMap: {e}; using Identity-H");
269                        CidCMap::identity(0)
270                    }
271                };
272                // /WMode may also live on the stream dict.
273                if let Ok(1) = s.dict.get_i64("WMode") {
274                    cmap.wmode = 1;
275                }
276                cmap
277            }
278            _ => CidCMap::identity(0),
279        },
280        _ => CidCMap::identity(0),
281    }
282}
283
284/// /DW2 vertical metrics from a CID font dict: [vy w1y], default [880 −1000].
285fn parse_dw2(file: &PdfFile, desc_dict: &zpdf_core::PdfDict) -> (f64, f64) {
286    resolve_array(file, desc_dict, "DW2")
287        .and_then(|arr| {
288            let v: Vec<f64> = arr.iter().filter_map(|o| o.as_f64().ok()).collect();
289            (v.len() >= 2).then(|| (v[0], v[1]))
290        })
291        .unwrap_or((880.0, -1000.0))
292}
293
294fn load_type0_font(
295    file: &PdfFile,
296    dict: &zpdf_core::PdfDict,
297    base_font: String,
298) -> Result<LoadedFont> {
299    // /DescendantFonts is commonly an indirect reference to the array.
300    let descendants = resolve_array(file, dict, "DescendantFonts")
301        .ok_or_else(|| zpdf_core::Error::MissingKey("DescendantFonts".into()))?;
302    let desc_ref = descendants
303        .first()
304        .ok_or_else(|| zpdf_core::Error::MissingKey("DescendantFonts[0]".into()))?
305        .as_ref()?;
306
307    let desc_obj = file.resolve(desc_ref)?;
308    let desc_dict = desc_obj.as_dict()?;
309
310    let mut cid_widths = parse_cid_widths(file, desc_dict);
311    parse_cid_w2(file, desc_dict, &mut cid_widths);
312    let cmap = parse_type0_encoding(file, dict);
313    let dw2 = parse_dw2(file, desc_dict);
314
315    let font_data = extract_font_file(file, desc_dict);
316
317    let mut font = match font_data {
318        Some(data) => {
319            let mut font = LoadedFont::new_with_data(
320                PdfFontType::Type0CidType2,
321                base_font.clone(),
322                data,
323                cid_widths.clone(),
324            );
325            // /CIDToGIDMap stream: explicit CID → GID table, authoritative for
326            // CIDFontType2 (TrueType-based) descendants. A raw-CFF CIDFontType0
327            // descendant keeps its charset-derived map built in new_with_data —
328            // there /CIDToGIDMap is not even a legal key.
329            if let Some(map) = parse_cid_to_gid_stream(file, desc_dict) {
330                let subtype = desc_dict.get_name("Subtype").unwrap_or("");
331                if subtype == "CIDFontType2" || font.cid_to_gid.is_none() {
332                    font.cid_to_gid = Some(map);
333                }
334            }
335            // Some embedded CID-keyed CFF subsets are defective and cannot be
336            // outlined (unparseable per-FD Private DICTs strand the local subrs),
337            // so most glyphs render blank. When the font is identifiably CJK and
338            // the embedded program fails to outline most sampled glyphs, fall
339            // back to a system CJK face (glyphs then route CID→Unicode→GID via
340            // /ToUnicode, attached later in load_single_font).
341            let cjk = is_cjk_ordering(desc_ordering(file, desc_dict).as_deref())
342                || zpdf_font::system::cjk_ordering_for(&base_font).is_some();
343            if cjk && font.embedded_outline_failure_rate() > 0.5 {
344                if let Some(sub) = substitute_type0_font(file, desc_dict, &base_font, cid_widths) {
345                    font = sub;
346                }
347            }
348            font
349        }
350        None => {
351            // Non-embedded composite font (typically CJK): substitute a system
352            // face. CIDs are remapped through /ToUnicode once it is attached
353            // (see build_substitute_cid_to_gid in load_single_font).
354            substitute_type0_font(file, desc_dict, &base_font, cid_widths)
355                .unwrap_or_else(|| LoadedFont::new_placeholder(base_font))
356        }
357    };
358    font.cid_cmap = Some(cmap);
359    font.dw2 = dw2;
360    // A Unicode-coded CMap is only usable when the font program can resolve
361    // Unicode; otherwise fall back to Identity (codes pass through as CIDs).
362    font.validate_cid_cmap();
363    Ok(font)
364}
365
366/// The descendant CIDFont's `/CIDSystemInfo /Ordering` (e.g. "GB1", "Identity").
367fn desc_ordering(file: &PdfFile, desc_dict: &zpdf_core::PdfDict) -> Option<String> {
368    resolve_dict(file, desc_dict, "CIDSystemInfo").and_then(|csi| match csi.get("Ordering") {
369        Some(PdfObject::String(s)) => Some(s.to_string_lossy()),
370        Some(PdfObject::Name(n)) => Some(n.as_str().to_string()),
371        _ => None,
372    })
373}
374
375/// A registered CJK character-collection ordering (not Adobe-Identity).
376fn is_cjk_ordering(ordering: Option<&str>) -> bool {
377    matches!(ordering, Some("GB1" | "CNS1" | "Japan1" | "Korea1" | "KR"))
378}
379
380/// Build a system-font substitute for a composite (Type0) font, carrying over
381/// the PDF's authoritative /W advances. Returns `None` when no installed face
382/// matches (caller keeps the embedded font or a placeholder).
383fn substitute_type0_font(
384    file: &PdfFile,
385    desc_dict: &zpdf_core::PdfDict,
386    base_font: &str,
387    cid_widths: CidWidths,
388) -> Option<LoadedFont> {
389    let ordering = desc_ordering(file, desc_dict);
390    let hints = substitute_hints(file, desc_dict);
391    zpdf_font::system::find_system_font(base_font, hints, ordering.as_deref()).and_then(|m| {
392        LoadedFont::new_substitute(
393            PdfFontType::Type0CidType2,
394            base_font.to_string(),
395            m.data,
396            m.face_index,
397            cid_widths,
398        )
399    })
400}
401
402/// Decode a /CIDToGIDMap stream into a CID → GID table: two bytes per CID,
403/// big-endian, indexed by CID. Returns `None` for /Identity, absence, or any
404/// non-stream form, which keeps the identity (or charset-derived) behavior.
405/// CIDs mapped to GID 0 (.notdef) are omitted — `glyph_outline` treats a
406/// missing entry as "no glyph", which matches the spec semantics.
407fn parse_cid_to_gid_stream(
408    file: &PdfFile,
409    desc_dict: &zpdf_core::PdfDict,
410) -> Option<std::collections::HashMap<u16, u16>> {
411    let stream_ref = match desc_dict.get("CIDToGIDMap") {
412        Some(PdfObject::Ref(r)) => *r,
413        // /Identity (the common name form), absent, or malformed.
414        _ => return None,
415    };
416    let data = match file.resolve_stream_data(stream_ref) {
417        Ok(d) => d,
418        Err(e) => {
419            // e.g. an indirect /Identity name, or an undecodable stream.
420            tracing::debug!("CIDToGIDMap {stream_ref}: not a decodable stream - {e}");
421            return None;
422        }
423    };
424    let mut map = std::collections::HashMap::new();
425    for (cid, gid_bytes) in data.chunks_exact(2).enumerate().take(u16::MAX as usize + 1) {
426        let gid = u16::from_be_bytes([gid_bytes[0], gid_bytes[1]]);
427        if gid != 0 {
428            map.insert(cid as u16, gid);
429        }
430    }
431    if map.is_empty() {
432        None
433    } else {
434        Some(map)
435    }
436}
437
438fn load_truetype_font(
439    file: &PdfFile,
440    dict: &zpdf_core::PdfDict,
441    base_font: String,
442) -> Result<LoadedFont> {
443    let cid_widths = parse_simple_widths(file, dict);
444    let font_data = extract_font_file_from_descriptor(file, dict);
445
446    match font_data {
447        Some(data) => Ok(LoadedFont::new_with_data(
448            PdfFontType::TrueType,
449            base_font,
450            data,
451            cid_widths,
452        )),
453        None => Ok(try_system_substitute_simple(
454            file,
455            dict,
456            &base_font,
457            PdfFontType::TrueType,
458            cid_widths,
459        )
460        .or_else(|| LoadedFont::new_standard(base_font.clone()))
461        .unwrap_or_else(|| LoadedFont::new_placeholder(base_font))),
462    }
463}
464
465fn load_type3_font(
466    file: &PdfFile,
467    dict: &zpdf_core::PdfDict,
468    base_font: String,
469) -> Result<LoadedFont> {
470    use std::sync::Arc;
471
472    // All four Type3 keys are commonly emitted as indirect objects; a direct-only
473    // read would silently drop every glyph, so resolve one level of indirection.
474
475    // FontMatrix: typically [0.001 0 0 -0.001 0 0] for 1000-unit glyph space
476    let font_matrix = {
477        let mut m = [0.001, 0.0, 0.0, -0.001, 0.0, 0.0];
478        if let Some(arr) = resolve_array(file, dict, "FontMatrix") {
479            for (i, obj) in arr.iter().enumerate().take(6) {
480                if let Ok(v) = obj.as_f64() {
481                    m[i] = v;
482                }
483            }
484        }
485        m
486    };
487
488    // Encoding/Differences → glyph name list
489    let mut encoding = Vec::new();
490    if let Some(enc_dict) = resolve_dict(file, dict, "Encoding") {
491        if let Some(diffs) = resolve_array(file, &enc_dict, "Differences") {
492            let mut current_code = 0usize;
493            for obj in &diffs {
494                match obj {
495                    PdfObject::Integer(n) => {
496                        current_code = *n as usize;
497                        while encoding.len() < current_code {
498                            encoding.push(String::new());
499                        }
500                    }
501                    PdfObject::Name(n) => {
502                        while encoding.len() <= current_code {
503                            encoding.push(String::new());
504                        }
505                        encoding[current_code] = n.0.clone();
506                        current_code += 1;
507                    }
508                    _ => {}
509                }
510            }
511        }
512    }
513
514    // CharProcs: name → stream ref
515    let mut char_procs = std::collections::HashMap::new();
516    if let Some(cp_dict) = resolve_dict(file, dict, "CharProcs") {
517        for (name, obj) in &cp_dict.0 {
518            if let PdfObject::Ref(r) = obj {
519                if let Ok(data) = file.resolve_stream_data(*r) {
520                    char_procs.insert(name.0.clone(), Arc::from(data));
521                }
522            }
523        }
524    }
525
526    // Widths
527    let first_char = dict.get_i64("FirstChar").unwrap_or(0) as u16;
528    let widths: Vec<f64> = resolve_array(file, dict, "Widths")
529        .unwrap_or_default()
530        .iter()
531        .map(|o| o.as_f64().unwrap_or(0.0))
532        .collect();
533
534    let font = LoadedFont {
535        font_type: zpdf_font::PdfFontType::Type3 {
536            font_matrix,
537            char_procs,
538            encoding,
539            widths,
540            first_char,
541        },
542        base_font,
543        font_data: None,
544        face_index: 0,
545        is_substitute: false,
546        cid_widths: CidWidths::new(1000.0),
547        units_per_em: 1000.0,
548        ascent: 880.0,
549        descent: -120.0,
550        cid_to_gid: None,
551        builtin_encoding_gids: None,
552        orphan_gids: Vec::new(),
553        encoding: None,
554        to_unicode: None,
555        symbolic: false,
556        type1: None,
557        cid_cmap: None,
558        dw2: (880.0, -1000.0),
559    };
560
561    Ok(font)
562}
563
564fn load_type1_font(
565    file: &PdfFile,
566    dict: &zpdf_core::PdfDict,
567    base_font: String,
568) -> Result<LoadedFont> {
569    let cid_widths = parse_simple_widths(file, dict);
570    let font_data = extract_font_file_from_descriptor(file, dict);
571
572    match font_data {
573        Some(data) => Ok(LoadedFont::new_with_data(
574            PdfFontType::Type1,
575            base_font,
576            data,
577            cid_widths,
578        )),
579        None => Ok(try_system_substitute_simple(
580            file,
581            dict,
582            &base_font,
583            PdfFontType::Type1,
584            cid_widths,
585        )
586        .or_else(|| LoadedFont::new_standard(base_font.clone()))
587        .unwrap_or_else(|| LoadedFont::new_placeholder(base_font))),
588    }
589}
590
591/// Extract embedded font binary from FontDescriptor → FontFile2 (TrueType).
592fn extract_font_file(file: &PdfFile, cid_dict: &zpdf_core::PdfDict) -> Option<Vec<u8>> {
593    let fd_ref = cid_dict.get_ref("FontDescriptor").ok()?;
594    let fd_obj = file.resolve(fd_ref).ok()?;
595    let fd_dict = fd_obj.as_dict().ok()?;
596
597    // Try FontFile2 (TrueType), then FontFile3 (OpenType/CFF), then FontFile (Type1)
598    for key in &["FontFile2", "FontFile3", "FontFile"] {
599        if let Ok(ff_ref) = fd_dict.get_ref(key) {
600            if let Ok(data) = file.resolve_stream_data(ff_ref) {
601                if !data.is_empty() {
602                    return Some(data);
603                }
604            }
605        }
606    }
607    None
608}
609
610fn extract_font_file_from_descriptor(
611    file: &PdfFile,
612    font_dict: &zpdf_core::PdfDict,
613) -> Option<Vec<u8>> {
614    let fd_ref = font_dict.get_ref("FontDescriptor").ok()?;
615    let fd_obj = file.resolve(fd_ref).ok()?;
616    let fd_dict = fd_obj.as_dict().ok()?;
617
618    for key in &["FontFile2", "FontFile3", "FontFile"] {
619        if let Ok(ff_ref) = fd_dict.get_ref(key) {
620            if let Ok(data) = file.resolve_stream_data(ff_ref) {
621                if !data.is_empty() {
622                    return Some(data);
623                }
624            }
625        }
626    }
627    None
628}
629
630/// Fetch an array value, resolving one level of indirect reference. pdftex (and
631/// many other producers) commonly emit `/Widths` and `/W` as indirect objects,
632/// which a plain `get_array` would miss (leaving every glyph at the default width).
633fn resolve_array(file: &PdfFile, dict: &zpdf_core::PdfDict, key: &str) -> Option<Vec<PdfObject>> {
634    match dict.get(key) {
635        Some(PdfObject::Array(a)) => Some(a.clone()),
636        Some(PdfObject::Ref(id)) => file
637            .resolve(*id)
638            .ok()
639            .and_then(|o| o.as_array().ok().map(|a| a.to_vec())),
640        _ => None,
641    }
642}
643
644/// Fetch a dictionary value, resolving one level of indirect reference, in the
645/// same spirit as [`resolve_array`] (Type3 producers commonly emit /CharProcs
646/// and /Encoding as indirect objects).
647fn resolve_dict(
648    file: &PdfFile,
649    dict: &zpdf_core::PdfDict,
650    key: &str,
651) -> Option<zpdf_core::PdfDict> {
652    match dict.get(key) {
653        Some(PdfObject::Dict(d)) => Some(d.clone()),
654        Some(PdfObject::Ref(id)) => file
655            .resolve(*id)
656            .ok()
657            .and_then(|o| o.as_dict().ok().cloned()),
658        _ => None,
659    }
660}
661
662/// Parse CID /W array: format is [cid [w1 w2 ...]] or [cid_first cid_last w]
663fn parse_cid_widths(file: &PdfFile, dict: &zpdf_core::PdfDict) -> CidWidths {
664    let dw = dict.get_f64("DW").unwrap_or(1000.0);
665    let mut widths = CidWidths::new(dw);
666
667    let w_array = match resolve_array(file, dict, "W") {
668        Some(arr) => arr,
669        None => return widths,
670    };
671
672    let mut i = 0;
673    while i < w_array.len() {
674        let cid_start = match w_array[i].as_i64() {
675            Ok(v) => v as u16,
676            Err(_) => break,
677        };
678        i += 1;
679        if i >= w_array.len() {
680            break;
681        }
682
683        match &w_array[i] {
684            PdfObject::Array(arr) => {
685                // [cid_start [w1 w2 w3 ...]]
686                for (j, obj) in arr.iter().enumerate() {
687                    let Some(cid) = cid_start.checked_add(j as u16) else {
688                        break;
689                    };
690                    if let Ok(w) = obj.as_f64() {
691                        widths.set(cid, w);
692                    }
693                }
694                i += 1;
695            }
696            PdfObject::Integer(_) | PdfObject::Real(_) => {
697                // [cid_start cid_end width]
698                let cid_end = w_array[i].as_i64().unwrap_or(cid_start as i64) as u16;
699                i += 1;
700                if i < w_array.len() {
701                    let w = w_array[i].as_f64().unwrap_or(dw);
702                    for cid in cid_start..=cid_end {
703                        widths.set(cid, w);
704                    }
705                    i += 1;
706                }
707            }
708            _ => {
709                i += 1;
710            }
711        }
712    }
713
714    widths
715}
716
717/// Parse the CID /W2 array (PDF 9.7.4.3) into per-CID vertical metrics.
718/// Two element forms, mirroring /W but with THREE numbers per glyph:
719///   `c [ w1y_1 vx_1 vy_1  w1y_2 vx_2 vy_2 ... ]`   (list form)
720///   `cFirst cLast w1y vx vy`                         (range form)
721/// where `w1y` is the vertical displacement and `(vx, vy)` the position vector.
722fn parse_cid_w2(file: &PdfFile, dict: &zpdf_core::PdfDict, widths: &mut CidWidths) {
723    if let Some(arr) = resolve_array(file, dict, "W2") {
724        apply_w2_array(&arr, widths);
725    }
726}
727
728fn apply_w2_array(w2_array: &[PdfObject], widths: &mut CidWidths) {
729    let mut i = 0;
730    while i < w2_array.len() {
731        let cid_start = match w2_array[i].as_i64() {
732            Ok(v) => v as u16,
733            Err(_) => break,
734        };
735        i += 1;
736        if i >= w2_array.len() {
737            break;
738        }
739
740        match &w2_array[i] {
741            PdfObject::Array(arr) => {
742                // List form: triples (w1y, vx, vy) starting at cid_start.
743                let mut k = 0;
744                while k + 2 < arr.len() {
745                    let (Ok(w1y), Ok(vx), Ok(vy)) =
746                        (arr[k].as_f64(), arr[k + 1].as_f64(), arr[k + 2].as_f64())
747                    else {
748                        break;
749                    };
750                    let Some(cid) = cid_start.checked_add((k / 3) as u16) else {
751                        break;
752                    };
753                    widths.set_v(cid, w1y, vx, vy);
754                    k += 3;
755                }
756                i += 1;
757            }
758            PdfObject::Integer(_) | PdfObject::Real(_) => {
759                // Range form: cFirst cLast w1y vx vy.
760                let cid_end = w2_array[i].as_i64().unwrap_or(cid_start as i64) as u16;
761                if i + 3 < w2_array.len() {
762                    let (Ok(w1y), Ok(vx), Ok(vy)) = (
763                        w2_array[i + 1].as_f64(),
764                        w2_array[i + 2].as_f64(),
765                        w2_array[i + 3].as_f64(),
766                    ) else {
767                        break;
768                    };
769                    for cid in cid_start..=cid_end {
770                        widths.set_v(cid, w1y, vx, vy);
771                    }
772                    i += 4;
773                } else {
774                    break;
775                }
776            }
777            _ => {
778                i += 1;
779            }
780        }
781    }
782}
783
784fn parse_simple_widths(file: &PdfFile, dict: &zpdf_core::PdfDict) -> CidWidths {
785    let first_char = dict.get_i64("FirstChar").unwrap_or(0) as u16;
786    let mut widths = CidWidths::new(1000.0);
787
788    if let Some(arr) = resolve_array(file, dict, "Widths") {
789        for (j, obj) in arr.iter().enumerate() {
790            let Some(code) = first_char.checked_add(j as u16) else {
791                break;
792            };
793            if let Ok(w) = obj.as_f64() {
794                widths.set(code, w);
795            }
796        }
797    }
798
799    widths
800}
801
802#[cfg(test)]
803mod tests {
804    use super::*;
805
806    fn int(v: i64) -> PdfObject {
807        PdfObject::Integer(v)
808    }
809    fn real(v: f64) -> PdfObject {
810        PdfObject::Real(v)
811    }
812
813    #[test]
814    fn w2_list_form_assigns_consecutive_cids() {
815        // 120 [w1y vx vy  w1y vx vy] → CIDs 120 and 121.
816        let arr = vec![
817            int(120),
818            PdfObject::Array(vec![
819                real(-1000.0),
820                real(500.0),
821                real(880.0),
822                int(-900),
823                int(450),
824                int(820),
825            ]),
826        ];
827        let mut w = CidWidths::new(1000.0);
828        apply_w2_array(&arr, &mut w);
829        assert_eq!(w.get_v(120), Some((-1000.0, 500.0, 880.0)));
830        assert_eq!(w.get_v(121), Some((-900.0, 450.0, 820.0)));
831        assert_eq!(w.get_v(122), None);
832    }
833
834    #[test]
835    fn w2_range_form_assigns_inclusive_range() {
836        // cFirst cLast w1y vx vy
837        let arr = vec![int(10), int(12), int(-1000), int(500), int(880)];
838        let mut w = CidWidths::new(1000.0);
839        apply_w2_array(&arr, &mut w);
840        for cid in 10..=12 {
841            assert_eq!(w.get_v(cid), Some((-1000.0, 500.0, 880.0)));
842        }
843        assert_eq!(w.get_v(9), None);
844        assert_eq!(w.get_v(13), None);
845    }
846
847    #[test]
848    fn w2_truncated_entry_is_ignored_not_panic() {
849        // Range header without the trailing metric numbers must not panic.
850        let arr = vec![int(10), int(12), int(-1000)];
851        let mut w = CidWidths::new(1000.0);
852        apply_w2_array(&arr, &mut w);
853        assert_eq!(w.get_v(10), None);
854    }
855}