Skip to main content

justpdf_core/font/
mod.rs

1pub mod cff;
2pub mod cjk;
3pub mod cmap;
4mod encoding;
5pub mod opentype;
6pub mod recovery;
7mod standard14;
8pub mod subset;
9pub mod type3;
10
11pub use cmap::ToUnicodeCMap;
12pub use encoding::{Encoding, decode_text};
13pub use standard14::{is_standard14, standard14_widths};
14
15use crate::object::{IndirectRef, PdfDict, PdfObject};
16
17/// Basic font information extracted from a PDF font dictionary.
18#[derive(Debug, Clone)]
19pub struct FontInfo {
20    /// Font name (BaseFont).
21    pub base_font: Vec<u8>,
22    /// Font subtype: Type1, TrueType, Type0, Type3, etc.
23    pub subtype: Vec<u8>,
24    /// Encoding used by this font.
25    pub encoding: Encoding,
26    /// Glyph widths (indexed by char code).
27    pub widths: FontWidths,
28    /// ToUnicode CMap data (raw, not parsed yet).
29    pub to_unicode: Option<Vec<u8>>,
30    /// Whether this is one of the Standard 14 fonts.
31    pub is_standard14: bool,
32    /// Font descriptor (PDF spec 7.6).
33    pub descriptor: Option<FontDescriptor>,
34}
35
36/// Font descriptor containing font metrics and flags (PDF spec section 7.6).
37#[derive(Debug, Clone)]
38pub struct FontDescriptor {
39    /// /FontName — the PostScript name of the font.
40    pub font_name: Vec<u8>,
41    /// /FontFamily — the font family name.
42    pub font_family: Option<Vec<u8>>,
43    /// /Flags — a collection of boolean attributes (see flag constants).
44    pub flags: u32,
45    /// /FontBBox — bounding box for all glyphs [llx lly urx ury].
46    pub font_b_box: Option<[f64; 4]>,
47    /// /ItalicAngle — angle in degrees counter-clockwise from vertical.
48    pub italic_angle: f64,
49    /// /Ascent — maximum height above the baseline.
50    pub ascent: f64,
51    /// /Descent — maximum depth below the baseline (typically negative).
52    pub descent: f64,
53    /// /CapHeight — top of flat capital letters.
54    pub cap_height: Option<f64>,
55    /// /XHeight — top of flat non-ascending lowercase letters.
56    pub x_height: Option<f64>,
57    /// /StemV — dominant vertical stem width.
58    pub stem_v: f64,
59    /// /StemH — dominant horizontal stem width.
60    pub stem_h: Option<f64>,
61    /// /AvgWidth — average glyph width.
62    pub avg_width: Option<f64>,
63    /// /MaxWidth — maximum glyph width.
64    pub max_width: Option<f64>,
65    /// /MissingWidth — width to use for undefined character codes.
66    pub missing_width: Option<f64>,
67    /// /Leading — desired spacing between lines of text.
68    pub leading: Option<f64>,
69    /// /FontFile — reference to embedded Type1 font program.
70    pub font_file_ref: Option<IndirectRef>,
71    /// /FontFile2 — reference to embedded TrueType font program.
72    pub font_file2_ref: Option<IndirectRef>,
73    /// /FontFile3 — reference to embedded CFF / OpenType font program.
74    pub font_file3_ref: Option<IndirectRef>,
75}
76
77impl FontDescriptor {
78    // Font flag constants (PDF spec Table 123).
79    /// All glyphs have the same width.
80    pub const FIXED_PITCH: u32 = 1;
81    /// Glyphs have serifs.
82    pub const SERIF: u32 = 1 << 1;
83    /// Font uses a symbol character set.
84    pub const SYMBOLIC: u32 = 1 << 2;
85    /// Glyphs resemble cursive handwriting.
86    pub const SCRIPT: u32 = 1 << 3;
87    /// Font uses a standard (non-symbol) character set.
88    pub const NONSYMBOLIC: u32 = 1 << 5;
89    /// Glyphs have dominant slant.
90    pub const ITALIC: u32 = 1 << 6;
91    /// No lowercase letters; small letters rendered as small capitals.
92    pub const ALL_CAP: u32 = 1 << 16;
93    /// Lowercase letters rendered as smaller versions of uppercase.
94    pub const SMALL_CAP: u32 = 1 << 17;
95    /// Bold glyphs shall be painted with extra thickness at small sizes.
96    pub const FORCE_BOLD: u32 = 1 << 18;
97
98    /// Check whether the given flag bit is set.
99    pub fn has_flag(&self, flag: u32) -> bool {
100        self.flags & flag != 0
101    }
102
103    /// Returns true if the font is fixed-pitch.
104    pub fn is_fixed_pitch(&self) -> bool {
105        self.has_flag(Self::FIXED_PITCH)
106    }
107
108    /// Returns true if the font is symbolic.
109    pub fn is_symbolic(&self) -> bool {
110        self.has_flag(Self::SYMBOLIC)
111    }
112
113    /// Returns true if the font is italic.
114    pub fn is_italic(&self) -> bool {
115        self.has_flag(Self::ITALIC)
116    }
117}
118
119/// Font glyph widths.
120#[derive(Debug, Clone)]
121pub enum FontWidths {
122    /// Simple font: /FirstChar, /LastChar, /Widths array.
123    Simple {
124        first_char: u32,
125        widths: Vec<f64>,
126        default_width: f64,
127    },
128    /// CID font: /W array (not yet fully parsed).
129    CID {
130        default_width: f64,
131        w_entries: Vec<CIDWidthEntry>,
132    },
133    /// No width info — use default.
134    None { default_width: f64 },
135}
136
137#[derive(Debug, Clone)]
138pub enum CIDWidthEntry {
139    /// CID range: first_cid, last_cid, width.
140    Range { first: u32, last: u32, width: f64 },
141    /// CID list: first_cid, [w1, w2, ...].
142    List { first: u32, widths: Vec<f64> },
143}
144
145impl FontWidths {
146    /// Get width for a character code (in font units, typically 1/1000 of text space).
147    pub fn get_width(&self, char_code: u32) -> f64 {
148        match self {
149            Self::Simple {
150                first_char,
151                widths,
152                default_width,
153            } => {
154                if char_code >= *first_char {
155                    let idx = (char_code - first_char) as usize;
156                    widths.get(idx).copied().unwrap_or(*default_width)
157                } else {
158                    *default_width
159                }
160            }
161            Self::CID {
162                default_width,
163                w_entries,
164            } => {
165                for entry in w_entries {
166                    match entry {
167                        CIDWidthEntry::Range { first, last, width } => {
168                            if char_code >= *first && char_code <= *last {
169                                return *width;
170                            }
171                        }
172                        CIDWidthEntry::List { first, widths } => {
173                            if char_code >= *first {
174                                let idx = (char_code - first) as usize;
175                                if let Some(w) = widths.get(idx) {
176                                    return *w;
177                                }
178                            }
179                        }
180                    }
181                }
182                *default_width
183            }
184            Self::None { default_width } => *default_width,
185        }
186    }
187}
188
189/// Parse basic font info from a font dictionary.
190pub fn parse_font_info(dict: &PdfDict) -> FontInfo {
191    let base_font = dict
192        .get(b"BaseFont")
193        .and_then(|o| o.as_name())
194        .unwrap_or(b"Unknown")
195        .to_vec();
196
197    let subtype = dict
198        .get(b"Subtype")
199        .and_then(|o| o.as_name())
200        .unwrap_or(b"Type1")
201        .to_vec();
202
203    let is_std14 = is_standard14(&base_font);
204
205    let encoding = parse_encoding(dict);
206    let widths = parse_widths(dict, &base_font, is_std14);
207
208    let descriptor = dict
209        .get_dict(b"FontDescriptor")
210        .and_then(parse_font_descriptor);
211
212    FontInfo {
213        base_font,
214        subtype,
215        encoding,
216        widths,
217        to_unicode: None, // Resolved later by the document
218        is_standard14: is_std14,
219        descriptor,
220    }
221}
222
223/// Parse a font descriptor dictionary (PDF spec section 7.6).
224///
225/// Returns `None` if the dictionary lacks the required `/FontName` entry.
226pub fn parse_font_descriptor(dict: &PdfDict) -> Option<FontDescriptor> {
227    let font_name = dict
228        .get(b"FontName")
229        .and_then(|o| o.as_name())
230        .map(|n| n.to_vec())?;
231
232    let font_family = dict
233        .get(b"FontFamily")
234        .and_then(|o| match o {
235            PdfObject::String(s) => Some(s.clone()),
236            _ => o.as_name().map(|n| n.to_vec()),
237        });
238
239    let flags = dict.get_i64(b"Flags").unwrap_or(0) as u32;
240    let italic_angle = dict.get_f64(b"ItalicAngle").unwrap_or(0.0);
241    let ascent = dict.get_f64(b"Ascent").unwrap_or(0.0);
242    let descent = dict.get_f64(b"Descent").unwrap_or(0.0);
243    let stem_v = dict.get_f64(b"StemV").unwrap_or(0.0);
244
245    let font_b_box = dict.get_array(b"FontBBox").and_then(|arr| {
246        if arr.len() == 4 {
247            Some([
248                arr[0].as_f64().unwrap_or(0.0),
249                arr[1].as_f64().unwrap_or(0.0),
250                arr[2].as_f64().unwrap_or(0.0),
251                arr[3].as_f64().unwrap_or(0.0),
252            ])
253        } else {
254            None
255        }
256    });
257
258    let cap_height = dict.get_f64(b"CapHeight");
259    let x_height = dict.get_f64(b"XHeight");
260    let stem_h = dict.get_f64(b"StemH");
261    let avg_width = dict.get_f64(b"AvgWidth");
262    let max_width = dict.get_f64(b"MaxWidth");
263    let missing_width = dict.get_f64(b"MissingWidth");
264    let leading = dict.get_f64(b"Leading");
265
266    let font_file_ref = dict.get_ref(b"FontFile").cloned();
267    let font_file2_ref = dict.get_ref(b"FontFile2").cloned();
268    let font_file3_ref = dict.get_ref(b"FontFile3").cloned();
269
270    Some(FontDescriptor {
271        font_name,
272        font_family,
273        flags,
274        font_b_box,
275        italic_angle,
276        ascent,
277        descent,
278        cap_height,
279        x_height,
280        stem_v,
281        stem_h,
282        avg_width,
283        max_width,
284        missing_width,
285        leading,
286        font_file_ref,
287        font_file2_ref,
288        font_file3_ref,
289    })
290}
291
292fn parse_encoding(dict: &PdfDict) -> Encoding {
293    match dict.get(b"Encoding") {
294        Some(PdfObject::Name(name)) => Encoding::from_name(name),
295        // TODO: handle encoding dict with /Differences
296        _ => Encoding::StandardEncoding,
297    }
298}
299
300fn parse_widths(dict: &PdfDict, base_font: &[u8], is_std14: bool) -> FontWidths {
301    // Simple font widths
302    if let (Some(first_char), Some(widths_arr)) =
303        (dict.get_i64(b"FirstChar"), dict.get_array(b"Widths"))
304    {
305        let widths: Vec<f64> = widths_arr
306            .iter()
307            .map(|o| o.as_f64().unwrap_or(0.0))
308            .collect();
309        return FontWidths::Simple {
310            first_char: first_char as u32,
311            widths,
312            default_width: if is_std14 { 600.0 } else { 1000.0 },
313        };
314    }
315
316    // Standard 14: use built-in widths
317    if is_std14 {
318        let widths = standard14_widths(base_font);
319        if !widths.is_empty() {
320            return FontWidths::Simple {
321                first_char: 0,
322                widths,
323                default_width: 600.0,
324            };
325        }
326    }
327
328    FontWidths::None {
329        default_width: 1000.0,
330    }
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336
337    #[test]
338    fn test_simple_widths() {
339        let widths = FontWidths::Simple {
340            first_char: 32,
341            widths: vec![250.0, 333.0, 408.0],
342            default_width: 0.0,
343        };
344        assert_eq!(widths.get_width(32), 250.0);
345        assert_eq!(widths.get_width(33), 333.0);
346        assert_eq!(widths.get_width(34), 408.0);
347        assert_eq!(widths.get_width(35), 0.0); // default
348        assert_eq!(widths.get_width(0), 0.0); // below first_char
349    }
350
351    #[test]
352    fn test_cid_widths() {
353        let widths = FontWidths::CID {
354            default_width: 1000.0,
355            w_entries: vec![
356                CIDWidthEntry::Range {
357                    first: 1,
358                    last: 10,
359                    width: 500.0,
360                },
361                CIDWidthEntry::List {
362                    first: 20,
363                    widths: vec![600.0, 700.0, 800.0],
364                },
365            ],
366        };
367        assert_eq!(widths.get_width(5), 500.0);
368        assert_eq!(widths.get_width(20), 600.0);
369        assert_eq!(widths.get_width(21), 700.0);
370        assert_eq!(widths.get_width(99), 1000.0); // default
371    }
372
373    #[test]
374    fn test_parse_font_info_standard14() {
375        let mut dict = PdfDict::new();
376        dict.insert(b"Type".to_vec(), PdfObject::Name(b"Font".to_vec()));
377        dict.insert(b"Subtype".to_vec(), PdfObject::Name(b"Type1".to_vec()));
378        dict.insert(b"BaseFont".to_vec(), PdfObject::Name(b"Helvetica".to_vec()));
379        dict.insert(
380            b"Encoding".to_vec(),
381            PdfObject::Name(b"WinAnsiEncoding".to_vec()),
382        );
383
384        let info = parse_font_info(&dict);
385        assert_eq!(info.base_font, b"Helvetica");
386        assert!(info.is_standard14);
387        assert_eq!(info.encoding, Encoding::WinAnsiEncoding);
388        assert!(info.descriptor.is_none());
389    }
390
391    #[test]
392    fn test_parse_font_descriptor_full() {
393        let mut desc = PdfDict::new();
394        desc.insert(b"FontName".to_vec(), PdfObject::Name(b"ArialMT".to_vec()));
395        desc.insert(
396            b"FontFamily".to_vec(),
397            PdfObject::String(b"Arial".to_vec()),
398        );
399        desc.insert(b"Flags".to_vec(), PdfObject::Integer(32)); // NONSYMBOLIC
400        desc.insert(
401            b"FontBBox".to_vec(),
402            PdfObject::Array(vec![
403                PdfObject::Integer(-665),
404                PdfObject::Integer(-210),
405                PdfObject::Integer(2000),
406                PdfObject::Integer(728),
407            ]),
408        );
409        desc.insert(b"ItalicAngle".to_vec(), PdfObject::Integer(0));
410        desc.insert(b"Ascent".to_vec(), PdfObject::Integer(905));
411        desc.insert(b"Descent".to_vec(), PdfObject::Integer(-212));
412        desc.insert(b"CapHeight".to_vec(), PdfObject::Integer(728));
413        desc.insert(b"XHeight".to_vec(), PdfObject::Integer(517));
414        desc.insert(b"StemV".to_vec(), PdfObject::Integer(88));
415        desc.insert(b"StemH".to_vec(), PdfObject::Integer(76));
416        desc.insert(b"AvgWidth".to_vec(), PdfObject::Integer(441));
417        desc.insert(b"MaxWidth".to_vec(), PdfObject::Integer(2000));
418        desc.insert(b"MissingWidth".to_vec(), PdfObject::Integer(250));
419        desc.insert(b"Leading".to_vec(), PdfObject::Integer(33));
420        desc.insert(
421            b"FontFile2".to_vec(),
422            PdfObject::Reference(IndirectRef {
423                obj_num: 42,
424                gen_num: 0,
425            }),
426        );
427
428        let fd = parse_font_descriptor(&desc).expect("should parse");
429        assert_eq!(fd.font_name, b"ArialMT");
430        assert_eq!(fd.font_family.as_deref(), Some(b"Arial".as_slice()));
431        assert_eq!(fd.flags, 32);
432        assert!(fd.has_flag(FontDescriptor::NONSYMBOLIC));
433        assert!(!fd.has_flag(FontDescriptor::SYMBOLIC));
434        assert!(!fd.is_italic());
435        assert!(!fd.is_fixed_pitch());
436        assert!(!fd.is_symbolic());
437
438        let bbox = fd.font_b_box.expect("should have bbox");
439        assert_eq!(bbox, [-665.0, -210.0, 2000.0, 728.0]);
440
441        assert_eq!(fd.italic_angle, 0.0);
442        assert_eq!(fd.ascent, 905.0);
443        assert_eq!(fd.descent, -212.0);
444        assert_eq!(fd.cap_height, Some(728.0));
445        assert_eq!(fd.x_height, Some(517.0));
446        assert_eq!(fd.stem_v, 88.0);
447        assert_eq!(fd.stem_h, Some(76.0));
448        assert_eq!(fd.avg_width, Some(441.0));
449        assert_eq!(fd.max_width, Some(2000.0));
450        assert_eq!(fd.missing_width, Some(250.0));
451        assert_eq!(fd.leading, Some(33.0));
452
453        assert!(fd.font_file_ref.is_none());
454        let ff2 = fd.font_file2_ref.as_ref().expect("should have FontFile2");
455        assert_eq!(ff2.obj_num, 42);
456        assert_eq!(ff2.gen_num, 0);
457        assert!(fd.font_file3_ref.is_none());
458    }
459
460    #[test]
461    fn test_parse_font_descriptor_minimal() {
462        let mut desc = PdfDict::new();
463        desc.insert(b"FontName".to_vec(), PdfObject::Name(b"MyFont".to_vec()));
464
465        let fd = parse_font_descriptor(&desc).expect("should parse minimal");
466        assert_eq!(fd.font_name, b"MyFont");
467        assert_eq!(fd.flags, 0);
468        assert_eq!(fd.italic_angle, 0.0);
469        assert_eq!(fd.ascent, 0.0);
470        assert_eq!(fd.descent, 0.0);
471        assert_eq!(fd.stem_v, 0.0);
472        assert!(fd.font_family.is_none());
473        assert!(fd.font_b_box.is_none());
474        assert!(fd.cap_height.is_none());
475        assert!(fd.x_height.is_none());
476        assert!(fd.stem_h.is_none());
477        assert!(fd.avg_width.is_none());
478        assert!(fd.max_width.is_none());
479        assert!(fd.missing_width.is_none());
480        assert!(fd.leading.is_none());
481        assert!(fd.font_file_ref.is_none());
482        assert!(fd.font_file2_ref.is_none());
483        assert!(fd.font_file3_ref.is_none());
484    }
485
486    #[test]
487    fn test_parse_font_descriptor_missing_font_name() {
488        let mut desc = PdfDict::new();
489        desc.insert(b"Flags".to_vec(), PdfObject::Integer(32));
490        desc.insert(b"Ascent".to_vec(), PdfObject::Integer(800));
491
492        assert!(parse_font_descriptor(&desc).is_none());
493    }
494
495    #[test]
496    fn test_font_descriptor_flags() {
497        let fd = FontDescriptor {
498            font_name: b"TestFont".to_vec(),
499            font_family: None,
500            flags: FontDescriptor::FIXED_PITCH
501                | FontDescriptor::SERIF
502                | FontDescriptor::ITALIC
503                | FontDescriptor::FORCE_BOLD,
504            font_b_box: None,
505            italic_angle: -12.0,
506            ascent: 800.0,
507            descent: -200.0,
508            cap_height: None,
509            x_height: None,
510            stem_v: 80.0,
511            stem_h: None,
512            avg_width: None,
513            max_width: None,
514            missing_width: None,
515            leading: None,
516            font_file_ref: None,
517            font_file2_ref: None,
518            font_file3_ref: None,
519        };
520
521        assert!(fd.is_fixed_pitch());
522        assert!(fd.has_flag(FontDescriptor::SERIF));
523        assert!(fd.is_italic());
524        assert!(fd.has_flag(FontDescriptor::FORCE_BOLD));
525        assert!(!fd.is_symbolic());
526        assert!(!fd.has_flag(FontDescriptor::NONSYMBOLIC));
527        assert!(!fd.has_flag(FontDescriptor::SCRIPT));
528        assert!(!fd.has_flag(FontDescriptor::ALL_CAP));
529        assert!(!fd.has_flag(FontDescriptor::SMALL_CAP));
530    }
531
532    #[test]
533    fn test_parse_font_descriptor_bbox_wrong_length() {
534        let mut desc = PdfDict::new();
535        desc.insert(b"FontName".to_vec(), PdfObject::Name(b"Test".to_vec()));
536        desc.insert(
537            b"FontBBox".to_vec(),
538            PdfObject::Array(vec![PdfObject::Integer(0), PdfObject::Integer(0)]),
539        );
540
541        let fd = parse_font_descriptor(&desc).expect("should parse");
542        assert!(fd.font_b_box.is_none());
543    }
544
545    #[test]
546    fn test_parse_font_info_with_descriptor() {
547        let mut desc_dict = PdfDict::new();
548        desc_dict.insert(
549            b"FontName".to_vec(),
550            PdfObject::Name(b"TimesNewRomanPSMT".to_vec()),
551        );
552        desc_dict.insert(b"Flags".to_vec(), PdfObject::Integer(34)); // SERIF | NONSYMBOLIC
553        desc_dict.insert(b"Ascent".to_vec(), PdfObject::Integer(891));
554        desc_dict.insert(b"Descent".to_vec(), PdfObject::Integer(-216));
555        desc_dict.insert(b"StemV".to_vec(), PdfObject::Integer(82));
556        desc_dict.insert(b"ItalicAngle".to_vec(), PdfObject::Integer(0));
557        desc_dict.insert(b"CapHeight".to_vec(), PdfObject::Integer(662));
558        desc_dict.insert(
559            b"FontFile2".to_vec(),
560            PdfObject::Reference(IndirectRef {
561                obj_num: 100,
562                gen_num: 0,
563            }),
564        );
565
566        let mut font_dict = PdfDict::new();
567        font_dict.insert(b"Type".to_vec(), PdfObject::Name(b"Font".to_vec()));
568        font_dict.insert(b"Subtype".to_vec(), PdfObject::Name(b"TrueType".to_vec()));
569        font_dict.insert(
570            b"BaseFont".to_vec(),
571            PdfObject::Name(b"TimesNewRomanPSMT".to_vec()),
572        );
573        font_dict.insert(
574            b"Encoding".to_vec(),
575            PdfObject::Name(b"WinAnsiEncoding".to_vec()),
576        );
577        font_dict.insert(
578            b"FontDescriptor".to_vec(),
579            PdfObject::Dict(desc_dict),
580        );
581
582        let info = parse_font_info(&font_dict);
583        assert_eq!(info.base_font, b"TimesNewRomanPSMT");
584        assert_eq!(info.subtype, b"TrueType");
585
586        let fd = info.descriptor.expect("should have descriptor");
587        assert_eq!(fd.font_name, b"TimesNewRomanPSMT");
588        assert_eq!(fd.flags, 34);
589        assert!(fd.has_flag(FontDescriptor::SERIF));
590        assert!(fd.has_flag(FontDescriptor::NONSYMBOLIC));
591        assert_eq!(fd.ascent, 891.0);
592        assert_eq!(fd.descent, -216.0);
593        assert_eq!(fd.stem_v, 82.0);
594        assert_eq!(fd.cap_height, Some(662.0));
595        let ff2 = fd.font_file2_ref.as_ref().expect("should have FontFile2");
596        assert_eq!(ff2.obj_num, 100);
597    }
598
599    #[test]
600    fn test_font_descriptor_all_font_file_refs() {
601        let mut desc = PdfDict::new();
602        desc.insert(b"FontName".to_vec(), PdfObject::Name(b"Test".to_vec()));
603        desc.insert(
604            b"FontFile".to_vec(),
605            PdfObject::Reference(IndirectRef { obj_num: 10, gen_num: 0 }),
606        );
607        desc.insert(
608            b"FontFile2".to_vec(),
609            PdfObject::Reference(IndirectRef { obj_num: 20, gen_num: 0 }),
610        );
611        desc.insert(
612            b"FontFile3".to_vec(),
613            PdfObject::Reference(IndirectRef { obj_num: 30, gen_num: 0 }),
614        );
615
616        let fd = parse_font_descriptor(&desc).expect("should parse");
617        assert_eq!(fd.font_file_ref.as_ref().unwrap().obj_num, 10);
618        assert_eq!(fd.font_file2_ref.as_ref().unwrap().obj_num, 20);
619        assert_eq!(fd.font_file3_ref.as_ref().unwrap().obj_num, 30);
620    }
621}